mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 05:32:52 +00:00
improvements in subcolumns reading
This commit is contained in:
parent
06d5b87bc9
commit
6de8b05b7c
@ -149,21 +149,42 @@ namespace
|
||||
offset_values.resize(i);
|
||||
}
|
||||
|
||||
MutableColumnPtr getArraySizesPositionIndependent(const ColumnArray & column_array)
|
||||
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
|
||||
{
|
||||
const auto & offset_values = column_array.getOffsets();
|
||||
MutableColumnPtr new_offsets = column_array.getOffsetsColumn().cloneEmpty();
|
||||
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
|
||||
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
|
||||
|
||||
if (offset_values.empty())
|
||||
return new_offsets;
|
||||
if (column_offsets.empty())
|
||||
return column_sizes;
|
||||
|
||||
auto & new_offsets_values = assert_cast<ColumnVector<ColumnArray::Offset> &>(*new_offsets).getData();
|
||||
new_offsets_values.reserve(offset_values.size());
|
||||
new_offsets_values.push_back(offset_values[0]);
|
||||
for (size_t i = 1; i < offset_values.size(); ++i)
|
||||
new_offsets_values.push_back(offset_values[i] - offset_values[i - 1]);
|
||||
const auto & offsets_data = column_offsets.getData();
|
||||
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
|
||||
|
||||
return new_offsets;
|
||||
sizes_data.resize(offsets_data.size());
|
||||
sizes_data[0] = offsets_data[0];
|
||||
for (size_t i = 1; i < offsets_data.size(); ++i)
|
||||
sizes_data[i] = offsets_data[i] - offsets_data[i - 1];
|
||||
|
||||
return column_sizes;
|
||||
}
|
||||
|
||||
ColumnPtr arraySizesToOffsets(const IColumn & column)
|
||||
{
|
||||
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
|
||||
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
|
||||
|
||||
if (column_sizes.empty())
|
||||
return column_offsets;
|
||||
|
||||
const auto & sizes_data = column_sizes.getData();
|
||||
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
|
||||
|
||||
offsets_data.resize(sizes_data.size());
|
||||
offsets_data[0] = sizes_data[0];
|
||||
for (size_t i = 0; i < sizes_data.size(); ++i)
|
||||
offsets_data[i] = offsets_data[i - 1] + sizes_data[i];
|
||||
|
||||
return column_offsets;
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,12 +284,11 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
|
||||
|
||||
settings.path.push_back(Substream::ArraySizes);
|
||||
|
||||
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
column_array.getOffsetsPtr() = cached_column;
|
||||
column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
|
||||
}
|
||||
else if (auto * stream = settings.getter(settings.path))
|
||||
{
|
||||
@ -277,7 +297,7 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
|
||||
else
|
||||
DataTypeNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, column_array.getOffsetsPtr());
|
||||
addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
|
||||
}
|
||||
|
||||
settings.path.back() = Substream::ArrayElements;
|
||||
@ -547,24 +567,24 @@ DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name
|
||||
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, IColumn & column) const
|
||||
ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
|
||||
{
|
||||
return getSubcolumnImpl(subcolumn_name, column, 0);
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, IColumn & column, size_t level) const
|
||||
ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const
|
||||
{
|
||||
auto & column_array = assert_cast<ColumnArray &>(column);
|
||||
const auto & column_array = assert_cast<const ColumnArray &>(column);
|
||||
if (subcolumn_name == "size" + std::to_string(level))
|
||||
return getArraySizesPositionIndependent(column_array);
|
||||
return arrayOffsetsToSizes(column_array.getOffsetsColumn());
|
||||
|
||||
MutableColumnPtr subcolumn;
|
||||
ColumnPtr subcolumn;
|
||||
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
|
||||
subcolumn = nested_array->getSubcolumnImpl(subcolumn_name, column_array.getData(), level + 1);
|
||||
else
|
||||
subcolumn = nested->getSubcolumn(subcolumn_name, column_array.getData());
|
||||
|
||||
return ColumnArray::create(std::move(subcolumn), column_array.getOffsetsPtr()->assumeMutable());
|
||||
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
|
||||
}
|
||||
|
||||
size_t DataTypeArray::getNumberOfDimensions() const
|
||||
|
@ -113,7 +113,7 @@ public:
|
||||
}
|
||||
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
|
||||
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override;
|
||||
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested; }
|
||||
|
||||
@ -121,7 +121,7 @@ public:
|
||||
size_t getNumberOfDimensions() const;
|
||||
|
||||
private:
|
||||
MutableColumnPtr getSubcolumnImpl(const String & subcolumn_name, IColumn & column, size_t level) const;
|
||||
ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const;
|
||||
DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const;
|
||||
};
|
||||
|
||||
|
@ -542,9 +542,9 @@ DataTypePtr DataTypeNullable::tryGetSubcolumnType(const String & subcolumn_name)
|
||||
return nested_data_type->tryGetSubcolumnType(subcolumn_name);
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, IColumn & column) const
|
||||
ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
|
||||
{
|
||||
auto & column_nullable = assert_cast<ColumnNullable &>(column);
|
||||
const auto & column_nullable = assert_cast<const ColumnNullable &>(column);
|
||||
if (subcolumn_name == "null")
|
||||
return column_nullable.getNullMapColumnPtr()->assumeMutable();
|
||||
|
||||
|
@ -99,7 +99,7 @@ public:
|
||||
bool onlyNull() const override;
|
||||
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
|
||||
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override;
|
||||
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
|
||||
|
||||
const DataTypePtr & getNestedType() const { return nested_data_type; }
|
||||
|
||||
|
@ -22,7 +22,7 @@ private:
|
||||
bool escape_delimiter;
|
||||
|
||||
public:
|
||||
DataTypeOneElementTupleStreams(const DataTypePtr & nested_, const String & name_, bool escape_delimiter_ = true)
|
||||
DataTypeOneElementTupleStreams(const DataTypePtr & nested_, const String & name_, bool escape_delimiter_)
|
||||
: nested(nested_), name(name_), escape_delimiter(escape_delimiter_) {}
|
||||
|
||||
void enumerateStreams(
|
||||
@ -99,7 +99,7 @@ private:
|
||||
DataTypePtr createOneElementTuple(const DataTypePtr & type, const String & name, bool escape_delimiter)
|
||||
{
|
||||
auto custom_desc = std::make_unique<DataTypeCustomDesc>(
|
||||
std::make_unique<DataTypeCustomFixedName>(type->getName()), nullptr,
|
||||
std::make_unique<DataTypeCustomFixedName>(type->getName()),nullptr,
|
||||
std::make_unique<DataTypeOneElementTupleStreams>(type, name, escape_delimiter));
|
||||
|
||||
return DataTypeFactory::instance().getCustom(std::move(custom_desc));
|
||||
|
@ -635,14 +635,14 @@ DataTypePtr DataTypeTuple::tryGetSubcolumnType(const String & subcolumn_name) co
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MutableColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, IColumn & column) const
|
||||
ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
|
||||
{
|
||||
for (size_t i = 0; i < names.size(); ++i)
|
||||
{
|
||||
if (startsWith(subcolumn_name, names[i]))
|
||||
{
|
||||
size_t name_length = names[i].size();
|
||||
auto & subcolumn = extractElementColumn(column, i);
|
||||
const auto & subcolumn = extractElementColumn(column, i);
|
||||
|
||||
if (subcolumn_name.size() == name_length)
|
||||
return subcolumn.assumeMutable();
|
||||
|
@ -100,7 +100,7 @@ public:
|
||||
size_t getSizeOfValueInMemory() const override;
|
||||
|
||||
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
|
||||
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override;
|
||||
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
|
||||
|
||||
const DataTypes & getElements() const { return elems; }
|
||||
const Strings & getElementNames() const { return names; }
|
||||
|
@ -156,7 +156,7 @@ DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
}
|
||||
|
||||
MutableColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, IColumn &) const
|
||||
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
|
||||
}
|
||||
@ -173,11 +173,7 @@ Names IDataType::getSubcolumnNames() const
|
||||
new_path.push_back(elem);
|
||||
auto subcolumn_name = getSubcolumnNameForStream(new_path);
|
||||
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
|
||||
{
|
||||
/// Not all of substreams have its subcolumn.
|
||||
if (tryGetSubcolumnType(subcolumn_name))
|
||||
res.insert(subcolumn_name);
|
||||
}
|
||||
res.insert(subcolumn_name);
|
||||
}
|
||||
});
|
||||
|
||||
@ -329,7 +325,7 @@ void IDataType::deserializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
|
||||
/// Do not cache complex type, because they can be constructed
|
||||
/// their subcolumns, which are in cache.
|
||||
/// from their subcolumns, which are in cache.
|
||||
if (!haveSubtypes())
|
||||
{
|
||||
auto cached_column = getFromSubstreamsCache(cache, settings.path);
|
||||
@ -340,7 +336,7 @@ void IDataType::deserializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
}
|
||||
|
||||
auto mutable_column = IColumn::mutate(std::move(column));
|
||||
auto mutable_column = column->assumeMutable();
|
||||
deserializeBinaryBulkWithMultipleStreamsImpl(*mutable_column, limit, settings, state, cache);
|
||||
column = std::move(mutable_column);
|
||||
|
||||
|
@ -126,7 +126,7 @@ public:
|
||||
|
||||
virtual DataTypePtr tryGetSubcolumnType(const String & /* subcolumn_name */) const { return nullptr; }
|
||||
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
|
||||
virtual MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const;
|
||||
virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const;
|
||||
Names getSubcolumnNames() const;
|
||||
|
||||
using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;
|
||||
|
@ -220,7 +220,7 @@ void MergeTreeReaderCompact::readData(
|
||||
|
||||
storage_type->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
storage_type->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state);
|
||||
column = storage_type->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column->assumeMutable());
|
||||
column = storage_type->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -47,7 +47,7 @@ static ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair &
|
||||
|
||||
const auto & column = block.getByName(storage_name).column;
|
||||
if (name_and_type.isSubcolumn())
|
||||
return name_and_type.getStorageType()->getSubcolumn(name_and_type.getSubcolumnName(), *column->assumeMutable());
|
||||
return name_and_type.getStorageType()->getSubcolumn(name_and_type.getSubcolumnName(), *column);
|
||||
|
||||
return column;
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ protected:
|
||||
{
|
||||
const auto & current_column = buffer.data.getByName(elem.getStorageName()).column;
|
||||
if (elem.isSubcolumn())
|
||||
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column->assumeMutable()));
|
||||
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column));
|
||||
else
|
||||
columns.emplace_back(std::move(current_column));
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ protected:
|
||||
{
|
||||
auto current_column = src.getByName(elem.getStorageName()).column;
|
||||
if (elem.isSubcolumn())
|
||||
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column->assumeMutable()));
|
||||
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column));
|
||||
else
|
||||
columns.emplace_back(std::move(current_column));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user