improvements in subcolumns reading

This commit is contained in:
Anton Popov 2020-12-17 20:10:33 +03:00
parent 06d5b87bc9
commit 6de8b05b7c
13 changed files with 59 additions and 43 deletions

View File

@ -149,21 +149,42 @@ namespace
offset_values.resize(i);
}
MutableColumnPtr getArraySizesPositionIndependent(const ColumnArray & column_array)
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
{
const auto & offset_values = column_array.getOffsets();
MutableColumnPtr new_offsets = column_array.getOffsetsColumn().cloneEmpty();
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
if (offset_values.empty())
return new_offsets;
if (column_offsets.empty())
return column_sizes;
auto & new_offsets_values = assert_cast<ColumnVector<ColumnArray::Offset> &>(*new_offsets).getData();
new_offsets_values.reserve(offset_values.size());
new_offsets_values.push_back(offset_values[0]);
for (size_t i = 1; i < offset_values.size(); ++i)
new_offsets_values.push_back(offset_values[i] - offset_values[i - 1]);
const auto & offsets_data = column_offsets.getData();
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
return new_offsets;
sizes_data.resize(offsets_data.size());
sizes_data[0] = offsets_data[0];
for (size_t i = 1; i < offsets_data.size(); ++i)
sizes_data[i] = offsets_data[i] - offsets_data[i - 1];
return column_sizes;
}
ColumnPtr arraySizesToOffsets(const IColumn & column)
{
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
if (column_sizes.empty())
return column_offsets;
const auto & sizes_data = column_sizes.getData();
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
offsets_data.resize(sizes_data.size());
offsets_data[0] = sizes_data[0];
for (size_t i = 0; i < sizes_data.size(); ++i)
offsets_data[i] = offsets_data[i - 1] + sizes_data[i];
return column_offsets;
}
}
@ -263,12 +284,11 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
SubstreamsCache * cache) const
{
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
settings.path.push_back(Substream::ArraySizes);
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
{
column_array.getOffsetsPtr() = cached_column;
column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
}
else if (auto * stream = settings.getter(settings.path))
{
@ -277,7 +297,7 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
else
DataTypeNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
addToSubstreamsCache(cache, settings.path, column_array.getOffsetsPtr());
addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
}
settings.path.back() = Substream::ArrayElements;
@ -547,24 +567,24 @@ DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
}
MutableColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, IColumn & column) const
ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
{
return getSubcolumnImpl(subcolumn_name, column, 0);
}
MutableColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, IColumn & column, size_t level) const
ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const
{
auto & column_array = assert_cast<ColumnArray &>(column);
const auto & column_array = assert_cast<const ColumnArray &>(column);
if (subcolumn_name == "size" + std::to_string(level))
return getArraySizesPositionIndependent(column_array);
return arrayOffsetsToSizes(column_array.getOffsetsColumn());
MutableColumnPtr subcolumn;
ColumnPtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
subcolumn = nested_array->getSubcolumnImpl(subcolumn_name, column_array.getData(), level + 1);
else
subcolumn = nested->getSubcolumn(subcolumn_name, column_array.getData());
return ColumnArray::create(std::move(subcolumn), column_array.getOffsetsPtr()->assumeMutable());
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
}
size_t DataTypeArray::getNumberOfDimensions() const

View File

@ -113,7 +113,7 @@ public:
}
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
const DataTypePtr & getNestedType() const { return nested; }
@ -121,7 +121,7 @@ public:
size_t getNumberOfDimensions() const;
private:
MutableColumnPtr getSubcolumnImpl(const String & subcolumn_name, IColumn & column, size_t level) const;
ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const;
DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const;
};

View File

@ -542,9 +542,9 @@ DataTypePtr DataTypeNullable::tryGetSubcolumnType(const String & subcolumn_name)
return nested_data_type->tryGetSubcolumnType(subcolumn_name);
}
MutableColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, IColumn & column) const
ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
{
auto & column_nullable = assert_cast<ColumnNullable &>(column);
const auto & column_nullable = assert_cast<const ColumnNullable &>(column);
if (subcolumn_name == "null")
return column_nullable.getNullMapColumnPtr()->assumeMutable();

View File

@ -99,7 +99,7 @@ public:
bool onlyNull() const override;
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
const DataTypePtr & getNestedType() const { return nested_data_type; }

View File

@ -22,7 +22,7 @@ private:
bool escape_delimiter;
public:
DataTypeOneElementTupleStreams(const DataTypePtr & nested_, const String & name_, bool escape_delimiter_ = true)
DataTypeOneElementTupleStreams(const DataTypePtr & nested_, const String & name_, bool escape_delimiter_)
: nested(nested_), name(name_), escape_delimiter(escape_delimiter_) {}
void enumerateStreams(
@ -99,7 +99,7 @@ private:
DataTypePtr createOneElementTuple(const DataTypePtr & type, const String & name, bool escape_delimiter)
{
auto custom_desc = std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>(type->getName()), nullptr,
std::make_unique<DataTypeCustomFixedName>(type->getName()),nullptr,
std::make_unique<DataTypeOneElementTupleStreams>(type, name, escape_delimiter));
return DataTypeFactory::instance().getCustom(std::move(custom_desc));

View File

@ -635,14 +635,14 @@ DataTypePtr DataTypeTuple::tryGetSubcolumnType(const String & subcolumn_name) co
return nullptr;
}
MutableColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, IColumn & column) const
ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
{
for (size_t i = 0; i < names.size(); ++i)
{
if (startsWith(subcolumn_name, names[i]))
{
size_t name_length = names[i].size();
auto & subcolumn = extractElementColumn(column, i);
const auto & subcolumn = extractElementColumn(column, i);
if (subcolumn_name.size() == name_length)
return subcolumn.assumeMutable();

View File

@ -100,7 +100,7 @@ public:
size_t getSizeOfValueInMemory() const override;
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
const DataTypes & getElements() const { return elems; }
const Strings & getElementNames() const { return names; }

View File

@ -156,7 +156,7 @@ DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
}
MutableColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, IColumn &) const
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
}
@ -173,11 +173,7 @@ Names IDataType::getSubcolumnNames() const
new_path.push_back(elem);
auto subcolumn_name = getSubcolumnNameForStream(new_path);
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
{
/// Not all of substreams have its subcolumn.
if (tryGetSubcolumnType(subcolumn_name))
res.insert(subcolumn_name);
}
res.insert(subcolumn_name);
}
});
@ -329,7 +325,7 @@ void IDataType::deserializeBinaryBulkWithMultipleStreams(
}
/// Do not cache complex type, because they can be constructed
/// their subcolumns, which are in cache.
/// from their subcolumns, which are in cache.
if (!haveSubtypes())
{
auto cached_column = getFromSubstreamsCache(cache, settings.path);
@ -340,7 +336,7 @@ void IDataType::deserializeBinaryBulkWithMultipleStreams(
}
}
auto mutable_column = IColumn::mutate(std::move(column));
auto mutable_column = column->assumeMutable();
deserializeBinaryBulkWithMultipleStreamsImpl(*mutable_column, limit, settings, state, cache);
column = std::move(mutable_column);

View File

@ -126,7 +126,7 @@ public:
virtual DataTypePtr tryGetSubcolumnType(const String & /* subcolumn_name */) const { return nullptr; }
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
virtual MutableColumnPtr getSubcolumn(const String & subcolumn_name, IColumn & column) const;
virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const;
Names getSubcolumnNames() const;
using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;

View File

@ -220,7 +220,7 @@ void MergeTreeReaderCompact::readData(
storage_type->deserializeBinaryBulkStatePrefix(deserialize_settings, state);
storage_type->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state);
column = storage_type->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column->assumeMutable());
column = storage_type->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column);
}
else
{

View File

@ -47,7 +47,7 @@ static ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair &
const auto & column = block.getByName(storage_name).column;
if (name_and_type.isSubcolumn())
return name_and_type.getStorageType()->getSubcolumn(name_and_type.getSubcolumnName(), *column->assumeMutable());
return name_and_type.getStorageType()->getSubcolumn(name_and_type.getSubcolumnName(), *column);
return column;
}

View File

@ -121,7 +121,7 @@ protected:
{
const auto & current_column = buffer.data.getByName(elem.getStorageName()).column;
if (elem.isSubcolumn())
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column->assumeMutable()));
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column));
else
columns.emplace_back(std::move(current_column));
}

View File

@ -69,7 +69,7 @@ protected:
{
auto current_column = src.getByName(elem.getStorageName()).column;
if (elem.isSubcolumn())
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column->assumeMutable()));
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column));
else
columns.emplace_back(std::move(current_column));
}