diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 7df4a956c1a..d4acb877e87 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -224,7 +224,11 @@ String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path, siz void ISerialization::addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column) { - if (cache && !path.empty()) + if (!cache || path.empty()) + return; + + auto subcolumn_name = getSubcolumnNameForStream(path); + if (!subcolumn_name.empty()) cache->emplace(getSubcolumnNameForStream(path), column); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 45e5d9d92de..d4d3844de6c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -448,12 +448,7 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns) for (const auto & column : columns) column_name_to_position.emplace(column.name, pos++); - /// For wide parts convert plain arrays to Nested for - /// more convinient managing of shared offsets column. - if (part_type == Type::Wide) - columns_description = ColumnsDescription(Nested::collect(columns)); - else - columns_description = ColumnsDescription(columns); + columns_description = ColumnsDescription(columns); } NameAndTypePair IMergeTreeDataPart::getColumn(const String & column_name) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index b6e8b75147d..bf47ced2a77 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -136,6 +136,7 @@ public: void setColumns(const NamesAndTypesList & new_columns); const NamesAndTypesList & getColumns() const { return columns; } + const ColumnsDescription & getColumnsDescription() const { return columns_description; } NameAndTypePair getColumn(const String & name) const; std::optional tryGetColumn(const String & column_name) const; diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 78cc96ccd27..f844946fe22 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -55,16 +55,14 @@ public: const NamesAndTypesList & getColumns() const { return columns; } size_t numColumnsInResult() const { return columns.size(); } - size_t getFirstMarkToRead() const - { - return all_mark_ranges.front().begin; - } + size_t getFirstMarkToRead() const { return all_mark_ranges.front().begin; } MergeTreeData::DataPartPtr data_part; protected: /// Returns actual column name in part, which can differ from table metadata. String getColumnNameInPart(const NameAndTypePair & required_column) const; + /// Returns actual column name and type in part, which can differ from table metadata. NameAndTypePair getColumnInPart(const NameAndTypePair & required_column) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 3e5576b6bdf..b70d67de3b4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -49,7 +49,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( { auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - ptr, Nested::convertToSubcolumns(columns_to_read), + ptr, columns_to_read, metadata_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, avg_value_size_hints, profile_callback); @@ -66,7 +66,7 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( { return std::make_unique( shared_from_this(), data_part_storage_builder, - Nested::convertToSubcolumns(columns_list), metadata_snapshot, indices_to_recalc, + columns_list, metadata_snapshot, indices_to_recalc, index_granularity_info.marks_file_extension, default_codec_, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index e098e76d4ee..aed9a906a5c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,19 @@ MergeTreeReaderWide::MergeTreeReaderWide( } } +String MergeTreeReaderWide::getNameForSubstreamCache(const NameAndTypePair & column) const +{ + if (!column.isSubcolumn() && isArray(column.type)) + { + auto split = Nested::splitName(column.name); + const auto & part_columns = data_part->getColumnsDescription(); + + if (!split.second.empty() && part_columns.hasNested(split.first)) + return split.first; + } + + return column.getNameInStorage(); +} size_t MergeTreeReaderWide::readRows( size_t from_mark, size_t current_task_last_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) @@ -86,7 +100,7 @@ size_t MergeTreeReaderWide::readRows( auto column_from_part = getColumnInPart(*name_and_type); try { - auto & cache = caches[column_from_part.getNameInStorage()]; + auto & cache = caches[getNameForSubstreamCache(column_from_part)]; prefetch(column_from_part, from_mark, continue_reading, current_task_last_mark, cache, prefetched_streams); } catch (Exception & e) @@ -117,7 +131,7 @@ size_t MergeTreeReaderWide::readRows( try { size_t column_size_before_reading = column->size(); - auto & cache = caches[column_from_part.getNameInStorage()]; + auto & cache = caches[getNameForSubstreamCache(column_from_part)]; readData( column_from_part, column, from_mark, continue_reading, current_task_last_mark, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 7bb1ccfd173..e382f3f1dde 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -38,6 +38,8 @@ public: private: FileStreams streams; + String getNameForSubstreamCache(const NameAndTypePair & column) const; + void addStreams(const NameAndTypePair & name_and_type, const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);