From c2a40940dbc6a2bac3623bef2ac56bc842d837fd Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 29 Jul 2024 17:02:33 +0200 Subject: [PATCH] Revert "Slightly better calculation of primary index" --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 19 +----- .../MergeTreeDataPartWriterOnDisk.cpp | 65 +++++++++---------- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 9 ++- .../02993_lazy_index_loading.reference | 2 +- ..._system_unload_primary_key_table.reference | 8 +-- .../03128_system_unload_primary_key.reference | 4 +- 6 files changed, 45 insertions(+), 62 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index c87f66b64f3..6152da78395 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -72,21 +71,9 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - /// The memory for index was allocated without thread memory tracker. - /// We need to deallocate it in shrinkToFit without memory tracker as well. - MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - - Columns result; - result.reserve(index_columns.size()); - - for (auto & column : index_columns) - { - column->shrinkToFit(); - result.push_back(std::move(column)); - } - - index_columns.clear(); - return result; + return Columns( + std::make_move_iterator(index_columns.begin()), + std::make_move_iterator(index_columns.end())); } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 6dc7e649b06..46dd766139a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -255,12 +255,6 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } - - const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; - index_serializations.reserve(primary_key_types.size()); - - for (const auto & type : primary_key_types) - index_serializations.push_back(type->getDefaultSerialization()); } } @@ -306,33 +300,22 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } - skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } -void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) -{ - chassert(index_block.columns() == index_serializations.size()); - auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; - - for (size_t i = 0; i < index_block.columns(); ++i) - { - const auto & column = index_block.getByPosition(i).column; - - index_columns[i]->insertFrom(*column, row); - index_serializations[i]->serializeBinary(*column, row, index_stream, {}); - } -} - void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - if (!metadata_snapshot->hasPrimaryKey()) - return; - + size_t primary_columns_num = primary_index_block.columns(); if (index_columns.empty()) - index_columns = primary_index_block.cloneEmptyColumns(); + { + index_types = primary_index_block.getDataTypes(); + index_columns.resize(primary_columns_num); + last_block_index_columns.resize(primary_columns_num); + for (size_t i = 0; i < primary_columns_num; ++i) + index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); + } { /** While filling index (index_columns), disable memory tracker. @@ -346,14 +329,22 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (granule.mark_on_start) - calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); + if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) + { + for (size_t j = 0; j < primary_columns_num; ++j) + { + const auto & primary_column = primary_index_block.getByPosition(j); + index_columns[j]->insertFrom(*primary_column.column, granule.start_row); + primary_column.type->getDefaultSerialization()->serializeBinary( + *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); + } + } } } - /// Store block with last index row to write final mark at the end of column - if (with_final_mark) - last_index_block = primary_index_block; + /// store last index row to write final mark at the end of column + for (size_t j = 0; j < primary_columns_num; ++j) + last_block_index_columns[j] = primary_index_block.getByPosition(j).column; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -430,11 +421,17 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark && last_index_block) + if (write_final_mark) { - MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); - last_index_block.clear(); + for (size_t j = 0; j < index_columns.size(); ++j) + { + const auto & column = *last_block_index_columns[j]; + size_t last_row_number = column.size() - 1; + index_columns[j]->insertFrom(column, last_row_number); + index_types[j]->getDefaultSerialization()->serializeBinary( + column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); + } + last_block_index_columns.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 8d84442981e..bdf0fdb7f32 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - /// Last block with index columns. - /// It's written to index file in the `writeSuffixAndFinalizePart` method. - Block last_index_block; - Serializations index_serializations; + DataTypes index_types; + /// Index columns from the last block + /// It's written to index file in the `writeSuffixAndFinalizePart` method + Columns last_block_index_columns; bool data_written = false; @@ -193,7 +193,6 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; - void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 08f07a92815..5bc329ae4eb 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 100000000 +100000000 140000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 2d33f7f6683..3ac6127fb21 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 100000000 -100000000 100000000 -100000000 100000000 +100000000 140000000 +100000000 140000000 +100000000 140000000 0 0 -100000000 100000000 +100000000 140000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index 2646dc7247f..c7b40ae5b06 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 100000000 -100000000 100000000 +100000000 140000000 +100000000 140000000 0 0 0 0