From f225649332d022ec5f8572d994038c01aee6f1ab Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 27 May 2024 21:09:11 +0000 Subject: [PATCH 01/48] calculate skip indexes on vertical merge --- src/Storages/MergeTree/MergeTask.cpp | 217 ++++++++++++++++----------- src/Storages/MergeTree/MergeTask.h | 10 +- 2 files changed, 141 insertions(+), 86 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e43b6c615b3..bfe2f4673db 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -5,8 +5,15 @@ #include #include +#include "Common/DateLUT.h" #include #include +#include "Core/NamesAndTypes.h" +#include "Storages/ColumnsDescription.h" +#include "Storages/IndicesDescription.h" +#include "Storages/MergeTree/MergeTreeIndices.h" +#include "Storages/ProjectionsDescription.h" +#include "Storages/StorageInMemoryMetadata.h" #include #include #include @@ -48,59 +55,37 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } - -/// PK columns are sorted and merged, ordinary columns are gathered using info from merge step -static void extractMergingAndGatheringColumns( - const NamesAndTypesList & storage_columns, - const ExpressionActionsPtr & sorting_key_expr, - const IndicesDescription & indexes, - const MergeTreeData::MergingParams & merging_params, - NamesAndTypesList & gathering_columns, Names & gathering_column_names, - NamesAndTypesList & merging_columns, Names & merging_column_names) +static Statistics getStatisticsForColumns( + const NamesAndTypesList & columns_to_read, + const StorageMetadataPtr & metadata_snapshot) { - Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); - std::set key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend()); - for (const auto & index : indexes) + Statistics statistics; + const auto & all_columns = metadata_snapshot->getColumns(); + + for (const auto & column : columns_to_read) { - Names index_columns_vec = index.expression->getRequiredColumns(); - std::copy(index_columns_vec.cbegin(), index_columns_vec.cend(), - std::inserter(key_columns, key_columns.end())); - } - - /// Force sign column for Collapsing mode - if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) - key_columns.emplace(merging_params.sign_column); - - /// Force version column for Replacing mode - if (merging_params.mode == MergeTreeData::MergingParams::Replacing) - { - key_columns.emplace(merging_params.is_deleted_column); - key_columns.emplace(merging_params.version_column); - } - - /// Force sign column for VersionedCollapsing mode. Version is already in primary key. - if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) - key_columns.emplace(merging_params.sign_column); - - /// Force to merge at least one column in case of empty key - if (key_columns.empty()) - key_columns.emplace(storage_columns.front().name); - - /// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns - - for (const auto & column : storage_columns) - { - if (key_columns.contains(column.name)) + const auto & desc = all_columns.get(column.name); + if (desc.stat) { - merging_columns.emplace_back(column); - merging_column_names.emplace_back(column.name); - } - else - { - gathering_columns.emplace_back(column); - gathering_column_names.emplace_back(column.name); + auto statistic = MergeTreeStatisticsFactory::instance().get(*desc.stat); + statistics.push_back(std::move(statistic)); } } + return statistics; +} + +static void addSkipIndexesExpressions( + QueryPipelineBuilder & builder, + const IndicesDescription & indexes, + const StorageMetadataPtr & metadata_snapshot, + const ContextPtr & context) +{ + builder.addTransform(std::make_shared( + builder.getHeader(), + indexes.getSingleExpressionForIndices(metadata_snapshot->getColumns(), + context))); + + builder.addTransform(std::make_shared(builder.getHeader())); } static void addMissedColumnsToSerializationInfos( @@ -129,6 +114,76 @@ static void addMissedColumnsToSerializationInfos( } } +/// PK columns are sorted and merged, ordinary columns are gathered using info from merge step +void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColumns() +{ + const auto & sorting_key_expr = global_ctx->metadata_snapshot->getSortingKey().expression; + Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); + + std::set key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend()); + + /// Force sign column for Collapsing mode + if (ctx->merging_params.mode == MergeTreeData::MergingParams::Collapsing) + key_columns.emplace(ctx->merging_params.sign_column); + + /// Force version column for Replacing mode + if (ctx->merging_params.mode == MergeTreeData::MergingParams::Replacing) + { + key_columns.emplace(ctx->merging_params.is_deleted_column); + key_columns.emplace(ctx->merging_params.version_column); + } + + /// Force sign column for VersionedCollapsing mode. Version is already in primary key. + if (ctx->merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) + key_columns.emplace(ctx->merging_params.sign_column); + + /// Force to merge at least one column in case of empty key + if (key_columns.empty()) + key_columns.emplace(global_ctx->storage_columns.front().name); + + const auto & skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices(); + + for (const auto & index : skip_indexes) + { + auto index_columns = index.expression->getRequiredColumns(); + + if (index_columns.size() == 1) + { + const auto & column_name = index_columns.front(); + global_ctx->skip_indexes_by_column[column_name].push_back(index); + } + else + { + std::ranges::copy(index_columns, std::inserter(key_columns, key_columns.end())); + global_ctx->merging_skip_indexes.push_back(index); + } + } + + /// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns + + for (const auto & column : global_ctx->storage_columns) + { + if (key_columns.contains(column.name)) + { + global_ctx->merging_columns.emplace_back(column); + global_ctx->merging_column_names.emplace_back(column.name); + + auto it = global_ctx->skip_indexes_by_column.find(column.name); + if (it != global_ctx->skip_indexes_by_column.end()) + { + for (auto && index : it->second) + global_ctx->merging_skip_indexes.push_back(std::move(index)); + + global_ctx->skip_indexes_by_column.erase(it); + } + } + else + { + global_ctx->gathering_columns.emplace_back(column); + global_ctx->gathering_column_names.emplace_back(column.name); + } + } +} bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { @@ -204,19 +259,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() extendObjectColumns(global_ctx->storage_columns, object_columns, false); global_ctx->storage_snapshot = std::make_shared(*global_ctx->data, global_ctx->metadata_snapshot, std::move(object_columns)); - extractMergingAndGatheringColumns( - global_ctx->storage_columns, - global_ctx->metadata_snapshot->getSortingKey().expression, - global_ctx->metadata_snapshot->getSecondaryIndices(), - ctx->merging_params, - global_ctx->gathering_columns, - global_ctx->gathering_column_names, - global_ctx->merging_columns, - global_ctx->merging_column_names); - global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition()); global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr; + /// In case of replicated merge tree with zero copy replication /// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs /// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet. @@ -226,10 +272,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->force_ttl = false; if (enabledBlockNumberColumn(global_ctx)) - addGatheringColumn(global_ctx, BlockNumberColumn::name, BlockNumberColumn::type); + addStorageColumn(global_ctx, BlockNumberColumn::name, BlockNumberColumn::type); if (enabledBlockOffsetColumn(global_ctx)) - addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); + addStorageColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); SerializationInfo::Settings info_settings = { @@ -299,17 +345,18 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() { global_ctx->merging_columns = global_ctx->storage_columns; global_ctx->merging_column_names = global_ctx->all_column_names; - global_ctx->gathering_columns.clear(); - global_ctx->gathering_column_names.clear(); + global_ctx->merging_skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices(); break; } case MergeAlgorithm::Vertical: { + extractMergingAndGatheringColumns(); + ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->createRawStream(); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); std::map local_merged_column_to_size; - for (const MergeTreeData::DataPartPtr & part : global_ctx->future_part->parts) + for (const auto & part : global_ctx->future_part->parts) part->accumulateColumnSizes(local_merged_column_to_size); ctx->column_sizes = ColumnSizeEstimator( @@ -376,8 +423,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->new_data_part, global_ctx->metadata_snapshot, global_ctx->merging_columns, - MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), - MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()), + MergeTreeIndexFactory::instance().getMany(global_ctx->merging_skip_indexes), + getStatisticsForColumns(global_ctx->merging_columns, global_ctx->metadata_snapshot), ctx->compression_codec, global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, @@ -401,7 +448,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() return false; } -void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) +void MergeTask::addStorageColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) { if (global_ctx->storage_columns.contains(name)) return; @@ -575,7 +622,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const Names column_names{column_name}; ctx->progress_before = global_ctx->merge_list_element_ptr->progress.load(std::memory_order_relaxed); - global_ctx->column_progress = std::make_unique(ctx->progress_before, ctx->column_sizes->columnWeight(column_name)); Pipes pipes; @@ -598,7 +644,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const } auto pipe = Pipe::unitePipes(std::move(pipes)); - ctx->rows_sources_read_buf->seek(0, 0); const auto data_settings = global_ctx->data->getSettings(); @@ -609,9 +654,20 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const data_settings->merge_max_block_size, data_settings->merge_max_block_size_bytes); - pipe.addTransform(std::move(transform)); + QueryPipelineBuilder builder; + builder.init(std::move(pipe)); + builder.addTransform(std::move(transform)); - ctx->column_parts_pipeline = QueryPipeline(std::move(pipe)); + MergeTreeIndices indexes_to_recalc; + auto indexes_it = global_ctx->skip_indexes_by_column.find(column_name); + + if (indexes_it != global_ctx->skip_indexes_by_column.end()) + { + indexes_to_recalc = MergeTreeIndexFactory::instance().getMany(indexes_it->second); + addSkipIndexesExpressions(builder, indexes_it->second, global_ctx->metadata_snapshot, global_ctx->data->getContext()); + } + + ctx->column_parts_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); /// Dereference unique_ptr ctx->column_parts_pipeline.setProgressCallback(MergeProgressCallback( @@ -621,7 +677,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const /// Is calculated inside MergeProgressCallback. ctx->column_parts_pipeline.disableProfileEventUpdate(); - ctx->executor = std::make_unique(ctx->column_parts_pipeline); ctx->column_to = std::make_unique( @@ -629,11 +684,8 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const global_ctx->metadata_snapshot, ctx->executor->getHeader(), ctx->compression_codec, - /// we don't need to recalc indices here - /// because all of them were already recalculated and written - /// as key part of vertical merge - std::vector{}, - std::vector{}, /// TODO: think about it + indexes_to_recalc, + getStatisticsForColumns({*ctx->it_name_and_type}, global_ctx->metadata_snapshot), &global_ctx->written_offset_columns, global_ctx->to->getIndexGranularity()); @@ -1117,13 +1169,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() builder->addTransform(std::move(transform)); } - if (global_ctx->metadata_snapshot->hasSecondaryIndices()) - { - const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices(); - builder->addTransform(std::make_shared( - builder->getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext()))); - builder->addTransform(std::make_shared(builder->getHeader())); - } + if (!global_ctx->merging_skip_indexes.empty()) + addSkipIndexesExpressions(*builder, global_ctx->merging_skip_indexes, global_ctx->metadata_snapshot, global_ctx->data->getContext()); if (!subqueries.empty()) builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), global_ctx->context); @@ -1172,7 +1219,7 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm ctx->merging_params.mode == MergeTreeData::MergingParams::Replacing || ctx->merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing; - bool enough_ordinary_cols = global_ctx->gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; + bool enough_columns = global_ctx->gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; bool enough_total_rows = total_rows_count >= data_settings->vertical_merge_algorithm_min_rows_to_activate; @@ -1180,7 +1227,7 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm bool no_parts_overflow = global_ctx->future_part->parts.size() <= RowSourcePart::MAX_PARTS; - auto merge_alg = (is_supported_storage && enough_total_rows && enough_total_bytes && enough_ordinary_cols && no_parts_overflow) ? + auto merge_alg = (is_supported_storage && enough_total_rows && enough_total_bytes && enough_columns && no_parts_overflow) ? MergeAlgorithm::Vertical : MergeAlgorithm::Horizontal; return merge_alg; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index c8b0662e3eb..ae7e13dd244 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -2,8 +2,11 @@ #include #include +#include #include +#include "Storages/MergeTree/MergeTreeIndices.h" +#include "Storages/Statistics/Statistics.h" #include #include @@ -170,6 +173,9 @@ private: Names all_column_names{}; MergeTreeData::DataPart::Checksums checksums_gathered_columns{}; + IndicesDescription merging_skip_indexes; + std::unordered_map skip_indexes_by_column; + MergeAlgorithm chosen_merge_algorithm{MergeAlgorithm::Undecided}; size_t gathering_column_names_size{0}; @@ -260,12 +266,14 @@ private: MergeAlgorithm chooseMergeAlgorithm() const; void createMergedStream(); + void extractMergingAndGatheringColumns(); void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override { ctx = static_pointer_cast(local); global_ctx = static_pointer_cast(global); } + StageRuntimeContextPtr getContextForNextStage() override; ExecuteAndFinalizeHorizontalPartRuntimeContextPtr ctx; @@ -414,7 +422,7 @@ private: return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); } - static void addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); + static void addStorageColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); }; /// FIXME From 2f6a86f3a11426b82acdba4d485581f1d7c5e1f7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 28 May 2024 11:45:12 +0000 Subject: [PATCH 02/48] remove unneeded fields --- src/Core/NamesAndTypes.cpp | 12 ++++ src/Core/NamesAndTypes.h | 3 + src/Storages/MergeTree/ColumnSizeEstimator.h | 10 +-- src/Storages/MergeTree/MergeTask.cpp | 70 ++++++++----------- src/Storages/MergeTree/MergeTask.h | 8 +-- .../MergeTreeDataPartWriterOnDisk.cpp | 1 + 6 files changed, 51 insertions(+), 53 deletions(-) diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index d6380a632f1..49ab822c738 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -188,6 +188,18 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const return filter(NameSet(names.begin(), names.end())); } +NamesAndTypesList NamesAndTypesList::eraseNames(const NameSet & names) const +{ + NamesAndTypesList res; + for (const auto & column : *this) + { + if (!names.contains(column.name)) + res.push_back(column); + } + return res; +} + + NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const { /// NOTE: It's better to make a map in `IStorage` than to create it here every time again. diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index 915add9b7bc..29f40c45938 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -111,6 +111,9 @@ public: /// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns. NamesAndTypesList filter(const Names & names) const; + /// Leave only the columns whose names are not in the `names`. + NamesAndTypesList eraseNames(const NameSet & names) const; + /// Unlike `filter`, returns columns in the order in which they go in `names`. NamesAndTypesList addTypes(const Names & names) const; diff --git a/src/Storages/MergeTree/ColumnSizeEstimator.h b/src/Storages/MergeTree/ColumnSizeEstimator.h index 1307a5f493e..59a635a00fb 100644 --- a/src/Storages/MergeTree/ColumnSizeEstimator.h +++ b/src/Storages/MergeTree/ColumnSizeEstimator.h @@ -19,18 +19,18 @@ public: size_t sum_index_columns = 0; size_t sum_ordinary_columns = 0; - ColumnSizeEstimator(ColumnToSize && map_, const Names & key_columns, const Names & ordinary_columns) + ColumnSizeEstimator(ColumnToSize && map_, const NamesAndTypesList & key_columns, const NamesAndTypesList & ordinary_columns) : map(std::move(map_)) { - for (const auto & name : key_columns) + for (const auto & [name, _] : key_columns) if (!map.contains(name)) map[name] = 0; - for (const auto & name : ordinary_columns) + for (const auto & [name, _] : ordinary_columns) if (!map.contains(name)) map[name] = 0; - for (const auto & name : key_columns) + for (const auto & [name, _] : key_columns) sum_index_columns += map.at(name); - for (const auto & name : ordinary_columns) + for (const auto & [name, _] : ordinary_columns) sum_ordinary_columns += map.at(name); sum_total = std::max(static_cast(1), sum_index_columns + sum_ordinary_columns); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index bfe2f4673db..9dc72172a88 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -166,7 +166,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu if (key_columns.contains(column.name)) { global_ctx->merging_columns.emplace_back(column); - global_ctx->merging_column_names.emplace_back(column.name); auto it = global_ctx->skip_indexes_by_column.find(column.name); if (it != global_ctx->skip_indexes_by_column.end()) @@ -180,7 +179,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu else { global_ctx->gathering_columns.emplace_back(column); - global_ctx->gathering_column_names.emplace_back(column.name); } } } @@ -251,8 +249,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (!global_ctx->parent_part) global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename); - global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); + extractMergingAndGatheringColumns(); auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns()); @@ -272,10 +270,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->force_ttl = false; if (enabledBlockNumberColumn(global_ctx)) - addStorageColumn(global_ctx, BlockNumberColumn::name, BlockNumberColumn::type); + addGatheringColumn(global_ctx, BlockNumberColumn::name, BlockNumberColumn::type); if (enabledBlockOffsetColumn(global_ctx)) - addStorageColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); + addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); SerializationInfo::Settings info_settings = { @@ -324,6 +322,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->sum_input_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count; ctx->sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed; + global_ctx->chosen_merge_algorithm = chooseMergeAlgorithm(); global_ctx->merge_list_element_ptr->merge_algorithm.store(global_ctx->chosen_merge_algorithm, std::memory_order_relaxed); @@ -344,14 +343,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() case MergeAlgorithm::Horizontal: { global_ctx->merging_columns = global_ctx->storage_columns; - global_ctx->merging_column_names = global_ctx->all_column_names; global_ctx->merging_skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices(); + global_ctx->gathering_columns.clear(); + global_ctx->skip_indexes_by_column.clear(); break; } case MergeAlgorithm::Vertical: { - extractMergingAndGatheringColumns(); - ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->createRawStream(); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); @@ -361,8 +359,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->column_sizes = ColumnSizeEstimator( std::move(local_merged_column_to_size), - global_ctx->merging_column_names, - global_ctx->gathering_column_names); + global_ctx->merging_columns, + global_ctx->gathering_columns); break; } @@ -370,9 +368,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge algorithm must be chosen"); } - assert(global_ctx->gathering_columns.size() == global_ctx->gathering_column_names.size()); - assert(global_ctx->merging_columns.size() == global_ctx->merging_column_names.size()); - /// If merge is vertical we cannot calculate it ctx->blocks_are_granules_size = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical); @@ -389,28 +384,25 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() /// resources for this). if (!ctx->need_remove_expired_values) { - size_t expired_columns = 0; auto part_serialization_infos = global_ctx->new_data_part->getSerializationInfos(); + NameSet columns_to_remove; for (auto & [column_name, ttl] : global_ctx->new_data_part->ttl_infos.columns_ttl) { if (ttl.finished()) { global_ctx->new_data_part->expired_columns.insert(column_name); LOG_TRACE(ctx->log, "Adding expired column {} for part {}", column_name, global_ctx->new_data_part->name); - std::erase(global_ctx->gathering_column_names, column_name); - std::erase(global_ctx->merging_column_names, column_name); - std::erase(global_ctx->all_column_names, column_name); + columns_to_remove.insert(column_name); part_serialization_infos.erase(column_name); - ++expired_columns; } } - if (expired_columns) + if (!columns_to_remove.empty()) { - global_ctx->gathering_columns = global_ctx->gathering_columns.filter(global_ctx->gathering_column_names); - global_ctx->merging_columns = global_ctx->merging_columns.filter(global_ctx->merging_column_names); - global_ctx->storage_columns = global_ctx->storage_columns.filter(global_ctx->all_column_names); + global_ctx->gathering_columns = global_ctx->gathering_columns.eraseNames(columns_to_remove); + global_ctx->merging_columns = global_ctx->merging_columns.eraseNames(columns_to_remove); + global_ctx->storage_columns = global_ctx->storage_columns.eraseNames(columns_to_remove); global_ctx->new_data_part->setColumns( global_ctx->storage_columns, @@ -448,15 +440,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() return false; } -void MergeTask::addStorageColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) +void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) { if (global_ctx->storage_columns.contains(name)) return; global_ctx->storage_columns.emplace_back(name, type); - global_ctx->all_column_names.emplace_back(name); global_ctx->gathering_columns.emplace_back(name, type); - global_ctx->gathering_column_names.emplace_back(name); } @@ -470,7 +460,6 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g new_ctx->compression_codec = std::move(ctx->compression_codec); new_ctx->tmp_disk = std::move(ctx->tmp_disk); new_ctx->it_name_and_type = std::move(ctx->it_name_and_type); - new_ctx->column_num_for_vertical_merge = std::move(ctx->column_num_for_vertical_merge); new_ctx->read_with_direct_io = std::move(ctx->read_with_direct_io); new_ctx->need_sync = std::move(ctx->need_sync); @@ -557,7 +546,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read; size_t input_rows_filtered = *global_ctx->input_rows_filtered; - global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); + global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_columns.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); ctx->rows_sources_write_buf->next(); @@ -592,14 +581,12 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// Move ownership from std::unique_ptr to std::unique_ptr for CompressedReadBufferFromFile. /// First, release ownership from unique_ptr to base type. reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw` + /// Then, move ownership to unique_ptr to concrete type. std::unique_ptr reread_buffer_from_file(reread_buffer_raw); + /// CompressedReadBufferFromFile expects std::unique_ptr as argument. ctx->rows_sources_read_buf = std::make_unique(std::move(reread_buffer_from_file)); - - /// For external cycle - global_ctx->gathering_column_names_size = global_ctx->gathering_column_names.size(); - ctx->column_num_for_vertical_merge = 0; ctx->it_name_and_type = global_ctx->gathering_columns.cbegin(); const auto & settings = global_ctx->context->getSettingsRef(); @@ -743,8 +730,7 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const global_ctx->merge_list_element_ptr->bytes_written_uncompressed += bytes; global_ctx->merge_list_element_ptr->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed); - /// This is the external cycle increment. - ++ctx->column_num_for_vertical_merge; + /// This is the external loop increment. ++ctx->it_name_and_type; } @@ -776,9 +762,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c LOG_DEBUG(ctx->log, "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.", global_ctx->merge_list_element_ptr->rows_read, - global_ctx->all_column_names.size(), - global_ctx->merging_column_names.size(), - global_ctx->gathering_column_names.size(), + global_ctx->storage_columns.size(), + global_ctx->merging_columns.size(), + global_ctx->gathering_columns.size(), elapsed_seconds, global_ctx->merge_list_element_ptr->rows_read / elapsed_seconds, ReadableSize(global_ctx->merge_list_element_ptr->bytes_read_uncompressed / elapsed_seconds)); @@ -915,7 +901,7 @@ bool MergeTask::VerticalMergeStage::executeVerticalMergeForAllColumns() const return false; /// This is the external cycle condition - if (ctx->column_num_for_vertical_merge >= global_ctx->gathering_column_names_size) + if (ctx->it_name_and_type == global_ctx->gathering_columns.end()) return false; switch (ctx->vertical_merge_one_column_state) @@ -996,6 +982,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() global_ctx->horizontal_stage_progress = std::make_unique( ctx->column_sizes ? ctx->column_sizes->keyColumnsWeight() : 1.0); + auto merging_column_names = global_ctx->merging_columns.getNames(); + for (const auto & part : global_ctx->future_part->parts) { Pipe pipe = createMergeTreeSequentialSource( @@ -1003,7 +991,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() *global_ctx->data, global_ctx->storage_snapshot, part, - global_ctx->merging_column_names, + merging_column_names, /*mark_ranges=*/ {}, /*apply_deleted_mask=*/ true, ctx->read_with_direct_io, @@ -1143,12 +1131,12 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() /// If deduplicate_by_columns is empty, add all columns except virtuals. if (global_ctx->deduplicate_by_columns.empty()) { - for (const auto & column_name : global_ctx->merging_column_names) + for (const auto & column : global_ctx->merging_columns) { - if (virtuals.tryGet(column_name, VirtualsKind::Persistent)) + if (virtuals.tryGet(column.name, VirtualsKind::Persistent)) continue; - global_ctx->deduplicate_by_columns.emplace_back(column_name); + global_ctx->deduplicate_by_columns.emplace_back(column.name); } } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index ae7e13dd244..7c509699903 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -167,17 +167,13 @@ private: NamesAndTypesList gathering_columns{}; NamesAndTypesList merging_columns{}; - Names gathering_column_names{}; - Names merging_column_names{}; NamesAndTypesList storage_columns{}; - Names all_column_names{}; MergeTreeData::DataPart::Checksums checksums_gathered_columns{}; IndicesDescription merging_skip_indexes; std::unordered_map skip_indexes_by_column; MergeAlgorithm chosen_merge_algorithm{MergeAlgorithm::Undecided}; - size_t gathering_column_names_size{0}; std::unique_ptr horizontal_stage_progress{nullptr}; std::unique_ptr column_progress{nullptr}; @@ -238,7 +234,6 @@ private: /// Dependencies for next stages std::list::const_iterator it_name_and_type; - size_t column_num_for_vertical_merge{0}; bool need_sync{false}; }; @@ -292,7 +287,6 @@ private: CompressionCodecPtr compression_codec; TemporaryDataOnDiskPtr tmp_disk{nullptr}; std::list::const_iterator it_name_and_type; - size_t column_num_for_vertical_merge{0}; bool read_with_direct_io{false}; bool need_sync{false}; /// End dependencies from previous stages @@ -422,7 +416,7 @@ private: return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); } - static void addStorageColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); + static void addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); }; /// FIXME diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 0a8920790e0..3754b3beab1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -176,6 +176,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( if (settings.rewrite_primary_key) initPrimaryIndex(); + initSkipIndices(); initStatistics(); } From 1bedd6192e06e414cd99c9b1939eb153ac679115 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 28 May 2024 14:01:53 +0000 Subject: [PATCH 03/48] add test --- ...3166_skip_indexes_vertical_merge.reference | 32 +++++++++++++++++ .../03166_skip_indexes_vertical_merge.sql | 34 +++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference create mode 100644 tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference new file mode 100644 index 00000000000..02d5765102c --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference @@ -0,0 +1,32 @@ +200 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Filter ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_ind_merge) + Indexes: + PrimaryKey + Condition: true + Parts: 2/2 + Granules: 32/32 + Skip + Name: idx_b + Description: minmax GRANULARITY 1 + Parts: 2/2 + Granules: 4/32 +200 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + Filter ((WHERE + Change column names to column identifiers)) + ReadFromMergeTree (default.t_ind_merge) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 32/32 + Skip + Name: idx_b + Description: minmax GRANULARITY 1 + Parts: 1/1 + Granules: 4/32 diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql new file mode 100644 index 00000000000..b894c054f8a --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS t_ind_merge; + +CREATE TABLE t_ind_merge (a UInt64, b UInt64, c UInt64, d UInt64, INDEX idx_b b TYPE minmax) +ENGINE = MergeTree +ORDER BY a SETTINGS + index_granularity = 64, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + min_bytes_for_wide_part = 0; + +INSERT INTO t_ind_merge SELECT number, number, rand(), rand() from numbers(1000); +INSERT INTO t_ind_merge SELECT number, number, rand(), rand() from numbers(1000); + +SELECT count() FROM t_ind_merge WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; +EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge WHERE b < 100; + +OPTIMIZE TABLE t_ind_merge FINAL; + +SELECT count() FROM t_ind_merge WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; +EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge WHERE b < 100; + +DROP TABLE t_ind_merge; +SYSTEM FLUSH LOGS; + +WITH + (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge') AS uuid, + extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups +SELECT + groups[1] AS total, + groups[2] AS merged, + groups[3] AS gathered +FROM system.text_log +WHERE query_id = uuid || '::all_1_2_1' AND notEmpty(groups) +ORDER BY event_time_microseconds; From 1175c3d27ed157d68246db5b46a4dac638e2e474 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 29 May 2024 14:26:16 +0000 Subject: [PATCH 04/48] slightly better --- src/Storages/MergeTree/MergeTask.cpp | 54 +++++++++++++--------------- src/Storages/MergeTree/MergeTask.h | 4 +-- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 84567281291..d59501f033f 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -5,15 +5,9 @@ #include #include -#include "Common/DateLUT.h" #include #include #include "Core/NamesAndTypes.h" -#include "Storages/ColumnsDescription.h" -#include "Storages/IndicesDescription.h" -#include "Storages/MergeTree/MergeTreeIndices.h" -#include "Storages/ProjectionsDescription.h" -#include "Storages/StorageInMemoryMetadata.h" #include #include #include @@ -74,20 +68,6 @@ static Statistics getStatisticsForColumns( return statistics; } -static void addSkipIndexesExpressions( - QueryPipelineBuilder & builder, - const IndicesDescription & indexes, - const StorageMetadataPtr & metadata_snapshot, - const ContextPtr & context) -{ - builder.addTransform(std::make_shared( - builder.getHeader(), - indexes.getSingleExpressionForIndices(metadata_snapshot->getColumns(), - context))); - - builder.addTransform(std::make_shared(builder.getHeader())); -} - static void addMissedColumnsToSerializationInfos( size_t num_rows_in_parts, const Names & part_columns, @@ -147,6 +127,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu { auto index_columns = index.expression->getRequiredColumns(); + /// Calculate indexes that depend only on one column on vertical + /// stage and other indexes on horizonatal stage of merge. if (index_columns.size() == 1) { const auto & column_name = index_columns.front(); @@ -167,6 +149,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu { global_ctx->merging_columns.emplace_back(column); + /// If column is in horizontal stage we need to calculate its indexes on horizontal stage as well auto it = global_ctx->skip_indexes_by_column.find(column.name); if (it != global_ctx->skip_indexes_by_column.end()) { @@ -630,12 +613,12 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const pipes.emplace_back(std::move(pipe)); } - bool is_result_sparse = global_ctx->new_data_part->getSerialization(column_name)->getKind() == ISerialization::Kind::SPARSE; - auto pipe = Pipe::unitePipes(std::move(pipes)); ctx->rows_sources_read_buf->seek(0, 0); const auto data_settings = global_ctx->data->getSettings(); + bool is_result_sparse = global_ctx->new_data_part->getSerialization(column_name)->getKind() == ISerialization::Kind::SPARSE; + auto transform = std::make_unique( pipe.getHeader(), pipe.numOutputPorts(), @@ -644,9 +627,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const data_settings->merge_max_block_size_bytes, is_result_sparse); - QueryPipelineBuilder builder; - builder.init(std::move(pipe)); - builder.addTransform(std::move(transform)); + pipe.addTransform(std::move(transform)); MergeTreeIndices indexes_to_recalc; auto indexes_it = global_ctx->skip_indexes_by_column.find(column_name); @@ -654,10 +635,16 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const if (indexes_it != global_ctx->skip_indexes_by_column.end()) { indexes_to_recalc = MergeTreeIndexFactory::instance().getMany(indexes_it->second); - addSkipIndexesExpressions(builder, indexes_it->second, global_ctx->metadata_snapshot, global_ctx->data->getContext()); + + pipe.addTransform(std::make_shared( + pipe.getHeader(), + indexes_it->second.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), + global_ctx->data->getContext()))); + + pipe.addTransform(std::make_shared(pipe.getHeader())); } - ctx->column_parts_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); + ctx->column_parts_pipeline = QueryPipeline(std::move(pipe)); /// Dereference unique_ptr ctx->column_parts_pipeline.setProgressCallback(MergeProgressCallback( @@ -1161,7 +1148,14 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() } if (!global_ctx->merging_skip_indexes.empty()) - addSkipIndexesExpressions(*builder, global_ctx->merging_skip_indexes, global_ctx->metadata_snapshot, global_ctx->data->getContext()); + { + builder->addTransform(std::make_shared( + builder->getHeader(), + global_ctx->merging_skip_indexes.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), + global_ctx->data->getContext()))); + + builder->addTransform(std::make_shared(builder->getHeader())); + } if (!subqueries.empty()) builder = addCreatingSetsTransform(std::move(builder), std::move(subqueries), global_ctx->context); @@ -1210,7 +1204,7 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm ctx->merging_params.mode == MergeTreeData::MergingParams::Replacing || ctx->merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing; - bool enough_columns = global_ctx->gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; + bool enough_ordinary_cols = global_ctx->gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; bool enough_total_rows = total_rows_count >= data_settings->vertical_merge_algorithm_min_rows_to_activate; @@ -1218,7 +1212,7 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm bool no_parts_overflow = global_ctx->future_part->parts.size() <= RowSourcePart::MAX_PARTS; - auto merge_alg = (is_supported_storage && enough_total_rows && enough_total_bytes && enough_columns && no_parts_overflow) ? + auto merge_alg = (is_supported_storage && enough_total_rows && enough_total_bytes && enough_ordinary_cols && no_parts_overflow) ? MergeAlgorithm::Vertical : MergeAlgorithm::Horizontal; return merge_alg; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 7c509699903..a2f18f8807f 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -2,11 +2,8 @@ #include #include -#include #include -#include "Storages/MergeTree/MergeTreeIndices.h" -#include "Storages/Statistics/Statistics.h" #include #include @@ -27,6 +24,7 @@ #include #include #include +#include namespace DB From 6ab029b465eb4567840b2e4b354d9be4a24f2325 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 30 May 2024 16:12:52 +0000 Subject: [PATCH 05/48] fix block number columns --- src/Storages/MergeTree/MergeTask.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 5b06338cbd2..dc14f737955 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -58,10 +58,10 @@ static Statistics getStatisticsForColumns( for (const auto & column : columns_to_read) { - const auto & desc = all_columns.get(column.name); - if (desc.stat) + const auto * desc = all_columns.tryGet(column.name); + if (desc && desc->stat) { - auto statistic = MergeTreeStatisticsFactory::instance().get(*desc.stat); + auto statistic = MergeTreeStatisticsFactory::instance().get(*desc->stat); statistics.push_back(std::move(statistic)); } } From 450bcd7f70ef04cd6dfdb482350302f706fdfbf8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 30 May 2024 16:23:23 +0000 Subject: [PATCH 06/48] add test --- .../03166_skip_indexes_vertical_merge.sql | 34 --------------- ...6_skip_indexes_vertical_merge_1.reference} | 5 ++- .../03166_skip_indexes_vertical_merge_1.sql | 35 ++++++++++++++++ ...66_skip_indexes_vertical_merge_2.reference | 1 + .../03166_skip_indexes_vertical_merge_2.sql | 41 +++++++++++++++++++ 5 files changed, 80 insertions(+), 36 deletions(-) delete mode 100644 tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql rename tests/queries/0_stateless/{03166_skip_indexes_vertical_merge.reference => 03166_skip_indexes_vertical_merge_1.reference} (88%) create mode 100644 tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql create mode 100644 tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference create mode 100644 tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql deleted file mode 100644 index b894c054f8a..00000000000 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.sql +++ /dev/null @@ -1,34 +0,0 @@ -DROP TABLE IF EXISTS t_ind_merge; - -CREATE TABLE t_ind_merge (a UInt64, b UInt64, c UInt64, d UInt64, INDEX idx_b b TYPE minmax) -ENGINE = MergeTree -ORDER BY a SETTINGS - index_granularity = 64, - vertical_merge_algorithm_min_rows_to_activate = 1, - vertical_merge_algorithm_min_columns_to_activate = 1, - min_bytes_for_wide_part = 0; - -INSERT INTO t_ind_merge SELECT number, number, rand(), rand() from numbers(1000); -INSERT INTO t_ind_merge SELECT number, number, rand(), rand() from numbers(1000); - -SELECT count() FROM t_ind_merge WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; -EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge WHERE b < 100; - -OPTIMIZE TABLE t_ind_merge FINAL; - -SELECT count() FROM t_ind_merge WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; -EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge WHERE b < 100; - -DROP TABLE t_ind_merge; -SYSTEM FLUSH LOGS; - -WITH - (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge') AS uuid, - extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups -SELECT - groups[1] AS total, - groups[2] AS merged, - groups[3] AS gathered -FROM system.text_log -WHERE query_id = uuid || '::all_1_2_1' AND notEmpty(groups) -ORDER BY event_time_microseconds; diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.reference similarity index 88% rename from tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference rename to tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.reference index 02d5765102c..86f79bea4ba 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge.reference +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.reference @@ -3,7 +3,7 @@ Expression ((Project names + Projection)) Aggregating Expression (Before GROUP BY) Filter ((WHERE + Change column names to column identifiers)) - ReadFromMergeTree (default.t_ind_merge) + ReadFromMergeTree (default.t_ind_merge_1) Indexes: PrimaryKey Condition: true @@ -19,7 +19,7 @@ Expression ((Project names + Projection)) Aggregating Expression (Before GROUP BY) Filter ((WHERE + Change column names to column identifiers)) - ReadFromMergeTree (default.t_ind_merge) + ReadFromMergeTree (default.t_ind_merge_1) Indexes: PrimaryKey Condition: true @@ -30,3 +30,4 @@ Expression ((Project names + Projection)) Description: minmax GRANULARITY 1 Parts: 1/1 Granules: 4/32 +4 1 3 diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql new file mode 100644 index 00000000000..a605f9dea81 --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS t_ind_merge_1; + +CREATE TABLE t_ind_merge_1 (a UInt64, b UInt64, c UInt64, d UInt64, INDEX idx_b b TYPE minmax) +ENGINE = MergeTree +ORDER BY a SETTINGS + index_granularity = 64, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + min_bytes_for_wide_part = 0; + +INSERT INTO t_ind_merge_1 SELECT number, number, rand(), rand() FROM numbers(1000); +INSERT INTO t_ind_merge_1 SELECT number, number, rand(), rand() FROM numbers(1000); + +SELECT count() FROM t_ind_merge_1 WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; +EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge_1 WHERE b < 100; + +OPTIMIZE TABLE t_ind_merge_1 FINAL; + +SELECT count() FROM t_ind_merge_1 WHERE b < 100 SETTINGS force_data_skipping_indices = 'idx_b'; +EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge_1 WHERE b < 100; + +SYSTEM FLUSH LOGS; + +WITH + (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge_1') AS uuid, + extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups +SELECT + groups[1] AS total, + groups[2] AS merged, + groups[3] AS gathered +FROM system.text_log +WHERE query_id = uuid || '::all_1_2_1' AND notEmpty(groups) +ORDER BY event_time_microseconds; + +DROP TABLE t_ind_merge_1; diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference new file mode 100644 index 00000000000..4c2f01294a4 --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.reference @@ -0,0 +1 @@ +6 3 3 diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql new file mode 100644 index 00000000000..656694242b7 --- /dev/null +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql @@ -0,0 +1,41 @@ +DROP TABLE IF EXISTS t_ind_merge_2; + +CREATE TABLE t_ind_merge_2 ( + a UInt64, + b UInt64, + c UInt64, + d UInt64, + e UInt64, + f UInt64, + INDEX idx_a a TYPE minmax, + INDEX idx_b b TYPE minmax, + INDEX idx_cd c * d TYPE minmax, + INDEX idx_d1 d TYPE minmax, + INDEX idx_d2 d TYPE set(3), + INDEX idx_e e TYPE set(3)) +ENGINE = MergeTree +ORDER BY a SETTINGS + index_granularity = 64, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + min_bytes_for_wide_part = 0; + +INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand() FROM numbers(1000); +INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand() FROM numbers(1000); + +OPTIMIZE TABLE t_ind_merge_2 FINAL; +SYSTEM FLUSH LOGS; + +--- merged: a, c, d; gathered: b, e, f +WITH + (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge_2') AS uuid, + extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups +SELECT + groups[1] AS total, + groups[2] AS merged, + groups[3] AS gathered +FROM system.text_log +WHERE query_id = uuid || '::all_1_2_1' AND notEmpty(groups) +ORDER BY event_time_microseconds; + +DROP TABLE t_ind_merge_2; From 7f6d7f34630fde83f3378ce1f9217c32ca183e0e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 30 May 2024 17:55:53 +0000 Subject: [PATCH 07/48] fix indexes with expressions --- src/Storages/MergeTree/MergeTask.cpp | 5 +++-- src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp | 7 +++---- src/Storages/MergeTree/MergedColumnOnlyOutputStream.h | 6 +----- src/Storages/MergeTree/MutateTask.cpp | 2 +- .../0_stateless/03166_skip_indexes_vertical_merge_2.sql | 4 ++-- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index dc14f737955..2660602d652 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -679,14 +679,15 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const /// Is calculated inside MergeProgressCallback. ctx->column_parts_pipeline.disableProfileEventUpdate(); ctx->executor = std::make_unique(ctx->column_parts_pipeline); + NamesAndTypesList columns_list = {*ctx->it_name_and_type}; ctx->column_to = std::make_unique( global_ctx->new_data_part, global_ctx->metadata_snapshot, - ctx->executor->getHeader(), + columns_list, ctx->compression_codec, indexes_to_recalc, - getStatisticsForColumns({*ctx->it_name_and_type}, global_ctx->metadata_snapshot), + getStatisticsForColumns(columns_list, global_ctx->metadata_snapshot), &global_ctx->written_offset_columns, global_ctx->to->getIndexGranularity()); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 674a9bd498f..307c5231761 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -13,15 +13,14 @@ namespace ErrorCodes MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, - const Block & header_, + const NamesAndTypesList & columns_list_, CompressionCodecPtr default_codec, const MergeTreeIndices & indices_to_recalc, const Statistics & stats_to_recalc_, WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) - , header(header_) + : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true) { const auto & global_settings = data_part->storage.getContext()->getSettings(); @@ -37,7 +36,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( data_part->name, data_part->storage.getLogName(), data_part->getSerializations(), data_part_storage, data_part->index_granularity_info, storage_settings, - header.getNamesAndTypesList(), + columns_list_, data_part->getColumnPositions(), metadata_snapshot_, data_part->storage.getVirtualsPtr(), diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index ad3cabe459e..99100d3d8fe 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -17,7 +17,7 @@ public: MergedColumnOnlyOutputStream( const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, - const Block & header_, + const NamesAndTypesList & columns_list_, CompressionCodecPtr default_codec_, const MergeTreeIndices & indices_to_recalc_, const Statistics & stats_to_recalc_, @@ -25,16 +25,12 @@ public: const MergeTreeIndexGranularity & index_granularity = {}, const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr); - Block getHeader() const { return header; } void write(const Block & block) override; MergeTreeData::DataPart::Checksums fillChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums); void finish(bool sync); - -private: - Block header; }; using MergedColumnOnlyOutputStreamPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 43238c5bcbc..0b50ad4066b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1901,7 +1901,7 @@ private: ctx->out = std::make_shared( ctx->new_data_part, ctx->metadata_snapshot, - ctx->updated_header, + ctx->updated_header.getNamesAndTypesList(), ctx->compression_codec, std::vector(ctx->indices_to_recalc.begin(), ctx->indices_to_recalc.end()), Statistics(ctx->stats_to_recalc.begin(), ctx->stats_to_recalc.end()), diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql index 656694242b7..e29653a2e77 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql @@ -11,8 +11,8 @@ CREATE TABLE t_ind_merge_2 ( INDEX idx_b b TYPE minmax, INDEX idx_cd c * d TYPE minmax, INDEX idx_d1 d TYPE minmax, - INDEX idx_d2 d TYPE set(3), - INDEX idx_e e TYPE set(3)) + INDEX idx_d2 d + 7 TYPE set(3), + INDEX idx_e e * 3 TYPE set(3)) ENGINE = MergeTree ORDER BY a SETTINGS index_granularity = 64, From f716b131d77bd23545d7748e30bddaff49b2bb39 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 3 Jun 2024 14:23:34 +0000 Subject: [PATCH 08/48] fix tests --- src/Storages/MergeTree/MergeTask.cpp | 6 +++--- src/Storages/MergeTree/MergeTask.h | 2 +- .../0_stateless/03166_skip_indexes_vertical_merge_1.sql | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 2660602d652..0fcb3b0b0e7 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -95,7 +95,7 @@ static void addMissedColumnsToSerializationInfos( } /// PK columns are sorted and merged, ordinary columns are gathered using info from merge step -void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColumns() +void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColumns() const { const auto & sorting_key_expr = global_ctx->metadata_snapshot->getSortingKey().expression; Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns(); @@ -233,13 +233,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); - extractMergingAndGatheringColumns(); auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns()); - extendObjectColumns(global_ctx->storage_columns, object_columns, false); global_ctx->storage_snapshot = std::make_shared(*global_ctx->data, global_ctx->metadata_snapshot, std::move(object_columns)); + extractMergingAndGatheringColumns(); + global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition()); global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 13de2b9da11..0e13d3aef62 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -259,7 +259,7 @@ private: MergeAlgorithm chooseMergeAlgorithm() const; void createMergedStream(); - void extractMergingAndGatheringColumns(); + void extractMergingAndGatheringColumns() const; void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override { diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql index a605f9dea81..ba770656532 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql @@ -1,9 +1,12 @@ DROP TABLE IF EXISTS t_ind_merge_1; +SET allow_experimental_analyzer = 1; + CREATE TABLE t_ind_merge_1 (a UInt64, b UInt64, c UInt64, d UInt64, INDEX idx_b b TYPE minmax) ENGINE = MergeTree ORDER BY a SETTINGS index_granularity = 64, + merge_max_block_size = 8192, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, min_bytes_for_wide_part = 0; From 7590845bf4d2dc8510479a6c8bdb4e10c0058110 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jun 2024 14:23:24 +0000 Subject: [PATCH 09/48] slightly better --- src/Storages/MergeTree/MergeTask.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index acbcecb8b7e..25e0d825778 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,7 +7,6 @@ #include #include -#include "Core/NamesAndTypes.h" #include #include #include @@ -153,7 +152,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu auto it = global_ctx->skip_indexes_by_column.find(column.name); if (it != global_ctx->skip_indexes_by_column.end()) { - for (auto && index : it->second) + for (auto & index : it->second) global_ctx->merging_skip_indexes.push_back(std::move(index)); global_ctx->skip_indexes_by_column.erase(it); @@ -997,8 +996,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() global_ctx->horizontal_stage_progress = std::make_unique( ctx->column_sizes ? ctx->column_sizes->keyColumnsWeight() : 1.0); - auto merging_column_names = global_ctx->merging_columns.getNames(); - for (const auto & part : global_ctx->future_part->parts) { Pipe pipe = createMergeTreeSequentialSource( @@ -1006,7 +1003,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() *global_ctx->data, global_ctx->storage_snapshot, part, - merging_column_names, + global_ctx->merging_columns.getNames(), /*mark_ranges=*/ {}, global_ctx->input_rows_filtered, /*apply_deleted_mask=*/ true, From 687b38e52d978c36eb7cfd13990b7f700a5d23df Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jun 2024 14:46:28 +0000 Subject: [PATCH 10/48] fix test with ordinary database --- .../queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql | 2 +- .../queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql index ba770656532..4f2fb8dea0c 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql @@ -32,7 +32,7 @@ SELECT groups[2] AS merged, groups[3] AS gathered FROM system.text_log -WHERE query_id = uuid || '::all_1_2_1' AND notEmpty(groups) +WHERE (query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_1::all_1_2_1') AND notEmpty(groups) ORDER BY event_time_microseconds; DROP TABLE t_ind_merge_1; diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql index e29653a2e77..0710b1069a8 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql @@ -35,7 +35,7 @@ SELECT groups[2] AS merged, groups[3] AS gathered FROM system.text_log -WHERE query_id = uuid || '::all_1_2_1' AND notEmpty(groups) +WHERE (query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_2::all_1_2_1') AND notEmpty(groups) ORDER BY event_time_microseconds; DROP TABLE t_ind_merge_2; From d4294dae0f75f7a8990f0521e9d1e225b5cdfd25 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 6 Jun 2024 11:38:58 +0200 Subject: [PATCH 11/48] Document flameGraph aggregate function --- .../reference/flame_graph.md | 95 +++++++++++++++++++ .../aggregate-functions/reference/index.md | 1 + 2 files changed, 96 insertions(+) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/flame_graph.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md new file mode 100644 index 00000000000..4aa1cd0c8a8 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md @@ -0,0 +1,95 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/flamegraph +sidebar_position: 110 +--- + +# flameGraph + +Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl util](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph. + +## Syntax + +```sql +flameGraph(traces, [size], [ptr]) +``` + +## Parameters + +- `traces` — a stacktrace. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)). +- `size` — an allocation size for memory profiling. (optional - default `1`). [UInt64](../../data-types/int-uint.md). +- `ptr` — an allocation address. (optional - default `0`). [UInt64](../../data-types/int-uint.md). + +:::note +In the case where `ptr != 0`, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr. +Only allocations which were not freed are shown. Non mapped deallocations are ignored. +::: + +## Returned value + +- An array of strings for use with [flamegraph.pl util](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)). + +## Examples + +### Building a flamegraph based on a CPU query profiler + +```sql +SET query_profiler_cpu_time_period_ns=10000000; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +```text +clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg +``` + +### Building a flamegraph based on a memory query profiler, showing all allocations + +```sql +SET memory_profiler_sample_probability=1, max_untracked_memory=1; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +```text +clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg +``` + +### Building a flamegraph based on a memory query profiler, showing allocations which were not deallocated in query context + +```sql +SET memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +```text +clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg +``` + +### Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time + +```sql +SET memory_profiler_sample_probability=1, max_untracked_memory=1; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +``` + +- 1 - Memory usage per second + +```sql +SELECT event_time, m, formatReadableSize(max(s) as m) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample') GROUP BY event_time ORDER BY event_time; +``` + +- 2 - Find a time point with maximal memory usage + +```sql +SELECT argMax(event_time, s), max(s) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample'); +``` + +- 3 - Fix active allocations at fixed point of time + +```text +clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time <= 'yyy' ORDER BY event_time)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg +``` + +- 4 - Find deallocations at fixed point of time + +```text +clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, -size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time > 'yyy' ORDER BY event_time desc)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index a56b1c97681..e3725b6a430 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -58,6 +58,7 @@ ClickHouse-specific aggregate functions: - [topKWeighted](../reference/topkweighted.md) - [deltaSum](../reference/deltasum.md) - [deltaSumTimestamp](../reference/deltasumtimestamp.md) +- [flameGraph](../reference/flame_graph.md) - [groupArray](../reference/grouparray.md) - [groupArrayLast](../reference/grouparraylast.md) - [groupUniqArray](../reference/groupuniqarray.md) From baf3b6329d358eb024bacb54a863470b60337ee5 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:07:45 +0200 Subject: [PATCH 12/48] Fix typo --- .../aggregate-functions/reference/flame_graph.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md index 4aa1cd0c8a8..e09769477f1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md +++ b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md @@ -5,7 +5,7 @@ sidebar_position: 110 # flameGraph -Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl util](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph. +Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph. ## Syntax @@ -92,4 +92,4 @@ clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGr ```text clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, -size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time > 'yyy' ORDER BY event_time desc)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg -``` \ No newline at end of file +``` From 3c4f4d89813bac39b4eefa2011a82009b07723f2 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:08:54 +0200 Subject: [PATCH 13/48] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 49f43615c7e..09df4d98e71 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -222,6 +222,7 @@ DatabaseOrdinaryThreadsActive DateTime DateTimes DbCL +deallocated Decrypted Deduplicate Deduplication @@ -293,6 +294,7 @@ FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes FixedString FlameGraph +flameGraph Flink ForEach FreeBSD From dbc27f0c34beb0efd7cbb20fd79cbab3669643e0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 6 Jun 2024 13:20:31 +0000 Subject: [PATCH 14/48] fix statistics --- src/Storages/MergeTree/MergeTask.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 25e0d825778..56bd1181fef 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -58,7 +58,7 @@ static ColumnsStatistics getStatisticsForColumns( for (const auto & column : columns_to_read) { const auto * desc = all_columns.tryGet(column.name); - if (desc && desc->statistics.empty()) + if (desc && !desc->statistics.empty()) { auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics); all_statistics.push_back(std::move(statistics)); From 99de6bd6ecd71db4e132ca5fe997736affd3f306 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 6 Jun 2024 14:51:42 +0000 Subject: [PATCH 15/48] fix tests --- .../queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql | 2 +- .../queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql index 4f2fb8dea0c..ac987c9c75c 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql @@ -32,7 +32,7 @@ SELECT groups[2] AS merged, groups[3] AS gathered FROM system.text_log -WHERE (query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_1::all_1_2_1') AND notEmpty(groups) +WHERE ((query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_1::all_1_2_1')) AND notEmpty(groups) ORDER BY event_time_microseconds; DROP TABLE t_ind_merge_1; diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql index 0710b1069a8..2805059d918 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql @@ -35,7 +35,7 @@ SELECT groups[2] AS merged, groups[3] AS gathered FROM system.text_log -WHERE (query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_2::all_1_2_1') AND notEmpty(groups) +WHERE ((query_id = uuid || '::all_1_2_1') OR (query_id = currentDatabase() || '.t_ind_merge_2::all_1_2_1')) AND notEmpty(groups) ORDER BY event_time_microseconds; DROP TABLE t_ind_merge_2; From 18ced447efe9ab612362a584e05dd4edb02a3b87 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 7 Jun 2024 08:45:39 +0200 Subject: [PATCH 16/48] Fix typo II --- .../sql-reference/aggregate-functions/reference/flame_graph.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md index e09769477f1..ae17153085c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md +++ b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md @@ -26,7 +26,7 @@ Only allocations which were not freed are shown. Non mapped deallocations are ig ## Returned value -- An array of strings for use with [flamegraph.pl util](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)). +- An array of strings for use with [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)). ## Examples From c4605b690b1c757d72113352f0b8cc150b4686eb Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sun, 9 Jun 2024 22:15:38 +0000 Subject: [PATCH 17/48] fix tests --- .../0_stateless/03166_skip_indexes_vertical_merge_1.sql | 3 ++- .../0_stateless/03166_skip_indexes_vertical_merge_2.sql | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql index ac987c9c75c..d3e3b38a3cb 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql @@ -9,7 +9,8 @@ ORDER BY a SETTINGS merge_max_block_size = 8192, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, - min_bytes_for_wide_part = 0; + min_bytes_for_wide_part = 0, + min_bytes_for_full_part_storage = 0; INSERT INTO t_ind_merge_1 SELECT number, number, rand(), rand() FROM numbers(1000); INSERT INTO t_ind_merge_1 SELECT number, number, rand(), rand() FROM numbers(1000); diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql index 2805059d918..b749e0c84b0 100644 --- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql +++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql @@ -18,7 +18,8 @@ ORDER BY a SETTINGS index_granularity = 64, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, - min_bytes_for_wide_part = 0; + min_bytes_for_wide_part = 0, + min_bytes_for_full_part_storage = 0; INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand() FROM numbers(1000); INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand() FROM numbers(1000); From ee94d68cb963172dc6c783f5e69b6f00a5732d2b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 5 Jun 2024 08:38:12 +0000 Subject: [PATCH 18/48] Less aggressive logging --- src/Storages/MergeTree/RowOrderOptimizer.cpp | 25 ++++++++++---------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/RowOrderOptimizer.cpp b/src/Storages/MergeTree/RowOrderOptimizer.cpp index 34f9fed4500..76b0d6452ad 100644 --- a/src/Storages/MergeTree/RowOrderOptimizer.cpp +++ b/src/Storages/MergeTree/RowOrderOptimizer.cpp @@ -78,9 +78,8 @@ std::vector getOtherColumnIndexes(const Block & block, const SortDescrip /// -------- /// 2 1 a 3 /// ---------------------- -EqualRanges getEqualRanges(const Block & block, const SortDescription & sort_description, const IColumn::Permutation & permutation, const LoggerPtr & log) +EqualRanges getEqualRanges(const Block & block, const SortDescription & sort_description, const IColumn::Permutation & permutation) { - LOG_TRACE(log, "Finding equal ranges"); EqualRanges ranges; const size_t rows = block.rows(); if (sort_description.empty()) @@ -122,11 +121,10 @@ void updatePermutationInEqualRange( const std::vector & other_column_indexes, IColumn::Permutation & permutation, const EqualRange & equal_range, - const std::vector & cardinalities) + const std::vector & cardinalities, + const LoggerPtr & log) { - LoggerPtr log = getLogger("RowOrderOptimizer"); - - LOG_TRACE(log, "Starting optimization in equal range"); + LOG_TEST(log, "Starting optimization in equal range"); std::vector column_order(other_column_indexes.size()); iota(column_order.begin(), column_order.end(), 0); @@ -134,17 +132,17 @@ void updatePermutationInEqualRange( stable_sort(column_order.begin(), column_order.end(), cmp); std::vector ranges = {equal_range}; - LOG_TRACE(log, "equal_range: .from: {}, .to: {}", equal_range.from, equal_range.to); + LOG_TEST(log, "equal_range: .from: {}, .to: {}", equal_range.from, equal_range.to); for (size_t i : column_order) { const size_t column_id = other_column_indexes[i]; const ColumnPtr & column = block.getByPosition(column_id).column; - LOG_TRACE(log, "i: {}, column_id: {}, column->getName(): {}, cardinality: {}", i, column_id, column->getName(), cardinalities[i]); + LOG_TEST(log, "i: {}, column_id: {}, column type: {}, cardinality: {}", i, column_id, column->getName(), cardinalities[i]); column->updatePermutation( IColumn::PermutationSortDirection::Ascending, IColumn::PermutationSortStability::Stable, 0, 1, permutation, ranges); } - LOG_TRACE(log, "Finish optimization in equal range"); + LOG_TEST(log, "Finish optimization in equal range"); } } @@ -156,7 +154,10 @@ void RowOrderOptimizer::optimize(const Block & block, const SortDescription & so LOG_TRACE(log, "Starting optimization"); if (block.columns() == 0) + { + LOG_TRACE(log, "Finished optimization (block has no columns)"); return; /// a table without columns, this should not happen in the first place ... + } if (permutation.empty()) { @@ -165,17 +166,17 @@ void RowOrderOptimizer::optimize(const Block & block, const SortDescription & so iota(permutation.data(), rows, IColumn::Permutation::value_type(0)); } - const EqualRanges equal_ranges = getEqualRanges(block, sort_description, permutation, log); + const EqualRanges equal_ranges = getEqualRanges(block, sort_description, permutation); const std::vector other_columns_indexes = getOtherColumnIndexes(block, sort_description); - LOG_TRACE(log, "block.columns(): {}, block.rows(): {}, sort_description.size(): {}, equal_ranges.size(): {}", block.columns(), block.rows(), sort_description.size(), equal_ranges.size()); + LOG_TRACE(log, "columns: {}, sorting key columns: {}, rows: {}, equal ranges: {}", block.columns(), sort_description.size(), block.rows(), equal_ranges.size()); for (const auto & equal_range : equal_ranges) { if (equal_range.size() <= 1) continue; const std::vector cardinalities = getCardinalitiesInPermutedRange(block, other_columns_indexes, permutation, equal_range); - updatePermutationInEqualRange(block, other_columns_indexes, permutation, equal_range, cardinalities); + updatePermutationInEqualRange(block, other_columns_indexes, permutation, equal_range, cardinalities, log); } LOG_TRACE(log, "Finished optimization"); From ccdaf6f5a42fd5be5120addbd72d8f19a956cbb0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 9 Jun 2024 19:12:47 +0000 Subject: [PATCH 19/48] Restrict to MergeTree --- docs/en/operations/settings/merge-tree-settings.md | 2 ++ src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index d791683ac2b..39d9cd69e8f 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -978,6 +978,8 @@ Default value: false Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. +Only has an effect for ordinary MergeTree-engine tables. Does nothing for specialized MergeTree engine tables (e.g. CollapsingMergeTree). + MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec). Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns. Long runs of the same value typically compress very well. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 8e304936747..bb8f104c2bb 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -503,7 +503,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - if (data.getSettings()->allow_experimental_optimized_row_order) + if (data.getSettings()->allow_experimental_optimized_row_order + && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); perm_ptr = &perm; @@ -730,7 +731,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted); } - if (data.getSettings()->allow_experimental_optimized_row_order) + if (data.getSettings()->allow_experimental_optimized_row_order + && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); perm_ptr = &perm; From 25fd51e5bc9b3849451037f71f89fb2d88f0bb1b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 9 Jun 2024 19:18:21 +0000 Subject: [PATCH 20/48] Mark row order optimization non-experimental --- docs/en/operations/settings/merge-tree-settings.md | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- .../03166_optimize_row_order_during_insert.sql | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 39d9cd69e8f..b45dc290797 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,7 +974,7 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting -### allow_experimental_optimized_row_order +### optimize_row_order Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index bb8f104c2bb..7aa9c12a24b 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -503,7 +503,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - if (data.getSettings()->allow_experimental_optimized_row_order + if (data.getSettings()->optimize_row_order && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); @@ -731,7 +731,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted); } - if (data.getSettings()->allow_experimental_optimized_row_order + if (data.getSettings()->optimize_row_order) && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 026a1da7196..6ababefa530 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -94,6 +94,7 @@ struct Settings; M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \ M(Bool, add_implicit_sign_column_constraint_for_collapsing_engine, false, "If true, add implicit constraint for sign column for CollapsingMergeTree engine.", 0) \ M(Milliseconds, sleep_before_commit_local_part_in_replicated_table_ms, 0, "For testing. Do not change it.", 0) \ + M(Bool, optimize_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ \ /* Part removal settings. */ \ M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \ @@ -199,7 +200,6 @@ struct Settings; M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ - M(Bool, allow_experimental_optimized_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ diff --git a/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql index bb2f5e94d05..5fc71598e47 100644 --- a/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql +++ b/tests/queries/0_stateless/03166_optimize_row_order_during_insert.sql @@ -14,7 +14,7 @@ CREATE TABLE tab ( event Int8 ) ENGINE = MergeTree ORDER BY name -SETTINGS allow_experimental_optimized_row_order = true; +SETTINGS optimize_row_order = true; INSERT INTO tab VALUES ('Igor', 3), ('Egor', 1), ('Egor', 2), ('Igor', 2), ('Igor', 1); SELECT * FROM tab ORDER BY name SETTINGS max_threads=1; @@ -34,7 +34,7 @@ CREATE TABLE tab ( flag String ) ENGINE = MergeTree ORDER BY () -SETTINGS allow_experimental_optimized_row_order = True; +SETTINGS optimize_row_order = True; INSERT INTO tab VALUES ('Bob', 4, 100, '1'), ('Nikita', 2, 54, '1'), ('Nikita', 1, 228, '1'), ('Alex', 4, 83, '1'), ('Alex', 4, 134, '1'), ('Alex', 1, 65, '0'), ('Alex', 4, 134, '1'), ('Bob', 2, 53, '0'), ('Alex', 4, 83, '0'), ('Alex', 1, 63, '1'), ('Bob', 2, 53, '1'), ('Alex', 4, 192, '1'), ('Alex', 2, 128, '1'), ('Nikita', 2, 148, '0'), ('Bob', 4, 177, '0'), ('Nikita', 1, 173, '0'), ('Alex', 1, 239, '0'), ('Alex', 1, 63, '0'), ('Alex', 2, 224, '1'), ('Bob', 4, 177, '0'), ('Alex', 2, 128, '1'), ('Alex', 4, 134, '0'), ('Alex', 4, 83, '1'), ('Bob', 4, 100, '0'), ('Nikita', 2, 54, '1'), ('Alex', 1, 239, '1'), ('Bob', 2, 187, '1'), ('Alex', 1, 65, '1'), ('Bob', 2, 53, '1'), ('Alex', 2, 224, '0'), ('Alex', 4, 192, '0'), ('Nikita', 1, 173, '1'), ('Nikita', 2, 148, '1'), ('Bob', 2, 187, '1'), ('Nikita', 2, 208, '1'), ('Nikita', 2, 208, '0'), ('Nikita', 1, 228, '0'), ('Nikita', 2, 148, '0'); SELECT * FROM tab SETTINGS max_threads=1; @@ -58,7 +58,7 @@ CREATE TABLE tab ( flag Nullable(Int32) ) ENGINE = MergeTree ORDER BY (flag, money) -SETTINGS allow_experimental_optimized_row_order = True, allow_nullable_key = True; +SETTINGS optimize_row_order = True, allow_nullable_key = True; INSERT INTO tab VALUES ('AB', 0, 42, Null), ('AB', 0, 42, Null), ('A', 1, 42, Null), ('AB', 1, 9.81, 0), ('B', 0, 42, Null), ('B', -1, 3.14, Null), ('B', 1, 2.7, 1), ('B', 0, 42, 1), ('A', 1, 42, 1), ('B', 1, 42, Null), ('B', 0, 2.7, 1), ('A', 0, 2.7, 1), ('B', 2, 3.14, Null), ('A', 0, 3.14, Null), ('A', 1, 2.7, 1), ('A', 1, 42, Null); SELECT * FROM tab ORDER BY (flag, money) SETTINGS max_threads=1; @@ -89,7 +89,7 @@ CREATE TABLE tab ( tuple_column Tuple(UInt256) ) ENGINE = MergeTree() ORDER BY (fixed_str, event_date) -SETTINGS allow_experimental_optimized_row_order = True; +SETTINGS optimize_row_order = True; INSERT INTO tab VALUES ('A', '2020-01-01', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('A', '2020-01-01', [0.0, 1.1], NULL, 'example', {}, (26)), ('A', '2020-01-01', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)), ('A', '2020-01-02', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('A', '2020-01-02', [0.0, 2.2], 10, 'example', {}, (26)), ('A', '2020-01-02', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)), ('B', '2020-01-04', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('B', '2020-01-04', [0.0, 2.2], Null, 'example', {}, (26)), ('B', '2020-01-04', [2.2, 1.1], 1, 'some string', {'key2':'value2'}, (5)), ('B', '2020-01-05', [0.0, 1.1], 10, 'some string', {'key':'value'}, (123)), ('B', '2020-01-05', [0.0, 2.2], Null, 'example', {}, (26)), ('B', '2020-01-05', [2.2, 1.1], 1, 'some other string', {'key':'value'}, (5)), ('C', '2020-01-04', [0.0, 1.1], 10, 'some string', {'key':'value'}, (5)), ('C', '2020-01-04', [0.0, 2.2], Null, 'example', {}, (26)), ('C', '2020-01-04', [2.2, 1.1], 1, 'some other string', {'key2':'value2'}, (5)); From 1cd61b804dd01e3b56b3f2714d7c9adffe248900 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 07:52:14 +0000 Subject: [PATCH 21/48] Fix build --- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 7aa9c12a24b..a5fbca111f3 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -731,7 +731,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted); } - if (data.getSettings()->optimize_row_order) + if (data.getSettings()->optimize_row_order && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); From c6e43f7a7b74a8928c1a9bf0a572aadcb56e8c54 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:17:01 +0000 Subject: [PATCH 22/48] Bump absl to 2023-11-28 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 3bd86026c93..0c09fd0ff0d 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3 +Subproject commit 0c09fd0ff0d502c30831ff2ccf59894e36d2b60a From 4d3d18cee71ecadf520868623130538c1d3179e3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:18:22 +0000 Subject: [PATCH 23/48] Bump absl to 2023-12-06 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 0c09fd0ff0d..8588e7d14dc 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 0c09fd0ff0d502c30831ff2ccf59894e36d2b60a +Subproject commit 8588e7d14dca32eb2c695a9cd49d272aa23cc483 From 70c0589675d3c3c7f9a17d805818601fc0bd698e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:19:20 +0000 Subject: [PATCH 24/48] Bump absl to 2023-12-12 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 8588e7d14dc..ad0a6d2faf8 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 8588e7d14dca32eb2c695a9cd49d272aa23cc483 +Subproject commit ad0a6d2faf803645c8126f0b67eee2eaad98bc3f From 1bca6b900bb55aaadecdb24f07a60b18b6677eb8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:20:25 +0000 Subject: [PATCH 25/48] Bump absl to 2023-12-20 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index ad0a6d2faf8..794352a92f0 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit ad0a6d2faf803645c8126f0b67eee2eaad98bc3f +Subproject commit 794352a92f09425714b9116974b29e58ce8f9ba9 From 8fe272f210c7d214cedfaffa8eb72f73cb7756be Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:22:27 +0000 Subject: [PATCH 26/48] Bump absl to 2024-01-02 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 794352a92f0..925a5e681ea 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 794352a92f09425714b9116974b29e58ce8f9ba9 +Subproject commit 925a5e681ea1958171ba580c4402e5ce76473cb5 From a0d8d5a37ca944f6cb135444112f4906deb03371 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:31:33 +0000 Subject: [PATCH 27/48] Bump absl to 2024-01-02 --- contrib/abseil-cpp | 2 +- contrib/abseil-cpp-cmake/CMakeLists.txt | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 925a5e681ea..4038192a57c 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 925a5e681ea1958171ba580c4402e5ce76473cb5 +Subproject commit 4038192a57cb75f7ee671f81a3378ff4c74c4f8e diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index 7372195bb0d..be42d98345e 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -1283,12 +1283,9 @@ absl_cc_library( absl_cc_library( NAME flags - SRCS - "${DIR}/flag.cc" HDRS "${DIR}/declare.h" "${DIR}/flag.h" - "${DIR}/internal/flag_msvc.inc" COPTS ${ABSL_DEFAULT_COPTS} LINKOPTS From d4a453aad5eaaceb993570bb369e9321bc17bcf8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:32:22 +0000 Subject: [PATCH 28/48] Bump absl to 2024-03-06 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 4038192a57c..6f0bb2747d0 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 4038192a57cb75f7ee671f81a3378ff4c74c4f8e +Subproject commit 6f0bb2747d0a910de4a958eeeab2b9d615156382 From ae7d8821a78b5ff7a2228ca463323ca90c385a96 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 08:46:40 +0000 Subject: [PATCH 29/48] Bump absl to 2024-04-04 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 6f0bb2747d0..1ec4a27e399 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 6f0bb2747d0a910de4a958eeeab2b9d615156382 +Subproject commit 1ec4a27e39944462a574abbfa040498ed2831cc8 From 61b464321759e8edf0fad69aa80c8b0daef1818c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 10:39:35 +0000 Subject: [PATCH 30/48] Bump absl to 2024-04-24 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 1ec4a27e399..08b21bd0379 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 1ec4a27e39944462a574abbfa040498ed2831cc8 +Subproject commit 08b21bd037990c18d44fda1691211e73835bf214 From 643444eb1134c9e3767efeb1698e1553b1c686af Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 10:41:47 +0000 Subject: [PATCH 31/48] Bump absl to 2024-05-03 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 08b21bd0379..c1e1b47d989 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 08b21bd037990c18d44fda1691211e73835bf214 +Subproject commit c1e1b47d989978cde8c5a2a219df425b785a0c47 From c95ed40d3eb7db5fcef1a5a51c3964e11cb77f56 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 11:32:29 +0000 Subject: [PATCH 32/48] Bump absl to 2024-05-06 --- contrib/abseil-cpp | 2 +- contrib/abseil-cpp-cmake/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index c1e1b47d989..a28ee5b51c9 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit c1e1b47d989978cde8c5a2a219df425b785a0c47 +Subproject commit a28ee5b51c9ea41707d9a5d2d358ad77850264c4 diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index be42d98345e..d026a7c78bc 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -1058,8 +1058,10 @@ absl_cc_library( demangle_internal HDRS "${DIR}/internal/demangle.h" + "${DIR}/internal/demangle_rust.h" SRCS "${DIR}/internal/demangle.cc" + "${DIR}/internal/demangle_rust.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS From da91dd64283de30172fca6cd30df4c711d291b44 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 11:35:07 +0000 Subject: [PATCH 33/48] Bump absl to 2024-06-07 --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index a28ee5b51c9..696b32788ca 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit a28ee5b51c9ea41707d9a5d2d358ad77850264c4 +Subproject commit 696b32788ca887881547380530926314c521ea7d From 4cb53e951369f24a6b2fa0c21e7c940f0733f82e Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 10 Jun 2024 14:46:39 +0200 Subject: [PATCH 34/48] Revert "Revert "Fix duplicating Delete events in blob_storage_log"" --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 5 +++-- src/Interpreters/SystemLog.cpp | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index ae719f5cde4..afc13251f5b 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -382,6 +382,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e { std::vector current_chunk; String keys; + size_t first_position = current_position; for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position) { Aws::S3::Model::ObjectIdentifier obj; @@ -407,9 +408,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e { const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError(); auto time_now = std::chrono::system_clock::now(); - for (const auto & object : objects) + for (size_t i = first_position; i < current_position; ++i) blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, - uri.bucket, object.remote_path, object.local_path, object.bytes_size, + uri.bucket, objects[i].remote_path, objects[i].local_path, objects[i].bytes_size, outcome_error, time_now); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 5e0ce2cb0de..3b25deeb59d 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -504,6 +504,10 @@ void SystemLog::flushImpl(const std::vector & to_flush, Block block(std::move(log_element_columns)); MutableColumns columns = block.mutateColumns(); + + for (auto & column : columns) + column->reserve(to_flush.size()); + for (const auto & elem : to_flush) elem.appendToBlock(columns); @@ -532,7 +536,8 @@ void SystemLog::flushImpl(const std::vector & to_flush, } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to flush system log {} with {} entries up to offset {}", + table_id.getNameForLogs(), to_flush.size(), to_flush_end)); } queue->confirm(to_flush_end); From 8af077f3d3cd891768b310b59b42696691578245 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 12:53:06 +0000 Subject: [PATCH 35/48] Update build descriptions --- contrib/abseil-cpp-cmake/CMakeLists.txt | 258 ++++++++++++++++++++---- 1 file changed, 218 insertions(+), 40 deletions(-) diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index d026a7c78bc..a9e79be4f09 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -1,6 +1,8 @@ set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}") +# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake + # # Copyright 2017 The Abseil Authors. # @@ -16,7 +18,6 @@ set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}") # See the License for the specific language governing permissions and # limitations under the License. # - function(absl_cc_library) cmake_parse_arguments(ABSL_CC_LIB "DISABLE_INSTALL;PUBLIC;TESTONLY" @@ -76,6 +77,12 @@ function(absl_cc_library) add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME}) endfunction() +# The following definitions are an amalgamation of the CMakeLists.txt files in absl/*/ +# To refresh them when upgrading to a new version: +# - copy them over from upstream +# - remove calls of 'absl_cc_test' +# - remove calls of `absl_cc_library` that contain `TESTONLY` +# - append '${DIR}' to the file definitions set(DIR ${ABSL_ROOT_DIR}/absl/algorithm) @@ -102,12 +109,12 @@ absl_cc_library( absl::algorithm absl::core_headers absl::meta + absl::nullability PUBLIC ) set(DIR ${ABSL_ROOT_DIR}/absl/base) -# Internal-only target, do not depend on directly. absl_cc_library( NAME atomic_hook @@ -146,6 +153,18 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} ) +absl_cc_library( + NAME + no_destructor + HDRS + "${DIR}/no_destructor.h" + DEPS + absl::config + absl::nullability + COPTS + ${ABSL_DEFAULT_COPTS} +) + absl_cc_library( NAME nullability @@ -305,6 +324,8 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} + $<$:-lrt> + $<$:-ladvapi32> DEPS absl::atomic_hook absl::base_internal @@ -312,6 +333,7 @@ absl_cc_library( absl::core_headers absl::dynamic_annotations absl::log_severity + absl::nullability absl::raw_logging_internal absl::spinlock_wait absl::type_traits @@ -357,6 +379,7 @@ absl_cc_library( absl::base absl::config absl::core_headers + absl::nullability PUBLIC ) @@ -467,10 +490,11 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS - absl::container_common absl::common_policy_traits absl::compare absl::compressed_tuple + absl::config + absl::container_common absl::container_memory absl::cord absl::core_headers @@ -480,7 +504,6 @@ absl_cc_library( absl::strings absl::throw_delegate absl::type_traits - absl::utility ) # Internal-only target, do not depend on directly. @@ -523,7 +546,9 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::base_internal absl::compressed_tuple + absl::config absl::core_headers absl::memory absl::span @@ -548,18 +573,6 @@ absl_cc_library( PUBLIC ) -# Internal-only target, do not depend on directly. -absl_cc_library( - NAME - counting_allocator - HDRS - "${DIR}/internal/counting_allocator.h" - COPTS - ${ABSL_DEFAULT_COPTS} - DEPS - absl::config -) - absl_cc_library( NAME flat_hash_map @@ -570,7 +583,7 @@ absl_cc_library( DEPS absl::container_memory absl::core_headers - absl::hash_function_defaults + absl::hash_container_defaults absl::raw_hash_map absl::algorithm_container absl::memory @@ -586,7 +599,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS absl::container_memory - absl::hash_function_defaults + absl::hash_container_defaults absl::raw_hash_set absl::algorithm_container absl::core_headers @@ -604,7 +617,7 @@ absl_cc_library( DEPS absl::container_memory absl::core_headers - absl::hash_function_defaults + absl::hash_container_defaults absl::node_slot_policy absl::raw_hash_map absl::algorithm_container @@ -620,8 +633,9 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::container_memory absl::core_headers - absl::hash_function_defaults + absl::hash_container_defaults absl::node_slot_policy absl::raw_hash_set absl::algorithm_container @@ -629,6 +643,19 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + hash_container_defaults + HDRS + "${DIR}/hash_container_defaults.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::hash_function_defaults + PUBLIC +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -655,9 +682,11 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS absl::config + absl::container_common absl::cord absl::hash absl::strings + absl::type_traits PUBLIC ) @@ -703,6 +732,7 @@ absl_cc_library( absl::base absl::config absl::exponential_biased + absl::no_destructor absl::raw_logging_internal absl::sample_recorder absl::synchronization @@ -756,7 +786,9 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config absl::container_memory + absl::core_headers absl::raw_hash_set absl::throw_delegate PUBLIC @@ -817,6 +849,7 @@ absl_cc_library( DEPS absl::config absl::core_headers + absl::debugging_internal absl::meta absl::strings absl::span @@ -931,6 +964,7 @@ absl_cc_library( absl::crc32c absl::config absl::strings + absl::no_destructor ) set(DIR ${ABSL_ROOT_DIR}/absl/debugging) @@ -954,6 +988,8 @@ absl_cc_library( "${DIR}/stacktrace.cc" COPTS ${ABSL_DEFAULT_COPTS} + LINKOPTS + $<$:${EXECINFO_LIBRARY}> DEPS absl::debugging_internal absl::config @@ -980,6 +1016,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} + $<$:-ldbghelp> DEPS absl::debugging_internal absl::demangle_internal @@ -1254,6 +1291,7 @@ absl_cc_library( absl::strings absl::synchronization absl::flat_hash_map + absl::no_destructor ) # Internal-only target, do not depend on directly. @@ -1298,7 +1336,6 @@ absl_cc_library( absl::flags_config absl::flags_internal absl::flags_reflection - absl::base absl::core_headers absl::strings ) @@ -1378,6 +1415,9 @@ absl_cc_library( absl::synchronization ) +############################################################################ +# Unit tests in alphabetical order. + set(DIR ${ABSL_ROOT_DIR}/absl/functional) absl_cc_library( @@ -1430,6 +1470,18 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + overload + HDRS + "${DIR}/overload.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::meta + PUBLIC +) + set(DIR ${ABSL_ROOT_DIR}/absl/hash) absl_cc_library( @@ -1639,6 +1691,7 @@ absl_cc_library( absl::log_internal_conditions absl::log_internal_message absl::log_internal_strip + absl::absl_vlog_is_on ) absl_cc_library( @@ -1720,6 +1773,7 @@ absl_cc_library( absl::log_entry absl::log_severity absl::log_sink + absl::no_destructor absl::raw_logging_internal absl::synchronization absl::span @@ -1770,6 +1824,7 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS + absl::core_headers absl::log_internal_message absl::log_internal_nullstream absl::log_severity @@ -1875,6 +1930,11 @@ absl_cc_library( PUBLIC ) +# Warning: Many linkers will strip the contents of this library because its +# symbols are only used in a global constructor. A workaround is for clients +# to link this using $ instead of +# the plain absl::log_flags. +# TODO(b/320467376): Implement the equivalent of Bazel's alwayslink=True. absl_cc_library( NAME log_flags @@ -1896,6 +1956,7 @@ absl_cc_library( absl::flags absl::flags_marshalling absl::strings + absl::vlog_config_internal PUBLIC ) @@ -1918,6 +1979,7 @@ absl_cc_library( absl::log_severity absl::raw_logging_internal absl::strings + absl::vlog_config_internal ) absl_cc_library( @@ -1951,6 +2013,7 @@ absl_cc_library( ${ABSL_DEFAULT_LINKOPTS} DEPS absl::log_internal_log_impl + absl::vlog_is_on PUBLIC ) @@ -2063,21 +2126,75 @@ absl_cc_library( ) absl_cc_library( - NAME - log_internal_fnmatch - SRCS - "${DIR}/internal/fnmatch.cc" - HDRS - "${DIR}/internal/fnmatch.h" - COPTS - ${ABSL_DEFAULT_COPTS} - LINKOPTS - ${ABSL_DEFAULT_LINKOPTS} - DEPS - absl::config - absl::strings + NAME + vlog_config_internal + SRCS + "${DIR}/internal/vlog_config.cc" + HDRS + "${DIR}/internal/vlog_config.h" + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + DEPS + absl::base + absl::config + absl::core_headers + absl::log_internal_fnmatch + absl::memory + absl::no_destructor + absl::strings + absl::synchronization + absl::optional ) +absl_cc_library( + NAME + absl_vlog_is_on + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + HDRS + "${DIR}/absl_vlog_is_on.h" + DEPS + absl::vlog_config_internal + absl::config + absl::core_headers + absl::strings +) + +absl_cc_library( + NAME + vlog_is_on + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + HDRS + "${DIR}/vlog_is_on.h" + DEPS + absl::absl_vlog_is_on +) + +absl_cc_library( + NAME + log_internal_fnmatch + SRCS + "${DIR}/internal/fnmatch.cc" + HDRS + "${DIR}/internal/fnmatch.h" + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + DEPS + absl::config + absl::strings +) + +# Test targets + set(DIR ${ABSL_ROOT_DIR}/absl/memory) absl_cc_library( @@ -2146,6 +2263,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::compare absl::config absl::core_headers absl::bits @@ -2175,6 +2293,8 @@ absl_cc_library( PUBLIC ) +set(DIR ${ABSL_ROOT_DIR}/absl/profiling) + absl_cc_library( NAME sample_recorder @@ -2187,8 +2307,6 @@ absl_cc_library( absl::synchronization ) -set(DIR ${ABSL_ROOT_DIR}/absl/profiling) - absl_cc_library( NAME exponential_biased @@ -2264,6 +2382,7 @@ absl_cc_library( LINKOPTS ${ABSL_DEFAULT_LINKOPTS} DEPS + absl::config absl::fast_type_id absl::optional ) @@ -2335,11 +2454,13 @@ absl_cc_library( DEPS absl::config absl::inlined_vector + absl::nullability absl::random_internal_pool_urbg absl::random_internal_salted_seed_seq absl::random_internal_seed_material absl::random_seed_gen_exception absl::span + absl::string_view ) # Internal-only target, do not depend on directly. @@ -2398,6 +2519,7 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} + $<$:-lbcrypt> DEPS absl::core_headers absl::optional @@ -2657,6 +2779,29 @@ absl_cc_library( absl::config ) +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + random_internal_distribution_test_util + SRCS + "${DIR}/internal/chi_square.cc" + "${DIR}/internal/distribution_test_util.cc" + HDRS + "${DIR}/internal/chi_square.h" + "${DIR}/internal/distribution_test_util.h" + COPTS + ${ABSL_DEFAULT_COPTS} + LINKOPTS + ${ABSL_DEFAULT_LINKOPTS} + DEPS + absl::config + absl::core_headers + absl::raw_logging_internal + absl::strings + absl::str_format + absl::span +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -2698,6 +2843,8 @@ absl_cc_library( absl::function_ref absl::inlined_vector absl::memory + absl::no_destructor + absl::nullability absl::optional absl::raw_logging_internal absl::span @@ -2723,8 +2870,11 @@ absl_cc_library( absl::base absl::config absl::core_headers + absl::has_ostream_operator + absl::nullability absl::raw_logging_internal absl::status + absl::str_format absl::strings absl::type_traits absl::utility @@ -2747,6 +2897,7 @@ absl_cc_library( absl::base absl::config absl::core_headers + absl::nullability absl::throw_delegate PUBLIC ) @@ -2761,6 +2912,7 @@ absl_cc_library( "${DIR}/has_absl_stringify.h" "${DIR}/internal/damerau_levenshtein_distance.h" "${DIR}/internal/string_constant.h" + "${DIR}/internal/has_absl_stringify.h" "${DIR}/match.h" "${DIR}/numbers.h" "${DIR}/str_cat.h" @@ -2804,6 +2956,7 @@ absl_cc_library( absl::endian absl::int128 absl::memory + absl::nullability absl::raw_logging_internal absl::throw_delegate absl::type_traits @@ -2823,6 +2976,18 @@ absl_cc_library( PUBLIC ) +absl_cc_library( + NAME + has_ostream_operator + HDRS + "${DIR}/has_ostream_operator.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + PUBLIC +) + # Internal-only target, do not depend on directly. absl_cc_library( NAME @@ -2850,11 +3015,16 @@ absl_cc_library( NAME str_format HDRS - "${DIR}/str_format.h" + "str_format.h" COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config + absl::core_headers + absl::nullability + absl::span absl::str_format_internal + absl::string_view PUBLIC ) @@ -2885,6 +3055,7 @@ absl_cc_library( absl::strings absl::config absl::core_headers + absl::fixed_array absl::inlined_vector absl::numeric_representation absl::type_traits @@ -2988,6 +3159,7 @@ absl_cc_library( DEPS absl::base absl::config + absl::no_destructor absl::raw_logging_internal absl::synchronization ) @@ -3078,6 +3250,7 @@ absl_cc_library( absl::endian absl::function_ref absl::inlined_vector + absl::nullability absl::optional absl::raw_logging_internal absl::span @@ -3245,6 +3418,8 @@ absl_cc_library( ${ABSL_DEFAULT_COPTS} DEPS Threads::Threads + # TODO(#1495): Use $ once our + # minimum CMake version >= 3.24 $<$:-Wl,-framework,CoreFoundation> ) @@ -3254,7 +3429,7 @@ absl_cc_library( NAME any HDRS - "${DIR}/any.h" + "any.h" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -3285,8 +3460,8 @@ absl_cc_library( NAME bad_any_cast_impl SRCS - "${DIR}/bad_any_cast.h" - "${DIR}/bad_any_cast.cc" + "${DIR}/bad_any_cast.h" + "${DIR}/bad_any_cast.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -3306,6 +3481,7 @@ absl_cc_library( DEPS absl::algorithm absl::core_headers + absl::nullability absl::throw_delegate absl::type_traits PUBLIC @@ -3326,6 +3502,7 @@ absl_cc_library( absl::config absl::core_headers absl::memory + absl::nullability absl::type_traits absl::utility PUBLIC @@ -3388,6 +3565,7 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config absl::core_headers absl::type_traits PUBLIC From 598219c57dac54d9000d2fe338b523007e13be21 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 12:56:21 +0000 Subject: [PATCH 36/48] Minor update --- contrib/abseil-cpp-cmake/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index a9e79be4f09..4137547b736 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -3015,7 +3015,7 @@ absl_cc_library( NAME str_format HDRS - "str_format.h" + "${DIR}/str_format.h" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -3429,7 +3429,7 @@ absl_cc_library( NAME any HDRS - "any.h" + "${DIR}/any.h" COPTS ${ABSL_DEFAULT_COPTS} DEPS From 1f17ddc6fe35be95736b448ebb3b73123c034196 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 10 Jun 2024 13:06:44 +0000 Subject: [PATCH 37/48] Update .clang-tidy --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 896052915f7..de19059d09e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -37,7 +37,6 @@ Checks: [ '-cert-oop54-cpp', '-cert-oop57-cpp', - '-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667 '-clang-analyzer-optin.performance.Padding', '-clang-analyzer-unix.Malloc', From 46df67d1dec2865aecfbe319532216d9aec670d6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 11 Jun 2024 09:07:03 +0000 Subject: [PATCH 38/48] Fix infinite query duration in case of cyclic aliases. --- src/Analyzer/Resolve/ScopeAliases.h | 7 ++++++- .../0_stateless/02896_cyclic_aliases_crash.reference | 1 + tests/queries/0_stateless/02896_cyclic_aliases_crash.sql | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Resolve/ScopeAliases.h b/src/Analyzer/Resolve/ScopeAliases.h index baab843988b..830ae72144b 100644 --- a/src/Analyzer/Resolve/ScopeAliases.h +++ b/src/Analyzer/Resolve/ScopeAliases.h @@ -75,7 +75,12 @@ struct ScopeAliases if (jt == transitive_aliases.end()) return {}; - key = &(getKey(jt->second, find_option)); + const auto & new_key = getKey(jt->second, find_option); + /// Ignore potential cyclic aliases. + if (new_key == *key) + return {}; + + key = &new_key; it = alias_map.find(*key); } diff --git a/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference b/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference index caf11f5c15a..e537236478d 100644 --- a/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference +++ b/tests/queries/0_stateless/02896_cyclic_aliases_crash.reference @@ -1,2 +1,3 @@ 1 2 3 1 5 +300 diff --git a/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql b/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql index 5fb628eeb67..5440872e052 100644 --- a/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql +++ b/tests/queries/0_stateless/02896_cyclic_aliases_crash.sql @@ -30,3 +30,7 @@ WHERE (time_stamp_utc >= toDateTime('2024-04-25 00:00:00')) AND (time_stamp_utc GROUP BY time_stamp_utc ORDER BY Impressions DESC LIMIT 1000; + +drop table test_table; +create table test_table engine MergeTree order by sum as select 100 as sum union all select 200 as sum; +select sum as sum from (select sum(sum) as sum from test_table); From 61d9280e969ecc6e4cf38adee24090fe13d96112 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 12:10:23 +0200 Subject: [PATCH 39/48] Update src/Storages/MergeTree/MergeTreeDataWriter.cpp Co-authored-by: SmitaRKulkarni --- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index a5fbca111f3..1c24fd9525a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -504,7 +504,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( } if (data.getSettings()->optimize_row_order - && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. + && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if this optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); perm_ptr = &perm; From a96513ff967706ac9c0de572da005cad6ace1e5e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 12:10:29 +0200 Subject: [PATCH 40/48] Update src/Storages/MergeTree/MergeTreeDataWriter.cpp Co-authored-by: SmitaRKulkarni --- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 1c24fd9525a..5c8aa32949d 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -732,7 +732,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( } if (data.getSettings()->optimize_row_order - && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if it the optimization messes up specialized MergeTree engines. + && data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if this optimization messes up specialized MergeTree engines. { RowOrderOptimizer::optimize(block, sort_description, perm); perm_ptr = &perm; From 2d2ebc918a172dfe76968210afa3b5cfb2cbfe96 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 11 Jun 2024 14:42:48 +0200 Subject: [PATCH 41/48] Update odbc-bridge.md --- docs/en/operations/utilities/odbc-bridge.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/utilities/odbc-bridge.md b/docs/en/operations/utilities/odbc-bridge.md index abb8860880e..eb849c6b6ae 100644 --- a/docs/en/operations/utilities/odbc-bridge.md +++ b/docs/en/operations/utilities/odbc-bridge.md @@ -18,7 +18,7 @@ This tool works via HTTP, not via pipes, shared memory, or TCP because: However it can be used as standalone tool from command line with the following parameters in POST-request URL: - `connection_string` -- ODBC connection string. -- `columns` -- columns in ClickHouse NamesAndTypesList format, name in backticks, +- `sample_block` -- columns description in ClickHouse NamesAndTypesList format, name in backticks, type as string. Name and type are space separated, rows separated with newline. - `max_block_size` -- optional parameter, sets maximum size of single block. From e47bbfb7f29a591d9b8e2432c4626054a6011dc2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 14:08:16 +0000 Subject: [PATCH 42/48] Remove upstream abseil repository --- .gitmodules | 3 --- contrib/abseil-cpp | 1 - 2 files changed, 4 deletions(-) delete mode 160000 contrib/abseil-cpp diff --git a/.gitmodules b/.gitmodules index 28696428e8c..a6ad00e434b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -161,9 +161,6 @@ [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz -[submodule "contrib/abseil-cpp"] - path = contrib/abseil-cpp - url = https://github.com/abseil/abseil-cpp [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp deleted file mode 160000 index 696b32788ca..00000000000 --- a/contrib/abseil-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 696b32788ca887881547380530926314c521ea7d From 182f85b25cfe2ac4589d4b3edc4d623e994b1932 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 11 Jun 2024 16:09:31 +0200 Subject: [PATCH 43/48] Test if unit-tests can be decreased --- docker/images.json | 10 +++++----- docker/test/unit/Dockerfile | 4 +--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/docker/images.json b/docker/images.json index 7439517379b..716b76ee217 100644 --- a/docker/images.json +++ b/docker/images.json @@ -41,8 +41,7 @@ "docker/test/stateless": { "name": "clickhouse/stateless-test", "dependent": [ - "docker/test/stateful", - "docker/test/unit" + "docker/test/stateful" ] }, "docker/test/stateful": { @@ -122,15 +121,16 @@ "docker/test/base": { "name": "clickhouse/test-base", "dependent": [ + "docker/test/clickbench", "docker/test/fuzzer", - "docker/test/libfuzzer", "docker/test/integration/base", "docker/test/keeper-jepsen", + "docker/test/libfuzzer", "docker/test/server-jepsen", "docker/test/sqllogic", "docker/test/sqltest", - "docker/test/clickbench", - "docker/test/stateless" + "docker/test/stateless", + "docker/test/unit" ] }, "docker/test/integration/kerberized_hadoop": { diff --git a/docker/test/unit/Dockerfile b/docker/test/unit/Dockerfile index cf5ba1eec7f..af44dc930b2 100644 --- a/docker/test/unit/Dockerfile +++ b/docker/test/unit/Dockerfile @@ -1,9 +1,7 @@ # rebuild in #33610 # docker build -t clickhouse/unit-test . ARG FROM_TAG=latest -FROM clickhouse/stateless-test:$FROM_TAG - -RUN apt-get install gdb +FROM clickhouse/test-base:$FROM_TAG COPY run.sh / CMD ["/bin/bash", "/run.sh"] From e5dcf75968b44a10e521bd9c1c106621a56ec7cb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 14:09:56 +0000 Subject: [PATCH 44/48] Add forked abseil submodule back --- .gitmodules | 3 +++ contrib/abseil-cpp | 1 + 2 files changed, 4 insertions(+) create mode 160000 contrib/abseil-cpp diff --git a/.gitmodules b/.gitmodules index a6ad00e434b..6d64c52ce00 100644 --- a/.gitmodules +++ b/.gitmodules @@ -161,6 +161,9 @@ [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz +[submodule "abseil"] + path = contrib/abseil-cpp + url = https://github.com/ClickHouse/abseil-cpp.git [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp new file mode 160000 index 00000000000..3916ba76a98 --- /dev/null +++ b/contrib/abseil-cpp @@ -0,0 +1 @@ +Subproject commit 3916ba76a98d3082414a10977e10bdebfdf3b177 From af83bc92ced8c08325924b5a77bfad210eb48149 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 14:16:38 +0000 Subject: [PATCH 45/48] Switch to same HEAD as before but with s390x-breaking commit reverted --- contrib/abseil-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 3916ba76a98..a3c4dd3e77f 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 3916ba76a98d3082414a10977e10bdebfdf3b177 +Subproject commit a3c4dd3e77f28b526efbb0eb394b72e29c633936 From 1c5e935dfa380fbf7a474810fc8594f3227fdbc3 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 11 Jun 2024 16:38:19 +0200 Subject: [PATCH 46/48] Small fix for 02340_parts_refcnt_mergetree https://s3.amazonaws.com/clickhouse-test-reports/61112/f8e3e95b97920c4bd9a21101a2d664e9b3ed60e8/stateless_tests__debug__[1_5].html --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index b100f96befa..e7d95d8db72 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -58,7 +58,7 @@ function check_refcnt_for_table() $CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3" # Kill the query gracefully. - kill -INT $PID + kill -INT $PID ||: wait $PID grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED rm -f "${log_file:?}" From 7683f06188d8dc901bd912c4ace935a4b3f498e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 12 Jun 2024 11:26:21 +0200 Subject: [PATCH 47/48] Revert "S3: reduce retires time for queries, increase retries count for backups" --- src/Backups/BackupIO_S3.cpp | 6 +++--- src/Core/Settings.h | 1 - src/Core/SettingsChangesHistory.h | 1 - src/IO/S3/Client.h | 2 +- .../integration/test_mask_sensitive_info/configs/users.xml | 1 - 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 1ea59c1d38b..92f086295a0 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -54,9 +54,9 @@ namespace S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, context->getRemoteHostFilter(), - static_cast(local_settings.s3_max_redirects), - static_cast(local_settings.backup_restore_s3_retry_attempts), - local_settings.enable_s3_requests_logging, + static_cast(global_settings.s3_max_redirects), + static_cast(global_settings.s3_retry_attempts), + global_settings.enable_s3_requests_logging, /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7f99243e285..b3e83092a77 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -517,7 +517,6 @@ class IColumn; M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, "Maximum size of batch for multi request to [Zoo]Keeper during backup or restore", 0) \ - M(UInt64, backup_restore_s3_retry_attempts, 1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore.", 0) \ M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \ \ M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index b447421671e..69bc8c5d207 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -115,7 +115,6 @@ static const std::map& error, long attemptedRetries) const override; diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml index f767216e907..f129a5bb3e3 100644 --- a/tests/integration/test_mask_sensitive_info/configs/users.xml +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -2,7 +2,6 @@ 5 - 5 From 71d76aa4ac308e52e2663b409cf2e78c7d7b672f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 12 Jun 2024 11:27:56 +0200 Subject: [PATCH 48/48] Revert "Small fix for 02340_parts_refcnt_mergetree" --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index e7d95d8db72..b100f96befa 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -58,7 +58,7 @@ function check_refcnt_for_table() $CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3" # Kill the query gracefully. - kill -INT $PID ||: + kill -INT $PID wait $PID grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED rm -f "${log_file:?}"