From e9b0d3e4a2775bd3a9df5b167452e7971fcbc0ce Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Tue, 29 Aug 2023 09:41:18 +0000 Subject: [PATCH] do not implicitly read pk and version columns in lonely parts if nnot necessary --- .../QueryPlan/ReadFromMergeTree.cpp | 13 ++++---- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- ...ross_partitions_final_all_lonely.reference | 21 ++++++++++++ ...rge_across_partitions_final_all_lonely.sql | 32 +++++++++++++++++++ 4 files changed, 60 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.reference create mode 100644 tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.sql diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0b5eb94dbac..02b3d7a07fa 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -979,7 +979,7 @@ static void addMergingFinal( Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & column_names, ActionsDAGPtr & out_projection) + RangesInDataParts && parts_with_ranges, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection) { const auto & settings = context->getSettingsRef(); const auto data_settings = data.getSettings(); @@ -1141,17 +1141,16 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts) num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size()); - auto pipe = read(std::move(lonely_parts), column_names, ReadFromMergeTree::ReadType::Default, + auto pipe = read(std::move(lonely_parts), partition_pipes.empty() ? origin_column_names : column_names, ReadFromMergeTree::ReadType::Default, num_streams_for_lonely_parts, min_marks_for_concurrent_read, info.use_uncompressed_cache); /// Drop temporary columns, added by 'sorting_key_expr' if (!out_projection) out_projection = createProjection(pipe.getHeader()); - pipe.addSimpleTransform([sorting_expr](const Block & header) - { - return std::make_shared(header, sorting_expr); - }); + if (!partition_pipes.empty()) + pipe.addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); partition_pipes.emplace_back(std::move(pipe)); } @@ -1742,7 +1741,7 @@ Pipe ReadFromMergeTree::spreadMarkRanges( ::sort(column_names_to_read.begin(), column_names_to_read.end()); column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end()); - return spreadMarkRangesAmongStreamsFinal(std::move(parts_with_ranges), num_streams, column_names_to_read, result_projection); + return spreadMarkRangesAmongStreamsFinal(std::move(parts_with_ranges), num_streams, result.column_names_to_read, column_names_to_read, result_projection); } else if (input_order_info) { diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index cb2a3a8ddf9..2a4b6022d49 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -314,7 +314,7 @@ private: const InputOrderInfoPtr & input_order_info); Pipe spreadMarkRangesAmongStreamsFinal( - RangesInDataParts && parts, size_t num_streams, const Names & column_names, ActionsDAGPtr & out_projection); + RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); ReadFromMergeTree::AnalysisResult getAnalysisResult() const; MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr; diff --git a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.reference b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.reference new file mode 100644 index 00000000000..9f4677b0f16 --- /dev/null +++ b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.reference @@ -0,0 +1,21 @@ +(Expression) +ExpressionTransform × 16 +Header: max(val) UInt64: max(val) UInt64 UInt64(size = 0) + count() UInt64: count() UInt64 UInt64(size = 0) + (Aggregating) + Resize 4 → 16 + Header × 16 : max(val) UInt64: max(val) UInt64 UInt64(size = 0) + count() UInt64: count() UInt64 UInt64(size = 0) + AggregatingTransform × 4 + Header: max(val) UInt64: max(val) UInt64 UInt64(size = 0) + count() UInt64: count() UInt64 UInt64(size = 0) + StrictResize 4 → 4 + Header × 4 : val UInt64: val UInt64 UInt64(size = 0) + (Expression) + ExpressionTransform × 4 + Header: val UInt64: val UInt64 UInt64(size = 0) + (ReadFromMergeTree) + ExpressionTransform × 4 + Header: val UInt64: val UInt64 UInt64(size = 0) + MergeTreeThread × 4 0 → 1 + Header: val UInt64: val UInt64 UInt64(size = 0) diff --git a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.sql b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.sql new file mode 100644 index 00000000000..d2c40bb002a --- /dev/null +++ b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_all_lonely.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS all_lonely; + +CREATE TABLE all_lonely +( + `id` UInt64, + `dt` Date, + `val` UInt64, + `version` UInt64 +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY dt +ORDER BY (id); + +INSERT INTO all_lonely SELECT number, '2022-10-28', number*10, 0 FROM numbers(10000); +INSERT INTO all_lonely SELECT number+500000, '2022-10-28', number*10, 1 FROM numbers(10000); +OPTIMIZE TABLE all_lonely PARTITION '2022-10-28' FINAL; + + +INSERT INTO all_lonely SELECT number, '2022-10-29', number*10, 0 FROM numbers(10000); +INSERT INTO all_lonely SELECT number+500000, '2022-10-29', number*10, 1 FROM numbers(10000); +OPTIMIZE TABLE all_lonely PARTITION '2022-10-29' FINAL; + +INSERT INTO all_lonely SELECT number, '2022-10-30', number*10, 0 FROM numbers(10000); +INSERT INTO all_lonely SELECT number+500000, '2022-10-30', number*10, 1 FROM numbers(10000); +OPTIMIZE TABLE all_lonely PARTITION '2022-10-30' FINAL; + + +INSERT INTO all_lonely SELECT number, '2022-10-31', number*10, 0 FROM numbers(10000); +INSERT INTO all_lonely SELECT number+500000, '2022-10-31', number*10, 1 FROM numbers(10000); +OPTIMIZE TABLE all_lonely PARTITION '2022-10-31' FINAL; + +EXPLAIN PIPELINE header=1 SELECT max(val), count(*) FROM all_lonely FINAL SETTINGS do_not_merge_across_partitions_select_final = 1, max_threads = 16;