From cbbd8e521cd802885af25250f8c856e65cf1d6a4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 7 May 2020 01:27:35 +0300 Subject: [PATCH] fix usage of primary key wrapped into a function with 'FINAL' modifier and 'ORDER BY' optimization --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 24 ++++++++++++++++++- .../0_stateless/01137_order_by_func.reference | 10 ++++++++ .../0_stateless/01137_order_by_func.sql | 19 +++++++++++++++ .../01137_order_by_func_final.reference | 3 +++ .../0_stateless/01137_order_by_func_final.sql | 10 ++++++++ 5 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01137_order_by_func.reference create mode 100644 tests/queries/0_stateless/01137_order_by_func.sql create mode 100644 tests/queries/0_stateless/01137_order_by_func_final.reference create mode 100644 tests/queries/0_stateless/01137_order_by_func_final.sql diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 580c95b34dd..c0785899aab 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -813,6 +813,14 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( return res; } +static ExpressionActionsPtr createProjection(const Pipe & pipe, const MergeTreeData & data) +{ + const auto & header = pipe.getHeader(); + auto projection = std::make_shared(header.getNamesAndTypesList(), data.global_context); + projection->add(ExpressionAction::project(header.getNames())); + return projection; +} + Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts, size_t num_streams, @@ -999,13 +1007,19 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( sort_description.emplace_back(data.sorting_key_columns[j], input_sorting_info->direction, 1); + /// Project input columns to drop columns from sorting_key_prefix_expr + /// to allow execute the same expression later. + /// NOTE: It may lead to double computation of expression. + auto projection = createProjection(pipes.back(), data); for (auto & pipe : pipes) pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), sorting_key_prefix_expr)); auto merging_sorted = std::make_shared( pipes.back().getHeader(), pipes.size(), sort_description, max_block_size); - res.emplace_back(std::move(pipes), std::move(merging_sorted)); + Pipe merged(std::move(pipes), std::move(merging_sorted)); + merged.addSimpleTransform(std::make_shared(merged.getHeader(), projection)); + res.emplace_back(std::move(merged)); } else res.emplace_back(std::move(pipes.front())); @@ -1051,6 +1065,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( use_uncompressed_cache = false; Pipes pipes; + /// Project input columns to drop columns from sorting_key_expr + /// to allow execute the same expression later. + /// NOTE: It may lead to double computation of expression. + ExpressionActionsPtr projection; for (const auto & part : parts) { @@ -1061,6 +1079,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( virt_columns, part.part_index_in_query); Pipe pipe(std::move(source_processor)); + if (!projection) + projection = createProjection(pipe, data); + pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), data.sorting_key_expr)); pipes.emplace_back(std::move(pipe)); } @@ -1133,6 +1154,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (merged_processor) { Pipe pipe(std::move(pipes), std::move(merged_processor)); + pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), projection)); pipes = Pipes(); pipes.emplace_back(std::move(pipe)); } diff --git a/tests/queries/0_stateless/01137_order_by_func.reference b/tests/queries/0_stateless/01137_order_by_func.reference new file mode 100644 index 00000000000..1dd2626a58f --- /dev/null +++ b/tests/queries/0_stateless/01137_order_by_func.reference @@ -0,0 +1,10 @@ +2020-05-05 01:00:00 0 +2020-05-05 01:00:00 1 +2020-05-05 01:00:00 2 +2020-05-05 01:00:00 3 +2020-05-05 01:00:00 4 +999999 +999999 +999999 +999998 +999998 diff --git a/tests/queries/0_stateless/01137_order_by_func.sql b/tests/queries/0_stateless/01137_order_by_func.sql new file mode 100644 index 00000000000..7fc2126b515 --- /dev/null +++ b/tests/queries/0_stateless/01137_order_by_func.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS pk_func; +CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = MergeTree ORDER BY toDate(d); + +INSERT INTO pk_func SELECT '2020-05-05 01:00:00', number FROM numbers(1000000); +INSERT INTO pk_func SELECT '2020-05-06 01:00:00', number FROM numbers(1000000); +INSERT INTO pk_func SELECT '2020-05-07 01:00:00', number FROM numbers(1000000); + +SELECT * FROM pk_func ORDER BY toDate(d), ui LIMIT 5; + +DROP TABLE pk_func; + +CREATE TABLE pk_func(i UInt32) ENGINE = MergeTree ORDER BY -i; +INSERT INTO pk_func SELECT number FROM numbers(1000000); +INSERT INTO pk_func SELECT number FROM numbers(1000000); +INSERT INTO pk_func SELECT number FROM numbers(1000000); + +SELECT * FROM pk_func ORDER BY -i LIMIT 5; + +DROP TABLE pk_func; diff --git a/tests/queries/0_stateless/01137_order_by_func_final.reference b/tests/queries/0_stateless/01137_order_by_func_final.reference new file mode 100644 index 00000000000..c97316543da --- /dev/null +++ b/tests/queries/0_stateless/01137_order_by_func_final.reference @@ -0,0 +1,3 @@ +2020-05-05 704982704 +2020-05-06 704982704 +2020-05-07 704982704 diff --git a/tests/queries/0_stateless/01137_order_by_func_final.sql b/tests/queries/0_stateless/01137_order_by_func_final.sql new file mode 100644 index 00000000000..32a9085e9ee --- /dev/null +++ b/tests/queries/0_stateless/01137_order_by_func_final.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS pk_func; +CREATE TABLE pk_func(d DateTime, ui UInt32) ENGINE = SummingMergeTree ORDER BY toDate(d); + +INSERT INTO pk_func SELECT '2020-05-05 01:00:00', number FROM numbers(100000); +INSERT INTO pk_func SELECT '2020-05-06 01:00:00', number FROM numbers(100000); +INSERT INTO pk_func SELECT '2020-05-07 01:00:00', number FROM numbers(100000); + +SELECT toDate(d), ui FROM pk_func FINAL; + +DROP TABLE pk_func;