From 67213b8ad4cf4bb7e87536f252cfae99192de04d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 May 2020 21:22:58 +0300 Subject: [PATCH] fix sample with final --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 42 +++++++++++-------- .../MergeTree/MergeTreeDataSelectExecutor.h | 6 ++- .../0_stateless/01137_sample_final.reference | 10 +++++ .../0_stateless/01137_sample_final.sql | 13 ++++++ 4 files changed, 51 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/01137_sample_final.reference create mode 100644 tests/queries/0_stateless/01137_sample_final.sql diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c0785899aab..ddd1e14381d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -601,6 +601,11 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( .save_marks_in_cache = true }; + /// Projection, that needed to drop columns, which have appeared by execution + /// of some extra expressions, and to allow execute the same expressions later. + /// NOTE: It may lead to double computation of expressions. + ExpressionActionsPtr result_projection; + if (select.final()) { /// Add columns needed to calculate the sorting expression and the sign. @@ -623,7 +628,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( query_info, virt_column_names, settings, - reader_settings); + reader_settings, + result_projection); } else if (settings.optimize_read_in_order && query_info.input_sorting_info) { @@ -644,7 +650,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( sorting_key_prefix_expr, virt_column_names, settings, - reader_settings); + reader_settings, + result_projection); } else { @@ -667,6 +674,13 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( pipe.getHeader(), filter_expression, filter_function->getColumnName(), false)); } + if (result_projection) + { + for (auto & pipe : res) + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), result_projection)); + } + /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. if (sample_factor_column_queried) { @@ -831,7 +845,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( const ExpressionActionsPtr & sorting_key_prefix_expr, const Names & virt_columns, const Settings & settings, - const MergeTreeReaderSettings & reader_settings) const + const MergeTreeReaderSettings & reader_settings, + ExpressionActionsPtr & out_projection) const { size_t sum_marks = 0; const InputSortingInfoPtr & input_sorting_info = query_info.input_sorting_info; @@ -1007,19 +1022,14 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( sort_description.emplace_back(data.sorting_key_columns[j], input_sorting_info->direction, 1); - /// Project input columns to drop columns from sorting_key_prefix_expr - /// to allow execute the same expression later. - /// NOTE: It may lead to double computation of expression. - auto projection = createProjection(pipes.back(), data); + out_projection = createProjection(pipes.back(), data); for (auto & pipe : pipes) pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), sorting_key_prefix_expr)); auto merging_sorted = std::make_shared( pipes.back().getHeader(), pipes.size(), sort_description, max_block_size); - Pipe merged(std::move(pipes), std::move(merging_sorted)); - merged.addSimpleTransform(std::make_shared(merged.getHeader(), projection)); - res.emplace_back(std::move(merged)); + res.emplace_back(std::move(pipes), std::move(merging_sorted)); } else res.emplace_back(std::move(pipes.front())); @@ -1037,7 +1047,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( const SelectQueryInfo & query_info, const Names & virt_columns, const Settings & settings, - const MergeTreeReaderSettings & reader_settings) const + const MergeTreeReaderSettings & reader_settings, + ExpressionActionsPtr & out_projection) const { const auto data_settings = data.getSettings(); size_t sum_marks = 0; @@ -1065,10 +1076,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( use_uncompressed_cache = false; Pipes pipes; - /// Project input columns to drop columns from sorting_key_expr - /// to allow execute the same expression later. - /// NOTE: It may lead to double computation of expression. - ExpressionActionsPtr projection; for (const auto & part : parts) { @@ -1079,8 +1086,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( virt_columns, part.part_index_in_query); Pipe pipe(std::move(source_processor)); - if (!projection) - projection = createProjection(pipe, data); + if (!out_projection) + out_projection = createProjection(pipe, data); pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), data.sorting_key_expr)); pipes.emplace_back(std::move(pipe)); @@ -1154,7 +1161,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (merged_processor) { Pipe pipe(std::move(pipes), std::move(merged_processor)); - pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), projection)); pipes = Pipes(); pipes.emplace_back(std::move(pipe)); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index e6eb26da7e3..05f330bf643 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -67,7 +67,8 @@ private: const ExpressionActionsPtr & sorting_key_prefix_expr, const Names & virt_columns, const Settings & settings, - const MergeTreeReaderSettings & reader_settings) const; + const MergeTreeReaderSettings & reader_settings, + ExpressionActionsPtr & out_projection) const; Pipes spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, @@ -77,7 +78,8 @@ private: const SelectQueryInfo & query_info, const Names & virt_columns, const Settings & settings, - const MergeTreeReaderSettings & reader_settings) const; + const MergeTreeReaderSettings & reader_settings, + ExpressionActionsPtr & out_projection) const; /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. size_t getApproximateTotalRowsToRead( diff --git a/tests/queries/0_stateless/01137_sample_final.reference b/tests/queries/0_stateless/01137_sample_final.reference new file mode 100644 index 00000000000..91dfe010161 --- /dev/null +++ b/tests/queries/0_stateless/01137_sample_final.reference @@ -0,0 +1,10 @@ +8 8 +9 9 +10 10 +14 14 +15 15 +9140302661501632497 +9199082625845137542 +8769270213041934496 +4926958392161000708 +89922286675368115 diff --git a/tests/queries/0_stateless/01137_sample_final.sql b/tests/queries/0_stateless/01137_sample_final.sql new file mode 100644 index 00000000000..99fac514767 --- /dev/null +++ b/tests/queries/0_stateless/01137_sample_final.sql @@ -0,0 +1,13 @@ +drop table if exists tab; + +create table tab (x UInt64, v UInt64) engine = ReplacingMergeTree(v) order by (x, sipHash64(x)) sample by sipHash64(x); +insert into tab select number, number from numbers(1000); +select * from tab final sample 1/2 order by x limit 5; + +drop table tab; + +create table tab (x UInt64, v UInt64) engine = ReplacingMergeTree(v) order by (x, sipHash64(x)) sample by sipHash64(x); +insert into tab select number, number from numbers(1000); +select sipHash64(x) from tab sample 1/2 order by x, sipHash64(x) limit 5; + +drop table tab;