From ce40d84eefb0629ff49f55f99caef7c15392aa8b Mon Sep 17 00:00:00 2001 From: Nickita Taranov Date: Tue, 29 Mar 2022 21:16:05 +0200 Subject: [PATCH] more fixes --- .../Optimizations/liftUpFunctions.cpp | 36 +++++++++++++------ src/Processors/QueryPlan/SortingStep.cpp | 2 +- ...emerge_sort_lowered_memory_bytes_ratio.sql | 4 +-- .../01655_plan_optimizations.reference | 14 +++----- .../0_stateless/01655_plan_optimizations.sh | 8 ++--- 5 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index 8e4242ea73e..80b82d989dd 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -2,35 +2,48 @@ #include #include #include +#include -#include -#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} +} -namespace DB::QueryPlanOptimizations +namespace { -void swapSortingAndUnneededCalculations(QueryPlan::Node * parent_node, ActionsDAGPtr && unneeded_for_sorting) +void swapSortingAndUnneededCalculations(DB::QueryPlan::Node * parent_node, DB::ActionsDAGPtr && unneeded_for_sorting) { - QueryPlan::Node * child_node = parent_node->children.front(); + DB::QueryPlan::Node * child_node = parent_node->children.front(); auto & parent_step = parent_node->step; auto & child_step = child_node->step; - auto * sorting_step = typeid_cast(parent_step.get()); + auto * sorting_step = typeid_cast(parent_step.get()); // Sorting -> Expression std::swap(parent_step, child_step); // Expression -> Sorting - sorting_step->updateInputStream(child_node->children.at(0)->step->getOutputStream()); - auto input_header = sorting_step->getInputStreams().at(0).header; + if (child_node->children.size() != 1) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "SortingStep is expected to have only one input stream."); + sorting_step->updateInputStream(child_node->children.front()->step->getOutputStream()); + auto input_header = sorting_step->getInputStreams().front().header; sorting_step->updateOutputStream(std::move(input_header)); auto description = parent_node->step->getStepDescription(); - parent_step = std::make_unique(child_step->getOutputStream(), std::move(unneeded_for_sorting)); + parent_step = std::make_unique(child_step->getOutputStream(), std::move(unneeded_for_sorting)); parent_step->setStepDescription(description + " [lifted up part]"); // UnneededCalculations -> Sorting } +} + +namespace DB::QueryPlanOptimizations +{ + size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -55,12 +68,15 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: if (unneeded_for_sorting->trivial()) return 0; + if (child_node->children.size() != 1) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ExpressionStep is expected to have only one input stream."); + // Sorting (parent_node) -> Expression (child_node) auto & node_with_needed = nodes.emplace_back(); std::swap(node_with_needed.children, child_node->children); child_node->children = {&node_with_needed}; node_with_needed.step - = std::make_unique(node_with_needed.children.at(0)->step->getOutputStream(), std::move(needed_for_sorting)); + = std::make_unique(node_with_needed.children.front()->step->getOutputStream(), std::move(needed_for_sorting)); node_with_needed.step->setStepDescription(child_step->getStepDescription()); // Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed) diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 9cc242852bf..efefbad0ded 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -92,7 +92,7 @@ SortingStep::SortingStep( void SortingStep::updateInputStream(DataStream input_stream) { input_streams.clear(); - input_streams.push_back(std::move(input_stream)); + input_streams.emplace_back(std::move(input_stream)); } void SortingStep::updateOutputStream(Block result_header) diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql index 8646b40563e..6e23ab9cdb9 100644 --- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql +++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql @@ -10,8 +10,8 @@ set max_block_size=40960; -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption -- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB -- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0) -select repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 } -select repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 } +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } -- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94) -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 218ff7bd8c9..bb9c614f728 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -143,17 +143,11 @@ Filter 2 3 2 3 > function calculation should be done after sorting and limit (if possible) -> the whole Expression node could be moved after Sorting -Expression -Limit -Expression -Sorting -Expression > Expression should be divided into two subexpressions and only one of them should be moved after Sorting -Expression -Limit -Expression +Expression (Before ORDER BY [lifted up part]) +FUNCTION sipHash64 Sorting -Expression +Expression (Before ORDER BY) +FUNCTION plus > this query should be executed without throwing an exception 0 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 1f5d88bd8bf..0b7f004a2ce 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -198,14 +198,10 @@ $CLICKHOUSE_CLIENT -q " ) where a != 1 settings enable_optimize_predicate_expression = 0" echo "> function calculation should be done after sorting and limit (if possible)" -echo "> the whole Expression node could be moved after Sorting" -$CLICKHOUSE_CLIENT -q " - explain select sipHash64(number) from numbers(100) order by number limit 5" | - sed 's/ //g' | grep -o "^ *\(Expression\|Limit\|Sorting\)" echo "> Expression should be divided into two subexpressions and only one of them should be moved after Sorting" $CLICKHOUSE_CLIENT -q " - explain select sipHash64(number) from numbers(100) order by number + 1 limit 5" | - sed 's/ //g' | grep -o "^ *\(Expression\|Limit\|Sorting\)" + explain actions = 1 select number as n, sipHash64(n) from numbers(100) order by number + 1 limit 5" | + sed 's/^ *//g' | grep -o "^ *\(Expression (Before ORDER BY.*)\|Sorting\|FUNCTION \w\+\)" echo "> this query should be executed without throwing an exception" $CLICKHOUSE_CLIENT -q " select throwIf(number = 5) from (select * from numbers(10)) order by number limit 1"