diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 25116f5145a..f06ac229e94 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1527,6 +1527,21 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & return res; } +ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameSet & sort_columns) const +{ + std::unordered_set split_nodes; + for (const auto & sort_column : sort_columns) + if (const auto * node = tryFindInIndex(sort_column)) + split_nodes.insert(node); + else + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Sorting column {} wasn't found in the ActionsDAG's index. DAG:\n{}", sort_column, dumpDAG()); + + auto res = split(split_nodes); + res.second->project_input = project_input; + return res; +} + ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const { const auto * node = tryFindInIndex(column_name); diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index b07ab08c997..1ff82c8ea60 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -274,6 +274,10 @@ public: /// Index of initial actions must contain column_name. SplitResult splitActionsForFilter(const std::string & column_name) const; + /// Splits actions into two parts. The first part contains all the calculations required to calculate sort_columns. + /// The second contains the rest. + SplitResult splitActionsBySortingDescription(const NameSet & sort_columns) const; + /// Create actions which may calculate part of filter using only available_inputs. /// If nothing may be calculated, returns nullptr. /// Otherwise, return actions which inputs are from available_inputs. diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 10bc6293537..1d5b83dc9d0 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -44,16 +44,20 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); /// May split FilterStep and push down only part of it. size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); +/// Move ExpressionStep after SortingStep if possible. +/// May split ExpressionStep and lift up only a part of it. +size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); + inline const auto & getOptimizations() { - static const std::array optimizations = - {{ + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, - }}; + {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan}, + }}; return optimizations; } diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp new file mode 100644 index 00000000000..2a415f8c5af --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} +} + +namespace +{ + +const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node) +{ + if (node.children.size() != 1) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Node \"{}\" is expected to have only one child.", node.step->getName()); + return node.children.front()->step->getOutputStream(); +} + +} + +namespace DB::QueryPlanOptimizations +{ + +size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) +{ + if (parent_node->children.size() != 1) + return 0; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent_step = parent_node->step; + auto & child_step = child_node->step; + auto * sorting_step = typeid_cast(parent_step.get()); + auto * expression_step = typeid_cast(child_step.get()); + + if (!sorting_step || !expression_step) + return 0; + + NameSet sort_columns; + for (const auto & col : sorting_step->getSortDescription()) + sort_columns.insert(col.column_name); + auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns); + + // No calculations can be postponed. + if (unneeded_for_sorting->trivial()) + return 0; + + // Sorting (parent_node) -> Expression (child_node) + auto & node_with_needed = nodes.emplace_back(); + std::swap(node_with_needed.children, child_node->children); + child_node->children = {&node_with_needed}; + + node_with_needed.step = std::make_unique(getChildOutputStream(node_with_needed), std::move(needed_for_sorting)); + node_with_needed.step->setStepDescription(child_step->getStepDescription()); + // Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed) + + std::swap(parent_step, child_step); + // so far the origin Expression (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed) + + sorting_step->updateInputStream(getChildOutputStream(*child_node)); + auto input_header = sorting_step->getInputStreams().at(0).header; + sorting_step->updateOutputStream(std::move(input_header)); + + auto description = parent_step->getStepDescription(); + parent_step = std::make_unique(child_step->getOutputStream(), std::move(unneeded_for_sorting)); + parent_step->setStepDescription(description + " [lifted up part]"); + // UneededCalculations (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed) + + return 3; +} +} diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 1e56c02504b..859c9fd9e19 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,11 +1,12 @@ +#include +#include +#include #include -#include +#include +#include #include #include -#include -#include -#include -#include +#include #include namespace DB @@ -88,6 +89,18 @@ SortingStep::SortingStep( output_stream->sort_mode = DataStream::SortMode::Stream; } +void SortingStep::updateInputStream(DataStream input_stream) +{ + input_streams.clear(); + input_streams.emplace_back(std::move(input_stream)); +} + +void SortingStep::updateOutputStream(Block result_header) +{ + output_stream = createOutputStream(input_streams.at(0), std::move(result_header), getDataStreamTraits()); + updateDistinctColumns(output_stream->header, output_stream->distinct_columns); +} + void SortingStep::updateLimit(size_t limit_) { if (limit_ && (limit == 0 || limit_ < limit)) diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 8e253e71f44..1738d8d4e45 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -49,6 +49,11 @@ public: /// Add limit or change it to lower value. void updateLimit(size_t limit_); + void updateInputStream(DataStream input_stream); + void updateOutputStream(Block result_header); + + SortDescription getSortDescription() const { return result_description; } + private: enum class Type diff --git a/tests/performance/function_calculation_after_sorting_and_limit.xml b/tests/performance/function_calculation_after_sorting_and_limit.xml new file mode 100644 index 00000000000..ddb8f860600 --- /dev/null +++ b/tests/performance/function_calculation_after_sorting_and_limit.xml @@ -0,0 +1,4 @@ + + SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5 + SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number + 1 LIMIT 5 + diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 11cc146dd62..68875735110 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -35,10 +35,11 @@ Expression (Projection) ReadFromMergeTree (default.test_table) Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - Sorting - Expression (Before ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree (default.test_table) + Expression (Before ORDER BY [lifted up part]) + Sorting + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromMergeTree (default.test_table) optimize_aggregation_in_order Expression ((Projection + Before ORDER BY)) Aggregating diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 655232fcdd4..c766bf16f19 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -925,10 +925,11 @@ Expression ((Projection + Before ORDER BY)) Window (Window step for window \'ORDER BY o ASC, number ASC\') Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') Window (Window step for window \'ORDER BY number ASC\') - Sorting (Sorting for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + Expression ((Before window functions + (Projection + Before ORDER BY)) [lifted up part]) + Sorting (Sorting for window \'ORDER BY number ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) -- A test case for the sort comparator found by fuzzer. SELECT max(number) OVER (ORDER BY number DESC NULLS FIRST), diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql index 5de4210d3f2..6e23ab9cdb9 100644 --- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql +++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql @@ -10,8 +10,8 @@ set max_block_size=40960; -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption -- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB -- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0) -select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 format Null; -- { serverError 241 } -select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 } +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } -- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94) -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 33a7ff44b74..bb9c614f728 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -142,3 +142,12 @@ Filter Filter 2 3 2 3 +> function calculation should be done after sorting and limit (if possible) +> Expression should be divided into two subexpressions and only one of them should be moved after Sorting +Expression (Before ORDER BY [lifted up part]) +FUNCTION sipHash64 +Sorting +Expression (Before ORDER BY) +FUNCTION plus +> this query should be executed without throwing an exception +0 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index b66d788a338..0b7f004a2ce 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -196,3 +196,12 @@ $CLICKHOUSE_CLIENT -q " select a, b from ( select number + 1 as a, number + 2 as b from numbers(2) union all select number + 1 as b, number + 2 as a from numbers(2) ) where a != 1 settings enable_optimize_predicate_expression = 0" + +echo "> function calculation should be done after sorting and limit (if possible)" +echo "> Expression should be divided into two subexpressions and only one of them should be moved after Sorting" +$CLICKHOUSE_CLIENT -q " + explain actions = 1 select number as n, sipHash64(n) from numbers(100) order by number + 1 limit 5" | + sed 's/^ *//g' | grep -o "^ *\(Expression (Before ORDER BY.*)\|Sorting\|FUNCTION \w\+\)" +echo "> this query should be executed without throwing an exception" +$CLICKHOUSE_CLIENT -q " + select throwIf(number = 5) from (select * from numbers(10)) order by number limit 1" diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference index 9e24b7c6ea6..67a043d6646 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference @@ -7,13 +7,15 @@ ExpressionTransform (Limit) Limit - (Sorting) - MergingSortedTransform 2 → 1 - (Expression) - ExpressionTransform × 2 - (SettingQuotaAndLimits) - (ReadFromMergeTree) - MergeTreeInOrder × 2 0 → 1 + (Expression) + ExpressionTransform + (Sorting) + MergingSortedTransform 2 → 1 + (Expression) + ExpressionTransform × 2 + (SettingQuotaAndLimits) + (ReadFromMergeTree) + MergeTreeInOrder × 2 0 → 1 2020-10-01 9 2020-10-01 9 2020-10-01 9 @@ -23,16 +25,18 @@ ExpressionTransform ExpressionTransform (Limit) Limit - (Sorting) - MergingSortedTransform 2 → 1 - (Expression) - ExpressionTransform × 2 - (SettingQuotaAndLimits) - (ReadFromMergeTree) - ReverseTransform - MergeTreeReverse 0 → 1 - ReverseTransform - MergeTreeReverse 0 → 1 + (Expression) + ExpressionTransform + (Sorting) + MergingSortedTransform 2 → 1 + (Expression) + ExpressionTransform × 2 + (SettingQuotaAndLimits) + (ReadFromMergeTree) + ReverseTransform + MergeTreeReverse 0 → 1 + ReverseTransform + MergeTreeReverse 0 → 1 2020-10-01 9 2020-10-01 9 2020-10-01 9 @@ -42,15 +46,17 @@ ExpressionTransform ExpressionTransform (Limit) Limit - (Sorting) - FinishSortingTransform - PartialSortingTransform - MergingSortedTransform 2 → 1 - (Expression) - ExpressionTransform × 2 - (SettingQuotaAndLimits) - (ReadFromMergeTree) - MergeTreeInOrder × 2 0 → 1 + (Expression) + ExpressionTransform + (Sorting) + FinishSortingTransform + PartialSortingTransform + MergingSortedTransform 2 → 1 + (Expression) + ExpressionTransform × 2 + (SettingQuotaAndLimits) + (ReadFromMergeTree) + MergeTreeInOrder × 2 0 → 1 2020-10-11 0 2020-10-11 0 2020-10-11 0