diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp index c5a440aa51d..035e9958641 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp @@ -24,16 +24,51 @@ void printStepName(const char * prefix, const QueryPlan::Node * node) LOG_DEBUG(&Poco::Logger::get("RedundantOrderBy"), "{}: {}: {}", prefix, stepName(node), reinterpret_cast(node->step.get())); } +struct FrameWithParent +{ + QueryPlan::Node * node = nullptr; + QueryPlan::Node * parent_node = nullptr; + size_t next_child = 0; +}; + +using StackWithParent = std::vector; + +bool checkForStatefulFunctions(std::vector const & stack, const QueryPlan::Node * node_affect_order) +{ + chassert(!stack.empty()); + chassert(typeid_cast(stack.back().node->step.get())); + + /// skip element on top of stack since it's sorting + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const auto * node = it->node; + if (node == node_affect_order) + break; + + const auto * step = node->step.get(); + + const auto * expr = typeid_cast(step); + if (expr) + { + if (expr->getExpression()->hasStatefulFunctions()) + return true; + } + else + { + const auto * trans = typeid_cast(step); + if (!trans) + break; + + if (!trans->getDataStreamTraits().preserves_sorting) + break; + } + } + return false; +} + void tryRemoveRedundantOrderBy(QueryPlan::Node * root) { - struct Frame - { - QueryPlan::Node * node = nullptr; - QueryPlan::Node * parent_node = nullptr; - size_t next_child = 0; - }; - - std::vector stack; + StackWithParent stack; stack.push_back({.node = root}); std::vector nodes_affect_order; @@ -44,7 +79,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root) QueryPlan::Node * current_node = frame.node; QueryPlan::Node * parent_node = frame.parent_node; - IQueryPlanStep * current_step = frame.node->step.get(); + IQueryPlanStep * current_step = current_node->step.get(); printStepName("back", current_node); /// top-down visit @@ -66,7 +101,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root) return false; bool remove_sorting = false; - /// TODO: (0) check stateful functions + /// (1) aggregation if (const AggregatingStep * parent_aggr = typeid_cast(step_affect_order); parent_aggr) { @@ -88,6 +123,12 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root) if (remove_sorting) { + /// if there is expression with stateful function between current step + /// and step which affects order, then we need to keep sorting since + /// stateful function output can depend on order + if (checkForStatefulFunctions(stack, node_affect_order)) + return false; + chassert(typeid_cast(current_node->children.front()->step.get())); chassert(!current_node->children.front()->children.empty()); @@ -104,7 +145,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root) frame.next_child = frame.node->children.size(); /// current sorting step has been removed from plan, its parent has new children, need to visit them - auto next_frame = Frame{.node = parent_node->children[0], .parent_node = parent_node}; + auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node}; ++frame.next_child; printStepName("push", next_frame.node); stack.push_back(next_frame); @@ -126,7 +167,7 @@ void tryRemoveRedundantOrderBy(QueryPlan::Node * root) /// Traverse all children if (frame.next_child < frame.node->children.size()) { - auto next_frame = Frame{.node = current_node->children[frame.next_child], .parent_node = current_node}; + auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node}; ++frame.next_child; printStepName("push", next_frame.node); stack.push_back(next_frame); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 33d3d38205e..60d9c9bed97 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -447,10 +447,11 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings) { + QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); + if (optimization_settings.remove_redundant_order_by) QueryPlanOptimizations::tryRemoveRedundantOrderBy(root); - QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); } diff --git a/tests/queries/0_stateless/02496_remove_redundant_order_by.reference b/tests/queries/0_stateless/02496_remove_redundant_order_by.reference index 3c0bfc44e0c..05b167eb699 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_order_by.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_order_by.reference @@ -22,7 +22,7 @@ Expression (Project names) Header: number UInt64 Sorting (Sorting for ORDER BY) Header: number_0 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_0 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -37,7 +37,7 @@ Header: number UInt64 Header: number_1 UInt64 Sorting (Sorting for ORDER BY) Header: number_1 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_1 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -54,7 +54,7 @@ Header: number UInt64 Header: number_1 UInt64 Sorting (Sorting for ORDER BY) Header: number_1 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_1 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -69,7 +69,7 @@ Header: t1.number UInt64 Header: t1.number_0 UInt64 Sorting (Sorting for ORDER BY) Header: number_2 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_2 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -77,7 +77,7 @@ Header: t1.number UInt64 Header: t2.number_1 UInt64 Sorting (Sorting for ORDER BY) Header: number_4 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_4 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -94,11 +94,11 @@ Header: t1.number UInt64 Join (JOIN FillRightFirst) Header: t1.number_0 UInt64 t2.number_1 UInt64 - Expression ((Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Change column names to column identifiers + Project names)) Header: t1.number_0 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 - Expression ((Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Change column names to column identifiers + Project names)) Header: t2.number_1 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -108,7 +108,7 @@ Header: sum(number) UInt64 Aggregating Header: number_0 UInt64 sum(number_0) UInt64 - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))))) + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) Header: number_0 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -122,7 +122,7 @@ Header: any(number) UInt64 Header: number_0 UInt64 Sorting (Sorting for ORDER BY) Header: number_1 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_1 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -137,7 +137,7 @@ Header: a UInt64 Aggregating Header: number_1 UInt64 sum(number_1) UInt64 - Expression ((Before GROUP BY + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) Header: number_1 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 @@ -171,7 +171,30 @@ Header: a UInt64 Header: number_1 UInt64 Sorting (Sorting for ORDER BY) Header: number_2 UInt64 - Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + Project names)))) Header: number_2 UInt64 ReadFromStorage (SystemNumbers) Header: number UInt64 +-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function +-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) +-- non-stateful function does _not_ prevent removing inner ORDER BY +Expression (Project names) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + Expression (Project names) + Expression (Projection) + Expression (Change column names to column identifiers) + ReadFromStorage (SystemNumbers) diff --git a/tests/queries/0_stateless/02496_remove_redundant_order_by.sql b/tests/queries/0_stateless/02496_remove_redundant_order_by.sql index 9fb64b8b6ca..04718ab5eeb 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_order_by.sql +++ b/tests/queries/0_stateless/02496_remove_redundant_order_by.sql @@ -209,3 +209,32 @@ FROM GROUP BY number ) WHERE a > 0; + +SELECT '-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function'; +SET query_plan_enable_optimizations = 0; + +SELECT '-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order'; +EXPLAIN +SELECT + number, + neighbor(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC; + +SELECT '-- non-stateful function does _not_ prevent removing inner ORDER BY'; +EXPLAIN +SELECT + number, + plus(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC;