diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 973304b366b..ee0c0f56c0b 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -58,6 +58,9 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, /// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key size_t tryDistinctReadInOrder(QueryPlan::Node * node); +/// Remove redudant ORDER BYs +void tryRemoveRedundantOrderBy(QueryPlan::Node * parent_node); + /// Put some steps under union, so that plan optimisation could be applied to union parts separately. /// For example, the plan can be rewritten like: /// - Something - - Expression - Something - diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp new file mode 100644 index 00000000000..5c195bb7730 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +void tryRemoveRedundantOrderBy(QueryPlan::Node * root) +{ + // do top down find first order by or group by + Stack stack; + stack.push_back({.node = root}); + + std::vector steps_affect_order; /// aggregation or sorting + + while (!stack.empty()) + { + auto & frame = stack.back(); + + QueryPlan::Node * current_node = frame.node; + IQueryPlanStep * current_step = frame.node->step.get(); + if (!steps_affect_order.empty()) + { + while (true) + { + if (SortingStep * ss = typeid_cast(current_step); ss) + { + /// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + /// if ORDER BY is with FILL WITH, it is non-removable + if (typeid_cast(steps_affect_order.back()) || typeid_cast(steps_affect_order.back()) + || typeid_cast(steps_affect_order.back())) + break; + + bool remove_sorting = false; + /// (1) aggregation + if (const AggregatingStep * parent_aggr = typeid_cast(steps_affect_order.back()); parent_aggr) + { + /// check if it contains aggregation functions which depends on order + } + /// (2) sorting + else if (SortingStep * parent_sorting = typeid_cast(steps_affect_order.back()); parent_sorting) + { + remove_sorting = true; + } + + if (remove_sorting) + { + /// need to remove sorting and its expression from plan + QueryPlan::Node * parent = stack.back().node; + chassert(parent->children.front() == current_node); + + QueryPlan::Node * next_node = !current_node->children.empty() ? current_node->children.front() : nullptr; + if (next_node && typeid_cast(next_node->step.get())) + next_node = !current_node->children.empty() ? current_node->children.front() : nullptr; + + if (next_node) + parent->children[0] = next_node; + } + } + } + } + + if (typeid_cast(current_step) + || typeid_cast(current_step) /// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// aggregation change order + steps_affect_order.push_back(current_step); + + /// visit children if there are non-visited + if (frame.next_child < frame.node->children.size()) + { + auto next_frame = Frame{.node = frame.node->children[frame.next_child]}; + ++frame.next_child; + stack.push_back(next_frame); + } + /// all children are visited + else + { + if (!steps_affect_order.empty() && current_step == steps_affect_order.back()) + steps_affect_order.pop_back(); + + stack.pop_back(); + } + } +} + +} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index e1662d43015..d53d26ffc6a 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -447,6 +447,7 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings) { + QueryPlanOptimizations::tryRemoveRedundantOrderBy(root); QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); }