From e25053dec0c87b275ae520ad7a35016ce709f171 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Dec 2022 22:46:39 +0000 Subject: [PATCH] Query plan visitor with debug logs --- .../Optimizations/removeRedundantOrderBy.cpp | 343 ++++++++++-------- 1 file changed, 200 insertions(+), 143 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp index c26c337d6a5..82f697819ab 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantOrderBy.cpp @@ -14,185 +14,242 @@ namespace DB::QueryPlanOptimizations { -const char * stepName(const QueryPlan::Node * node) +template +class QueryPlanVisitor { - IQueryPlanStep * current_step = node->step.get(); - return typeid(*current_step).name(); -} - -void printStepName(const char * prefix, const QueryPlan::Node * node) -{ - LOG_DEBUG(&Poco::Logger::get("RedundantOrderBy"), "{}: {}: {}", prefix, stepName(node), reinterpret_cast(node->step.get())); -} - -struct FrameWithParent -{ - QueryPlan::Node * node = nullptr; - QueryPlan::Node * parent_node = nullptr; - size_t next_child = 0; -}; - -using StackWithParent = std::vector; - -bool checkIfCanDeleteSorting(const StackWithParent & stack, const QueryPlan::Node * node_affect_order) -{ - chassert(!stack.empty()); - chassert(typeid_cast(stack.back().node->step.get())); - - /// skip element on top of stack since it's sorting - for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) +protected: + struct FrameWithParent { - const auto * node = it->node; - /// walking though stack until reach node which affects order - if (node == node_affect_order) - break; + QueryPlan::Node * node = nullptr; + QueryPlan::Node * parent_node = nullptr; + size_t next_child = 0; + }; - const auto * step = node->step.get(); + using StackWithParent = std::vector; - const auto * expr = typeid_cast(step); - if (expr) + QueryPlan::Node * root = nullptr; + StackWithParent stack; + +public: + explicit QueryPlanVisitor(QueryPlan::Node * root_) : root(root_) { } + + void visit() + { + stack.push_back({.node = root}); + + while (!stack.empty()) { - if (expr->getExpression()->hasStatefulFunctions()) - return true; - } - else - { - const auto * window = typeid_cast(step); - if (window) - return true; + auto & frame = stack.back(); - const auto * trans = typeid_cast(step); - if (!trans) - break; + QueryPlan::Node * current_node = frame.node; + QueryPlan::Node * parent_node = frame.parent_node; - if (!trans->getDataStreamTraits().preserves_sorting) - break; + logStep("back", current_node); + + /// top-down visit + if (0 == frame.next_child) + { + logStep("top-down", current_node); + if (! visitTopDown(current_node, parent_node)) + continue; + } + /// Traverse all children + if (frame.next_child < frame.node->children.size()) + { + auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node}; + ++frame.next_child; + logStep("push", next_frame.node); + stack.push_back(next_frame); + continue; + } + + /// bottom-up visit + logStep("bottom-up", current_node); + visitBottomUp(current_node, parent_node); + + logStep("pop", current_node); + stack.pop_back(); } } - return false; -} -void tryRemoveRedundantOrderBy(QueryPlan::Node * root) + bool visitTopDown(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + return getDerived().visitTopDown(current_node, parent_node); + } + void visitBottomUp(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + getDerived().visitBottomUp(current_node, parent_node); + } + +private: + Derived & getDerived() { return *static_cast(this); } + + const Derived & getDerived() const { return *static_cast(this); } + +protected: + void logStep(const char * prefix, const QueryPlan::Node * node) + { + if constexpr (debug_logging) + { + IQueryPlanStep * current_step = node->step.get(); + LOG_DEBUG( + &Poco::Logger::get("QueryPlanVisitor"), + "{}: {}: {}", + prefix, + current_step->getName(), + reinterpret_cast(current_step)); + } + } +}; + +class RemoveRedundantOrderBy : public QueryPlanVisitor { - StackWithParent stack; - stack.push_back({.node = root}); - std::vector nodes_affect_order; - while (!stack.empty()) +public: + explicit RemoveRedundantOrderBy(QueryPlan::Node * root_) : QueryPlanVisitor(root_) { } + + bool visitTopDown(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) { - auto & frame = stack.back(); - - QueryPlan::Node * current_node = frame.node; - QueryPlan::Node * parent_node = frame.parent_node; IQueryPlanStep * current_step = current_node->step.get(); - printStepName("back", current_node); - /// top-down visit - if (0 == frame.next_child) + /// if there is parent node which can affect order and current step is sorting + /// then check if we can remove the sorting step (and corresponding expression step) + if (!nodes_affect_order.empty() && typeid_cast(current_step)) { - printStepName("visit", current_node); - /// if there is parent node which can affect order and current step is sorting - /// then check if we can remove the sorting step (and corresponding expression step) - if (!nodes_affect_order.empty() && typeid_cast(current_step)) + auto try_to_remove_sorting_step = [&]() -> bool { - auto try_to_remove_sorting_step = [&]() -> bool + QueryPlan::Node * node_affect_order = nodes_affect_order.back(); + IQueryPlanStep * step_affect_order = node_affect_order->step.get(); + /// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + /// if ORDER BY is with FILL WITH, it is non-removable + if (typeid_cast(step_affect_order) || typeid_cast(step_affect_order) + || typeid_cast(step_affect_order)) + return false; + + bool consider_to_remove_sorting = false; + + /// (1) aggregation + if (const AggregatingStep * parent_aggr = typeid_cast(step_affect_order); parent_aggr) { - QueryPlan::Node * node_affect_order = nodes_affect_order.back(); - IQueryPlanStep * step_affect_order = node_affect_order->step.get(); - /// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - /// if ORDER BY is with FILL WITH, it is non-removable - if (typeid_cast(step_affect_order) || typeid_cast(step_affect_order) - || typeid_cast(step_affect_order)) + auto const & aggregates = parent_aggr->getParams().aggregates; + for (const auto & aggregate : aggregates) + { + auto aggregate_function_properties + = AggregateFunctionFactory::instance().tryGetProperties(aggregate.function->getName()); + if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) + return false; + } + consider_to_remove_sorting = true; + } + /// (2) sorting + else if (typeid_cast(step_affect_order)) + { + consider_to_remove_sorting = true; + } + + if (consider_to_remove_sorting) + { + /// (1) if there is expression with stateful function between current step + /// and step which affects order, then we need to keep sorting since + /// stateful function output can depend on order + /// (2) for window function we do ORDER BY in 2 Sorting steps, so do not delete Sorting + /// if window function step is on top + if (checkIfCanDeleteSorting(node_affect_order)) return false; - bool consider_to_remove_sorting = false; + chassert(typeid_cast(current_node->children.front()->step.get())); + chassert(!current_node->children.front()->children.empty()); - /// (1) aggregation - if (const AggregatingStep * parent_aggr = typeid_cast(step_affect_order); parent_aggr) - { - auto const & aggregates = parent_aggr->getParams().aggregates; - for (const auto & aggregate : aggregates) - { - auto aggregate_function_properties - = AggregateFunctionFactory::instance().tryGetProperties(aggregate.function->getName()); - if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) - return false; - } - consider_to_remove_sorting = true; - } - /// (2) sorting - else if (typeid_cast(step_affect_order)) - { - consider_to_remove_sorting = true; - } - - if (consider_to_remove_sorting) - { - /// (1) if there is expression with stateful function between current step - /// and step which affects order, then we need to keep sorting since - /// stateful function output can depend on order - /// (2) for window function we do ORDER BY in 2 Sorting steps, so do not delete Sorting - /// if window function step is on top - if (checkIfCanDeleteSorting(stack, node_affect_order)) - return false; - - chassert(typeid_cast(current_node->children.front()->step.get())); - chassert(!current_node->children.front()->children.empty()); - - /// need to remove sorting and its expression from plan - parent_node->children.front() = current_node->children.front()->children.front(); - } - return true; - }; - if (try_to_remove_sorting_step()) - { - LOG_DEBUG(&Poco::Logger::get("RedundantOrderBy"), "Sorting removed"); - - /// mark removed node as visited - frame.next_child = frame.node->children.size(); - - /// current sorting step has been removed from plan, its parent has new children, need to visit them - auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node}; - ++frame.next_child; - printStepName("push", next_frame.node); - stack.push_back(next_frame); - continue; + /// need to remove sorting and its expression from plan + parent_node->children.front() = current_node->children.front()->children.front(); } - } - - if (typeid_cast(current_step) - || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable - || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting - || typeid_cast(current_step)) /// (4) aggregation change order + return true; + }; + if (try_to_remove_sorting_step()) { - printStepName("steps_affect_order/push", current_node); - nodes_affect_order.push_back(current_node); + logStep("removed from plan", current_node); + + auto & frame = stack.back(); + /// mark removed node as visited + frame.next_child = frame.node->children.size(); + + /// current sorting step has been removed from plan, its parent has new children, need to visit them + auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node}; + stack.push_back(next_frame); + logStep("push", next_frame.node); + return false; } } - /// Traverse all children - if (frame.next_child < frame.node->children.size()) + if (typeid_cast(current_step) + || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (4) aggregation change order { - auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node}; - ++frame.next_child; - printStepName("push", next_frame.node); - stack.push_back(next_frame); - continue; + logStep("steps_affect_order/push", current_node); + nodes_affect_order.push_back(current_node); } - /// bottom-up visit + return true; + } + + void visitBottomUp(QueryPlan::Node * current_node, QueryPlan::Node *) + { /// we come here when all children of current_node are visited, /// so it's a node which affect order, remove it from the corresponding stack if (!nodes_affect_order.empty() && nodes_affect_order.back() == current_node) { - printStepName("node_affect_order/pop", current_node); + logStep("node_affect_order/pop", current_node); nodes_affect_order.pop_back(); } - - printStepName("pop", current_node); - stack.pop_back(); } + +private: + bool checkIfCanDeleteSorting(const QueryPlan::Node * node_affect_order) + { + chassert(!stack.empty()); + chassert(typeid_cast(stack.back().node->step.get())); + + /// skip element on top of stack since it's sorting + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const auto * node = it->node; + /// walking though stack until reach node which affects order + if (node == node_affect_order) + break; + + const auto * step = node->step.get(); + + const auto * expr = typeid_cast(step); + if (expr) + { + if (expr->getExpression()->hasStatefulFunctions()) + return true; + } + else + { + const auto * window = typeid_cast(step); + if (window) + return true; + + const auto * trans = typeid_cast(step); + if (!trans) + break; + + if (!trans->getDataStreamTraits().preserves_sorting) + break; + } + } + return false; + } + +}; + +void tryRemoveRedundantOrderBy(QueryPlan::Node * root) +{ + RemoveRedundantOrderBy(root).visit(); } + }