diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4207d371c09..dba9385fe27 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -190,6 +190,7 @@ add_object_library(clickhouse_processors_sources Processors/Sources) add_object_library(clickhouse_processors_merges Processors/Merges) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) +add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) set (DBMS_COMMON_LIBRARIES) # libgcc_s does not provide an implementation of an atomics library. Instead, diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e71c60bb540..d8c40ffda2f 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -609,10 +609,10 @@ bool ActionsDAG::hasStatefulFunctions() const return false; } -bool ActionsDAG::empty() const +bool ActionsDAG::trivial() const { for (const auto & node : nodes) - if (node.type != ActionType::INPUT) + if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN) return false; return true; diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index c82496b2a8a..b12da30e24f 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -223,7 +223,7 @@ public: bool hasArrayJoin() const; bool hasStatefulFunctions() const; - bool empty() const; /// If actions only contain inputs. + bool trivial() const; /// If actions has no functions or array join. const ActionsSettings & getSettings() const { return settings; } diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h new file mode 100644 index 00000000000..454eab9649a --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -0,0 +1,56 @@ +#pragma once +#include +#include + +namespace DB +{ + +namespace QueryPlanOptimizations +{ + +/// This is the main function which optimizes the whole QueryPlan tree. +void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes); + +/// Optimization is a function applied to QueryPlan::Node. +/// It can read and update subtree of specified node. +/// It return the number of updated layers of subtree if some change happened. +/// It must guarantee that the structure of tree is correct. +/// +/// New nodes should be added to QueryPlan::Nodes list. +/// It is not needed to remove old nodes from the list. +struct Optimization +{ + using Function = size_t (*)(QueryPlan::Node *, QueryPlan::Nodes &); + const Function apply = nullptr; + const char * name; +}; + +/// Move ARRAY JOIN up if possible. +size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); + +/// Move LimitStep down if possible. +size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + +/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes. +size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); + +/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep +/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep +size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + +inline const auto & getOptimizations() +{ + static const std::array optimizations = + {{ + {tryLiftUpArrayJoin, "liftUpArrayJoin"}, + {tryPushDownLimit, "pushDownLimit"}, + {trySplitFilter, "splitFilter"}, + {tryMergeExpressions, "mergeExpressions"}, + }}; + + return optimizations; +} + +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp new file mode 100644 index 00000000000..e20c5f93d6e --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) +{ + if (parent_node->children.size() != 1) + return 0; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + auto * expression_step = typeid_cast(parent.get()); + auto * filter_step = typeid_cast(parent.get()); + auto * array_join_step = typeid_cast(child.get()); + + if (!(expression_step || filter_step) || !array_join_step) + return 0; + + const auto & array_join = array_join_step->arrayJoin(); + const auto & expression = expression_step ? expression_step->getExpression() + : filter_step->getExpression(); + + auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns); + + /// No actions can be moved before ARRAY JOIN. + if (split_actions.first->trivial()) + return 0; + + auto description = parent->getStepDescription(); + + /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin. + if (split_actions.second->trivial()) + { + auto expected_header = parent->getOutputStream().header; + + /// Expression/Filter -> ArrayJoin + std::swap(parent, child); + /// ArrayJoin -> Expression/Filter + + if (expression_step) + child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), + std::move(split_actions.first)); + else + child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), + std::move(split_actions.first), + filter_step->getFilterColumnName(), + filter_step->removesFilterColumn()); + + child->setStepDescription(std::move(description)); + + array_join_step->updateInputStream(child->getOutputStream(), expected_header); + return 2; + } + + /// Add new expression step before ARRAY JOIN. + /// Expression/Filter -> ArrayJoin -> Something + auto & node = nodes.emplace_back(); + node.children.swap(child_node->children); + child_node->children.emplace_back(&node); + /// Expression/Filter -> ArrayJoin -> node -> Something + + node.step = std::make_unique(node.children.at(0)->step->getOutputStream(), + std::move(split_actions.first)); + node.step->setStepDescription(description); + array_join_step->updateInputStream(node.step->getOutputStream(), {}); + + if (expression_step) + parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second); + else + parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second, + filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); + + parent->setStepDescription(description + " [split]"); + return 3; +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp new file mode 100644 index 00000000000..01af6a2bbde --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +/// If plan looks like Limit -> Sorting, update limit for Sorting +static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit) +{ + if (limit == 0) + return false; + + QueryPlanStepPtr & step = node->step; + QueryPlan::Node * child = nullptr; + bool updated = false; + + if (auto * merging_sorted = typeid_cast(step.get())) + { + /// TODO: remove LimitStep here. + merging_sorted->updateLimit(limit); + updated = true; + child = node->children.front(); + } + else if (auto * finish_sorting = typeid_cast(step.get())) + { + /// TODO: remove LimitStep here. + finish_sorting->updateLimit(limit); + updated = true; + } + else if (auto * merge_sorting = typeid_cast(step.get())) + { + merge_sorting->updateLimit(limit); + updated = true; + child = node->children.front(); + } + else if (auto * partial_sorting = typeid_cast(step.get())) + { + partial_sorting->updateLimit(limit); + updated = true; + } + + /// We often have chain PartialSorting -> MergeSorting -> MergingSorted + /// Try update limit for them also if possible. + if (child) + tryUpdateLimitForSortingSteps(child, limit); + + return updated; +} + +size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return 0; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + auto * limit = typeid_cast(parent.get()); + + if (!limit) + return 0; + + /// Skip LIMIT WITH TIES by now. + if (limit->withTies()) + return 0; + + const auto * transforming = dynamic_cast(child.get()); + + /// Skip everything which is not transform. + if (!transforming) + return 0; + + /// Special cases for sorting steps. + if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting())) + return 0; + + /// Special case for TotalsHaving. Totals may be incorrect if we push down limit. + if (typeid_cast(child.get())) + return 0; + + /// Now we should decide if pushing down limit possible for this step. + + const auto & transform_traits = transforming->getTransformTraits(); + const auto & data_stream_traits = transforming->getDataStreamTraits(); + + /// Cannot push down if child changes the number of rows. + if (!transform_traits.preserves_number_of_rows) + return 0; + + /// Cannot push down if data was sorted exactly by child stream. + if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting) + return 0; + + /// Now we push down limit only if it doesn't change any stream properties. + /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info. + if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream())) + return 0; + + /// Input stream for Limit have changed. + limit->updateInputStream(transforming->getInputStreams().front()); + + parent.swap(child); + return 2; +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp new file mode 100644 index 00000000000..dfd15a2a929 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return false; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * parent_expr = typeid_cast(parent.get()); + auto * parent_filter = typeid_cast(parent.get()); + auto * child_expr = typeid_cast(child.get()); + + if (parent_expr && child_expr) + { + const auto & child_actions = child_expr->getExpression(); + const auto & parent_actions = parent_expr->getExpression(); + + /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them. + /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2])) + /// Such a query will return two zeroes if we combine actions together. + if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + return 0; + + auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + + auto expr = std::make_unique(child_expr->getInputStreams().front(), merged); + expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); + + parent_node->step = std::move(expr); + parent_node->children.swap(child_node->children); + return 1; + } + else if (parent_filter && child_expr) + { + const auto & child_actions = child_expr->getExpression(); + const auto & parent_actions = parent_filter->getExpression(); + + if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) + return 0; + + auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); + + auto filter = std::make_unique(child_expr->getInputStreams().front(), merged, + parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn()); + filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); + + parent_node->step = std::move(filter); + parent_node->children.swap(child_node->children); + return 1; + } + + return 0; +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp new file mode 100644 index 00000000000..e5ccc173ed8 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -0,0 +1,75 @@ +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +void optimizeTree(QueryPlan::Node & root, QueryPlan::Nodes & nodes) +{ + const auto & optimizations = getOptimizations(); + + struct Frame + { + QueryPlan::Node * node; + + /// If not zero, traverse only depth_limit layers of tree (if no other optimizations happen). + /// Otherwise, traverse all children. + size_t depth_limit = 0; + + /// Next child to process. + size_t next_child = 0; + }; + + std::stack stack; + stack.push(Frame{.node = &root}); + + while (!stack.empty()) + { + auto & frame = stack.top(); + + /// If traverse_depth_limit == 0, then traverse without limit (first entrance) + /// If traverse_depth_limit > 1, then traverse with (limit - 1) + if (frame.depth_limit != 1) + { + /// Traverse all children first. + if (frame.next_child < frame.node->children.size()) + { + stack.push(Frame + { + .node = frame.node->children[frame.next_child], + .depth_limit = frame.depth_limit ? (frame.depth_limit - 1) : 0, + }); + + ++frame.next_child; + continue; + } + } + + size_t max_update_depth = 0; + + /// Apply all optimizations. + for (const auto & optimization : optimizations) + { + /// Just in case, skip optimization if it is not initialized. + if (!optimization.apply) + continue; + + /// Try to apply optimization. + auto update_depth = optimization.apply(frame.node, nodes); + max_update_depth = std::max(max_update_depth, update_depth); + } + + /// Traverse `max_update_depth` layers of tree again. + if (max_update_depth) + { + frame.depth_limit = max_update_depth; + frame.next_child = 0; + continue; + } + + /// Nothing was applied. + stack.pop(); + } +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp new file mode 100644 index 00000000000..38ba8f25b24 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes. +size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) +{ + auto * filter_step = typeid_cast(node->step.get()); + if (!filter_step) + return 0; + + const auto & expr = filter_step->getExpression(); + + /// Do not split if there are function like runningDifference. + if (expr->hasStatefulFunctions()) + return 0; + + auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName()); + + if (split.second->trivial()) + return 0; + + if (filter_step->removesFilterColumn()) + split.second->removeUnusedInput(filter_step->getFilterColumnName()); + + auto description = filter_step->getStepDescription(); + + auto & filter_node = nodes.emplace_back(); + node->children.swap(filter_node.children); + node->children.push_back(&filter_node); + + filter_node.step = std::make_unique( + filter_node.children.at(0)->step->getOutputStream(), + std::move(split.first), + filter_step->getFilterColumnName(), + filter_step->removesFilterColumn()); + + node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); + + filter_node.step->setStepDescription("(" + description + ")[split]"); + node->step->setStepDescription(description); + + return 2; +} + +} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 6fba991a56c..755944fdf9f 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -6,15 +6,7 @@ #include #include #include -#include -#include "MergingSortedStep.h" -#include "FinishSortingStep.h" -#include "MergeSortingStep.h" -#include "PartialSortingStep.h" -#include "TotalsHavingStep.h" -#include "ExpressionStep.h" -#include "ArrayJoinStep.h" -#include "FilterStep.h" +#include namespace DB { @@ -341,318 +333,9 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio } } -/// If plan looks like Limit -> Sorting, update limit for Sorting -bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit) -{ - if (limit == 0) - return false; - - QueryPlanStepPtr & step = node->step; - QueryPlan::Node * child = nullptr; - bool updated = false; - - if (auto * merging_sorted = typeid_cast(step.get())) - { - /// TODO: remove LimitStep here. - merging_sorted->updateLimit(limit); - updated = true; - child = node->children.front(); - } - else if (auto * finish_sorting = typeid_cast(step.get())) - { - /// TODO: remove LimitStep here. - finish_sorting->updateLimit(limit); - updated = true; - } - else if (auto * merge_sorting = typeid_cast(step.get())) - { - merge_sorting->updateLimit(limit); - updated = true; - child = node->children.front(); - } - else if (auto * partial_sorting = typeid_cast(step.get())) - { - partial_sorting->updateLimit(limit); - updated = true; - } - - /// We often have chain PartialSorting -> MergeSorting -> MergingSorted - /// Try update limit for them also if possible. - if (child) - tryUpdateLimitForSortingSteps(child, limit); - - return updated; -} - -/// Move LimitStep down if possible. -static void tryPushDownLimit(QueryPlanStepPtr & parent, QueryPlan::Node * child_node) -{ - auto & child = child_node->step; - auto * limit = typeid_cast(parent.get()); - - if (!limit) - return; - - /// Skip LIMIT WITH TIES by now. - if (limit->withTies()) - return; - - const auto * transforming = dynamic_cast(child.get()); - - /// Skip everything which is not transform. - if (!transforming) - return; - - /// Special cases for sorting steps. - if (tryUpdateLimitForSortingSteps(child_node, limit->getLimitForSorting())) - return; - - /// Special case for TotalsHaving. Totals may be incorrect if we push down limit. - if (typeid_cast(child.get())) - return; - - /// Now we should decide if pushing down limit possible for this step. - - const auto & transform_traits = transforming->getTransformTraits(); - const auto & data_stream_traits = transforming->getDataStreamTraits(); - - /// Cannot push down if child changes the number of rows. - if (!transform_traits.preserves_number_of_rows) - return; - - /// Cannot push down if data was sorted exactly by child stream. - if (!child->getOutputStream().sort_description.empty() && !data_stream_traits.preserves_sorting) - return; - - /// Now we push down limit only if it doesn't change any stream properties. - /// TODO: some of them may be changed and, probably, not important for following streams. We may add such info. - if (!limit->getOutputStream().hasEqualPropertiesWith(transforming->getOutputStream())) - return; - - /// Input stream for Limit have changed. - limit->updateInputStream(transforming->getInputStreams().front()); - - parent.swap(child); -} - -/// Move ARRAY JOIN up if possible. -static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * child_node, QueryPlan::Nodes & nodes) -{ - auto & parent = parent_node->step; - auto & child = child_node->step; - auto * expression_step = typeid_cast(parent.get()); - auto * filter_step = typeid_cast(parent.get()); - auto * array_join_step = typeid_cast(child.get()); - - if (!(expression_step || filter_step) || !array_join_step) - return; - - const auto & array_join = array_join_step->arrayJoin(); - const auto & expression = expression_step ? expression_step->getExpression() - : filter_step->getExpression(); - - auto split_actions = expression->splitActionsBeforeArrayJoin(array_join->columns); - - /// No actions can be moved before ARRAY JOIN. - if (split_actions.first->empty()) - return; - - auto description = parent->getStepDescription(); - - /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin. - if (split_actions.second->empty()) - { - auto expected_header = parent->getOutputStream().header; - - /// Expression/Filter -> ArrayJoin - std::swap(parent, child); - /// ArrayJoin -> Expression/Filter - - if (expression_step) - child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), - std::move(split_actions.first)); - else - child = std::make_unique(child_node->children.at(0)->step->getOutputStream(), - std::move(split_actions.first), - filter_step->getFilterColumnName(), - filter_step->removesFilterColumn()); - - child->setStepDescription(std::move(description)); - - array_join_step->updateInputStream(child->getOutputStream(), expected_header); - return; - } - - /// Add new expression step before ARRAY JOIN. - /// Expression/Filter -> ArrayJoin -> Something - auto & node = nodes.emplace_back(); - node.children.swap(child_node->children); - child_node->children.emplace_back(&node); - /// Expression/Filter -> ArrayJoin -> node -> Something - - node.step = std::make_unique(node.children.at(0)->step->getOutputStream(), - std::move(split_actions.first)); - node.step->setStepDescription(description); - array_join_step->updateInputStream(node.step->getOutputStream(), {}); - - if (expression_step) - parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second); - else - parent = std::make_unique(array_join_step->getOutputStream(), split_actions.second, - filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); - - parent->setStepDescription(description + " [split]"); -} - -/// Replace chain `ExpressionStep -> ExpressionStep` to single ExpressionStep -/// Replace chain `FilterStep -> ExpressionStep` to single FilterStep -static bool tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Node * child_node) -{ - auto & parent = parent_node->step; - auto & child = child_node->step; - - auto * parent_expr = typeid_cast(parent.get()); - auto * parent_filter = typeid_cast(parent.get()); - auto * child_expr = typeid_cast(child.get()); - - if (parent_expr && child_expr) - { - const auto & child_actions = child_expr->getExpression(); - const auto & parent_actions = parent_expr->getExpression(); - - /// We cannot combine actions with arrayJoin and stateful function because we not always can reorder them. - /// Example: select rowNumberInBlock() from (select arrayJoin([1, 2])) - /// Such a query will return two zeroes if we combine actions together. - if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) - return false; - - auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); - - auto expr = std::make_unique(child_expr->getInputStreams().front(), merged); - expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); - - parent_node->step = std::move(expr); - parent_node->children.swap(child_node->children); - return true; - } - else if (parent_filter && child_expr) - { - const auto & child_actions = child_expr->getExpression(); - const auto & parent_actions = parent_filter->getExpression(); - - if (child_actions->hasArrayJoin() && parent_actions->hasStatefulFunctions()) - return false; - - auto merged = ActionsDAG::merge(std::move(*child_actions), std::move(*parent_actions)); - - auto filter = std::make_unique(child_expr->getInputStreams().front(), merged, - parent_filter->getFilterColumnName(), parent_filter->removesFilterColumn()); - filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_expr->getStepDescription() + ")"); - - parent_node->step = std::move(filter); - parent_node->children.swap(child_node->children); - return true; - } - - return false; -} - -/// Split FilterStep into chain `ExpressionStep -> FilterStep`, where FilterStep contains minimal number of nodes. -static bool trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) -{ - auto * filter_step = typeid_cast(node->step.get()); - if (!filter_step) - return false; - - const auto & expr = filter_step->getExpression(); - - /// Do not split if there are function like runningDifference. - if (expr->hasStatefulFunctions()) - return false; - - auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName()); - - if (split.second->empty()) - return false; - - if (filter_step->removesFilterColumn()) - split.second->removeUnusedInput(filter_step->getFilterColumnName()); - - auto description = filter_step->getStepDescription(); - - auto & filter_node = nodes.emplace_back(); - node->children.swap(filter_node.children); - node->children.push_back(&filter_node); - - filter_node.step = std::make_unique( - filter_node.children.at(0)->step->getOutputStream(), - std::move(split.first), - filter_step->getFilterColumnName(), - filter_step->removesFilterColumn()); - - node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); - - filter_node.step->setStepDescription("(" + description + ")[split]"); - node->step->setStepDescription(description); - - return true; -} - void QueryPlan::optimize() { - /* Stack contains info for every nodes in the path from tree root to the current node. - * Every optimization changes only current node and it's children. - * Optimization may change QueryPlanStep, but not QueryPlan::Node (only add a new one). - * So, QueryPlan::Node::children will be always valid. - */ - - struct Frame - { - Node * node; - size_t next_child = 0; - }; - - std::stack stack; - stack.push(Frame{.node = root}); - - while (!stack.empty()) - { - auto & frame = stack.top(); - - if (frame.next_child == 0) - { - if (frame.node->children.size() == 1) - { - tryPushDownLimit(frame.node->step, frame.node->children.front()); - - while (tryMergeExpressions(frame.node, frame.node->children.front())); - - if (frame.node->children.size() == 1) - tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes); - - trySplitFilter(frame.node, nodes); - } - } - - if (frame.next_child < frame.node->children.size()) - { - stack.push(Frame{frame.node->children[frame.next_child]}); - ++frame.next_child; - } - else - { - if (frame.node->children.size() == 1) - { - while (tryMergeExpressions(frame.node, frame.node->children.front())); - - trySplitFilter(frame.node, nodes); - - tryLiftUpArrayJoin(frame.node, frame.node->children.front(), nodes); - } - - stack.pop(); - } - } + QueryPlanOptimizations::optimizeTree(*root, nodes); } } diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 2eb27be8899..1c6f0cf1f7c 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -116,6 +116,11 @@ SRCS( QueryPlan/MergingFinal.cpp QueryPlan/MergingSortedStep.cpp QueryPlan/OffsetStep.cpp + QueryPlan/Optimizations/liftUpArrayJoin.cpp + QueryPlan/Optimizations/limitPushDown.cpp + QueryPlan/Optimizations/mergeExpressions.cpp + QueryPlan/Optimizations/optimizeTree.cpp + QueryPlan/Optimizations/splitFilter.cpp QueryPlan/PartialSortingStep.cpp QueryPlan/QueryPlan.cpp QueryPlan/ReadFromPreparedSource.cpp