diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp index 6e44067411f..6d14a8c3d9c 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp @@ -36,97 +36,113 @@ namespace } return non_const_columns; } + + bool canRemoveDistinct(const QueryPlan::Node * distinct_node) + { + const DistinctStep * distinct_step = typeid_cast(distinct_node->step.get()); + chassert(distinct_step); + + std::vector dag_stack; + const DistinctStep * inner_distinct_step = nullptr; + const QueryPlan::Node * node = distinct_node; + while (!node->children.empty()) + { + const IQueryPlanStep * current_step = node->step.get(); + + /// don't try to remove DISTINCT after union or join + if (typeid_cast(current_step) || typeid_cast(current_step) + || typeid_cast(current_step)) + break; + + if (const auto * const expr = typeid_cast(current_step); expr) + dag_stack.push_back(expr->getExpression()); + if (const auto * const filter = typeid_cast(current_step); filter) + dag_stack.push_back(filter->getExpression()); + + node = node->children.front(); + inner_distinct_step = typeid_cast(node->step.get()); + if (inner_distinct_step) + break; + } + if (!inner_distinct_step) + return false; + + /// possible cases (outer distinct -> inner distinct): + /// final -> preliminary => do nothing + /// preliminary -> final => try remove preliminary + /// final -> final => try remove final + /// preliminary -> preliminary => logical error? + if (inner_distinct_step->isPreliminary()) + return false; + + const auto distinct_columns = getDistinctColumns(distinct_step); + auto inner_distinct_columns = getDistinctColumns(inner_distinct_step); + if (distinct_columns.size() != inner_distinct_columns.size()) + return false; + + ActionsDAGPtr path_actions; + if (!dag_stack.empty()) + { + /// build actions DAG to find original column names + path_actions = dag_stack.back(); + dag_stack.pop_back(); + while (!dag_stack.empty()) + { + ActionsDAGPtr clone = dag_stack.back()->clone(); + dag_stack.pop_back(); + path_actions->mergeInplace(std::move(*clone)); + } + + logActionsDAG("merged DAG:\n{}", path_actions); + + /// compare columns of two DISTINCTs + for (const auto & column : distinct_columns) + { + const auto * alias_node = path_actions->getOriginalNodeForOutputAlias(String(column)); + if (!alias_node) + return false; + + auto it = inner_distinct_columns.find(alias_node->result_name); + if (it == inner_distinct_columns.end()) + return false; + + inner_distinct_columns.erase(it); + } + } + else + { + if (distinct_columns != inner_distinct_columns) + return false; + } + + return true; + } } +/// +/// DISTINCT is redundant if DISTINCT on the same columns was executed before +/// Trivial example: SELECT DISTINCT * FROM (SELECT DISTINCT * FROM numbers(3)) +/// size_t tryRemoveRedundantDistinct(QueryPlan::Node * parent_node, QueryPlan::Nodes & /* nodes*/) { - if (parent_node->children.empty()) - return 0; - - /// check if it is preliminary distinct node - QueryPlan::Node * distinct_node = parent_node->children.front(); - DistinctStep * distinct_step = typeid_cast(distinct_node->step.get()); - if (!distinct_step) - return 0; - - std::vector dag_stack; - const DistinctStep * inner_distinct_step = nullptr; - QueryPlan::Node * node = distinct_node; - while (!node->children.empty()) + bool applied = false; + for (const auto * node : parent_node->children) { - const IQueryPlanStep * current_step = node->step.get(); + /// check if it is distinct node + const DistinctStep * distinct_step = typeid_cast(node->step.get()); + if (!distinct_step) + continue; - /// don't try to remove DISTINCT after union or join - if (typeid_cast(current_step) || typeid_cast(current_step) - || typeid_cast(current_step)) - break; - - if (const auto * const expr = typeid_cast(current_step); expr) - dag_stack.push_back(expr->getExpression()); - if (const auto * const filter = typeid_cast(current_step); filter) - dag_stack.push_back(filter->getExpression()); - - node = node->children.front(); - inner_distinct_step = typeid_cast(node->step.get()); - if (inner_distinct_step) - break; - } - if (!inner_distinct_step) - return 0; - - /// possible cases (outer distinct -> inner distinct): - /// final -> preliminary => do nothing - /// preliminary -> final => try remove preliminary - /// final -> final => try remove final - /// preliminary -> preliminary => logical error? - if (inner_distinct_step->isPreliminary()) - return 0; - - const auto distinct_columns = getDistinctColumns(distinct_step); - auto inner_distinct_columns = getDistinctColumns(inner_distinct_step); - if (distinct_columns.size() != inner_distinct_columns.size()) - return 0; - - ActionsDAGPtr path_actions; - if (!dag_stack.empty()) - { - /// build actions DAG to find original column names - path_actions = dag_stack.back(); - dag_stack.pop_back(); - while (!dag_stack.empty()) + if (canRemoveDistinct(node)) { - ActionsDAGPtr clone = dag_stack.back()->clone(); - dag_stack.pop_back(); - path_actions->mergeInplace(std::move(*clone)); - } - - logActionsDAG("merged DAG:\n{}", path_actions); - - /// compare columns of two DISTINCTs - for (const auto & column : distinct_columns) - { - const auto * alias_node = path_actions->getOriginalNodeForOutputAlias(String(column)); - if (!alias_node) - return 0; - - auto it = inner_distinct_columns.find(alias_node->result_name); - if (it == inner_distinct_columns.end()) - return 0; - - inner_distinct_columns.erase(it); + /// remove current distinct + chassert(!node->children.empty()); + parent_node->children[0] = node->children.front(); + applied = true; } } - else - { - if (distinct_columns != inner_distinct_columns) - return 0; - } - /// remove current distinct - chassert(!distinct_node->children.empty()); - parent_node->children[0] = distinct_node->children.front(); - - return 1; + return applied; } }