From 06d112135ec0156155510653abdef429d7cb6283 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jan 2024 16:31:10 +0000 Subject: [PATCH 01/19] Simplify prewhere push down from query plan. Try to always use it. --- src/Interpreters/ActionsDAG.cpp | 11 +- src/Interpreters/ActionsDAG.h | 9 +- src/Interpreters/InterpreterSelectQuery.cpp | 56 +- .../Optimizations/liftUpFunctions.cpp | 2 +- .../Optimizations/optimizePrewhere.cpp | 509 ++++++++++-------- .../MergeTree/MergeTreeWhereOptimizer.cpp | 27 +- .../MergeTree/MergeTreeWhereOptimizer.h | 9 +- 7 files changed, 334 insertions(+), 289 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 6512def9202..03d7e620541 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1631,7 +1631,7 @@ void ActionsDAG::mergeNodes(ActionsDAG && second) } } -ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split_nodes) const +ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split_nodes, bool create_split_nodes_mapping) const { /// Split DAG into two parts. /// (first_nodes, first_outputs) is a part which will have split_list in result. @@ -1830,7 +1830,14 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split second_actions->outputs.swap(second_outputs); second_actions->inputs.swap(second_inputs); - return {std::move(first_actions), std::move(second_actions)}; + std::unordered_map split_nodes_mapping; + if (create_split_nodes_mapping) + { + for (const auto * node : split_nodes) + split_nodes_mapping[node] = data[node].to_first; + } + + return {std::move(first_actions), std::move(second_actions), std::move(split_nodes_mapping)}; } ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 45f6e5cc717..04683832c6d 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -326,13 +326,18 @@ public: /// Merge current nodes with specified dag nodes void mergeNodes(ActionsDAG && second); - using SplitResult = std::pair; + struct SplitResult + { + ActionsDAGPtr first; + ActionsDAGPtr second; + std::unordered_map split_nodes_mapping; + }; /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children. /// Execution of first then second parts on block is equivalent to execution of initial DAG. /// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal outputs. /// Second DAG inputs may contain less inputs then first DAG (but also include other columns). - SplitResult split(std::unordered_set split_nodes) const; + SplitResult split(std::unordered_set split_nodes, bool create_split_nodes_mapping = false) const; /// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN. SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d0cf9f1160c..187518b9f6c 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -600,7 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query.setFinal(); } - auto analyze = [&] (bool try_move_to_prewhere) + auto analyze = [&] (bool) { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; @@ -632,37 +632,37 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere - && storage && storage->canMoveConditionsToPrewhere() - && query.where() && !query.prewhere() - && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. - { - /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable - if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) - { - /// Extract column compressed sizes. - std::unordered_map column_compressed_sizes; - for (const auto & [name, sizes] : column_sizes) - column_compressed_sizes[name] = sizes.data_compressed; + // if (try_move_to_prewhere + // && storage && storage->canMoveConditionsToPrewhere() + // && query.where() && !query.prewhere() + // && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. + // { + // /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable + // if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) + // { + // /// Extract column compressed sizes. + // std::unordered_map column_compressed_sizes; + // for (const auto & [name, sizes] : column_sizes) + // column_compressed_sizes[name] = sizes.data_compressed; - SelectQueryInfo current_info; - current_info.query = query_ptr; - current_info.syntax_analyzer_result = syntax_analyzer_result; + // SelectQueryInfo current_info; + // current_info.query = query_ptr; + // current_info.syntax_analyzer_result = syntax_analyzer_result; - Names queried_columns = syntax_analyzer_result->requiredSourceColumns(); - const auto & supported_prewhere_columns = storage->supportedPrewhereColumns(); + // Names queried_columns = syntax_analyzer_result->requiredSourceColumns(); + // const auto & supported_prewhere_columns = storage->supportedPrewhereColumns(); - MergeTreeWhereOptimizer where_optimizer{ - std::move(column_compressed_sizes), - metadata_snapshot, - storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context), - queried_columns, - supported_prewhere_columns, - log}; + // MergeTreeWhereOptimizer where_optimizer{ + // std::move(column_compressed_sizes), + // metadata_snapshot, + // storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context), + // queried_columns, + // supported_prewhere_columns, + // log}; - where_optimizer.optimize(current_info, context); - } - } + // where_optimizer.optimize(current_info, context); + // } + // } if (query.prewhere() && query.where()) { diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index 34a1fc2bb88..3fc2d64b11f 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -66,7 +66,7 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: NameSet sort_columns; for (const auto & col : sorting_step->getSortDescription()) sort_columns.insert(col.column_name); - auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns); + auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns); // No calculations can be postponed. if (unneeded_for_sorting->trivial()) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 7902b36f80e..b2ac34b4b24 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -5,7 +5,8 @@ #include #include #include -#include +#include "Functions/FunctionsLogical.h" +#include "Functions/IFunctionAdaptors.h" namespace DB { @@ -15,58 +16,58 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -namespace -{ +// namespace +// { -void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header) -{ - std::unordered_map output_name_to_node; - for (const auto * output_node : actions_dag->getOutputs()) - output_name_to_node.emplace(output_node->result_name, output_node); +// void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header) +// { +// std::unordered_map output_name_to_node; +// for (const auto * output_node : actions_dag->getOutputs()) +// output_name_to_node.emplace(output_node->result_name, output_node); - std::unordered_set used_output_nodes; +// std::unordered_set used_output_nodes; - ActionsDAG::NodeRawConstPtrs updated_outputs; - updated_outputs.reserve(expected_header.columns()); +// ActionsDAG::NodeRawConstPtrs updated_outputs; +// updated_outputs.reserve(expected_header.columns()); - for (const auto & column : expected_header) - { - auto output_node_it = output_name_to_node.find(column.name); - if (output_node_it == output_name_to_node.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid move to PREWHERE optimization. Cannot find column {} in output", - column.name); +// for (const auto & column : expected_header) +// { +// auto output_node_it = output_name_to_node.find(column.name); +// if (output_node_it == output_name_to_node.end()) +// throw Exception(ErrorCodes::LOGICAL_ERROR, +// "Invalid move to PREWHERE optimization. Cannot find column {} in output", +// column.name); - updated_outputs.push_back(output_node_it->second); - used_output_nodes.insert(output_node_it->second); - } +// updated_outputs.push_back(output_node_it->second); +// used_output_nodes.insert(output_node_it->second); +// } - ActionsDAG::NodeRawConstPtrs unused_outputs; - for (const auto * output_node : actions_dag->getOutputs()) - { - if (used_output_nodes.contains(output_node)) - continue; +// ActionsDAG::NodeRawConstPtrs unused_outputs; +// for (const auto * output_node : actions_dag->getOutputs()) +// { +// if (used_output_nodes.contains(output_node)) +// continue; - unused_outputs.push_back(output_node); - } +// unused_outputs.push_back(output_node); +// } - auto & actions_dag_outputs = actions_dag->getOutputs(); - actions_dag_outputs = std::move(updated_outputs); - actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end()); -} +// auto & actions_dag_outputs = actions_dag->getOutputs(); +// actions_dag_outputs = std::move(updated_outputs); +// actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end()); +// } -} +// } namespace QueryPlanOptimizations { -void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) +void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) { if (stack.size() < 3) return; - const auto & frame = stack.back(); + auto & frame = stack.back(); /** Assume that on stack there are at least 3 nodes: * @@ -82,7 +83,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) if (storage_prewhere_info && storage_prewhere_info->prewhere_actions) return; - const QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node; + QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node; const auto * filter_step = typeid_cast(filter_node->step.get()); if (!filter_step) return; @@ -92,40 +93,40 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) * Collect input node to output nodes mapping. */ ColumnsWithTypeAndName required_columns_after_filter; - std::unordered_set output_nodes_mapped_to_input; - std::unordered_map> input_node_to_output_names; + // std::unordered_set output_nodes_mapped_to_input; + // std::unordered_map> input_node_to_output_names; - for (const auto * output_node : filter_step->getExpression()->getOutputs()) - { - const auto * node_without_alias = output_node; - while (node_without_alias->type == ActionsDAG::ActionType::ALIAS) - node_without_alias = node_without_alias->children[0]; + // for (const auto * output_node : filter_step->getExpression()->getOutputs()) + // { + // const auto * node_without_alias = output_node; + // while (node_without_alias->type == ActionsDAG::ActionType::ALIAS) + // node_without_alias = node_without_alias->children[0]; - if (node_without_alias->type == ActionsDAG::ActionType::INPUT) - { - output_nodes_mapped_to_input.emplace(output_node->result_name); + // if (node_without_alias->type == ActionsDAG::ActionType::INPUT) + // { + // output_nodes_mapped_to_input.emplace(output_node->result_name); - auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name); - if (output_names_it == input_node_to_output_names.end()) - { - auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector()); - output_names_it = insert_it; - } + // auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name); + // if (output_names_it == input_node_to_output_names.end()) + // { + // auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector()); + // output_names_it = insert_it; + // } - output_names_it->second.push_back(output_node->result_name); - } + // output_names_it->second.push_back(output_node->result_name); + // } - if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn()) - continue; + // if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn()) + // continue; - required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name)); - } + // required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name)); + // } const auto & context = read_from_merge_tree->getContext(); const auto & settings = context->getSettingsRef(); - if (!settings.allow_experimental_analyzer) - return; + // if (!settings.allow_experimental_analyzer) + // return; const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers; bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal(); @@ -170,7 +171,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) filter_step->getFilterColumnName(), read_from_merge_tree->getContext(), is_final); - if (!optimize_result.has_value()) + + if (!optimize_result.fully_moved_to_prewhere && optimize_result.prewhere_nodes.empty()) return; PrewhereInfoPtr prewhere_info; @@ -181,198 +183,243 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) prewhere_info->need_filter = true; - auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions; + // QueryPlan::Node * replace_old_filter_node = nullptr; + // bool remove_filter_node = false; - ActionsChain actions_chain; - - std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name; - actions_chain.addStep(std::make_unique(prewhere_filter_actions)); - - auto & filter_actions = optimize_result->filter_actions; - - /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts: - * 1. Filter expressions. - * 2. Prewhere filter expressions. - * - * There can be cases when all expressions are moved to PREWHERE, but it is not - * enough to produce required filter output columns. - * - * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c; - * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`. - * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions. - * - * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE. - */ - bool need_additional_filter_after_prewhere = false; - - if (!filter_actions) + if (!optimize_result.fully_moved_to_prewhere) { - /// Any node from PREWHERE filter actions can be used as possible output node - std::unordered_set possible_prewhere_output_nodes; - for (const auto & node : prewhere_filter_actions->getNodes()) - possible_prewhere_output_nodes.insert(node.result_name); + auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true); + ActionsDAG::NodeRawConstPtrs conditions; + conditions.reserve(split_result.split_nodes_mapping.size()); + for (const auto * condition : optimize_result.prewhere_nodes) + conditions.push_back(split_result.split_nodes_mapping.at(condition)); - for (auto & required_column : required_columns_after_filter) + prewhere_info->prewhere_actions = std::move(split_result.first); + prewhere_info->remove_prewhere_column = true; + + if (conditions.size() == 1) { - if (!possible_prewhere_output_nodes.contains(required_column.name) && - !output_nodes_mapped_to_input.contains(required_column.name)) + for (const auto * output : prewhere_info->prewhere_actions->getOutputs()) { - need_additional_filter_after_prewhere = true; - break; - } - } - } - - /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter - * actions output columns as filter actions dag input columns. - * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from - * PREWHERE filter actions. - */ - if (need_additional_filter_after_prewhere || filter_actions) - { - auto merged_filter_actions = std::make_shared(actions_chain.getLastStepAvailableOutputColumns()); - merged_filter_actions->getOutputs().clear(); - merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone())); - - /// Add old filter step filter column to outputs - for (const auto & node : merged_filter_actions->getNodes()) - { - if (node.result_name == filter_step->getFilterColumnName()) - { - merged_filter_actions->getOutputs().push_back(&node); - break; - } - } - - filter_actions = std::move(merged_filter_actions); - - /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage - prewhere_info->need_filter = false; - - actions_chain.addStep(std::make_unique(filter_actions)); - } - - auto required_output_actions = std::make_shared(required_columns_after_filter); - actions_chain.addStep(std::make_unique(required_output_actions)); - - actions_chain.finalize(); - - prewhere_filter_actions->projectInput(false); - - auto & prewhere_actions_chain_node = actions_chain[0]; - prewhere_info->prewhere_actions = std::move(prewhere_filter_actions); - prewhere_info->prewhere_column_name = prewere_filter_node_name; - prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name); - - read_from_merge_tree->updatePrewhereInfo(prewhere_info); - - QueryPlan::Node * replace_old_filter_node = nullptr; - bool remove_filter_node = false; - - if (filter_actions) - { - filter_actions->projectInput(false); - - /// Match dag output nodes with old filter step header - matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header); - - auto & filter_actions_chain_node = actions_chain[1]; - bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName()); - auto after_prewhere_filter_step = std::make_unique(read_from_merge_tree->getOutputStream(), - filter_actions, - filter_step->getFilterColumnName(), - remove_filter_column); - - auto & node = nodes.emplace_back(); - node.children.emplace_back(frame.node); - node.step = std::move(after_prewhere_filter_step); - - replace_old_filter_node = &node; - } - else - { - auto rename_actions_dag = std::make_shared(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName()); - bool apply_rename_step = false; - - ActionsDAG::NodeRawConstPtrs updated_outputs; - - /** If in output after read from merge tree there are column names without aliases, - * apply old filter step aliases to them. - */ - for (const auto * output_node : rename_actions_dag->getOutputs()) - { - const auto alias_it = input_node_to_output_names.find(output_node->result_name); - if (alias_it == input_node_to_output_names.end()) - { - updated_outputs.push_back(output_node); - continue; + if (output == conditions.front()) + prewhere_info->remove_prewhere_column = false; } - for (auto & output_name : alias_it->second) - { - if (output_name == output_node->result_name) - { - updated_outputs.push_back(output_node); - continue; - } - - updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name)); - apply_rename_step = true; - } - } - - rename_actions_dag->getOutputs() = std::move(updated_outputs); - - bool apply_match_step = false; - - /// If column order does not match old filter step column order, match dag output nodes with header - if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header)) - { - apply_match_step = true; - matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header); - } - - if (apply_rename_step || apply_match_step) - { - auto rename_step = std::make_unique(read_from_merge_tree->getOutputStream(), rename_actions_dag); - if (apply_rename_step) - rename_step->setStepDescription("Change column names to column identifiers"); - - auto & node = nodes.emplace_back(); - node.children.emplace_back(frame.node); - node.step = std::move(rename_step); - - replace_old_filter_node = &node; + prewhere_info->prewhere_column_name = conditions.front()->result_name; } else { - replace_old_filter_node = frame.node; - remove_filter_node = true; + + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {}); + prewhere_info->prewhere_column_name = node->result_name; + prewhere_info->prewhere_actions->getOutputs().push_back(node); } + + read_from_merge_tree->updatePrewhereInfo(prewhere_info); + filter_node->step = std::make_unique( + read_from_merge_tree->getOutputStream(), + std::move(split_result.second), + filter_step->getFilterColumnName(), + filter_step->removesFilterColumn()); + + return; } + prewhere_info->prewhere_actions = filter_step->getExpression(); + prewhere_info->prewhere_column_name = filter_step->getFilterColumnName(); + prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn(); + + read_from_merge_tree->updatePrewhereInfo(prewhere_info); + + // replace_old_filter_node = frame.node; + // remove_filter_node = true; + + + + // auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions; + + // ActionsChain actions_chain; + + // std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name; + // actions_chain.addStep(std::make_unique(prewhere_filter_actions)); + + // auto & filter_actions = optimize_result->filter_actions; + + // /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts: + // * 1. Filter expressions. + // * 2. Prewhere filter expressions. + // * + // * There can be cases when all expressions are moved to PREWHERE, but it is not + // * enough to produce required filter output columns. + // * + // * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c; + // * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`. + // * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions. + // * + // * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE. + // */ + // bool need_additional_filter_after_prewhere = false; + + // if (!filter_actions) + // { + // /// Any node from PREWHERE filter actions can be used as possible output node + // std::unordered_set possible_prewhere_output_nodes; + // for (const auto & node : prewhere_filter_actions->getNodes()) + // possible_prewhere_output_nodes.insert(node.result_name); + + // for (auto & required_column : required_columns_after_filter) + // { + // if (!possible_prewhere_output_nodes.contains(required_column.name) && + // !output_nodes_mapped_to_input.contains(required_column.name)) + // { + // need_additional_filter_after_prewhere = true; + // break; + // } + // } + // } + + // /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter + // * actions output columns as filter actions dag input columns. + // * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from + // * PREWHERE filter actions. + // */ + // if (need_additional_filter_after_prewhere || filter_actions) + // { + // auto merged_filter_actions = std::make_shared(actions_chain.getLastStepAvailableOutputColumns()); + // merged_filter_actions->getOutputs().clear(); + // merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone())); + + // /// Add old filter step filter column to outputs + // for (const auto & node : merged_filter_actions->getNodes()) + // { + // if (node.result_name == filter_step->getFilterColumnName()) + // { + // merged_filter_actions->getOutputs().push_back(&node); + // break; + // } + // } + + // filter_actions = std::move(merged_filter_actions); + + // /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage + // prewhere_info->need_filter = false; + + // actions_chain.addStep(std::make_unique(filter_actions)); + // } + + // auto required_output_actions = std::make_shared(required_columns_after_filter); + // actions_chain.addStep(std::make_unique(required_output_actions)); + + // actions_chain.finalize(); + + // prewhere_filter_actions->projectInput(false); + + // auto & prewhere_actions_chain_node = actions_chain[0]; + // prewhere_info->prewhere_actions = std::move(prewhere_filter_actions); + // prewhere_info->prewhere_column_name = prewere_filter_node_name; + // prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name); + + // read_from_merge_tree->updatePrewhereInfo(prewhere_info); + + // QueryPlan::Node * replace_old_filter_node = nullptr; + // bool remove_filter_node = false; + + // if (filter_actions) + // { + // filter_actions->projectInput(false); + + // /// Match dag output nodes with old filter step header + // matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header); + + // auto & filter_actions_chain_node = actions_chain[1]; + // bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName()); + // auto after_prewhere_filter_step = std::make_unique(read_from_merge_tree->getOutputStream(), + // filter_actions, + // filter_step->getFilterColumnName(), + // remove_filter_column); + + // auto & node = nodes.emplace_back(); + // node.children.emplace_back(frame.node); + // node.step = std::move(after_prewhere_filter_step); + + // replace_old_filter_node = &node; + // } + // else + // { + // auto rename_actions_dag = std::make_shared(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName()); + // bool apply_rename_step = false; + + // ActionsDAG::NodeRawConstPtrs updated_outputs; + + // /** If in output after read from merge tree there are column names without aliases, + // * apply old filter step aliases to them. + // */ + // for (const auto * output_node : rename_actions_dag->getOutputs()) + // { + // const auto alias_it = input_node_to_output_names.find(output_node->result_name); + // if (alias_it == input_node_to_output_names.end()) + // { + // updated_outputs.push_back(output_node); + // continue; + // } + + // for (auto & output_name : alias_it->second) + // { + // if (output_name == output_node->result_name) + // { + // updated_outputs.push_back(output_node); + // continue; + // } + + // updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name)); + // apply_rename_step = true; + // } + // } + + // rename_actions_dag->getOutputs() = std::move(updated_outputs); + + // bool apply_match_step = false; + + // /// If column order does not match old filter step column order, match dag output nodes with header + // if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header)) + // { + // apply_match_step = true; + // matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header); + // } + + // if (apply_rename_step || apply_match_step) + // { + // auto rename_step = std::make_unique(read_from_merge_tree->getOutputStream(), rename_actions_dag); + // if (apply_rename_step) + // rename_step->setStepDescription("Change column names to column identifiers"); + + // auto & node = nodes.emplace_back(); + // node.children.emplace_back(frame.node); + // node.step = std::move(rename_step); + + // replace_old_filter_node = &node; + // } + // else + // { + // replace_old_filter_node = frame.node; + // remove_filter_node = true; + // } + // } + QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node; for (auto & filter_parent_child : filter_parent_node->children) { if (filter_parent_child == filter_node) { - filter_parent_child = replace_old_filter_node; + filter_parent_child = frame.node; size_t stack_size = stack.size(); - /** If filter step is completely replaced with PREWHERE filter actions, remove it from stack. - * Otherwise replace old filter step with new filter step after PREWHERE. - */ - if (remove_filter_node) - { - std::swap(stack[stack_size - 1], stack[stack_size - 2]); - stack.pop_back(); - } - else - { - stack[stack_size - 2] = Frame{.node = replace_old_filter_node, .next_child = 1}; - } + /// Step is completely replaced with PREWHERE filter actions, remove it from stack. + std::swap(stack[stack_size - 1], stack[stack_size - 2]); + stack.pop_back(); break; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 4aecf85ac2a..151ce7635b2 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -112,7 +112,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length)); } -std::optional MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag, +MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag, const std::string & filter_column_name, const ContextPtr & context, bool is_final) @@ -132,11 +132,14 @@ std::optional MergeTreeWhe if (!optimize_result) return {}; - auto filter_actions = reconstructDAG(optimize_result->where_conditions); - auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions); + if (optimize_result->where_conditions.empty()) + return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true}; - FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) }; - return result; + std::unordered_set prewhere_conditions; + for (const auto & condition : optimize_result->prewhere_conditions) + prewhere_conditions.insert(condition.node.getDAGNode()); + + return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = false}; } static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column) @@ -343,20 +346,6 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions) return function; } -ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions) -{ - if (conditions.empty()) - return {}; - - ActionsDAG::NodeRawConstPtrs filter_nodes; - filter_nodes.reserve(conditions.size()); - - for (const auto & condition : conditions) - filter_nodes.push_back(condition.node.getDAGNode()); - - return ActionsDAG::buildFilterActionsDAG(filter_nodes); -} - std::optional MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const { diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index b56219e3c59..84afa4cda17 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -47,11 +47,11 @@ public: struct FilterActionsOptimizeResult { - ActionsDAGPtr filter_actions; - ActionsDAGPtr prewhere_filter_actions; + std::unordered_set prewhere_nodes; + bool fully_moved_to_prewhere = false; }; - std::optional optimize(const ActionsDAGPtr & filter_dag, + FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag, const std::string & filter_column_name, const ContextPtr & context, bool is_final); @@ -122,9 +122,6 @@ private: /// Reconstruct AST from conditions static ASTPtr reconstructAST(const Conditions & conditions); - /// Reconstruct DAG from conditions - static ActionsDAGPtr reconstructDAG(const Conditions & conditions); - void optimizeArbitrary(ASTSelectQuery & select) const; UInt64 getColumnsSize(const NameSet & columns) const; From fbd71ee15e22f68d4155437201a83d5c8133c203 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jan 2024 20:46:13 +0000 Subject: [PATCH 02/19] Fixing style. --- src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index b2ac34b4b24..4cea74b9b12 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -235,8 +235,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) // replace_old_filter_node = frame.node; // remove_filter_node = true; - - // auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions; // ActionsChain actions_chain; From d1902cdba0b9fce3c621e1266e9e004fe2a21daf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Jan 2024 16:31:16 +0000 Subject: [PATCH 03/19] Fix some tests. --- src/Interpreters/ActionsDAG.cpp | 42 +++-- .../Optimizations/optimizePrewhere.cpp | 151 ++++++++++++------ .../QueryPlan/ReadFromMergeTree.cpp | 79 +++++++-- .../MergeTree/MergeTreeSelectProcessor.cpp | 2 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 6 +- .../02235_add_part_offset_virtual_column.sql | 4 +- 6 files changed, 208 insertions(+), 76 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 03d7e620541..5a1f9a87974 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1765,13 +1765,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split } /// Input from second DAG should also be in the first. - if (copy.type == ActionType::INPUT) - { - auto & input_copy = first_nodes.emplace_back(*cur.node); - assert(cur_data.to_first == nullptr); - cur_data.to_first = &input_copy; - new_inputs.push_back(cur.node); - } + // if (copy.type == ActionType::INPUT) + // { + // auto & input_copy = first_nodes.emplace_back(*cur.node); + // assert(cur_data.to_first == nullptr); + // cur_data.to_first = &input_copy; + // new_inputs.push_back(cur.node); + // } } else { @@ -1790,11 +1790,12 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split /// If this node is needed in result, add it as input. Node input_node; input_node.type = ActionType::INPUT; - input_node.result_type = node.result_type; - input_node.result_name = node.result_name; + input_node.result_type = cur.node->result_type; + input_node.result_name = cur.node->result_name; cur_data.to_second = &second_nodes.emplace_back(std::move(input_node)); - new_inputs.push_back(cur.node); + if (cur.node->type != ActionType::INPUT) + new_inputs.push_back(cur.node); } } } @@ -1810,14 +1811,29 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split for (const auto * input_node : inputs) { const auto & cur = data[input_node]; - first_inputs.push_back(cur.to_first); + if (cur.to_first) + { + first_inputs.push_back(cur.to_first); + + if (cur.to_second) + first_outputs.push_back(cur.to_first); + } } for (const auto * input : new_inputs) { const auto & cur = data[input]; - second_inputs.push_back(cur.to_second); - first_outputs.push_back(cur.to_first); + if (cur.to_second) + second_inputs.push_back(cur.to_second); + if (cur.to_first) + first_outputs.push_back(cur.to_first); + } + + for (const auto * input_node : inputs) + { + const auto & cur = data[input_node]; + if (cur.to_second) + second_inputs.push_back(cur.to_second); } auto first_actions = std::make_shared(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 4cea74b9b12..a9405d0cbdb 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -62,6 +62,20 @@ namespace ErrorCodes namespace QueryPlanOptimizations { +static void removeFromOutput(ActionsDAG & dag, const std::string name) +{ + const auto * node = &dag.findInOutputs(name); + auto & outputs = dag.getOutputs(); + for (size_t i = 0; i < outputs.size(); ++i) + { + if (node == outputs[i]) + { + outputs.erase(outputs.begin() + i); + return; + } + } +} + void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) { if (stack.size() < 3) @@ -172,7 +186,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) read_from_merge_tree->getContext(), is_final); - if (!optimize_result.fully_moved_to_prewhere && optimize_result.prewhere_nodes.empty()) + if (optimize_result.prewhere_nodes.empty()) return; PrewhereInfoPtr prewhere_info; @@ -182,55 +196,102 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info = std::make_shared(); prewhere_info->need_filter = true; + // std::cerr << filter_step->getExpression()->dumpDAG() << std::endl; // QueryPlan::Node * replace_old_filter_node = nullptr; // bool remove_filter_node = false; - if (!optimize_result.fully_moved_to_prewhere) - { - auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true); - ActionsDAG::NodeRawConstPtrs conditions; - conditions.reserve(split_result.split_nodes_mapping.size()); - for (const auto * condition : optimize_result.prewhere_nodes) - conditions.push_back(split_result.split_nodes_mapping.at(condition)); + auto filter_expression = filter_step->getExpression(); + const auto & filter_column_name = filter_step->getFilterColumnName(); - prewhere_info->prewhere_actions = std::move(split_result.first); + if (optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn()) + { + removeFromOutput(*filter_expression, filter_column_name); + auto & outputs = filter_expression->getOutputs(); + size_t size = outputs.size(); + outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end()); + filter_expression->removeUnusedActions(false); + outputs.resize(size); + } + + // std::cerr << "!!!!!!!!!!!!!!!!\n"; + + // if (!optimize_result.fully_moved_to_prewhere) + // { + auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true); + + // std::cerr << split_result.first->dumpDAG() << std::endl; + // std::cerr << split_result.second->dumpDAG() << std::endl; + + // for (const auto * input : split_result.first->getInputs()) + // std::cerr << "in 1" << input->result_name << std::endl; + // for (const auto * input : split_result.second->getInputs()) + // std::cerr << "in 2" << input->result_name << std::endl; + + ActionsDAG::NodeRawConstPtrs conditions; + conditions.reserve(split_result.split_nodes_mapping.size()); + for (const auto * condition : optimize_result.prewhere_nodes) + { + // std::cerr << ".. " << condition->result_name << std::endl; + conditions.push_back(split_result.split_nodes_mapping.at(condition)); + } + + prewhere_info->prewhere_actions = std::move(split_result.first); + prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); + + if (conditions.size() == 1) + { + prewhere_info->prewhere_column_name = conditions.front()->result_name; + prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front()); + } + else + { prewhere_info->remove_prewhere_column = true; - if (conditions.size() == 1) - { - for (const auto * output : prewhere_info->prewhere_actions->getOutputs()) - { - if (output == conditions.front()) - prewhere_info->remove_prewhere_column = false; - } + FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); + const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {}); + prewhere_info->prewhere_column_name = node->result_name; + prewhere_info->prewhere_actions->getOutputs().push_back(node); + } - prewhere_info->prewhere_column_name = conditions.front()->result_name; - } - else - { + // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl; + // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl; - FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {}); - prewhere_info->prewhere_column_name = node->result_name; - prewhere_info->prewhere_actions->getOutputs().push_back(node); - } + read_from_merge_tree->updatePrewhereInfo(prewhere_info); - read_from_merge_tree->updatePrewhereInfo(prewhere_info); + // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl; + // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl; + + if (!optimize_result.fully_moved_to_prewhere) + { filter_node->step = std::make_unique( read_from_merge_tree->getOutputStream(), std::move(split_result.second), filter_step->getFilterColumnName(), filter_step->removesFilterColumn()); - - return; } + else + { + // std::cerr << split_result.second->dumpDAG() << std::endl; + // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl; + // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl; - prewhere_info->prewhere_actions = filter_step->getExpression(); - prewhere_info->prewhere_column_name = filter_step->getFilterColumnName(); - prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn(); + filter_node->step = std::make_unique( + read_from_merge_tree->getOutputStream(), + std::move(split_result.second)); + } + // return; + // } - read_from_merge_tree->updatePrewhereInfo(prewhere_info); + // std::cerr << "!!!!!!!!!!!!!!!!\n"; + + // prewhere_info->prewhere_actions = filter_step->getExpression(); + // prewhere_info->prewhere_actions->projectInput(false); + // std::cerr << prewhere_info->prewhere_actions->dumpDAG() << std::endl; + // prewhere_info->prewhere_column_name = filter_step->getFilterColumnName(); + // prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn(); + + // read_from_merge_tree->updatePrewhereInfo(prewhere_info); // replace_old_filter_node = frame.node; // remove_filter_node = true; @@ -405,23 +466,23 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) // } // } - QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node; + // QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node; - for (auto & filter_parent_child : filter_parent_node->children) - { - if (filter_parent_child == filter_node) - { - filter_parent_child = frame.node; + // for (auto & filter_parent_child : filter_parent_node->children) + // { + // if (filter_parent_child == filter_node) + // { + // filter_parent_child = frame.node; - size_t stack_size = stack.size(); + // size_t stack_size = stack.size(); - /// Step is completely replaced with PREWHERE filter actions, remove it from stack. - std::swap(stack[stack_size - 1], stack[stack_size - 2]); - stack.pop_back(); + // /// Step is completely replaced with PREWHERE filter actions, remove it from stack. + // std::swap(stack[stack_size - 1], stack[stack_size - 2]); + // stack.pop_back(); - break; - } - } + // break; + // } + // } } } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 5ed56f59fc1..6adc48d87d2 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -89,6 +89,34 @@ size_t countPartitions(const MergeTreeData::DataPartsVector & prepared_parts) return countPartitions(prepared_parts, get_partition_id); } +bool restoreDAGInputs(ActionsDAG & dag, const NameSet & inputs) +{ + std::unordered_set outputs(dag.getOutputs().begin(), dag.getOutputs().end()); + bool added = false; + for (const auto * input : dag.getInputs()) + { + if (inputs.contains(input->result_name) && !outputs.contains(input)) + { + dag.getOutputs().push_back(input); + added = true; + } + } + + return added; +} + +bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs) +{ + bool added = false; + if (info.row_level_filter) + added = added || restoreDAGInputs(*info.row_level_filter, inputs); + + if (info.prewhere_actions) + added = added || restoreDAGInputs(*info.prewhere_actions, inputs); + + return added; +} + } namespace ProfileEvents @@ -786,18 +814,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( /// To fix this, we prohibit removing any input in prewhere actions. Instead, projection actions will be added after sorting. /// See 02354_read_in_order_prewhere.sql as an example. bool have_input_columns_removed_after_prewhere = false; - if (prewhere_info && prewhere_info->prewhere_actions) + if (prewhere_info) { - auto & outputs = prewhere_info->prewhere_actions->getOutputs(); - std::unordered_set outputs_set(outputs.begin(), outputs.end()); - for (const auto * input : prewhere_info->prewhere_actions->getInputs()) - { - if (!outputs_set.contains(input)) - { - outputs.push_back(input); - have_input_columns_removed_after_prewhere = true; - } - } + NameSet sorting_columns; + for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes()) + sorting_columns.insert(column.name); + + have_input_columns_removed_after_prewhere = restorePrewhereInputs(*prewhere_info, sorting_columns); } /// Let's split ranges to avoid reading much data. @@ -984,7 +1007,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( /// Thus we need to merge all partition parts into a single sorted stream. Pipe pipe = Pipe::unitePipes(std::move(pipes)); merge_streams(pipe); - out_projection = createProjection(pipe_header); return pipe; } @@ -1133,6 +1155,14 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + if (prewhere_info) + { + NameSet sorting_columns; + for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes()) + sorting_columns.insert(column.name); + restorePrewhereInputs(*prewhere_info, sorting_columns); + } + for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) { /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition @@ -1802,13 +1832,20 @@ Pipe ReadFromMergeTree::spreadMarkRanges( if (!final && result.sampling.use_sampling) { + NameSet sampling_columns; + /// Add columns needed for `sample_by_ast` to `column_names_to_read`. /// Skip this if final was used, because such columns were already added from PK. for (const auto & column : result.sampling.filter_expression->getRequiredColumns().getNames()) { if (!names.contains(column)) column_names_to_read.push_back(column); + + sampling_columns.insert(column); } + + if (prewhere_info) + restorePrewhereInputs(*prewhere_info, sampling_columns); } if (final) @@ -2002,6 +2039,24 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons }); } + /// Some extra columns could be added by sample/final/in-order/etc + /// Remove them from header if not needed. + if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header)) + { + auto convert_actions_dag = ActionsDAG::makeConvertingActions( + pipe.getHeader().getColumnsWithTypeAndName(), + getOutputStream().header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); + + auto converting_dag_expr = std::make_shared(convert_actions_dag); + + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, converting_dag_expr); + }); + } + for (const auto & processor : pipe.getProcessors()) processors.emplace_back(processor); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index aeff438f509..4e93bd267ec 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -449,8 +449,8 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere Block MergeTreeSelectProcessor::transformHeader( Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns) { + injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns); auto transformed = applyPrewhereActions(std::move(block), prewhere_info); - injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns); return transformed; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 151ce7635b2..c52a2fee051 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -132,14 +132,14 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op if (!optimize_result) return {}; - if (optimize_result->where_conditions.empty()) - return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true}; + // if (optimize_result->where_conditions.empty()) + // return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true}; std::unordered_set prewhere_conditions; for (const auto & condition : optimize_result->prewhere_conditions) prewhere_conditions.insert(condition.node.getDAGNode()); - return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = false}; + return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()}; } static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column) diff --git a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql index dc8fceddc52..73ae6eb499f 100644 --- a/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql +++ b/tests/queries/0_stateless/02235_add_part_offset_virtual_column.sql @@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000 SELECT 'PREWHERE'; SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000; -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 } -SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 } +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 } +SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 } SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3; From df0c30878eaca90c3eb4426e93727481b9ed4f0b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 19 Jan 2024 10:44:58 +0000 Subject: [PATCH 04/19] Fixing style --- src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index a9405d0cbdb..6872141951e 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -214,8 +214,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) outputs.resize(size); } - // std::cerr << "!!!!!!!!!!!!!!!!\n"; - // if (!optimize_result.fully_moved_to_prewhere) // { auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true); @@ -283,8 +281,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) // return; // } - // std::cerr << "!!!!!!!!!!!!!!!!\n"; - // prewhere_info->prewhere_actions = filter_step->getExpression(); // prewhere_info->prewhere_actions->projectInput(false); // std::cerr << prewhere_info->prewhere_actions->dumpDAG() << std::endl; From 39da54cd508adb7a8d071d5ba4a0f118a8a1af23 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 7 Feb 2024 19:15:59 +0000 Subject: [PATCH 05/19] Fixing more tests. --- .../Optimizations/optimizePrewhere.cpp | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 6872141951e..9d997584a28 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -142,15 +142,17 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) // if (!settings.allow_experimental_analyzer) // return; - const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers; - bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal(); + //const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers; + bool is_final = read_from_merge_tree->isQueryWithFinal(); //table_expression_modifiers && table_expression_modifiers->hasFinal(); bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); + // std::cerr << "============ !!! << " << is_final << ' ' << settings.optimize_move_to_prewhere_if_final << std::endl; if (!optimize_move_to_prewhere) return; const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot(); - if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio()) + //if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio()) + if (read_from_merge_tree->isQueryWithSampling()) { const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes(); @@ -226,6 +228,46 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) // for (const auto * input : split_result.second->getInputs()) // std::cerr << "in 2" << input->result_name << std::endl; + + /// This is the leak of abstraction. + /// Splited actions may have inputs which are needed only for PREWHERE. + /// This is fine for ActionsDAG to have such a split, but it breakes defaults calculation. + /// + /// See 00950_default_prewhere for example. + /// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1` + /// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE + /// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed). + /// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value. + /// If column SessionType is removed by PREWHERE actions, we use zero as defaut, and get a wrong result. + /// + /// So, here we restore removed inputs for PREWHERE actions + { + // const auto & virtuals = read_from_merge_tree->getVirtualColumnNames(); + // NameSet virtual_names(virtuals.begin(), virtuals.end()); + + //std::unordered_set first_inputs(split_result.first->getInputs().begin(), split_result.first->getInputs().end()); + std::unordered_set first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); + ///std::unordered_set second_inputs(split_result.second->getInputs().begin(), split_result.second->getInputs().end()); + + for (const auto * input : split_result.first->getInputs()) + { + if (!first_outputs.contains(input)) + { + split_result.first->getOutputs().push_back(input); + /// Add column to second actions as input. + /// Do not add it to result, so it would be removed. + split_result.second->addInput(input->result_name, input->result_type); + } + } + + // NameSet input_columns; + // for (const auto * input : split_result.first->getInputs()) + // input_columns.insert(input->result_name); + + // auto header = read_from_merge_tree->getStorageSnapshot()->getSampleBlockForColumns(read_from_merge_tree->getRealColumnNames()); + // header = MergeTreeSelectProcessor::transformHeader(std::move(header), prewhere_info, {}, {}); + } + ActionsDAG::NodeRawConstPtrs conditions; conditions.reserve(split_result.split_nodes_mapping.size()); for (const auto * condition : optimize_result.prewhere_nodes) From 1c71a27527262db3034f067aef0e159d85306c6b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 7 Feb 2024 20:05:39 +0000 Subject: [PATCH 06/19] Remove some comments --- src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 9d997584a28..eb5f7a42819 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -142,16 +142,13 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) // if (!settings.allow_experimental_analyzer) // return; - //const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers; - bool is_final = read_from_merge_tree->isQueryWithFinal(); //table_expression_modifiers && table_expression_modifiers->hasFinal(); + bool is_final = read_from_merge_tree->isQueryWithFinal(); bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); - // std::cerr << "============ !!! << " << is_final << ' ' << settings.optimize_move_to_prewhere_if_final << std::endl; if (!optimize_move_to_prewhere) return; const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot(); - //if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio()) if (read_from_merge_tree->isQueryWithSampling()) { const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); From fafd8005a05412db754a6ea595472ba59fda6f29 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 8 Feb 2024 13:51:29 +0000 Subject: [PATCH 07/19] Fixing style. --- src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index eb5f7a42819..2f790d9892f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -228,14 +228,14 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// This is the leak of abstraction. /// Splited actions may have inputs which are needed only for PREWHERE. - /// This is fine for ActionsDAG to have such a split, but it breakes defaults calculation. + /// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation. /// /// See 00950_default_prewhere for example. /// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1` /// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE /// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed). /// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value. - /// If column SessionType is removed by PREWHERE actions, we use zero as defaut, and get a wrong result. + /// If column SessionType is removed by PREWHERE actions, we use zero as default, and get a wrong result. /// /// So, here we restore removed inputs for PREWHERE actions { From 3cca665f667bbda8b8c1a25b35e2684813ea61a3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 14:06:41 +0000 Subject: [PATCH 08/19] Attempt to fix more tests. --- .../QueryPlan/Optimizations/optimizeTree.cpp | 16 +++++++++ .../QueryPlan/Optimizations/splitFilter.cpp | 35 +++++++++++++++++-- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index c8c95e7443f..93430e072bb 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -118,6 +118,22 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s optimizePrewhere(stack, nodes); optimizePrimaryKeyCondition(stack); + auto & frame = stack.back(); + + /// Traverse all children first. + if (frame.next_child < frame.node->children.size()) + { + auto next_frame = Frame{.node = frame.node->children[frame.next_child]}; + ++frame.next_child; + stack.push_back(next_frame); + continue; + } + + stack.pop_back(); + } + + while (!stack.empty()) + { { /// NOTE: frame cannot be safely used after stack was modified. auto & frame = stack.back(); diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp index 8c212936195..561ad7302c6 100644 --- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp +++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp @@ -14,19 +14,33 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) return 0; const auto & expr = filter_step->getExpression(); + const std::string & filter_column_name = filter_step->getFilterColumnName(); /// Do not split if there are function like runningDifference. if (expr->hasStatefulFunctions()) return 0; - auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName()); + bool filter_name_clashs_with_input = false; + if (filter_step->removesFilterColumn()) + { + for (const auto * input : expr->getInputs()) + { + if (input->result_name == filter_column_name) + { + filter_name_clashs_with_input = true; + break; + } + } + } + + auto split = expr->splitActionsForFilter(filter_column_name); if (split.second->trivial()) return 0; bool remove_filter = false; if (filter_step->removesFilterColumn()) - remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName()); + remove_filter = split.second->removeUnusedResult(filter_column_name); auto description = filter_step->getStepDescription(); @@ -34,10 +48,25 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes) node->children.swap(filter_node.children); node->children.push_back(&filter_node); + std::string split_filter_name = filter_column_name; + if (filter_name_clashs_with_input) + { + split_filter_name = "__split_filter"; + + for (auto & filter_output : split.first->getOutputs()) + { + if (filter_output->result_name == filter_column_name) + { + filter_output = &split.first->addAlias(*filter_output, split_filter_name); + break; + } + } + } + filter_node.step = std::make_unique( filter_node.children.at(0)->step->getOutputStream(), std::move(split.first), - filter_step->getFilterColumnName(), + std::move(split_filter_name), remove_filter); node->step = std::make_unique(filter_node.step->getOutputStream(), std::move(split.second)); From 7575f0db84dc01fb1a6e5b5404d0f3d298e220f1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 14:36:56 +0000 Subject: [PATCH 09/19] Fix some tests. --- ...771_ignore_data_skipping_indices.reference | 111 +++++++++----- .../02771_ignore_data_skipping_indices.sql | 8 ++ ...ndex_in_function_different_types.reference | 136 +++++++----------- ..._key_index_in_function_different_types.sql | 17 +-- ...f_indexes_support_match_function.reference | 12 -- ...ngrambf_indexes_support_match_function.sql | 89 +----------- 6 files changed, 140 insertions(+), 233 deletions(-) diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference index fcede2caf2a..e23e3094ca3 100644 --- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference +++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference @@ -1,40 +1,77 @@ 1 2 3 1 2 3 1 2 3 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - Skip - Name: xy_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + Skip + Name: xy_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + Skip + Name: xy_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql index a49239e9de2..716421b7342 100644 --- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql +++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql @@ -24,6 +24,14 @@ SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx'; SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 } SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx'; + +SET allow_experimental_analyzer = 0; + +SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%'; +SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%'; + +SET allow_experimental_analyzer = 1; + SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%'; SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%'; diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference index f34aad737d4..6338d048186 100644 --- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference +++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference @@ -1,88 +1,48 @@ -CreatingSets (Create sets before main query execution) - Expression ((Projection + Before ORDER BY)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 1-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Projection + Before ORDER BY)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 1-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Projection + Before ORDER BY)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 5-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Projection + Before ORDER BY)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 5-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Project names + Projection)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 1-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Project names + Projection)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 1-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Project names + Projection)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 5-element set)) - Parts: 1/1 - Granules: 1/1 -CreatingSets (Create sets before main query execution) - Expression ((Project names + Projection)) - ReadFromMergeTree (default.test_table) - Indexes: - PrimaryKey - Keys: - id - value - Condition: and((id in (-Inf, 10]), (value in 5-element set)) - Parts: 1/1 - Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 1-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 1-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 5-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((value in 5-element set), (id in (-Inf, 10])) + Parts: 1/1 + Granules: 1/1 diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql index 077c49fb22e..585c2635970 100644 --- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql +++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql @@ -7,18 +7,9 @@ CREATE TABLE test_table INSERT INTO test_table SELECT number, number FROM numbers(10); -SET allow_experimental_analyzer = 0; - -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5); -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5'); -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5)); -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5)); - -SET allow_experimental_analyzer = 1; - -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5); -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5'); -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5)); -EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5)); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5'); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5)); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5)); DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference index 1cf1644fe0a..5c6a213a03f 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference @@ -2,12 +2,8 @@ 2 Hello World 1 Hello ClickHouse 2 Hello World - Granules: 6/6 - Granules: 2/6 Granules: 6/6 Granules: 2/6 - Granules: 6/6 - Granules: 2/6 Granules: 6/6 Granules: 2/6 --- @@ -17,22 +13,14 @@ 1 Hello ClickHouse 2 Hello World 6 World Champion - Granules: 6/6 - Granules: 3/6 Granules: 6/6 Granules: 3/6 - Granules: 6/6 - Granules: 3/6 Granules: 6/6 Granules: 3/6 --- 5 OLAP Database 5 OLAP Database - Granules: 6/6 - Granules: 1/6 Granules: 6/6 Granules: 1/6 - Granules: 6/6 - Granules: 1/6 Granules: 6/6 Granules: 1/6 diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql index 49d39c601ef..5db9697d018 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql @@ -38,20 +38,7 @@ FROM SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 0; - -SELECT * -FROM -( - EXPLAIN PLAN indexes=1 - SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id -) -WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 1; + explain LIKE '%Granules: %'; SELECT * FROM @@ -60,20 +47,7 @@ FROM SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 0; - -SELECT * -FROM -( - EXPLAIN PLAN indexes=1 - SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id -) -WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 1; + explain LIKE '%Granules: %'; SELECT '---'; @@ -92,20 +66,7 @@ FROM SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 0; - -SELECT * -FROM -( - EXPLAIN PLAN indexes = 1 - SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id -) -WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 1; + explain LIKE '%Granules: %'; SELECT * FROM @@ -114,20 +75,7 @@ FROM SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 0; - -SELECT * -FROM -( - EXPLAIN PLAN indexes = 1 - SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id -) -WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 1; + explain LIKE '%Granules: %'; SELECT '---'; @@ -145,19 +93,7 @@ FROM SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id ) WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 0; -SELECT * -FROM -( - EXPLAIN PLAN indexes = 1 - SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id -) -WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 1; + explain LIKE '%Granules: %'; SELECT * FROM @@ -166,20 +102,7 @@ FROM SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id ) WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 0; - -SELECT * -FROM -( - EXPLAIN PLAN indexes = 1 - SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id -) -WHERE - explain LIKE '%Granules: %' -SETTINGS - allow_experimental_analyzer = 1; + explain LIKE '%Granules: %'; DROP TABLE tokenbf_tab; DROP TABLE ngrambf_tab; From 1063d9ca5005c317cc6ce5dfaac6cbcfd271e9b7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 14:47:24 +0000 Subject: [PATCH 10/19] Use opd prewhere optimizaer for old analyzer. --- src/Interpreters/InterpreterSelectQuery.cpp | 56 ++++++++++----------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 187518b9f6c..d0cf9f1160c 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -600,7 +600,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query.setFinal(); } - auto analyze = [&] (bool) + auto analyze = [&] (bool try_move_to_prewhere) { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; @@ -632,37 +632,37 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - // if (try_move_to_prewhere - // && storage && storage->canMoveConditionsToPrewhere() - // && query.where() && !query.prewhere() - // && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. - // { - // /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable - // if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) - // { - // /// Extract column compressed sizes. - // std::unordered_map column_compressed_sizes; - // for (const auto & [name, sizes] : column_sizes) - // column_compressed_sizes[name] = sizes.data_compressed; + if (try_move_to_prewhere + && storage && storage->canMoveConditionsToPrewhere() + && query.where() && !query.prewhere() + && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. + { + /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable + if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) + { + /// Extract column compressed sizes. + std::unordered_map column_compressed_sizes; + for (const auto & [name, sizes] : column_sizes) + column_compressed_sizes[name] = sizes.data_compressed; - // SelectQueryInfo current_info; - // current_info.query = query_ptr; - // current_info.syntax_analyzer_result = syntax_analyzer_result; + SelectQueryInfo current_info; + current_info.query = query_ptr; + current_info.syntax_analyzer_result = syntax_analyzer_result; - // Names queried_columns = syntax_analyzer_result->requiredSourceColumns(); - // const auto & supported_prewhere_columns = storage->supportedPrewhereColumns(); + Names queried_columns = syntax_analyzer_result->requiredSourceColumns(); + const auto & supported_prewhere_columns = storage->supportedPrewhereColumns(); - // MergeTreeWhereOptimizer where_optimizer{ - // std::move(column_compressed_sizes), - // metadata_snapshot, - // storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context), - // queried_columns, - // supported_prewhere_columns, - // log}; + MergeTreeWhereOptimizer where_optimizer{ + std::move(column_compressed_sizes), + metadata_snapshot, + storage->getConditionEstimatorByPredicate(query_info, storage_snapshot, context), + queried_columns, + supported_prewhere_columns, + log}; - // where_optimizer.optimize(current_info, context); - // } - // } + where_optimizer.optimize(current_info, context); + } + } if (query.prewhere() && query.where()) { From d3cccf856199d6cbd5c7eb1caff3ee0d9501351c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 15:44:51 +0000 Subject: [PATCH 11/19] Fixing tests. --- .../QueryPlan/Optimizations/optimizeTree.cpp | 2 + ...f_indexes_support_match_function.reference | 12 +++ ...ngrambf_indexes_support_match_function.sql | 89 +++++++++++++++++-- 3 files changed, 97 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 93430e072bb..55f7e7cb85b 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -132,6 +132,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s stack.pop_back(); } + stack.push_back({.node = &root}); + while (!stack.empty()) { { diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference index 5c6a213a03f..1cf1644fe0a 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference @@ -2,8 +2,12 @@ 2 Hello World 1 Hello ClickHouse 2 Hello World + Granules: 6/6 + Granules: 2/6 Granules: 6/6 Granules: 2/6 + Granules: 6/6 + Granules: 2/6 Granules: 6/6 Granules: 2/6 --- @@ -13,14 +17,22 @@ 1 Hello ClickHouse 2 Hello World 6 World Champion + Granules: 6/6 + Granules: 3/6 Granules: 6/6 Granules: 3/6 + Granules: 6/6 + Granules: 3/6 Granules: 6/6 Granules: 3/6 --- 5 OLAP Database 5 OLAP Database + Granules: 6/6 + Granules: 1/6 Granules: 6/6 Granules: 1/6 + Granules: 6/6 + Granules: 1/6 Granules: 6/6 Granules: 1/6 diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql index 5db9697d018..49d39c601ef 100644 --- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql +++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql @@ -38,7 +38,20 @@ FROM SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %'; + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; SELECT * FROM @@ -47,7 +60,20 @@ FROM SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %'; + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; SELECT '---'; @@ -66,7 +92,20 @@ FROM SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %'; + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; SELECT * FROM @@ -75,7 +114,20 @@ FROM SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id ) WHERE - explain LIKE '%Granules: %'; + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; SELECT '---'; @@ -93,7 +145,19 @@ FROM SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id ) WHERE - explain LIKE '%Granules: %'; + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; SELECT * FROM @@ -102,7 +166,20 @@ FROM SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id ) WHERE - explain LIKE '%Granules: %'; + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; DROP TABLE tokenbf_tab; DROP TABLE ngrambf_tab; From 16f3dbea713840ef9c79faa358330986acdc81a7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 16:43:48 +0000 Subject: [PATCH 12/19] Update optimizePrewjere --- .../QueryPlan/Optimizations/optimizePrewhere.cpp | 4 ++-- .../02149_read_in_order_fixed_prefix.reference | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 2f790d9892f..ee5ad8d1a8a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -139,8 +139,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) const auto & context = read_from_merge_tree->getContext(); const auto & settings = context->getSettingsRef(); - // if (!settings.allow_experimental_analyzer) - // return; + if (!settings.allow_experimental_analyzer) + return; bool is_final = read_from_merge_tree->isQueryWithFinal(); bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); diff --git a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference index f2a4ef1f634..f3415a34823 100644 --- a/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference +++ b/tests/queries/0_stateless/02149_read_in_order_fixed_prefix.reference @@ -76,8 +76,7 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - ExpressionTransform - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 2020-10-11 0 0 2020-10-11 0 10 2020-10-11 0 20 @@ -106,8 +105,7 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - ExpressionTransform - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 2020-10-12 0 2020-10-12 1 2020-10-12 2 @@ -140,9 +138,8 @@ ExpressionTransform (Expression) ExpressionTransform (ReadFromMergeTree) - ExpressionTransform - ReverseTransform - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 + ReverseTransform + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1 2020-10-12 99999 2020-10-12 99998 2020-10-12 99997 From 1d0a86ccdfb94863b09125a554a329b971b16587 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 18:31:51 +0000 Subject: [PATCH 13/19] Try to fix tests. --- .../QueryPlan/Optimizations/optimizeTree.cpp | 9 ++- .../01763_filter_push_down_bugs.reference | 2 +- .../01786_explain_merge_tree.reference | 12 --- .../0_stateless/01786_explain_merge_tree.sh | 4 +- ...771_ignore_data_skipping_indices.reference | 74 +++++++++---------- .../02771_ignore_data_skipping_indices.sql | 2 - ...ndex_in_function_different_types.reference | 44 +++++++++++ ..._key_index_in_function_different_types.sql | 9 +++ 8 files changed, 99 insertions(+), 57 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 55f7e7cb85b..c64bc308246 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -120,6 +120,12 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s auto & frame = stack.back(); + if (frame.next_child == 0) + { + if (optimization_settings.distinct_in_order) + tryDistinctReadInOrder(frame.node); + } + /// Traverse all children first. if (frame.next_child < frame.node->children.size()) { @@ -154,9 +160,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.aggregation_in_order) optimizeAggregationInOrder(*frame.node, nodes); - - if (optimization_settings.distinct_in_order) - tryDistinctReadInOrder(frame.node); } /// Traverse all children first. diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference index c8045dd26f5..80bd7dfd8c0 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference @@ -28,7 +28,7 @@ Expression ((Projection + Before ORDER BY)) Expression ((Project names + Projection)) Filter ((WHERE + DROP unused columns after JOIN)) Join (JOIN FillRightFirst) - Expression (Change column names to column identifiers) + Expression ReadFromMergeTree (default.t1) Indexes: PrimaryKey diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 096090f8fa1..fd1bc713b08 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -3,21 +3,18 @@ MinMax Keys: y - Condition: (y in [1, +Inf)) Parts: 4/5 Granules: 11/12 Partition Keys: y bitAnd(z, 3) - Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])) Parts: 3/4 Granules: 10/11 PrimaryKey Keys: x y - Condition: and((x in [11, +Inf)), (y in [1, +Inf))) Parts: 2/3 Granules: 6/10 Skip @@ -37,7 +34,6 @@ { "Type": "MinMax", "Keys": ["y"], - "Condition": "(y in [1, +Inf))", "Initial Parts": 5, "Selected Parts": 4, "Initial Granules": 12, @@ -46,7 +42,6 @@ { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], - "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))", "Initial Parts": 4, "Selected Parts": 3, "Initial Granules": 11, @@ -55,7 +50,6 @@ { "Type": "PrimaryKey", "Keys": ["x", "y"], - "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))", "Initial Parts": 3, "Selected Parts": 2, "Initial Granules": 10, @@ -109,21 +103,18 @@ MinMax Keys: y - Condition: (y in [1, +Inf)) Parts: 4/5 Granules: 11/12 Partition Keys: y bitAnd(z, 3) - Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1])) Parts: 3/4 Granules: 10/11 PrimaryKey Keys: x y - Condition: and((x in [11, +Inf)), (y in [1, +Inf))) Parts: 2/3 Granules: 6/10 Skip @@ -138,7 +129,6 @@ { "Type": "MinMax", "Keys": ["y"], - "Condition": "(y in [1, +Inf))", "Initial Parts": 5, "Selected Parts": 4, "Initial Granules": 12, @@ -147,7 +137,6 @@ { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], - "Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))", "Initial Parts": 4, "Selected Parts": 3, "Initial Granules": 11, @@ -156,7 +145,6 @@ { "Type": "PrimaryKey", "Keys": ["x", "y"], - "Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))", "Initial Parts": 3, "Selected Parts": 2, "Initial Granules": 10, diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 23537013204..e3b28acdc41 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -17,13 +17,13 @@ do $CH_CLIENT -q " explain indexes = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14; - " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description" + " | grep -A 100 "ReadFromMergeTree" | grep -v "Condition" echo "-----------------" $CH_CLIENT -q " explain indexes = 1, json = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14 format TSVRaw; - " | grep -A 100 "ReadFromMergeTree" # | grep -v "Description" + " | grep -A 100 "ReadFromMergeTree" | grep -v "Condition" echo "-----------------" diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference index e23e3094ca3..33df18c8801 100644 --- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference +++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference @@ -1,43 +1,43 @@ 1 2 3 1 2 3 1 2 3 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - Skip - Name: xy_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 - ReadFromMergeTree (default.data_02771) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 - Skip - Name: x_idx - Description: minmax GRANULARITY 1 - Parts: 0/1 - Granules: 0/1 - Skip - Name: y_idx - Description: minmax GRANULARITY 1 - Parts: 0/0 - Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + Skip + Name: xy_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 + ReadFromMergeTree (default.data_02771) + Indexes: + PrimaryKey + Condition: true + Parts: 1/1 + Granules: 1/1 + Skip + Name: x_idx + Description: minmax GRANULARITY 1 + Parts: 0/1 + Granules: 0/1 + Skip + Name: y_idx + Description: minmax GRANULARITY 1 + Parts: 0/0 + Granules: 0/0 ReadFromMergeTree (default.data_02771) Indexes: PrimaryKey diff --git a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql index 716421b7342..951d87fd2c0 100644 --- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql +++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql @@ -1,5 +1,3 @@ -SET allow_experimental_analyzer = 0; - DROP TABLE IF EXISTS data_02771; diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference index 6338d048186..7a5e798359b 100644 --- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference +++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference @@ -1,3 +1,47 @@ +CreatingSets + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((id in (-Inf, 10]), (value in 1-element set)) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((id in (-Inf, 10]), (value in 1-element set)) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((id in (-Inf, 10]), (value in 5-element set)) + Parts: 1/1 + Granules: 1/1 +CreatingSets + Expression + ReadFromMergeTree + Indexes: + PrimaryKey + Keys: + id + value + Condition: and((id in (-Inf, 10]), (value in 5-element set)) + Parts: 1/1 + Granules: 1/1 CreatingSets Expression Expression diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql index 585c2635970..1b1a7607344 100644 --- a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql +++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql @@ -7,6 +7,15 @@ CREATE TABLE test_table INSERT INTO test_table SELECT number, number FROM numbers(10); +set allow_experimental_analyzer = 0; + +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5'); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5)); +EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5)); + +set allow_experimental_analyzer = 1; + EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5); EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5'); EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5)); From 769078e2d81cf8090c5b96bac24f80b5d3cae495 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Feb 2024 19:32:53 +0000 Subject: [PATCH 14/19] Another attempt. --- src/Processors/QueryPlan/Optimizations/optimizeTree.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index c64bc308246..daf0a1b959b 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -122,6 +122,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (frame.next_child == 0) { + + if (optimization_settings.read_in_order) + optimizeReadInOrder(*frame.node, nodes); + if (optimization_settings.distinct_in_order) tryDistinctReadInOrder(frame.node); } @@ -150,9 +154,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s { has_reading_from_mt |= typeid_cast(frame.node->step.get()) != nullptr; - if (optimization_settings.read_in_order) - optimizeReadInOrder(*frame.node, nodes); - /// Projection optimization relies on PK optimization if (optimization_settings.optimize_projection) num_applied_projection From 43d8a879fefbca12bd03328d8cb9d483223b6520 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 10 Feb 2024 11:38:21 +0000 Subject: [PATCH 15/19] Fixing tests. --- .../QueryPlan/Optimizations/optimizeTree.cpp | 10 +- .../02521_aggregation_by_partitions.reference | 141 +++++++++--------- 2 files changed, 74 insertions(+), 77 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index daf0a1b959b..816850cc82c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -126,6 +126,9 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.read_in_order) optimizeReadInOrder(*frame.node, nodes); + if (optimization_settings.aggregation_in_order) + optimizeAggregationInOrder(*frame.node, nodes); + if (optimization_settings.distinct_in_order) tryDistinctReadInOrder(frame.node); } @@ -139,6 +142,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s continue; } + enableMemoryBoundMerging(*stack.back().node, nodes); + stack.pop_back(); } @@ -158,9 +163,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.optimize_projection) num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); - - if (optimization_settings.aggregation_in_order) - optimizeAggregationInOrder(*frame.node, nodes); } /// Traverse all children first. @@ -192,8 +194,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } - enableMemoryBoundMerging(*stack.back().node, nodes); - stack.pop_back(); } diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference index 67a131ff853..d32e6c7d416 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference @@ -91,19 +91,18 @@ ExpressionTransform × 16 (Expression) ExpressionTransform × 4 (ReadFromMergeTree) - ExpressionTransform × 4 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 1000000 (Expression) ExpressionTransform × 16 @@ -114,41 +113,6 @@ ExpressionTransform × 16 (Expression) ExpressionTransform × 8 (ReadFromMergeTree) - ExpressionTransform × 8 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 -1000000 -(Expression) -ExpressionTransform × 16 - (Aggregating) - FinalizeAggregatedTransform × 16 - AggregatingInOrderTransform × 16 - (Expression) - ExpressionTransform × 16 - (ReadFromMergeTree) - ExpressionTransform × 16 MergingSortedTransform 2 → 1 ExpressionTransform × 2 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 @@ -173,30 +137,63 @@ ExpressionTransform × 16 MergingSortedTransform 2 → 1 ExpressionTransform × 2 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 - MergingSortedTransform 2 → 1 - ExpressionTransform × 2 - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 +1000000 +(Expression) +ExpressionTransform × 16 + (Aggregating) + FinalizeAggregatedTransform × 16 + AggregatingInOrderTransform × 16 + (Expression) + ExpressionTransform × 16 + (ReadFromMergeTree) + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 + MergingSortedTransform 2 → 1 + ExpressionTransform × 2 + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 1000000 Skip merging: 1 Skip merging: 1 From 74caa8e44e66f0ce2803b088d8bd0bb796de5bef Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 10 Feb 2024 11:43:35 +0000 Subject: [PATCH 16/19] Updaye test. --- .../02521_aggregation_by_partitions.reference | 12 ++++++++++++ .../0_stateless/02521_aggregation_by_partitions.sql | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference index d32e6c7d416..87b2d5c3430 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.reference +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.reference @@ -1,3 +1,5 @@ +-- { echoOn } +explain pipeline select a from t1 group by a; (Expression) ExpressionTransform × 16 (Aggregating) @@ -15,6 +17,8 @@ ExpressionTransform × 16 Resize 3 → 1 MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 3 0 → 1 1000000 +-- { echoOn } +explain pipeline select a from t2 group by a; (Expression) ExpressionTransform × 16 (Aggregating) @@ -40,6 +44,8 @@ ExpressionTransform × 16 Resize 2 → 1 MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 2 0 → 1 1000000 +-- { echoOn } +explain pipeline select a from t3 group by a; (Expression) ExpressionTransform × 16 (Aggregating) @@ -82,6 +88,8 @@ ExpressionTransform × 16 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 1000000 1000000 +-- { echoOn } +explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12; (Expression) ExpressionTransform × 16 (Aggregating) @@ -104,6 +112,8 @@ ExpressionTransform × 16 ExpressionTransform × 2 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 1000000 +-- { echoOn } +explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12; (Expression) ExpressionTransform × 16 (Aggregating) @@ -138,6 +148,8 @@ ExpressionTransform × 16 ExpressionTransform × 2 MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1 1000000 +-- { echoOn } +explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12; (Expression) ExpressionTransform × 16 (Aggregating) diff --git a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql index 87317e5fba4..5b013ca5aef 100644 --- a/tests/queries/0_stateless/02521_aggregation_by_partitions.sql +++ b/tests/queries/0_stateless/02521_aggregation_by_partitions.sql @@ -15,7 +15,9 @@ system stop merges t1; insert into t1 select number from numbers_mt(1e6); insert into t1 select number from numbers_mt(1e6); +-- { echoOn } explain pipeline select a from t1 group by a; +-- { echoOff } select count() from (select throwIf(count() != 2) from t1 group by a); @@ -28,7 +30,9 @@ system stop merges t2; insert into t2 select number from numbers_mt(1e6); insert into t2 select number from numbers_mt(1e6); +-- { echoOn } explain pipeline select a from t2 group by a; +-- { echoOff } select count() from (select throwIf(count() != 2) from t2 group by a); @@ -41,7 +45,9 @@ system stop merges t3; insert into t3 select number from numbers_mt(1e6); insert into t3 select number from numbers_mt(1e6); +-- { echoOn } explain pipeline select a from t3 group by a; +-- { echoOff } select count() from (select throwIf(count() != 2) from t3 group by a); @@ -63,7 +69,9 @@ system stop merges t4; insert into t4 select number from numbers_mt(1e6); insert into t4 select number from numbers_mt(1e6); +-- { echoOn } explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12; +-- { echoOff } select count() from (select throwIf(count() != 2) from t4 group by a); @@ -76,7 +84,9 @@ system stop merges t5; insert into t5 select number from numbers_mt(1e6); insert into t5 select number from numbers_mt(1e6); +-- { echoOn } explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12; +-- { echoOff } select count() from (select throwIf(count() != 2) from t5 group by a); @@ -89,7 +99,9 @@ system stop merges t6; insert into t6 select number from numbers_mt(1e6); insert into t6 select number from numbers_mt(1e6); +-- { echoOn } explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12; +-- { echoOff } select count() from (select throwIf(count() != 2) from t6 group by a); From bcae537810a221af9e83551bf3b083190e82f70e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 10 Feb 2024 11:50:06 +0000 Subject: [PATCH 17/19] Remove commented code. --- .../Optimizations/optimizePrewhere.cpp | 320 +----------------- 1 file changed, 1 insertion(+), 319 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index ee5ad8d1a8a..6676f935b67 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -16,49 +16,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -// namespace -// { - -// void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header) -// { -// std::unordered_map output_name_to_node; -// for (const auto * output_node : actions_dag->getOutputs()) -// output_name_to_node.emplace(output_node->result_name, output_node); - -// std::unordered_set used_output_nodes; - -// ActionsDAG::NodeRawConstPtrs updated_outputs; -// updated_outputs.reserve(expected_header.columns()); - -// for (const auto & column : expected_header) -// { -// auto output_node_it = output_name_to_node.find(column.name); -// if (output_node_it == output_name_to_node.end()) -// throw Exception(ErrorCodes::LOGICAL_ERROR, -// "Invalid move to PREWHERE optimization. Cannot find column {} in output", -// column.name); - -// updated_outputs.push_back(output_node_it->second); -// used_output_nodes.insert(output_node_it->second); -// } - -// ActionsDAG::NodeRawConstPtrs unused_outputs; -// for (const auto * output_node : actions_dag->getOutputs()) -// { -// if (used_output_nodes.contains(output_node)) -// continue; - -// unused_outputs.push_back(output_node); -// } - -// auto & actions_dag_outputs = actions_dag->getOutputs(); -// actions_dag_outputs = std::move(updated_outputs); -// actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end()); -// } - -// } - - namespace QueryPlanOptimizations { @@ -102,40 +59,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) if (!filter_step) return; - /** Collect required filter output columns. - * Collect output nodes that are mapped to input nodes. - * Collect input node to output nodes mapping. - */ - ColumnsWithTypeAndName required_columns_after_filter; - // std::unordered_set output_nodes_mapped_to_input; - // std::unordered_map> input_node_to_output_names; - - // for (const auto * output_node : filter_step->getExpression()->getOutputs()) - // { - // const auto * node_without_alias = output_node; - // while (node_without_alias->type == ActionsDAG::ActionType::ALIAS) - // node_without_alias = node_without_alias->children[0]; - - // if (node_without_alias->type == ActionsDAG::ActionType::INPUT) - // { - // output_nodes_mapped_to_input.emplace(output_node->result_name); - - // auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name); - // if (output_names_it == input_node_to_output_names.end()) - // { - // auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector()); - // output_names_it = insert_it; - // } - - // output_names_it->second.push_back(output_node->result_name); - // } - - // if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn()) - // continue; - - // required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name)); - // } - const auto & context = read_from_merge_tree->getContext(); const auto & settings = context->getSettingsRef(); @@ -149,6 +72,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot(); + ColumnsWithTypeAndName required_columns_after_filter; if (read_from_merge_tree->isQueryWithSampling()) { const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); @@ -195,10 +119,6 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info = std::make_shared(); prewhere_info->need_filter = true; - // std::cerr << filter_step->getExpression()->dumpDAG() << std::endl; - - // QueryPlan::Node * replace_old_filter_node = nullptr; - // bool remove_filter_node = false; auto filter_expression = filter_step->getExpression(); const auto & filter_column_name = filter_step->getFilterColumnName(); @@ -213,19 +133,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) outputs.resize(size); } - // if (!optimize_result.fully_moved_to_prewhere) - // { auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true); - // std::cerr << split_result.first->dumpDAG() << std::endl; - // std::cerr << split_result.second->dumpDAG() << std::endl; - - // for (const auto * input : split_result.first->getInputs()) - // std::cerr << "in 1" << input->result_name << std::endl; - // for (const auto * input : split_result.second->getInputs()) - // std::cerr << "in 2" << input->result_name << std::endl; - - /// This is the leak of abstraction. /// Splited actions may have inputs which are needed only for PREWHERE. /// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation. @@ -239,13 +148,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) /// /// So, here we restore removed inputs for PREWHERE actions { - // const auto & virtuals = read_from_merge_tree->getVirtualColumnNames(); - // NameSet virtual_names(virtuals.begin(), virtuals.end()); - - //std::unordered_set first_inputs(split_result.first->getInputs().begin(), split_result.first->getInputs().end()); std::unordered_set first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end()); - ///std::unordered_set second_inputs(split_result.second->getInputs().begin(), split_result.second->getInputs().end()); - for (const auto * input : split_result.first->getInputs()) { if (!first_outputs.contains(input)) @@ -256,22 +159,12 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) split_result.second->addInput(input->result_name, input->result_type); } } - - // NameSet input_columns; - // for (const auto * input : split_result.first->getInputs()) - // input_columns.insert(input->result_name); - - // auto header = read_from_merge_tree->getStorageSnapshot()->getSampleBlockForColumns(read_from_merge_tree->getRealColumnNames()); - // header = MergeTreeSelectProcessor::transformHeader(std::move(header), prewhere_info, {}, {}); } ActionsDAG::NodeRawConstPtrs conditions; conditions.reserve(split_result.split_nodes_mapping.size()); for (const auto * condition : optimize_result.prewhere_nodes) - { - // std::cerr << ".. " << condition->result_name << std::endl; conditions.push_back(split_result.split_nodes_mapping.at(condition)); - } prewhere_info->prewhere_actions = std::move(split_result.first); prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn(); @@ -291,14 +184,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) prewhere_info->prewhere_actions->getOutputs().push_back(node); } - // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl; - // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl; - read_from_merge_tree->updatePrewhereInfo(prewhere_info); - // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl; - // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl; - if (!optimize_result.fully_moved_to_prewhere) { filter_node->step = std::make_unique( @@ -309,215 +196,10 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &) } else { - // std::cerr << split_result.second->dumpDAG() << std::endl; - // std::cerr << read_from_merge_tree->getOutputStream().header.dumpStructure() << std::endl; - // std::cerr << read_from_merge_tree->getOutputStream().header.dumpIndex() << std::endl; - filter_node->step = std::make_unique( read_from_merge_tree->getOutputStream(), std::move(split_result.second)); } - // return; - // } - - // prewhere_info->prewhere_actions = filter_step->getExpression(); - // prewhere_info->prewhere_actions->projectInput(false); - // std::cerr << prewhere_info->prewhere_actions->dumpDAG() << std::endl; - // prewhere_info->prewhere_column_name = filter_step->getFilterColumnName(); - // prewhere_info->remove_prewhere_column = filter_step->removesFilterColumn(); - - // read_from_merge_tree->updatePrewhereInfo(prewhere_info); - - // replace_old_filter_node = frame.node; - // remove_filter_node = true; - - // auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions; - - // ActionsChain actions_chain; - - // std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name; - // actions_chain.addStep(std::make_unique(prewhere_filter_actions)); - - // auto & filter_actions = optimize_result->filter_actions; - - // /** Merge tree where optimizer splits conjunctions in filter expression into 2 parts: - // * 1. Filter expressions. - // * 2. Prewhere filter expressions. - // * - // * There can be cases when all expressions are moved to PREWHERE, but it is not - // * enough to produce required filter output columns. - // * - // * Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c; - // * In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`. - // * It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions. - // * - // * In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE. - // */ - // bool need_additional_filter_after_prewhere = false; - - // if (!filter_actions) - // { - // /// Any node from PREWHERE filter actions can be used as possible output node - // std::unordered_set possible_prewhere_output_nodes; - // for (const auto & node : prewhere_filter_actions->getNodes()) - // possible_prewhere_output_nodes.insert(node.result_name); - - // for (auto & required_column : required_columns_after_filter) - // { - // if (!possible_prewhere_output_nodes.contains(required_column.name) && - // !output_nodes_mapped_to_input.contains(required_column.name)) - // { - // need_additional_filter_after_prewhere = true; - // break; - // } - // } - // } - - // /** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter - // * actions output columns as filter actions dag input columns. - // * Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from - // * PREWHERE filter actions. - // */ - // if (need_additional_filter_after_prewhere || filter_actions) - // { - // auto merged_filter_actions = std::make_shared(actions_chain.getLastStepAvailableOutputColumns()); - // merged_filter_actions->getOutputs().clear(); - // merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone())); - - // /// Add old filter step filter column to outputs - // for (const auto & node : merged_filter_actions->getNodes()) - // { - // if (node.result_name == filter_step->getFilterColumnName()) - // { - // merged_filter_actions->getOutputs().push_back(&node); - // break; - // } - // } - - // filter_actions = std::move(merged_filter_actions); - - // /// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage - // prewhere_info->need_filter = false; - - // actions_chain.addStep(std::make_unique(filter_actions)); - // } - - // auto required_output_actions = std::make_shared(required_columns_after_filter); - // actions_chain.addStep(std::make_unique(required_output_actions)); - - // actions_chain.finalize(); - - // prewhere_filter_actions->projectInput(false); - - // auto & prewhere_actions_chain_node = actions_chain[0]; - // prewhere_info->prewhere_actions = std::move(prewhere_filter_actions); - // prewhere_info->prewhere_column_name = prewere_filter_node_name; - // prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name); - - // read_from_merge_tree->updatePrewhereInfo(prewhere_info); - - // QueryPlan::Node * replace_old_filter_node = nullptr; - // bool remove_filter_node = false; - - // if (filter_actions) - // { - // filter_actions->projectInput(false); - - // /// Match dag output nodes with old filter step header - // matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header); - - // auto & filter_actions_chain_node = actions_chain[1]; - // bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName()); - // auto after_prewhere_filter_step = std::make_unique(read_from_merge_tree->getOutputStream(), - // filter_actions, - // filter_step->getFilterColumnName(), - // remove_filter_column); - - // auto & node = nodes.emplace_back(); - // node.children.emplace_back(frame.node); - // node.step = std::move(after_prewhere_filter_step); - - // replace_old_filter_node = &node; - // } - // else - // { - // auto rename_actions_dag = std::make_shared(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName()); - // bool apply_rename_step = false; - - // ActionsDAG::NodeRawConstPtrs updated_outputs; - - // /** If in output after read from merge tree there are column names without aliases, - // * apply old filter step aliases to them. - // */ - // for (const auto * output_node : rename_actions_dag->getOutputs()) - // { - // const auto alias_it = input_node_to_output_names.find(output_node->result_name); - // if (alias_it == input_node_to_output_names.end()) - // { - // updated_outputs.push_back(output_node); - // continue; - // } - - // for (auto & output_name : alias_it->second) - // { - // if (output_name == output_node->result_name) - // { - // updated_outputs.push_back(output_node); - // continue; - // } - - // updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name)); - // apply_rename_step = true; - // } - // } - - // rename_actions_dag->getOutputs() = std::move(updated_outputs); - - // bool apply_match_step = false; - - // /// If column order does not match old filter step column order, match dag output nodes with header - // if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header)) - // { - // apply_match_step = true; - // matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header); - // } - - // if (apply_rename_step || apply_match_step) - // { - // auto rename_step = std::make_unique(read_from_merge_tree->getOutputStream(), rename_actions_dag); - // if (apply_rename_step) - // rename_step->setStepDescription("Change column names to column identifiers"); - - // auto & node = nodes.emplace_back(); - // node.children.emplace_back(frame.node); - // node.step = std::move(rename_step); - - // replace_old_filter_node = &node; - // } - // else - // { - // replace_old_filter_node = frame.node; - // remove_filter_node = true; - // } - // } - - // QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node; - - // for (auto & filter_parent_child : filter_parent_node->children) - // { - // if (filter_parent_child == filter_node) - // { - // filter_parent_child = frame.node; - - // size_t stack_size = stack.size(); - - // /// Step is completely replaced with PREWHERE filter actions, remove it from stack. - // std::swap(stack[stack_size - 1], stack[stack_size - 2]); - // stack.pop_back(); - - // break; - // } - // } } } From 8b25d868e269de6407377657b406114c62a0f2be Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 10 Feb 2024 17:04:27 +0100 Subject: [PATCH 18/19] Update optimizePrewhere.cpp --- src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index 6676f935b67..ec07f028f20 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -11,11 +11,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace QueryPlanOptimizations { From 306f5047818ea3dc13ec5575c5d6d96f5b25373f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 10 Feb 2024 19:53:03 +0000 Subject: [PATCH 19/19] Fixing test. --- .../QueryPlan/Optimizations/optimizeTree.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 816850cc82c..fafd6d1dc00 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -126,9 +126,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.read_in_order) optimizeReadInOrder(*frame.node, nodes); - if (optimization_settings.aggregation_in_order) - optimizeAggregationInOrder(*frame.node, nodes); - if (optimization_settings.distinct_in_order) tryDistinctReadInOrder(frame.node); } @@ -142,8 +139,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s continue; } - enableMemoryBoundMerging(*stack.back().node, nodes); - stack.pop_back(); } @@ -163,6 +158,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.optimize_projection) num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); + + + if (optimization_settings.aggregation_in_order) + optimizeAggregationInOrder(*frame.node, nodes); } /// Traverse all children first. @@ -194,6 +193,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } + enableMemoryBoundMerging(*stack.back().node, nodes); + stack.pop_back(); }