From ffaf9da2dbe8a24db922146606842420c1083c58 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Mar 2021 20:08:59 +0300 Subject: [PATCH] Remove index by name from ActionsDAG --- src/Interpreters/ActionsDAG.cpp | 377 ++++++++++++++------------------ src/Interpreters/ActionsDAG.h | 208 +++++++++--------- 2 files changed, 264 insertions(+), 321 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e994a6a0ef6..6c7d223dd18 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_) { for (const auto & input : inputs_) - addInput(input.name, input.type, true); + addInput(input.name, input.type); } ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_) @@ -41,7 +41,7 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_) { if (input.column && isColumnConst(*input.column)) { - addInput(input, true); + addInput(input); /// Here we also add column. /// It will allow to remove input which is actually constant (after projection). @@ -49,49 +49,49 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_) /// without any respect to header structure. So, it is a way to drop materialized column and use /// constant value from header. /// We cannot remove such input right now cause inputs positions are important in some cases. - addColumn(input, true); + addColumn(input); } else - addInput(input.name, input.type, true); + addInput(input.name, input.type); } } -ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index) +ActionsDAG::Node & ActionsDAG::addNode(Node node) { - auto it = index.find(node.result_name); - if (it != index.end() && !can_replace && add_to_index) - throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); + // auto it = index.find(node.result_name); + // if (it != index.end() && !can_replace && add_to_index) + // throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); auto & res = nodes.emplace_back(std::move(node)); if (res.type == ActionType::INPUT) inputs.emplace_back(&res); - if (add_to_index) - index.replace(&res); + // if (add_to_index) + // index.replace(&res); return res; } -ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) -{ - auto it = index.find(name); - if (it == index.end()) - throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); +// ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) +// { +// auto it = index.find(name); +// if (it == index.end()) +// throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); - return **it; -} +// return **it; +// } -const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace) +const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) { Node node; node.type = ActionType::INPUT; node.result_type = std::move(type); node.result_name = std::move(name); - return addNode(std::move(node), can_replace); + return addNode(std::move(node)); } -const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace) +const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) { Node node; node.type = ActionType::INPUT; @@ -99,10 +99,10 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool node.result_name = std::move(column.name); node.column = std::move(column.column); - return addNode(std::move(node), can_replace); + return addNode(std::move(node)); } -const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize) +const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) { if (!column.column) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name); @@ -113,30 +113,25 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo node.result_name = std::move(column.name); node.column = std::move(column.column); - auto * res = &addNode(std::move(node), can_replace, !materialize); + auto * res = &addNode(std::move(node)); - if (materialize) - { - auto & name = res->result_name; + // if (materialize) + // { + // auto & name = res->result_name; - FunctionOverloadResolverPtr func_builder_materialize = - std::make_shared( - std::make_unique( - std::make_shared())); + // FunctionOverloadResolverPtr func_builder_materialize = + // std::make_shared( + // std::make_unique( + // std::make_shared())); - res = &addFunction(func_builder_materialize, {res}, {}, true, false); - res = &addAlias(*res, name, true); - } + // res = &addFunction(func_builder_materialize, {res}, {}, true, false); + // res = &addAlias(*res, name, true); + // } return *res; } -const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace) -{ - return addAlias(getNode(name), alias, can_replace); -} - -ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool can_replace) +const ActionsDAG::Node & ActionsDAG::addAlias(const Node & child, std::string alias) { Node node; node.type = ActionType::ALIAS; @@ -145,13 +140,11 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca node.column = child.column; node.children.emplace_back(&child); - return addNode(std::move(node), can_replace); + return addNode(std::move(node)); } -const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) +const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::string result_name) { - auto & child = getNode(source_name); - const DataTypeArray * array_type = typeid_cast(child.result_type.get()); if (!array_type) throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); @@ -167,37 +160,8 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_nam const ActionsDAG::Node & ActionsDAG::addFunction( const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name, - const Context & context [[maybe_unused]], - bool can_replace) -{ - const auto & all_settings = context.getSettingsRef(); - settings.max_temporary_columns = all_settings.max_temporary_columns; - settings.max_temporary_non_const_columns = all_settings.max_temporary_non_const_columns; - -#if USE_EMBEDDED_COMPILER - settings.compile_expressions = all_settings.compile_expressions; - settings.min_count_to_compile_expression = all_settings.min_count_to_compile_expression; - - if (!compilation_cache) - compilation_cache = context.getCompiledExpressionCache(); -#endif - - Inputs children; - children.reserve(argument_names.size()); - for (const auto & name : argument_names) - children.push_back(&getNode(name)); - - return addFunction(function, children, std::move(result_name), can_replace); -} - -ActionsDAG::Node & ActionsDAG::addFunction( - const FunctionOverloadResolverPtr & function, - Inputs children, - std::string result_name, - bool can_replace, - bool add_to_index) + NodeRawConstPtrs children, + std::string result_name) { size_t num_arguments = children.size(); @@ -229,10 +193,7 @@ ActionsDAG::Node & ActionsDAG::addFunction( node.function = node.function_base->prepare(arguments); /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. - /// But if we compile expressions compiled version of this function maybe placed in cache, - /// so we don't want to unfold non deterministic functions - if (all_const && node.function_base->isSuitableForConstantFolding() - && (!settings.compile_expressions || node.function_base->isDeterministic())) + if (all_const && node.function_base->isSuitableForConstantFolding()) { size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); auto col = node.function->execute(arguments, node.result_type, num_rows, true); @@ -277,7 +238,7 @@ ActionsDAG::Node & ActionsDAG::addFunction( node.result_name = std::move(result_name); - return addNode(std::move(node), can_replace, add_to_index); + return addNode(std::move(node)); } @@ -333,35 +294,26 @@ std::string ActionsDAG::dumpNames() const void ActionsDAG::removeUnusedActions(const Names & required_names) { - std::unordered_set nodes_set; - std::vector required_nodes; + NodeRawConstPtrs required_nodes; required_nodes.reserve(required_names.size()); + std::unordered_map> names_map; + for (const auto * node : index) + names_map[node->result_name].push_back(node); + for (const auto & name : required_names) { - auto it = index.find(name); - if (it == index.end()) + auto & nodes_list = names_map[name]; + if (nodes_list.empty()) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown column: {}, there are only columns {}", name, dumpNames()); - if (nodes_set.insert(*it).second) - required_nodes.push_back(*it); + required_nodes.push_back(nodes_list.front()); + nodes_list.pop_back(); } removeUnusedActions(required_nodes); -} - -void ActionsDAG::removeUnusedActions(const std::vector & required_nodes) -{ - { - Index new_index; - - for (auto * node : required_nodes) - new_index.insert(node); - - index.swap(new_index); - } - + index.swap(required_nodes); removeUnusedActions(); } @@ -373,7 +325,7 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) for (auto * node : index) { visited_nodes.insert(node); - stack.push(node); + stack.push(const_cast(node)); } for (auto & node : nodes) @@ -421,22 +373,35 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs) inputs.erase(it, inputs.end()); } -void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector & result_nodes) +void ActionsDAG::addAliases(const NamesWithAliases & aliases, bool project) { - std::vector required_nodes; + std::unordered_map> names_map; + for (const auto * node : index) + names_map[node->result_name].push_back(node); + + NodeRawConstPtrs required_nodes; for (const auto & item : aliases) { - auto & child = getNode(item.first); - required_nodes.push_back(&child); + auto & nodes_list = names_map[item.first]; + if (nodes_list.empty()) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown column: {}, there are only columns {}", name, dumpNames()); + + const auto * child = nodes_list.front(); + nodes_list.pop_front(); + required_nodes.push_back(child); } - result_nodes.reserve(aliases.size()); + if (project) + index.clear(); + + index.reserve(index.size() + aliases.size()); for (size_t i = 0; i < aliases.size(); ++i) { const auto & item = aliases[i]; - auto * child = required_nodes[i]; + const auto * child = required_nodes[i]; if (!item.second.empty() && item.first != item.second) { @@ -447,40 +412,39 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vectorcolumn; node.children.emplace_back(child); - auto & alias = addNode(std::move(node), true); - result_nodes.push_back(&alias); + auto & alias = addNode(std::move(node)); + index.push_back(&alias); } - else - result_nodes.push_back(child); + else if (project) + index.push_back(child); } } void ActionsDAG::addAliases(const NamesWithAliases & aliases) { - std::vector result_nodes; - addAliases(aliases, result_nodes); + addAliases(aliases, false); } void ActionsDAG::project(const NamesWithAliases & projection) { - std::vector result_nodes; - addAliases(projection, result_nodes); - removeUnusedActions(result_nodes); + addAliases(projection, true); + removeUnusedActions(); projectInput(); - settings.projected_output = true; + projected_output = true; } bool ActionsDAG::tryRestoreColumn(const std::string & column_name) { - if (index.contains(column_name)) - return true; + for (const auto * node : index) + if (node->result_name == column_name) + return true; for (auto it = nodes.rbegin(); it != nodes.rend(); ++it) { auto & node = *it; if (node.result_name == column_name) { - index.replace(&node); + index.push_back(&node); return true; } } @@ -502,7 +466,7 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG()); col = *it; - index.remove(it); + index.erase(it); } /// Check if column is in input. @@ -541,7 +505,9 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) ActionsDAGPtr ActionsDAG::clone() const { - auto actions = cloneEmpty(); + auto actions = std::make_shared(); + actions.project_input = project_input; + actions.projected_output = projected_output; std::unordered_map copy_map; @@ -556,7 +522,7 @@ ActionsDAGPtr ActionsDAG::clone() const child = copy_map[child]; for (const auto & node : index) - actions->index.insert(copy_map[node]); + actions->index.push_back(copy_map[node]); for (const auto & node : inputs) actions->inputs.push_back(copy_map[node]); @@ -671,17 +637,12 @@ void ActionsDAG::addMaterializingOutputActions() std::make_unique( std::make_shared())); - Index new_index; - std::vector index_nodes(index.begin(), index.end()); - for (auto * node : index_nodes) + for (auto & node : index) { auto & name = node->result_name; - node = &addFunction(func_builder_materialize, {node}, {}, true, false); - node = &addAlias(*node, name, true); - new_index.insert(node); + node = &addFunction(func_builder_materialize, {node}, {}); + node = &addAlias(*node, name); } - - index.swap(new_index); } ActionsDAGPtr ActionsDAG::makeConvertingActions( @@ -697,7 +658,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); auto actions_dag = std::make_shared(source); - std::vector projection(num_result_columns); + NodeRawConstPtrs projection(num_result_columns); FunctionOverloadResolverPtr func_builder_materialize = std::make_shared( @@ -714,7 +675,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num) { const auto & res_elem = result[result_col_num]; - Node * src_node = nullptr; + const Node * src_node = nullptr; switch (mode) { @@ -743,7 +704,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( if (const auto * src_const = typeid_cast(src_node->column.get())) { if (ignore_constant_values) - src_node = const_cast(&actions_dag->addColumn(res_elem, true)); + src_node = &actions_dag->addColumn(res_elem); else if (res_const->getField() != src_const->getField()) throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because " "it is constant but values of constants are different in source and result", @@ -763,31 +724,32 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( column.column = DataTypeString().createColumnConst(0, column.name); column.type = std::make_shared(); - auto * right_arg = const_cast(&actions_dag->addColumn(std::move(column), true)); - auto * left_arg = src_node; + const auto * right_arg = &actions_dag->addColumn(std::move(column)); + const auto * left_arg = src_node; FunctionCast::Diagnostic diagnostic = {src_node->result_name, res_elem.name}; FunctionOverloadResolverPtr func_builder_cast = std::make_shared( CastOverloadResolver::createImpl(false, std::move(diagnostic))); - Inputs children = { left_arg, right_arg }; - src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}, true); + NodeRawConstPtrs children = { left_arg, right_arg }; + src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}); } if (src_node->column && isColumnConst(*src_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) { - Inputs children = {src_node}; - src_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}, true); + NodeRawConstPtrs children = {src_node}; + src_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}); } if (src_node->result_name != res_elem.name) - src_node = &actions_dag->addAlias(*src_node, res_elem.name, true); + src_node = &actions_dag->addAlias(*src_node, res_elem.name); projection[result_col_num] = src_node; } - actions_dag->removeUnusedActions(projection); + actions_dag->index.swap(projection); + actions_dag->removeUnusedActions(); actions_dag->projectInput(); return actions_dag; @@ -802,11 +764,12 @@ ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column) std::make_shared())); auto column_name = column.name; - const auto & column_node = adding_column_action->addColumn(std::move(column)); - Inputs inputs = {const_cast(&column_node)}; - auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true); - adding_column_action->addAlias(function_node, std::move(column_name), true); + const auto * column_node = &adding_column_action->addColumn(std::move(column)); + NodeRawConstPtrs inputs = {column_node}; + auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}); + auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name)); + adding_column_action.index->push_back(&alias_node); return adding_column_action; } @@ -820,18 +783,18 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) /// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`. /// The second element is the number of removes (cause one node may be repeated several times in result). - std::unordered_map removed_first_result; + std::unordered_map removed_first_result; /// Map inputs of `second` to nodes of `first`. - std::unordered_map inputs_map; + std::unordered_map inputs_map; /// Update inputs list. { /// Index may have multiple columns with same name. They also may be used by `second`. Order is important. - std::unordered_map> first_result; - for (auto & node : first.index) + std::unordered_map> first_result; + for (const auto & node : first.index) first_result[node->result_name].push_back(node); - for (auto & node : second.inputs) + for (const auto & node : second.inputs) { auto it = first_result.find(node->result_name); if (it == first_result.end() || it->second.empty()) @@ -852,9 +815,9 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) } /// Replace inputs from `second` to nodes from `first` result. - for (auto & node : second.nodes) + for (const auto & node : second.nodes) { - for (auto & child : node.children) + for (const auto & child : node.children) { if (child->type == ActionType::INPUT) { @@ -865,7 +828,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) } } - for (auto & node : second.index) + for (const auto & node : second.index) { if (node->type == ActionType::INPUT) { @@ -876,49 +839,28 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) } /// Update index. - if (second.settings.project_input) + if (second.project_input) { first.index.swap(second.index); - first.settings.project_input = true; + first.project_input = true; } else { - /// Remove `second` inputs from index. - for (auto it = first.index.begin(); it != first.index.end();) + /// Add not removed result from first actions. + for (const auto * node : first.index) { - auto cur = it; - ++it; - - auto jt = removed_first_result.find(*cur); - if (jt != removed_first_result.end() && jt->second > 0) - { - first.index.remove(cur); - --jt->second; - } + auto it = removed_first_result.find(node); + if (it != removed_first_result.end() && it->second > 0) + --it->second; + else + second.index.push_back(node); } - for (auto it = second.index.rbegin(); it != second.index.rend(); ++it) - first.index.prepend(*it); + first.index.swap(second.index); } - first.nodes.splice(first.nodes.end(), std::move(second.nodes)); - /// Here we rebuild index because some string_view from the first map now may point to string from second. - ActionsDAG::Index first_index; - for (auto * node : first.index) - first_index.insert(node); - - first.index.swap(first_index); - -#if USE_EMBEDDED_COMPILER - if (first.compilation_cache == nullptr) - first.compilation_cache = second.compilation_cache; -#endif - - first.settings.max_temporary_columns = std::max(first.settings.max_temporary_columns, second.settings.max_temporary_columns); - first.settings.max_temporary_non_const_columns = std::max(first.settings.max_temporary_non_const_columns, second.settings.max_temporary_non_const_columns); - first.settings.min_count_to_compile_expression = std::max(first.settings.min_count_to_compile_expression, second.settings.min_count_to_compile_expression); first.settings.projected_output = second.settings.projected_output; /// Drop unused inputs and, probably, some actions. @@ -932,13 +874,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split /// Split DAG into two parts. /// (first_nodes, first_index) is a part which will have split_list in result. /// (second_nodes, second_index) is a part which will have same index as current actions. - std::list second_nodes; - std::list first_nodes; - Index second_index; - Index first_index; + Nodes second_nodes; + Nodes first_nodes; + NodeRawConstPtrs second_index; + NodeRawConstPtrs first_index; /// List of nodes from current actions which are not inputs, but will be in second part. - std::vector new_inputs; + NodeRawConstPtrs new_inputs; struct Frame { @@ -1096,13 +1038,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split } } - for (auto * node : index) - second_index.insert(data[node].to_second); + for (const auto * node : index) + second_index.push_back(data[node].to_second); - Inputs second_inputs; - Inputs first_inputs; + NodeRawConstPtrs second_inputs; + NodeRawConstPtrs first_inputs; - for (auto * input : inputs) + for (const auto * input : inputs) { const auto & cur = data[input]; first_inputs.push_back(cur.to_first); @@ -1112,15 +1054,15 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set split { const auto & cur = data[input]; second_inputs.push_back(cur.to_second); - first_index.insert(cur.to_first); + first_index.push_back(cur.to_first); } - auto first_actions = cloneEmpty(); + auto first_actions = std::make_shared(); first_actions->nodes.swap(first_nodes); first_actions->index.swap(first_index); first_actions->inputs.swap(first_inputs); - auto second_actions = cloneEmpty(); + auto second_actions = std::make_shared(); second_actions->nodes.swap(second_nodes); second_actions->index.swap(second_index); second_actions->inputs.swap(second_inputs); @@ -1158,7 +1100,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & /// At first, visit all children. We depend on ARRAY JOIN if any child does. while (cur.next_child_to_visit < cur.node->children.size()) { - auto * child = cur.node->children[cur.next_child_to_visit]; + const auto * child = cur.node->children[cur.next_child_to_visit]; if (visited_nodes.count(child) == 0) { @@ -1192,7 +1134,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & auto res = split(split_nodes); /// Do not remove array joined columns if they are not used. - res.first->settings.project_input = false; + res.first->project_input = false; return res; } @@ -1217,8 +1159,8 @@ namespace struct ConjunctionNodes { - std::vector allowed; - std::vector rejected; + NodeRawConstPtrs allowed; + NodeRawConstPtrs rejected; }; /// Take a node which result is predicate. @@ -1233,14 +1175,14 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere struct Frame { - ActionsDAG::Node * node; + const ActionsDAG::Node * node; bool is_predicate = false; size_t next_child_to_visit = 0; size_t num_allowed_children = 0; }; std::stack stack; - std::unordered_set visited_nodes; + std::unordered_set visited_nodes; stack.push(Frame{.node = predicate, .is_predicate = true}); visited_nodes.insert(predicate); @@ -1254,7 +1196,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere /// At first, visit all children. while (cur.next_child_to_visit < cur.node->children.size()) { - auto * child = cur.node->children[cur.next_child_to_visit]; + const auto * child = cur.node->children[cur.next_child_to_visit]; if (visited_nodes.count(child) == 0) { @@ -1277,7 +1219,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere } else if (is_conjunction) { - for (auto * child : cur.node->children) + for (const auto * child : cur.node->children) { if (allowed_nodes.count(child)) { @@ -1307,7 +1249,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere return conjunction; } -ColumnsWithTypeAndName prepareFunctionArguments(const std::vector nodes) +ColumnsWithTypeAndName prepareFunctionArguments(const NodeRawConstPtrs & nodes) { ColumnsWithTypeAndName arguments; arguments.reserve(nodes.size()); @@ -1332,20 +1274,19 @@ ColumnsWithTypeAndName prepareFunctionArguments(const std::vector conjunction) +ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction) { if (conjunction.empty()) return nullptr; - auto actions = cloneEmpty(); - actions->settings.project_input = false; + auto actions = std::make_shared(); FunctionOverloadResolverPtr func_builder_and = std::make_shared( std::make_unique( std::make_shared())); - std::unordered_map nodes_mapping; + std::unordered_map nodes_mapping; struct Frame { @@ -1368,7 +1309,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector conjunc /// At first, visit all children. while (cur.next_child_to_visit < cur.node->children.size()) { - auto * child = cur.node->children[cur.next_child_to_visit]; + const auto * child = cur.node->children[cur.next_child_to_visit]; if (nodes_mapping.count(child) == 0) { @@ -1390,7 +1331,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector conjunc if (node.type == ActionType::INPUT) { actions->inputs.emplace_back(&node); - actions->index.insert(&node); + actions->index.push_back(&node); } stack.pop(); @@ -1398,7 +1339,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector conjunc } } - Node * result_predicate = nodes_mapping[*conjunction.begin()]; + const Node * result_predicate = nodes_mapping[*conjunction.begin()]; if (conjunction.size() > 1) { @@ -1407,7 +1348,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector conjunc for (const auto * predicate : conjunction) args.emplace_back(nodes_mapping[predicate]); - result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false); + result_predicate = &actions->addFunction(func_builder_and, args, {}); } actions->index.insert(result_predicate); @@ -1429,7 +1370,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", filter_name, dumpDAG()); - predicate = *it; + predicate = const_cast(*it); } std::unordered_set allowed_nodes; @@ -1468,7 +1409,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, { if (*i == predicate) { - index.remove(i); + index.erase(i); break; } } @@ -1491,7 +1432,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, /// Predicate is conjunction, where both allowed and rejected sets are not empty. /// Replace this node to conjunction of rejected predicates. - std::vector new_children(conjunction.rejected.begin(), conjunction.rejected.end()); + NodeRawConstPtrs new_children = std::move(conjunction.rejected); if (new_children.size() == 1) { @@ -1516,8 +1457,8 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, node.column = DataTypeString().createColumnConst(0, node.result_name); node.result_type = std::make_shared(); - auto * right_arg = &nodes.emplace_back(std::move(node)); - auto * left_arg = new_children.front(); + const auto * right_arg = &nodes.emplace_back(std::move(node)); + const auto * left_arg = new_children.front(); predicate->children = {left_arg, right_arg}; auto arguments = prepareFunctionArguments(predicate->children); diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 14ed5df949b..22a18b69af9 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -55,9 +55,13 @@ public: FUNCTION, }; + struct Node; + using NodeRawPtrs = std::vector; + using NodeRawConstPtrs = std::vector; + struct Node { - std::vector children; + NodeRawConstPtrs children; ActionType type; @@ -90,94 +94,95 @@ public: /// /// Index is a list of nodes + [map: name -> list::iterator]. /// List is ordered, may contain nodes with same names, or one node several times. - class Index - { - private: - std::list list; - /// Map key is a string_view to Node::result_name for node from value. - /// Map always point to existing node, so key always valid (nodes live longer then index). - std::unordered_map::iterator> map; + // class Index + // { + // private: + // std::list list; + // /// Map key is a string_view to Node::result_name for node from value. + // /// Map always point to existing node, so key always valid (nodes live longer then index). + // std::unordered_map::iterator> map; - public: - auto size() const { return list.size(); } - bool contains(std::string_view key) const { return map.count(key) != 0; } + // public: + // auto size() const { return list.size(); } + // bool contains(std::string_view key) const { return map.count(key) != 0; } - std::list::iterator begin() { return list.begin(); } - std::list::iterator end() { return list.end(); } - std::list::const_iterator begin() const { return list.begin(); } - std::list::const_iterator end() const { return list.end(); } - std::list::const_reverse_iterator rbegin() const { return list.rbegin(); } - std::list::const_reverse_iterator rend() const { return list.rend(); } - std::list::const_iterator find(std::string_view key) const - { - auto it = map.find(key); - if (it == map.end()) - return list.end(); + // std::list::iterator begin() { return list.begin(); } + // std::list::iterator end() { return list.end(); } + // std::list::const_iterator begin() const { return list.begin(); } + // std::list::const_iterator end() const { return list.end(); } + // std::list::const_reverse_iterator rbegin() const { return list.rbegin(); } + // std::list::const_reverse_iterator rend() const { return list.rend(); } + // std::list::const_iterator find(std::string_view key) const + // { + // auto it = map.find(key); + // if (it == map.end()) + // return list.end(); - return it->second; - } + // return it->second; + // } - /// Insert method doesn't check if map already have node with the same name. - /// If node with the same name exists, it is removed from map, but not list. - /// It is expected and used for project(), when result may have several columns with the same name. - void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } - void prepend(Node * node) { map[node->result_name] = list.emplace(list.begin(), node); } + // /// Insert method doesn't check if map already have node with the same name. + // /// If node with the same name exists, it is removed from map, but not list. + // /// It is expected and used for project(), when result may have several columns with the same name. + // void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } + // void prepend(Node * node) { map[node->result_name] = list.emplace(list.begin(), node); } - /// If node with same name exists in index, replace it. Otherwise insert new node to index. - void replace(Node * node) - { - if (auto handle = map.extract(node->result_name)) - { - handle.key() = node->result_name; /// Change string_view - *handle.mapped() = node; - map.insert(std::move(handle)); - } - else - insert(node); - } + // /// If node with same name exists in index, replace it. Otherwise insert new node to index. + // void replace(Node * node) + // { + // if (auto handle = map.extract(node->result_name)) + // { + // handle.key() = node->result_name; /// Change string_view + // *handle.mapped() = node; + // map.insert(std::move(handle)); + // } + // else + // insert(node); + // } - void remove(std::list::iterator it) - { - auto map_it = map.find((*it)->result_name); - if (map_it != map.end() && map_it->second == it) - map.erase(map_it); + // void remove(std::list::iterator it) + // { + // auto map_it = map.find((*it)->result_name); + // if (map_it != map.end() && map_it->second == it) + // map.erase(map_it); - list.erase(it); - } + // list.erase(it); + // } - void swap(Index & other) - { - list.swap(other.list); - map.swap(other.map); - } - }; + // void swap(Index & other) + // { + // list.swap(other.list); + // map.swap(other.map); + // } + // }; /// NOTE: std::list is an implementation detail. /// It allows to add and remove new nodes inplace without reallocation. /// Raw pointers to nodes remain valid. using Nodes = std::list; - using Inputs = std::vector; + //using Inputs = std::vector; - struct ActionsSettings - { - size_t max_temporary_columns = 0; - size_t max_temporary_non_const_columns = 0; - size_t min_count_to_compile_expression = 0; - bool compile_expressions = false; - bool project_input = false; - bool projected_output = false; - }; + // struct ActionsSettings + // { + // size_t max_temporary_columns = 0; + // size_t max_temporary_non_const_columns = 0; + // size_t min_count_to_compile_expression = 0; + // bool compile_expressions = false; + // bool project_input = false; + // bool projected_output = false; + // }; private: Nodes nodes; - Index index; - Inputs inputs; + NodeRawConstPtrs index; + NodeRawConstPtrs inputs; - ActionsSettings settings; + bool project_input = false; + bool projected_output = false; -#if USE_EMBEDDED_COMPILER - std::shared_ptr compilation_cache; -#endif +// #if USE_EMBEDDED_COMPILER +// std::shared_ptr compilation_cache; +// #endif public: ActionsDAG() = default; @@ -188,8 +193,8 @@ public: explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_); const Nodes & getNodes() const { return nodes; } - const Index & getIndex() const { return index; } - const Inputs & getInputs() const { return inputs; } + const NodeRawConstPtrs & getIndex() const { return index; } + const NodeRawConstPtrs & getInputs() const { return inputs; } NamesAndTypesList getRequiredColumns() const; ColumnsWithTypeAndName getResultColumns() const; @@ -199,17 +204,15 @@ public: std::string dumpNames() const; std::string dumpDAG() const; - const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false); - const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false); - const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false); - const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); - const Node & addArrayJoin(const std::string & source_name, std::string result_name); + const Node & addInput(std::string name, DataTypePtr type); + const Node & addInput(ColumnWithTypeAndName column); + const Node & addColumn(ColumnWithTypeAndName column /*, bool materialize = false*/); + const Node & addAlias(const Node & child, std::string alias); + const Node & addArrayJoin(const Node & child, std::string result_name); const Node & addFunction( const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name, - const Context & context, - bool can_replace = false); + NodeRawConstPtrs children, + std::string result_name); /// Call addAlias several times. void addAliases(const NamesWithAliases & aliases); @@ -223,14 +226,14 @@ public: /// Return true if column was removed from inputs. bool removeUnusedResult(const std::string & column_name); - void projectInput() { settings.project_input = true; } + void projectInput() { project_input = true; } void removeUnusedActions(const Names & required_names); bool hasArrayJoin() const; bool hasStatefulFunctions() const; bool trivial() const; /// If actions has no functions or array join. - const ActionsSettings & getSettings() const { return settings; } + //const ActionsSettings & getSettings() const { return settings; } void compileExpressions(); @@ -289,31 +292,30 @@ public: ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs); private: - Node & addNode(Node node, bool can_replace = false, bool add_to_index = true); - Node & getNode(const std::string & name); + Node & addNode(Node node); + // Node & getNode(const std::string & name); - Node & addAlias(Node & child, std::string alias, bool can_replace); - Node & addFunction( - const FunctionOverloadResolverPtr & function, - Inputs children, - std::string result_name, - bool can_replace, - bool add_to_index = true); +// Node & addAlias(Node & child, std::string alias, bool can_replace); +// Node & addFunction( +// const FunctionOverloadResolverPtr & function, +// Inputs children, +// std::string result_name, +// bool can_replace, +// bool add_to_index = true); - ActionsDAGPtr cloneEmpty() const - { - auto actions = std::make_shared(); - actions->settings = settings; +// ActionsDAGPtr cloneEmpty() const +// { +// auto actions = std::make_shared(); +// // actions->settings = settings; -#if USE_EMBEDDED_COMPILER - actions->compilation_cache = compilation_cache; -#endif - return actions; - } +// // #if USE_EMBEDDED_COMPILER +// // actions->compilation_cache = compilation_cache; +// // #endif +// return actions; +// } - void removeUnusedActions(const std::vector & required_nodes); void removeUnusedActions(bool allow_remove_inputs = true); - void addAliases(const NamesWithAliases & aliases, std::vector & result_nodes); + void addAliases(const NamesWithAliases & aliases, bool project); void compileFunctions();