From a5ed6409adaade65bab8ab2b51b019fb056c1751 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Sep 2020 10:30:03 +0300 Subject: [PATCH 1/8] Add ExpressionDAG [Part 1]. --- src/Interpreters/ActionsVisitor.cpp | 70 +++++++------ src/Interpreters/ActionsVisitor.h | 76 ++++++++------ src/Interpreters/ExpressionActions.cpp | 139 +++++++++++++++++++++++++ src/Interpreters/ExpressionActions.h | 54 ++++++++++ 4 files changed, 273 insertions(+), 66 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 9d6d5f783ff..0742479d34f 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -381,52 +381,56 @@ SetPtr makeExplicitSet( return set; } -ScopeStack::ScopeStack(const ExpressionActionsPtr & actions, const Context & context_) +ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_) : context(context_) { - stack.emplace_back(); - stack.back().actions = actions; - - const Block & sample_block = actions->getSampleBlock(); - for (size_t i = 0, size = sample_block.columns(); i < size; ++i) - stack.back().new_columns.insert(sample_block.getByPosition(i).name); + stack.emplace_back(std::move(actions)); } void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) { - stack.emplace_back(); - Level & prev = stack[stack.size() - 2]; - - ColumnsWithTypeAndName all_columns; - NameSet new_names; + auto & actions = stack.emplace_back(std::make_shared()); + const auto & prev = stack[stack.size() - 2]; for (const auto & input_column : input_columns) - { - all_columns.emplace_back(nullptr, input_column.type, input_column.name); - new_names.insert(input_column.name); - stack.back().new_columns.insert(input_column.name); - } + actions->addInput(input_column.name, input_column.type); - const Block & prev_sample_block = prev.actions->getSampleBlock(); - for (size_t i = 0, size = prev_sample_block.columns(); i < size; ++i) - { - const ColumnWithTypeAndName & col = prev_sample_block.getByPosition(i); - if (!new_names.count(col.name)) - all_columns.push_back(col); - } + const auto & index = actions->getIndex(); - stack.back().actions = std::make_shared(all_columns, context); + for (const auto & [name, node] : prev->getIndex()) + { + if (index.count(name) == 0) + actions->addInput(node->result_name, node->result_type); + } } size_t ScopeStack::getColumnLevel(const std::string & name) { for (int i = static_cast(stack.size()) - 1; i >= 0; --i) - if (stack[i].new_columns.count(name)) + if (stack[i]->getIndex().count(name)) return i; throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); } +void ScopeStack::addAlias(const std::string & name, std::string alias) +{ + auto level = getColumnLevel(name); + const auto & node = stack[level]->addAlias(name, std::move(alias)); + + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j]->addInput(node.result_name, node.result_type); +} + +void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name) +{ + auto level = getColumnLevel(source_name); + const auto & node = stack[level]->addAlias(source_name, std::move(result_name)); + + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j]->addInput(node.result_name, node.result_type); +} + void ScopeStack::addAction(const ExpressionAction & action) { size_t level = 0; @@ -460,16 +464,16 @@ void ScopeStack::addActionNoInput(const ExpressionAction & action) stack[level].new_columns.insert(added.begin(), added.end()); } -ExpressionActionsPtr ScopeStack::popLevel() +ActionsDAGPtr ScopeStack::popLevel() { - ExpressionActionsPtr res = stack.back().actions; + auto res = std::move(stack.back()); stack.pop_back(); return res; } -const Block & ScopeStack::getSampleBlock() const +const ActionsDAG::Index & ScopeStack::getIndex() const { - return stack.back().actions->getSampleBlock(); + return stack.back()->getIndex(); } struct CachedColumnName @@ -532,7 +536,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, /// Special check for WITH statement alias. Add alias action to be able to use this alias. if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) - data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}})); + data.addAlias(identifier.name, identifier.alias); } } @@ -562,8 +566,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & /// It could have been possible to implement arrayJoin which keeps source column, /// but in this case it will always be replicated (as many arrays), which is expensive. String tmp_name = data.getUniqueName("_array_join_" + arg->getColumnName()); - data.addActionNoInput(ExpressionAction::copyColumn(arg->getColumnName(), tmp_name)); - data.addAction(ExpressionAction::arrayJoin(tmp_name, result_name)); + data.addAlias(arg->getColumnName(), tmp_name); + data.addArrayJoin(tmp_name, result_name); } return; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index d8d85f1c0bf..2fc98d6399d 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -16,6 +16,9 @@ struct ExpressionAction; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + /// The case of an explicit enumeration of values. SetPtr makeExplicitSet( const ASTFunction * node, const Block & sample_block, bool create_ordered_set, @@ -31,31 +34,28 @@ SetPtr makeExplicitSet( */ struct ScopeStack { - struct Level - { - ExpressionActionsPtr actions; - NameSet new_columns; - }; - - using Levels = std::vector; + using Levels = std::vector; Levels stack; const Context & context; - ScopeStack(const ExpressionActionsPtr & actions, const Context & context_); + ScopeStack(ActionsDAGPtr actions, const Context & context_); void pushLevel(const NamesAndTypesList & input_columns); size_t getColumnLevel(const std::string & name); + void addAlias(const std::string & name, std::string alias); + void addArrayJoin(const std::string & source_name, std::string result_name); + void addAction(const ExpressionAction & action); /// For arrayJoin() to avoid double columns in the input. void addActionNoInput(const ExpressionAction & action); - ExpressionActionsPtr popLevel(); + ActionsDAGPtr popLevel(); - const Block & getSampleBlock() const; + const ActionsDAG::Index & getIndex() const; }; class ASTIdentifier; @@ -91,7 +91,7 @@ public: int next_unique_suffix; Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions, + const NamesAndTypesList & source_columns_, ActionsDAGPtr actions, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_) : context(context_), @@ -105,33 +105,43 @@ public: only_consts(only_consts_), no_storage_or_local(no_storage_or_local_), visit_depth(0), - actions_stack(actions, context), + actions_stack(std::move(actions), context), next_unique_suffix(actions_stack.getSampleBlock().columns() + 1) {} - void updateActions(ExpressionActionsPtr & actions) - { - actions = actions_stack.popLevel(); - } - - void addAction(const ExpressionAction & action) - { - actions_stack.addAction(action); - } - void addActionNoInput(const ExpressionAction & action) - { - actions_stack.addActionNoInput(action); - } - - const Block & getSampleBlock() const - { - return actions_stack.getSampleBlock(); - } - +// void updateActions(ExpressionActionsPtr & actions) +// { +// actions = actions_stack.popLevel(); +// } +// +// void addAction(const ExpressionAction & action) +// { +// actions_stack.addAction(action); +// } +// void addActionNoInput(const ExpressionAction & action) +// { +// actions_stack.addActionNoInput(action); +// } +// +// const Block & getSampleBlock() const +// { +// return actions_stack.getSampleBlock(); +// } +// /// Does result of the calculation already exists in the block. - bool hasColumn(const String & columnName) const + bool hasColumn(const String & column_name) const { - return actions_stack.getSampleBlock().has(columnName); + return actions_stack.getIndex().count(column_name) != 0; + } + + void addAlias(const std::string & name, std::string alias) + { + actions_stack.addAlias(name, std::move(alias)); + } + + void addArrayJoin(const std::string & source_name, std::string result_name) + { + actions_stack.addArrayJoin(source_name, std::move(result_name)); } /* diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 33fa6215160..221bf9b1835 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1406,4 +1406,143 @@ const ExpressionActionsPtr & ExpressionActionsChain::Step::actions() const return typeid_cast(this)->actions; } +ActionsDAG::Node & ActionsDAG::addNode(Node node) +{ + if (index.count(node.result_name) != 0) + throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); + + auto & res = nodes.emplace_back(std::move(node)); + index[res.result_name] = &res; + return res; +} + +ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) +{ + auto it = index.find(name); + if (it == index.end()) + throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + + return *it->second; +} + +const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) +{ + Node node; + node.type = Type::INPUT; + node.result_type = std::move(type); + node.result_name = std::move(name); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias) +{ + auto & child = getNode(name); + + Node node; + node.type = Type::ALIAS; + node.result_type = child.result_type; + node.result_name = std::move(alias); + node.column = child.column; + node.children.emplace_back(&child); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) +{ + auto & child = getNode(source_name); + + const DataTypeArray * array_type = typeid_cast(child.result_type.get()); + if (!array_type) + throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); + + Node node; + node.type = Type::ARRAY_JOIN; + node.result_type = array_type->getNestedType(); + node.result_name = std::move(result_name); + node.children.emplace_back(&child); + + return addNode(std::move(node)); +} + +const ActionsDAG::Node & ActionsDAG::addFunction(const FunctionOverloadResolverPtr & function, const Names & arguments) +{ + Node node; + node.type = Type::FUNCTION; + + bool all_const = true; + bool all_suitable_for_constant_folding = true; + + ColumnNumbers arguments(argument_names.size()); + for (size_t i = 0; i < argument_names.size(); ++i) + { + arguments[i] = sample_block.getPositionByName(argument_names[i]); + ColumnPtr col = sample_block.safeGetByPosition(arguments[i]).column; + if (!col || !isColumnConst(*col)) + all_const = false; + + if (names_not_for_constant_folding.count(argument_names[i])) + all_suitable_for_constant_folding = false; + } + + size_t result_position = sample_block.columns(); + sample_block.insert({nullptr, result_type, result_name}); + function = function_base->prepare(sample_block, arguments, result_position); + function->createLowCardinalityResultCache(settings.max_threads); + + bool compile_expressions = false; +#if USE_EMBEDDED_COMPILER + compile_expressions = settings.compile_expressions; +#endif + /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. + /// But if we compile expressions compiled version of this function maybe placed in cache, + /// so we don't want to unfold non deterministic functions + if (all_const && function_base->isSuitableForConstantFolding() && (!compile_expressions || function_base->isDeterministic())) + { + function->execute(sample_block, arguments, result_position, sample_block.rows(), true); + + /// If the result is not a constant, just in case, we will consider the result as unknown. + ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position); + if (!isColumnConst(*col.column)) + { + col.column = nullptr; + } + else + { + /// All constant (literal) columns in block are added with size 1. + /// But if there was no columns in block before executing a function, the result has size 0. + /// Change the size to 1. + + if (col.column->empty()) + col.column = col.column->cloneResized(1); + + if (!all_suitable_for_constant_folding) + names_not_for_constant_folding.insert(result_name); + } + } + + /// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant. + /// We can't do constant folding, but can specify in sample block that function result is constant to avoid + /// unnecessary materialization. + auto & res = sample_block.getByPosition(result_position); + if (!res.column && function_base->isSuitableForConstantFolding()) + { + if (auto col = function_base->getResultIfAlwaysReturnsConstantAndHasArguments(sample_block, arguments)) + { + res.column = std::move(col); + names_not_for_constant_folding.insert(result_name); + } + } + + node.result_name = function->getName() + "("; + for (size_t i = 0 ; i < arguments.size(); ++i) + { + if (i) + node.result_name += ", "; + node.result_name += arguments[i]; + } + node.result_name += ")"; +} + } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 0607bc1e055..e742a84719d 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -140,6 +140,60 @@ private: class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG +{ +public: + + enum class Type + { + /// Column which must be in input. + INPUT, + /// Constant column with known value. + COLUMN, + /// Another one name for column. + ALIAS, + FUNCTION, + /// Function arrayJoin. Specially separated because it changes the number of rows. + ARRAY_JOIN, + }; + + struct Node + { + std::vector children; + + Type type; + + std::string result_name; + DataTypePtr result_type; + + /// For COLUMN node and propagated constants. + ColumnPtr column; + }; + + using Index = std::unordered_map; + +private: + std::list nodes; + Index index; + +public: + ActionsDAG() = default; + ActionsDAG(const ActionsDAG &) = delete; + ActionsDAG & operator=(const ActionsDAG &) = delete; + + const std::list & getNodes() const; + const Index & getIndex() const { return index; } + + const Node & addInput(std::string name, DataTypePtr type); + const Node & addAlias(const std::string & name, std::string alias); + const Node & addArrayJoin(const std::string & source_name, std::string result_name); + const Node & addFunction(const FunctionOverloadResolverPtr & function, const Names & arguments); + +private: + Node & addNode(Node node); + Node & getNode(const std::string & name); +}; + /** Contains a sequence of actions on the block. */ class ExpressionActions From c1469aff938d10b512290437a8c9b1ab1c428019 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Sep 2020 19:01:41 +0300 Subject: [PATCH 2/8] Add ExpressionDAG [Part 2]. --- src/Interpreters/ActionsVisitor.cpp | 97 ++++++----- src/Interpreters/ActionsVisitor.h | 55 ++++--- src/Interpreters/ExpressionActions.cpp | 208 ++++++++++++++++++++---- src/Interpreters/ExpressionActions.h | 24 ++- src/Interpreters/ExpressionAnalyzer.cpp | 25 ++- src/Interpreters/ExpressionAnalyzer.h | 9 +- 6 files changed, 292 insertions(+), 126 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 0742479d34f..75b29f61ae7 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -339,7 +339,7 @@ static Block createBlockForSet( } SetPtr makeExplicitSet( - const ASTFunction * node, const Block & sample_block, bool create_ordered_set, + const ASTFunction * node, const ActionsDAG::Index & index, bool create_ordered_set, const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets) { const IAST & args = *node->arguments; @@ -350,7 +350,10 @@ SetPtr makeExplicitSet( const ASTPtr & left_arg = args.children.at(0); const ASTPtr & right_arg = args.children.at(1); - const DataTypePtr & left_arg_type = sample_block.getByName(left_arg->getColumnName()).type; + auto it = index.find(left_arg->getColumnName()); + if (it == index.end()) + throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + const DataTypePtr & left_arg_type = it->second->result_type; DataTypes set_element_types = {left_arg_type}; const auto * left_tuple_type = typeid_cast(left_arg_type.get()); @@ -413,6 +416,15 @@ size_t ScopeStack::getColumnLevel(const std::string & name) throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); } +void ScopeStack::addColumn(ColumnWithTypeAndName column) +{ + auto level = getColumnLevel(column.name); + const auto & node = stack[level]->addColumn(std::move(column)); + + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j]->addInput(node.result_name, node.result_type); +} + void ScopeStack::addAlias(const std::string & name, std::string alias) { auto level = getColumnLevel(name); @@ -431,37 +443,20 @@ void ScopeStack::addArrayJoin(const std::string & source_name, std::string resul stack[j]->addInput(node.result_name, node.result_type); } -void ScopeStack::addAction(const ExpressionAction & action) +void ScopeStack::addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions) { size_t level = 0; - Names required = action.getNeededColumns(); - for (const auto & elem : required) - level = std::max(level, getColumnLevel(elem)); + for (const auto & argument : argument_names) + level = std::max(level, getColumnLevel(argument)); - Names added; - stack[level].actions->add(action, added); + const auto & node = stack[level]->addFunction(function, argument_names, std::move(result_name), compile_expressions); - stack[level].new_columns.insert(added.begin(), added.end()); - - for (const auto & elem : added) - { - const ColumnWithTypeAndName & col = stack[level].actions->getSampleBlock().getByName(elem); - for (size_t j = level + 1; j < stack.size(); ++j) - stack[j].actions->addInput(col); - } -} - -void ScopeStack::addActionNoInput(const ExpressionAction & action) -{ - size_t level = 0; - Names required = action.getNeededColumns(); - for (const auto & elem : required) - level = std::max(level, getColumnLevel(elem)); - - Names added; - stack[level].actions->add(action, added); - - stack[level].new_columns.insert(added.begin(), added.end()); + for (size_t j = level + 1; j < stack.size(); ++j) + stack[j]->addInput(node.result_name, node.result_type); } ActionsDAGPtr ScopeStack::popLevel() @@ -471,6 +466,11 @@ ActionsDAGPtr ScopeStack::popLevel() return res; } +std::string ScopeStack::dumpNames() const +{ + return stack.back()->dumpNames(); +} + const ActionsDAG::Index & ScopeStack::getIndex() const { return stack.back()->getIndex(); @@ -592,10 +592,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto argument_name = node.arguments->children.at(0)->getColumnName(); - data.addAction(ExpressionAction::applyFunction( + data.addFunction( FunctionFactory::instance().get(node.name + "IgnoreSet", data.context), { argument_name, argument_name }, - column_name.get(ast))); + column_name.get(ast)); } return; } @@ -667,7 +667,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & column.column = ColumnConst::create(std::move(column_set), 1); else column.column = std::move(column_set); - data.addAction(ExpressionAction::addColumn(column)); + data.addColumn(column); } argument_types.push_back(column.type); @@ -683,7 +683,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & ColumnConst::create(std::move(column_string), 1), std::make_shared(), data.getUniqueName("__" + node.name)); - data.addAction(ExpressionAction::addColumn(column)); + data.addColumn(column); argument_types.push_back(column.type); argument_names.push_back(column.name); } @@ -703,9 +703,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & child_column_name = as_literal->unique_column_name; } - if (data.hasColumn(child_column_name)) + const auto & index = data.actions_stack.getIndex(); + auto it = index.find(child_column_name); + if (it != index.end()) { - argument_types.push_back(data.getSampleBlock().getByName(child_column_name).type); + argument_types.push_back(it->second->result_type); argument_names.push_back(child_column_name); } else @@ -713,7 +715,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (data.only_consts) arguments_present = false; else - throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.getSampleBlock().dumpNames(), + throw Exception("Unknown identifier: " + child_column_name + " there are columns: " + data.actions_stack.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER); } } @@ -750,7 +752,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.actions_stack.pushLevel(lambda_arguments); visit(lambda->arguments->children.at(1), data); - ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel(); + auto lambda_dag = data.actions_stack.popLevel(); + auto lambda_actions = lambda_dag->buildExpressions(data.context); String result_name = lambda->arguments->children.at(1)->getColumnName(); lambda_actions->finalize(Names(1, result_name)); @@ -769,7 +772,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto function_capture = std::make_unique( lambda_actions, captured, lambda_arguments, result_type, result_name); auto function_capture_adapter = std::make_shared(std::move(function_capture)); - data.addAction(ExpressionAction::applyFunction(function_capture_adapter, captured, lambda_name)); + data.addFunction(function_capture_adapter, captured, lambda_name); argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); argument_names[i] = lambda_name; @@ -791,7 +794,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (arguments_present) { - data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast))); + data.addFunction(function_builder, argument_names, column_name.get(ast)); } } @@ -806,8 +809,12 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, if (literal.unique_column_name.empty()) { const auto default_name = literal.getColumnName(); - const auto & block = data.getSampleBlock(); - const auto * existing_column = block.findByName(default_name); + const auto & index = data.actions_stack.getIndex(); + const ActionsDAG::Node * existing_column = nullptr; + + auto it = index.find(default_name); + if (it != index.end()) + existing_column = it->second; /* * To approximate CSE, bind all identical literals to a single temporary @@ -843,7 +850,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, column.column = type->createColumnConst(1, value); column.type = type; - data.addAction(ExpressionAction::addColumn(column)); + data.addColumn(std::move(column)); } SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_subqueries) @@ -855,7 +862,6 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su const IAST & args = *node.arguments; const ASTPtr & left_in_operand = args.children.at(0); const ASTPtr & right_in_operand = args.children.at(1); - const Block & sample_block = data.getSampleBlock(); /// If the subquery or table name for SELECT. const auto * identifier = right_in_operand->as(); @@ -916,9 +922,10 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su } else { - if (sample_block.has(left_in_operand->getColumnName())) + const auto & index = data.actions_stack.getIndex(); + if (index.count(left_in_operand->getColumnName()) != 0) /// An explicit enumeration of values in parentheses. - return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets); + return makeExplicitSet(&node, index, false, data.context, data.set_size_limit, data.prepared_sets); else return {}; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 2fc98d6399d..109d547ed55 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -21,7 +21,7 @@ using ActionsDAGPtr = std::shared_ptr; /// The case of an explicit enumeration of values. SetPtr makeExplicitSet( - const ASTFunction * node, const Block & sample_block, bool create_ordered_set, + const ASTFunction * node, const ActionsDAG::Index & index, bool create_ordered_set, const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets); @@ -46,16 +46,19 @@ struct ScopeStack size_t getColumnLevel(const std::string & name); + void addColumn(ColumnWithTypeAndName column); void addAlias(const std::string & name, std::string alias); void addArrayJoin(const std::string & source_name, std::string result_name); - - void addAction(const ExpressionAction & action); - /// For arrayJoin() to avoid double columns in the input. - void addActionNoInput(const ExpressionAction & action); + void addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions); ActionsDAGPtr popLevel(); const ActionsDAG::Index & getIndex() const; + std::string dumpNames() const; }; class ASTIdentifier; @@ -106,34 +109,20 @@ public: no_storage_or_local(no_storage_or_local_), visit_depth(0), actions_stack(std::move(actions), context), - next_unique_suffix(actions_stack.getSampleBlock().columns() + 1) + next_unique_suffix(actions_stack.getIndex().size() + 1) {} -// void updateActions(ExpressionActionsPtr & actions) -// { -// actions = actions_stack.popLevel(); -// } -// -// void addAction(const ExpressionAction & action) -// { -// actions_stack.addAction(action); -// } -// void addActionNoInput(const ExpressionAction & action) -// { -// actions_stack.addActionNoInput(action); -// } -// -// const Block & getSampleBlock() const -// { -// return actions_stack.getSampleBlock(); -// } -// /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const { return actions_stack.getIndex().count(column_name) != 0; } + void addColumn(ColumnWithTypeAndName column) + { + actions_stack.addColumn(std::move(column)); + } + void addAlias(const std::string & name, std::string alias) { actions_stack.addAlias(name, std::move(alias)); @@ -144,18 +133,30 @@ public: actions_stack.addArrayJoin(source_name, std::move(result_name)); } + void addFunction(const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name) + { + actions_stack.addFunction(function, argument_names, std::move(result_name), + context.getSettingsRef().compile_expressions); + } + + ActionsDAGPtr getActions() + { + return actions_stack.popLevel(); + } + /* * Generate a column name that is not present in the sample block, using * the given prefix and an optional numeric suffix. */ String getUniqueName(const String & prefix) { - const auto & block = getSampleBlock(); auto result = prefix; // First, try the name without any suffix, because it is currently // used both as a display name and a column id. - while (block.has(result)) + while (hasColumn(result)) { result = prefix + "_" + toString(next_unique_suffix); ++next_unique_suffix; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 221bf9b1835..44b46e579f0 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -13,8 +13,10 @@ #include #include #include +#include #include #include +#include #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -1406,6 +1408,12 @@ const ExpressionActionsPtr & ExpressionActionsChain::Step::actions() const return typeid_cast(this)->actions; } +ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) +{ + for (const auto & input : inputs) + addInput(input.name, input.type); +} + ActionsDAG::Node & ActionsDAG::addNode(Node node) { if (index.count(node.result_name) != 0) @@ -1435,6 +1443,17 @@ const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type return addNode(std::move(node)); } +const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) +{ + Node node; + node.type = Type::COLUMN; + node.result_type = std::move(column.type); + node.result_name = std::move(column.name); + node.column = std::move(column.column); + + return addNode(std::move(node)); +} + const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias) { auto & child = getNode(name); @@ -1444,6 +1463,7 @@ const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::str node.result_type = child.result_type; node.result_name = std::move(alias); node.column = child.column; + node.allow_constant_folding = child.allow_constant_folding; node.children.emplace_back(&child); return addNode(std::move(node)); @@ -1466,49 +1486,61 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_nam return addNode(std::move(node)); } -const ActionsDAG::Node & ActionsDAG::addFunction(const FunctionOverloadResolverPtr & function, const Names & arguments) +const ActionsDAG::Node & ActionsDAG::addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions) { + size_t num_arguments = argument_names.size(); + Node node; node.type = Type::FUNCTION; + node.function_builder = function; + node.children.reserve(num_arguments); bool all_const = true; - bool all_suitable_for_constant_folding = true; + ColumnsWithTypeAndName arguments(num_arguments); + ColumnNumbers argument_numbers(num_arguments); - ColumnNumbers arguments(argument_names.size()); - for (size_t i = 0; i < argument_names.size(); ++i) + for (size_t i = 0; i < num_arguments; ++i) { - arguments[i] = sample_block.getPositionByName(argument_names[i]); - ColumnPtr col = sample_block.safeGetByPosition(arguments[i]).column; - if (!col || !isColumnConst(*col)) + auto & child = getNode(argument_names[i]); + node.children.emplace_back(&child); + node.allow_constant_folding = node.allow_constant_folding && child.allow_constant_folding; + + ColumnWithTypeAndName argument; + argument.column = child.column; + argument.type = child.result_type; + + if (!argument.column || !isColumnConst(*argument.column)) all_const = false; - if (names_not_for_constant_folding.count(argument_names[i])) - all_suitable_for_constant_folding = false; + arguments[i] = std::move(argument); + argument_numbers[i] = i; } - size_t result_position = sample_block.columns(); - sample_block.insert({nullptr, result_type, result_name}); - function = function_base->prepare(sample_block, arguments, result_position); - function->createLowCardinalityResultCache(settings.max_threads); + node.function_base = function->build(arguments); + node.result_type = node.function_base->getReturnType(); - bool compile_expressions = false; + Block sample_block(std::move(arguments)); + sample_block.insert({nullptr, node.result_type, node.result_name}); + node.function = node.function_base->prepare(sample_block, argument_numbers, num_arguments); + + bool do_compile_expressions = false; #if USE_EMBEDDED_COMPILER - compile_expressions = settings.compile_expressions; + do_compile_expressions = compile_expressions; #endif /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. /// But if we compile expressions compiled version of this function maybe placed in cache, /// so we don't want to unfold non deterministic functions - if (all_const && function_base->isSuitableForConstantFolding() && (!compile_expressions || function_base->isDeterministic())) + if (all_const && node.function_base->isSuitableForConstantFolding() && (!do_compile_expressions || node.function_base->isDeterministic())) { - function->execute(sample_block, arguments, result_position, sample_block.rows(), true); + node.function->execute(sample_block, argument_numbers, num_arguments, sample_block.rows(), true); /// If the result is not a constant, just in case, we will consider the result as unknown. - ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position); - if (!isColumnConst(*col.column)) - { - col.column = nullptr; - } - else + ColumnWithTypeAndName & col = sample_block.safeGetByPosition(num_arguments); + if (isColumnConst(*col.column)) { /// All constant (literal) columns in block are added with size 1. /// But if there was no columns in block before executing a function, the result has size 0. @@ -1517,32 +1549,136 @@ const ActionsDAG::Node & ActionsDAG::addFunction(const FunctionOverloadResolverP if (col.column->empty()) col.column = col.column->cloneResized(1); - if (!all_suitable_for_constant_folding) - names_not_for_constant_folding.insert(result_name); + node.column = std::move(col.column); } } /// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant. /// We can't do constant folding, but can specify in sample block that function result is constant to avoid /// unnecessary materialization. - auto & res = sample_block.getByPosition(result_position); - if (!res.column && function_base->isSuitableForConstantFolding()) + if (!node.column && node.function_base->isSuitableForConstantFolding()) { - if (auto col = function_base->getResultIfAlwaysReturnsConstantAndHasArguments(sample_block, arguments)) + if (auto col = node.function_base->getResultIfAlwaysReturnsConstantAndHasArguments(sample_block, argument_numbers)) { - res.column = std::move(col); - names_not_for_constant_folding.insert(result_name); + node.column = std::move(col); + node.allow_constant_folding = false; } } - node.result_name = function->getName() + "("; - for (size_t i = 0 ; i < arguments.size(); ++i) + if (result_name.empty()) { - if (i) - node.result_name += ", "; - node.result_name += arguments[i]; + result_name = function->getName() + "("; + for (size_t i = 0; i < argument_names.size(); ++i) + { + if (i) + result_name += ", "; + result_name += argument_names[i]; + } + result_name += ")"; } - node.result_name += ")"; + + node.result_name = std::move(result_name); + + return addNode(std::move(node)); +} + +NamesAndTypesList ActionsDAG::getNamesAndTypesList() const +{ + NamesAndTypesList result; + for (const auto & node : nodes) + result.emplace_back(node.result_name, node.result_type); + + return result; +} + +std::string ActionsDAG::dumpNames() const +{ + WriteBufferFromOwnString out; + for (auto it = nodes.begin(); it != nodes.end(); ++it) + { + if (it != nodes.begin()) + out << ", "; + out << it->result_name; + } + return out.str(); +} + +ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) +{ + struct Data + { + Node * node = nullptr; + size_t num_created_children = 0; + std::vector parents; + }; + + std::vector data(nodes.size()); + std::unordered_map reverse_index; + + for (auto & node : nodes) + { + size_t id = reverse_index.size(); + data[id].node = &node; + reverse_index[&node] = id; + } + + std::stack ready_nodes; + std::stack ready_array_joins; + + for (auto & node : nodes) + { + for (const auto & child : node.children) + data[reverse_index[child]].parents.emplace_back(&node); + + if (node.children.empty()) + ready_nodes.emplace(&node); + } + + auto expressions = std::make_shared(NamesAndTypesList(), context); + + while (!ready_nodes.empty() || !ready_array_joins.empty()) + { + auto & stack = ready_nodes.empty() ? ready_array_joins : ready_nodes; + Node * node = stack.top(); + stack.pop(); + + Names argument_names; + for (const auto & child : node->children) + argument_names.emplace_back(child->result_name); + + switch (node->type) + { + case Type::INPUT: + expressions->addInput({node->column, node->result_type, node->result_name}); + break; + case Type::COLUMN: + expressions->add(ExpressionAction::addColumn({node->column, node->result_type, node->result_name})); + break; + case Type::ALIAS: + expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name)); + break; + case Type::ARRAY_JOIN: + expressions->add(ExpressionAction::arrayJoin(argument_names.at(0), node->result_name)); + break; + case Type::FUNCTION: + expressions->add(ExpressionAction::applyFunction(node->function_builder, argument_names, node->result_name)); + break; + } + + for (const auto & parent : data[reverse_index[node]].parents) + { + auto & cur = data[reverse_index[parent]]; + ++cur.num_created_children; + + if (parent->children.size() == cur.num_created_children) + { + auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes; + push_stack.push(parent); + } + } + } + + return expressions; } } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index e742a84719d..4af80fc504d 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -152,9 +152,9 @@ public: COLUMN, /// Another one name for column. ALIAS, - FUNCTION, /// Function arrayJoin. Specially separated because it changes the number of rows. ARRAY_JOIN, + FUNCTION, }; struct Node @@ -166,8 +166,17 @@ public: std::string result_name; DataTypePtr result_type; + FunctionOverloadResolverPtr function_builder; + /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. + FunctionBasePtr function_base; + /// Prepared function which is used in function execution. + ExecutableFunctionPtr function; + /// For COLUMN node and propagated constants. ColumnPtr column; + /// Some functions like `ignore()` always return constant but can't be replaced by constant it. + /// We calculate such constants in order to avoid unnecessary materialization, but prohibit it's folding. + bool allow_constant_folding = true; }; using Index = std::unordered_map; @@ -180,14 +189,25 @@ public: ActionsDAG() = default; ActionsDAG(const ActionsDAG &) = delete; ActionsDAG & operator=(const ActionsDAG &) = delete; + ActionsDAG(const NamesAndTypesList & inputs); const std::list & getNodes() const; const Index & getIndex() const { return index; } + NamesAndTypesList getNamesAndTypesList() const; + std::string dumpNames() const; + const Node & addInput(std::string name, DataTypePtr type); + const Node & addColumn(ColumnWithTypeAndName column); const Node & addAlias(const std::string & name, std::string alias); const Node & addArrayJoin(const std::string & source_name, std::string result_name); - const Node & addFunction(const FunctionOverloadResolverPtr & function, const Names & arguments); + const Node & addFunction( + const FunctionOverloadResolverPtr & function, + const Names & argument_names, + std::string result_name, + bool compile_expressions); + + ExpressionActionsPtr buildExpressions(const Context & context); private: Node & addNode(Node node); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d9fc44d9b8f..0b7be0b1ea4 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -153,7 +153,7 @@ void ExpressionAnalyzer::analyzeAggregation() auto * select_query = query->as(); - ExpressionActionsPtr temp_actions = std::make_shared(sourceColumns(), context); + auto temp_actions = std::make_shared(sourceColumns()); if (select_query) { @@ -362,12 +362,11 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } else { - ExpressionActionsPtr temp_actions = std::make_shared(columns_after_join, context); + auto temp_actions = std::make_shared(columns_after_join); getRootActions(left_in_operand, true, temp_actions); - Block sample_block_with_calculated_columns = temp_actions->getSampleBlock(); - if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName())) - makeExplicitSet(func, sample_block_with_calculated_columns, true, context, + if (temp_actions->getIndex().count(left_in_operand->getColumnName()) != 0) + makeExplicitSet(func, temp_actions->getIndex(), true, context, settings.size_limits_for_set, prepared_sets); } } @@ -375,25 +374,25 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, - sourceColumns(), actions, prepared_sets, subqueries_for_sets, + sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, no_subqueries, false, only_consts, !isRemoteStorage()); ActionsVisitor(visitor_data, log.stream()).visit(ast); - visitor_data.updateActions(actions); + actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, - sourceColumns(), actions, prepared_sets, subqueries_for_sets, + sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, no_subqueries, true, only_consts, !isRemoteStorage()); ActionsVisitor(visitor_data, log.stream()).visit(ast); - visitor_data.updateActions(actions); + visitor_data.getActions(); } @@ -443,14 +442,14 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons } /// "Big" ARRAY JOIN. -ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const +ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool array_join_is_left) const { NameSet result_columns; for (const auto & result_source : syntax->array_join_result_to_source) { /// Assign new names to columns, if needed. if (result_source.first != result_source.second) - actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first)); + actions->addAlias(result_source.second, result_source.first); /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. result_columns.insert(result_source.first); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 1cc9d75b19f..458ba9b6770 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -37,6 +37,9 @@ using StorageMetadataPtr = std::shared_ptr; class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false); @@ -137,15 +140,15 @@ protected: /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. void initGlobalSubqueriesAndExternalTables(bool do_global); - ArrayJoinActionPtr addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const; + ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, From 1cae264eae3a18a1c8ed06b0a3fd4ed200e59caf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Sep 2020 21:36:51 +0300 Subject: [PATCH 3/8] Add ExpressionDAG [Part 3]. --- src/Interpreters/ArrayJoinAction.cpp | 8 +++--- src/Interpreters/ArrayJoinAction.h | 2 +- src/Interpreters/ExpressionActions.cpp | 16 ++++++++++++ src/Interpreters/ExpressionActions.h | 2 ++ src/Interpreters/ExpressionAnalyzer.cpp | 33 +++++++++++++------------ src/Interpreters/ExpressionAnalyzer.h | 2 +- 6 files changed, 42 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index 176dc8258ce..62064bf3292 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -35,11 +35,13 @@ ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool arr } -void ArrayJoinAction::prepare(Block & sample_block) +void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const { - for (const auto & name : columns) + for (auto & current : sample) { - ColumnWithTypeAndName & current = sample_block.getByName(name); + if (columns.count(current.name) == 0) + continue; + const DataTypeArray * array_type = typeid_cast(&*current.type); if (!array_type) throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); diff --git a/src/Interpreters/ArrayJoinAction.h b/src/Interpreters/ArrayJoinAction.h index 9467e579e62..81f720f00ef 100644 --- a/src/Interpreters/ArrayJoinAction.h +++ b/src/Interpreters/ArrayJoinAction.h @@ -28,7 +28,7 @@ public: FunctionOverloadResolverPtr function_builder; ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, const Context & context); - void prepare(Block & sample_block); + void prepare(ColumnsWithTypeAndName & sample) const; void execute(Block & block); }; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 44b46e579f0..3772ec4fcf7 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1414,6 +1414,12 @@ ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) addInput(input.name, input.type); } +ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) +{ + for (const auto & input : inputs) + addInput(input.name, input.type); +} + ActionsDAG::Node & ActionsDAG::addNode(Node node) { if (index.count(node.result_name) != 0) @@ -1582,6 +1588,16 @@ const ActionsDAG::Node & ActionsDAG::addFunction( return addNode(std::move(node)); } +ColumnsWithTypeAndName ActionsDAG::getResultColumns() const +{ + ColumnsWithTypeAndName result; + result.reserve(nodes.size()); + for (const auto & node : nodes) + result.emplace_back(node.column, node.result_type, node.result_name); + + return result; +} + NamesAndTypesList ActionsDAG::getNamesAndTypesList() const { NamesAndTypesList result; diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 4af80fc504d..79107d3baa9 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -190,10 +190,12 @@ public: ActionsDAG(const ActionsDAG &) = delete; ActionsDAG & operator=(const ActionsDAG &) = delete; ActionsDAG(const NamesAndTypesList & inputs); + ActionsDAG(const ColumnsWithTypeAndName & inputs); const std::list & getNodes() const; const Index & getIndex() const { return index; } + ColumnsWithTypeAndName getResultColumns() const; NamesAndTypesList getNamesAndTypesList() const; std::string dumpNames() const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 0b7be0b1ea4..053d353bdfb 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -165,14 +165,14 @@ void ExpressionAnalyzer::analyzeAggregation() getRootActionsNoMakeSet(array_join_expression_list, true, temp_actions, false); if (auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left)) { - auto sample_block = temp_actions->getSampleBlock(); - array_join->prepare(sample_block); - temp_actions = std::make_shared(sample_block.getColumnsWithTypeAndName(), context); + auto sample_columns = temp_actions->getResultColumns(); + array_join->prepare(sample_columns); + temp_actions = std::make_shared(sample_columns); } - for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList()) + for (auto & column : temp_actions->getResultColumns()) if (syntax->array_join_result_to_source.count(column.name)) - array_join_columns.emplace_back(column); + array_join_columns.emplace_back(column.name, column.type); } columns_after_array_join = sourceColumns(); @@ -182,9 +182,9 @@ void ExpressionAnalyzer::analyzeAggregation() if (join) { getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, temp_actions, false); - auto sample_columns = temp_actions->getSampleBlock().getColumnsWithTypeAndName(); + auto sample_columns = temp_actions->getResultColumns(); analyzedJoin().addJoinedColumnsAndCorrectNullability(sample_columns); - temp_actions = std::make_shared(sample_columns, context); + temp_actions = std::make_shared(sample_columns); } columns_after_join = columns_after_array_join; @@ -212,15 +212,16 @@ void ExpressionAnalyzer::analyzeAggregation() getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); - const auto & block = temp_actions->getSampleBlock(); + const auto & index = temp_actions->getIndex(); - if (!block.has(column_name)) + auto it = index.find(column_name); + if (it == index.end()) throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); - const auto & col = block.getByName(column_name); + const auto & node = it->second; /// Constant expressions have non-null column pointer at this stage. - if (col.column && isColumnConst(*col.column)) + if (node->column && isColumnConst(*node->column)) { /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. if (!aggregate_descriptions.empty() || size > 1) @@ -235,7 +236,7 @@ void ExpressionAnalyzer::analyzeAggregation() } } - NameAndTypePair key{column_name, col.type}; + NameAndTypePair key{column_name, node->result_type}; /// Aggregation keys are uniqued. if (!unique_keys.count(key.name)) @@ -256,14 +257,14 @@ void ExpressionAnalyzer::analyzeAggregation() } } else - aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); + aggregated_columns = temp_actions->getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); } else { - aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); + aggregated_columns = temp_actions->getNamesAndTypesList(); } } @@ -396,7 +397,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_sub } -bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions) +bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) { for (const ASTFunction * node : aggregates()) { @@ -411,7 +412,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & action { getRootActionsNoMakeSet(arguments[i], true, actions); const std::string & name = arguments[i]->getColumnName(); - types[i] = actions->getSampleBlock().getByName(name).type; + types[i] = actions->getIndex().find(name)->second->result_type; aggregate.argument_names[i] = name; } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 458ba9b6770..bf4a4f564a4 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -156,7 +156,7 @@ protected: * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. */ void analyzeAggregation(); - bool makeAggregateDescriptions(ExpressionActionsPtr & actions); + bool makeAggregateDescriptions(ActionsDAGPtr & actions); const ASTSelectQuery * getSelectQuery() const; From b461f97e4a1ce54be2669a519c8fb9b2a75786a9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 11 Sep 2020 15:24:41 +0300 Subject: [PATCH 4/8] Add ExpressionDAG [Part 4]. --- src/Interpreters/ExpressionActions.cpp | 92 +++++++++++++++++------ src/Interpreters/ExpressionActions.h | 26 ++++--- src/Interpreters/ExpressionAnalyzer.cpp | 62 ++++++++------- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Interpreters/MutationsInterpreter.cpp | 10 +-- 5 files changed, 127 insertions(+), 65 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 3772ec4fcf7..16c01b8747a 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -1251,7 +1251,7 @@ void ExpressionActionsChain::addStep() throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); ColumnsWithTypeAndName columns = steps.back()->getResultColumns(); - steps.push_back(std::make_unique(std::make_shared(columns, context))); + steps.push_back(std::make_unique(std::make_shared(columns))); } void ExpressionActionsChain::finalize() @@ -1398,12 +1398,17 @@ void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_) std::swap(result_columns, new_result_columns); } -ExpressionActionsPtr & ExpressionActionsChain::Step::actions() +ActionsDAGPtr & ExpressionActionsChain::Step::actions() { - return typeid_cast(this)->actions; + return typeid_cast(this)->actions_dag; } -const ExpressionActionsPtr & ExpressionActionsChain::Step::actions() const +const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const +{ + return typeid_cast(this)->actions_dag; +} + +ExpressionActionsPtr ExpressionActionsChain::Step::getExpression() const { return typeid_cast(this)->actions; } @@ -1420,13 +1425,18 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) addInput(input.name, input.type); } -ActionsDAG::Node & ActionsDAG::addNode(Node node) +ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) { - if (index.count(node.result_name) != 0) + auto it = index.find(node.result_name); + if (it != index.end() && !can_replace) throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); auto & res = nodes.emplace_back(std::move(node)); index[res.result_name] = &res; + + if (it != index.end()) + it->second->renaming_parent = &res; + return res; } @@ -1460,7 +1470,7 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) return addNode(std::move(node)); } -const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias) +const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace) { auto & child = getNode(name); @@ -1472,7 +1482,7 @@ const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::str node.allow_constant_folding = child.allow_constant_folding; node.children.emplace_back(&child); - return addNode(std::move(node)); + return addNode(std::move(node), can_replace); } const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) @@ -1591,9 +1601,10 @@ const ActionsDAG::Node & ActionsDAG::addFunction( ColumnsWithTypeAndName ActionsDAG::getResultColumns() const { ColumnsWithTypeAndName result; - result.reserve(nodes.size()); + result.reserve(index.size()); for (const auto & node : nodes) - result.emplace_back(node.column, node.result_type, node.result_name); + if (!node.renaming_parent) + result.emplace_back(node.column, node.result_type, node.result_name); return result; } @@ -1602,11 +1613,23 @@ NamesAndTypesList ActionsDAG::getNamesAndTypesList() const { NamesAndTypesList result; for (const auto & node : nodes) - result.emplace_back(node.result_name, node.result_type); + if (!node.renaming_parent) + result.emplace_back(node.result_name, node.result_type); return result; } +Names ActionsDAG::getNames() const +{ + Names names; + names.reserve(index.size()); + for (const auto & node : nodes) + if (!node.renaming_parent) + names.emplace_back(node.result_name); + + return names; +} + std::string ActionsDAG::dumpNames() const { WriteBufferFromOwnString out; @@ -1625,7 +1648,9 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) { Node * node = nullptr; size_t num_created_children = 0; + size_t num_expected_children = 0; std::vector parents; + Node * renamed_child = nullptr; }; std::vector data(nodes.size()); @@ -1643,13 +1668,38 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) for (auto & node : nodes) { + data[reverse_index[&node]].num_expected_children += node.children.size(); + for (const auto & child : node.children) data[reverse_index[child]].parents.emplace_back(&node); - if (node.children.empty()) + if (node.renaming_parent) + { + + auto & cur = data[reverse_index[node.renaming_parent]]; + cur.renamed_child = &node; + cur.num_expected_children += 1; + } + } + + for (auto & node : nodes) + { + if (node.children.empty() && data[reverse_index[&node]].renamed_child == nullptr) ready_nodes.emplace(&node); } + auto update_parent = [&](Node * parent) + { + auto & cur = data[reverse_index[parent]]; + ++cur.num_created_children; + + if (cur.num_created_children == cur.num_expected_children) + { + auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes; + push_stack.push(parent); + } + }; + auto expressions = std::make_shared(NamesAndTypesList(), context); while (!ready_nodes.empty() || !ready_array_joins.empty()) @@ -1662,6 +1712,8 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) for (const auto & child : node->children) argument_names.emplace_back(child->result_name); + auto & cur = data[reverse_index[node]]; + switch (node->type) { case Type::INPUT: @@ -1671,7 +1723,7 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) expressions->add(ExpressionAction::addColumn({node->column, node->result_type, node->result_name})); break; case Type::ALIAS: - expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name)); + expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name, cur.renamed_child != nullptr)); break; case Type::ARRAY_JOIN: expressions->add(ExpressionAction::arrayJoin(argument_names.at(0), node->result_name)); @@ -1681,17 +1733,11 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) break; } - for (const auto & parent : data[reverse_index[node]].parents) - { - auto & cur = data[reverse_index[parent]]; - ++cur.num_created_children; + for (const auto & parent : cur.parents) + update_parent(parent); - if (parent->children.size() == cur.num_created_children) - { - auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes; - push_stack.push(parent); - } - } + if (node->renaming_parent) + update_parent(node->renaming_parent); } return expressions; diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 79107d3baa9..e6e5c038ac3 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -160,6 +160,8 @@ public: struct Node { std::vector children; + /// This field is filled if current node is replaced by existing node with the same name. + Node * renaming_parent = nullptr; Type type; @@ -192,16 +194,16 @@ public: ActionsDAG(const NamesAndTypesList & inputs); ActionsDAG(const ColumnsWithTypeAndName & inputs); - const std::list & getNodes() const; const Index & getIndex() const { return index; } ColumnsWithTypeAndName getResultColumns() const; NamesAndTypesList getNamesAndTypesList() const; + Names getNames() const; std::string dumpNames() const; const Node & addInput(std::string name, DataTypePtr type); const Node & addColumn(ColumnWithTypeAndName column); - const Node & addAlias(const std::string & name, std::string alias); + const Node & addAlias(const std::string & name, std::string alias, bool can_replace); const Node & addArrayJoin(const std::string & source_name, std::string result_name); const Node & addFunction( const FunctionOverloadResolverPtr & function, @@ -212,10 +214,12 @@ public: ExpressionActionsPtr buildExpressions(const Context & context); private: - Node & addNode(Node node); + Node & addNode(Node node, bool can_replace = false); Node & getNode(const std::string & name); }; +using ActionsDAGPtr = std::shared_ptr; + /** Contains a sequence of actions on the block. */ class ExpressionActions @@ -363,17 +367,19 @@ struct ExpressionActionsChain virtual std::string dump() const = 0; /// Only for ExpressionActionsStep - ExpressionActionsPtr & actions(); - const ExpressionActionsPtr & actions() const; + ActionsDAGPtr & actions(); + const ActionsDAGPtr & actions() const; + ExpressionActionsPtr getExpression() const; }; struct ExpressionActionsStep : public Step { + ActionsDAGPtr actions_dag; ExpressionActionsPtr actions; - explicit ExpressionActionsStep(ExpressionActionsPtr actions_, Names required_output_ = Names()) + explicit ExpressionActionsStep(ActionsDAGPtr actions_, Names required_output_ = Names()) : Step(std::move(required_output_)) - , actions(std::move(actions_)) + , actions_dag(std::move(actions_)) { } @@ -458,7 +464,9 @@ struct ExpressionActionsChain throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); } - return steps.back()->actions(); + auto * step = typeid_cast(&steps.back()); + step->actions = step->actions_dag->buildExpressions(context); + return step->actions; } Step & getLastStep() @@ -472,7 +480,7 @@ struct ExpressionActionsChain Step & lastStep(const NamesAndTypesList & columns) { if (steps.empty()) - steps.emplace_back(std::make_unique(std::make_shared(columns, context))); + steps.emplace_back(std::make_unique(std::make_shared(columns))); return *steps.back(); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 053d353bdfb..779c9ee7bf7 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -472,8 +472,8 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi getRootActions(array_join_expression_list, only_types, step.actions()); - before_array_join = chain.getLastActions(); auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); + before_array_join = chain.getLastActions(); chain.steps.push_back(std::make_unique( array_join, step.getResultColumns())); @@ -615,13 +615,14 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer return subquery_for_join.join; } -bool SelectQueryExpressionAnalyzer::appendPrewhere( +ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) { const auto * select_query = getSelectQuery(); + ExpressionActionsPtr prewhere_actions; if (!select_query->prewhere()) - return false; + return prewhere_actions; auto & step = chain.lastStep(sourceColumns()); getRootActions(select_query->prewhere(), only_types, step.actions()); @@ -629,15 +630,16 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( step.required_output.push_back(prewhere_column_name); step.can_remove_required_output.push_back(true); - auto filter_type = step.actions()->getSampleBlock().getByName(prewhere_column_name).type; + auto filter_type = step.actions()->getIndex().find(prewhere_column_name)->second->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); { /// Remove unused source_columns from prewhere actions. - auto tmp_actions = std::make_shared(sourceColumns(), context); - getRootActions(select_query->prewhere(), only_types, tmp_actions); + auto tmp_actions_dag = std::make_shared(sourceColumns()); + getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); + auto tmp_actions = tmp_actions_dag->buildExpressions(context); tmp_actions->finalize({prewhere_column_name}); auto required_columns = tmp_actions->getRequiredColumns(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); @@ -653,7 +655,7 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( } } - auto names = step.actions()->getSampleBlock().getNames(); + auto names = step.actions()->getNames(); NameSet name_set(names.begin(), names.end()); for (const auto & column : sourceColumns()) @@ -661,7 +663,8 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( name_set.erase(column.name); Names required_output(name_set.begin(), name_set.end()); - step.actions()->finalize(required_output); + prewhere_actions = chain.getLastActions(); + prewhere_actions->finalize(required_output); } { @@ -672,8 +675,8 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( /// 2. Store side columns which were calculated during prewhere actions execution if they are used. /// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step. /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN. - ColumnsWithTypeAndName columns = step.actions()->getSampleBlock().getColumnsWithTypeAndName(); - auto required_columns = step.actions()->getRequiredColumns(); + ColumnsWithTypeAndName columns = prewhere_actions->getSampleBlock().getColumnsWithTypeAndName(); + auto required_columns = prewhere_actions->getRequiredColumns(); NameSet prewhere_input_names(required_columns.begin(), required_columns.end()); NameSet unused_source_columns; @@ -687,11 +690,13 @@ bool SelectQueryExpressionAnalyzer::appendPrewhere( } chain.steps.emplace_back(std::make_unique( - std::make_shared(std::move(columns), context))); + std::make_shared(std::move(columns)))); chain.steps.back()->additional_input = std::move(unused_source_columns); + chain.getLastActions(); + chain.addStep(); } - return true; + return prewhere_actions; } void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name) @@ -699,7 +704,8 @@ void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsCha ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); // FIXME: assert(filter_info); - step.actions() = std::move(actions); + auto * expression_step = typeid_cast(&step); + expression_step->actions = std::move(actions); step.required_output.push_back(std::move(column_name)); step.can_remove_required_output = {true}; @@ -721,7 +727,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); - auto filter_type = step.actions()->getSampleBlock().getByName(where_column_name).type; + auto filter_type = step.actions()->getIndex().find(where_column_name)->second->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); @@ -750,8 +756,9 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - group_by_elements_actions.emplace_back(std::make_shared(columns_after_join, context)); - getRootActions(child, only_types, group_by_elements_actions.back()); + auto actions_dag = std::make_shared(columns_after_join); + getRootActions(child, only_types, actions_dag); + group_by_elements_actions.emplace_back(actions_dag->buildExpressions(context)); } } @@ -838,8 +845,9 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain { for (auto & child : select_query->orderBy()->children) { - order_by_elements_actions.emplace_back(std::make_shared(columns_after_join, context)); - getRootActions(child, only_types, order_by_elements_actions.back()); + auto actions_dag = std::make_shared(columns_after_join); + getRootActions(child, only_types, actions_dag); + order_by_elements_actions.emplace_back(actions_dag->buildExpressions(context)); } } return true; @@ -919,7 +927,7 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & } } - step.actions()->add(ExpressionAction::project(result_columns)); + chain.getLastActions()->add(ExpressionAction::project(result_columns)); } @@ -933,7 +941,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) { - ExpressionActionsPtr actions = std::make_shared(aggregated_columns, context); + auto actions_dag = std::make_shared(aggregated_columns); NamesWithAliases result_columns; Names result_names; @@ -954,9 +962,11 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje alias = name; result_columns.emplace_back(name, alias); result_names.push_back(alias); - getRootActions(ast, false, actions); + getRootActions(ast, false, actions_dag); } + auto actions = actions_dag->buildExpressions(context); + if (add_aliases) { if (project_result) @@ -980,10 +990,10 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje ExpressionActionsPtr ExpressionAnalyzer::getConstActions() { - ExpressionActionsPtr actions = std::make_shared(NamesAndTypesList(), context); + auto actions = std::make_shared(NamesAndTypesList()); getRootActions(query, true, actions, true); - return actions; + return actions->buildExpressions(context); } ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions() @@ -1064,10 +1074,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( query_analyzer.appendPreliminaryFilter(chain, filter_info->actions, filter_info->column_name); } - if (query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) + if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere)) { - prewhere_info = std::make_shared( - chain.steps.front()->actions(), query.prewhere()->getColumnName()); + prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) { @@ -1081,7 +1090,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column); } } - chain.addStep(); } array_join = query_analyzer.appendArrayJoin(chain, before_array_join, only_types || !first_stage); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index bf4a4f564a4..7728cd9e6ea 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -319,7 +319,7 @@ private: void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); + ExpressionActionsPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 089e3d1c23f..2639c94a9ca 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -612,8 +612,8 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & for (const auto & kv : stage.column_to_updated) { - actions_chain.getLastActions()->add(ExpressionAction::copyColumn( - kv.second->getColumnName(), kv.first, /* can_replace = */ true)); + actions_chain.getLastStep().actions()->addAlias( + kv.second->getColumnName(), kv.first, /* can_replace = */ true); } } @@ -624,7 +624,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & actions_chain.finalize(); /// Propagate information about columns needed as input. - for (const auto & column : actions_chain.steps.front()->actions()->getRequiredColumnsWithTypes()) + for (const auto & column : actions_chain.steps.front()->getRequiredColumns()) prepared_stages[i - 1].output_columns.insert(column.name); } @@ -670,7 +670,7 @@ void MutationsInterpreter::addStreamsForLaterStages(const std::vector & p /// Execute DELETEs. pipeline.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, step->actions(), stage.filter_column_names[i], false); + return std::make_shared(header, step->getExpression(), stage.filter_column_names[i], false); }); } else @@ -678,7 +678,7 @@ void MutationsInterpreter::addStreamsForLaterStages(const std::vector & p /// Execute UPDATE or final projection. pipeline.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, step->actions()); + return std::make_shared(header, step->getExpression()); }); } } From 4aed515e194f5df3b1104f8d7a2efc56a0055f9b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 13 Sep 2020 16:51:31 +0300 Subject: [PATCH 5/8] Add ExpressionDAG [Part 5]. --- src/Interpreters/ActionsVisitor.cpp | 79 ++++++++++++++--------- src/Interpreters/ActionsVisitor.h | 12 +++- src/Interpreters/ExpressionActions.cpp | 75 +++++++++++++++++---- src/Interpreters/ExpressionActions.h | 9 ++- src/Interpreters/ExpressionAnalyzer.cpp | 40 ++++++++---- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Interpreters/MutationsInterpreter.cpp | 1 + 7 files changed, 152 insertions(+), 66 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 75b29f61ae7..4b761225291 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -53,6 +53,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_ELEMENT_OF_SET; + extern const int BAD_ARGUMENTS; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -387,60 +388,81 @@ SetPtr makeExplicitSet( ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_) : context(context_) { - stack.emplace_back(std::move(actions)); + auto & level = stack.emplace_back(); + level.actions = std::move(actions); + + for (const auto & [name, node] : level.actions->getIndex()) + if (node->type == ActionsDAG::Type::INPUT) + level.inputs.emplace(name); } void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) { - auto & actions = stack.emplace_back(std::make_shared()); + auto & level = stack.emplace_back(); + level.actions = std::make_shared(); const auto & prev = stack[stack.size() - 2]; for (const auto & input_column : input_columns) - actions->addInput(input_column.name, input_column.type); + { + level.actions->addInput(input_column.name, input_column.type); + level.inputs.emplace(input_column.name); + } - const auto & index = actions->getIndex(); + const auto & index = level.actions->getIndex(); - for (const auto & [name, node] : prev->getIndex()) + for (const auto & [name, node] : prev.actions->getIndex()) { if (index.count(name) == 0) - actions->addInput(node->result_name, node->result_type); + level.actions->addInput({node->column, node->result_type, node->result_name}); } } size_t ScopeStack::getColumnLevel(const std::string & name) { for (int i = static_cast(stack.size()) - 1; i >= 0; --i) - if (stack[i]->getIndex().count(name)) + { + if (stack[i].inputs.count(name)) return i; + const auto & index = stack[i].actions->getIndex(); + auto it = index.find(name); + + if (it != index.end() && it->second->type != ActionsDAG::Type::INPUT) + return i; + } + throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); } void ScopeStack::addColumn(ColumnWithTypeAndName column) { - auto level = getColumnLevel(column.name); - const auto & node = stack[level]->addColumn(std::move(column)); + const auto & node = stack[0].actions->addColumn(std::move(column)); - for (size_t j = level + 1; j < stack.size(); ++j) - stack[j]->addInput(node.result_name, node.result_type); + for (size_t j = 1; j < stack.size(); ++j) + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } void ScopeStack::addAlias(const std::string & name, std::string alias) { - auto level = getColumnLevel(name); - const auto & node = stack[level]->addAlias(name, std::move(alias)); + auto level = getColumnLevel(name); + const auto & node = stack[level].actions->addAlias(name, std::move(alias)); for (size_t j = level + 1; j < stack.size(); ++j) - stack[j]->addInput(node.result_name, node.result_type); + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } -void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name) +void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name) { - auto level = getColumnLevel(source_name); - const auto & node = stack[level]->addAlias(source_name, std::move(result_name)); + getColumnLevel(source_name); - for (size_t j = level + 1; j < stack.size(); ++j) - stack[j]->addInput(node.result_name, node.result_type); + if (stack.front().actions->getIndex().count(source_name) == 0) + throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name, + ErrorCodes::BAD_ARGUMENTS); + + const auto & node = stack.front().actions->addArrayJoin(source_name, std::move(result_name), std::move(unique_column_name)); + + for (size_t j = 1; j < stack.size(); ++j) + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } void ScopeStack::addFunction( @@ -453,27 +475,27 @@ void ScopeStack::addFunction( for (const auto & argument : argument_names) level = std::max(level, getColumnLevel(argument)); - const auto & node = stack[level]->addFunction(function, argument_names, std::move(result_name), compile_expressions); + const auto & node = stack[level].actions->addFunction(function, argument_names, std::move(result_name), compile_expressions); for (size_t j = level + 1; j < stack.size(); ++j) - stack[j]->addInput(node.result_name, node.result_type); + stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } ActionsDAGPtr ScopeStack::popLevel() { auto res = std::move(stack.back()); stack.pop_back(); - return res; + return res.actions; } std::string ScopeStack::dumpNames() const { - return stack.back()->dumpNames(); + return stack.back().actions->dumpNames(); } const ActionsDAG::Index & ScopeStack::getIndex() const { - return stack.back()->getIndex(); + return stack.back().actions->getIndex(); } struct CachedColumnName @@ -560,14 +582,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!data.only_consts) { String result_name = column_name.get(ast); - /// Here we copy argument because arrayJoin removes source column. - /// It makes possible to remove source column before arrayJoin if it won't be needed anymore. - - /// It could have been possible to implement arrayJoin which keeps source column, - /// but in this case it will always be replicated (as many arrays), which is expensive. - String tmp_name = data.getUniqueName("_array_join_" + arg->getColumnName()); - data.addAlias(arg->getColumnName(), tmp_name); - data.addArrayJoin(tmp_name, result_name); + data.addArrayJoin(arg->getColumnName(), result_name); } return; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 109d547ed55..da8ee25a2f4 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -34,7 +34,13 @@ SetPtr makeExplicitSet( */ struct ScopeStack { - using Levels = std::vector; + struct Level + { + ActionsDAGPtr actions; + NameSet inputs; + }; + + using Levels = std::vector; Levels stack; @@ -48,7 +54,7 @@ struct ScopeStack void addColumn(ColumnWithTypeAndName column); void addAlias(const std::string & name, std::string alias); - void addArrayJoin(const std::string & source_name, std::string result_name); + void addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name); void addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, @@ -130,7 +136,7 @@ public: void addArrayJoin(const std::string & source_name, std::string result_name) { - actions_stack.addArrayJoin(source_name, std::move(result_name)); + actions_stack.addArrayJoin(source_name, std::move(result_name), getUniqueName("_array_join_" + source_name)); } void addFunction(const FunctionOverloadResolverPtr & function, diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 16c01b8747a..9319f6db854 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -188,7 +188,8 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, size_t result_position = sample_block.columns(); sample_block.insert({nullptr, result_type, result_name}); - function = function_base->prepare(sample_block, arguments, result_position); + if (!function) + function = function_base->prepare(sample_block, arguments, result_position); function->createLowCardinalityResultCache(settings.max_threads); bool compile_expressions = false; @@ -200,7 +201,10 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings, /// so we don't want to unfold non deterministic functions if (all_const && function_base->isSuitableForConstantFolding() && (!compile_expressions || function_base->isDeterministic())) { - function->execute(sample_block, arguments, result_position, sample_block.rows(), true); + if (added_column) + sample_block.getByPosition(result_position).column = added_column; + else + function->execute(sample_block, arguments, result_position, sample_block.rows(), true); /// If the result is not a constant, just in case, we will consider the result as unknown. ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position); @@ -588,8 +592,11 @@ void ExpressionActions::addImpl(ExpressionAction action, Names & new_names) arguments[i] = sample_block.getByName(action.argument_names[i]); } - action.function_base = action.function_builder->build(arguments); - action.result_type = action.function_base->getReturnType(); + if (!action.function_base) + { + action.function_base = action.function_builder->build(arguments); + action.result_type = action.function_base->getReturnType(); + } } if (action.type == ExpressionAction::ADD_ALIASES) @@ -1250,6 +1257,12 @@ void ExpressionActionsChain::addStep() if (steps.empty()) throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + if (auto * step = typeid_cast(steps.back().get())) + { + if (!step->actions) + step->actions = step->actions_dag->buildExpressions(context); + } + ColumnsWithTypeAndName columns = steps.back()->getResultColumns(); steps.push_back(std::make_unique(std::make_shared(columns))); } @@ -1422,7 +1435,7 @@ ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) { for (const auto & input : inputs) - addInput(input.name, input.type); + addInput(input); } ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) @@ -1432,11 +1445,11 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); auto & res = nodes.emplace_back(std::move(node)); - index[res.result_name] = &res; if (it != index.end()) it->second->renaming_parent = &res; + index[res.result_name] = &res; return res; } @@ -1459,8 +1472,22 @@ const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type return addNode(std::move(node)); } +const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) +{ + Node node; + node.type = Type::INPUT; + node.result_type = std::move(column.type); + node.result_name = std::move(column.name); + node.column = std::move(column.column); + + return addNode(std::move(node)); +} + const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) { + if (!column.column) + throw Exception("Cannot add column " + column.name + " because it is nullptr", ErrorCodes::LOGICAL_ERROR); + Node node; node.type = Type::COLUMN; node.result_type = std::move(column.type); @@ -1485,7 +1512,8 @@ const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::str return addNode(std::move(node), can_replace); } -const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) +const ActionsDAG::Node & ActionsDAG::addArrayJoin( + const std::string & source_name, std::string result_name, std::string unique_column_name) { auto & child = getNode(source_name); @@ -1497,6 +1525,7 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_nam node.type = Type::ARRAY_JOIN; node.result_type = array_type->getNestedType(); node.result_name = std::move(result_name); + node.unique_column_name_for_array_join = std::move(unique_column_name); node.children.emplace_back(&child); return addNode(std::move(node)); @@ -1506,7 +1535,7 @@ const ActionsDAG::Node & ActionsDAG::addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name, - bool compile_expressions) + bool compile_expressions [[maybe_unused]]) { size_t num_arguments = argument_names.size(); @@ -1663,8 +1692,8 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) reverse_index[&node] = id; } - std::stack ready_nodes; - std::stack ready_array_joins; + std::queue ready_nodes; + std::queue ready_array_joins; for (auto & node : nodes) { @@ -1705,7 +1734,7 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) while (!ready_nodes.empty() || !ready_array_joins.empty()) { auto & stack = ready_nodes.empty() ? ready_array_joins : ready_nodes; - Node * node = stack.top(); + Node * node = stack.front(); stack.pop(); Names argument_names; @@ -1726,11 +1755,29 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name, cur.renamed_child != nullptr)); break; case Type::ARRAY_JOIN: - expressions->add(ExpressionAction::arrayJoin(argument_names.at(0), node->result_name)); + /// Here we copy argument because arrayJoin removes source column. + /// It makes possible to remove source column before arrayJoin if it won't be needed anymore. + + /// It could have been possible to implement arrayJoin which keeps source column, + /// but in this case it will always be replicated (as many arrays), which is expensive. + expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->unique_column_name_for_array_join)); + expressions->add(ExpressionAction::arrayJoin(node->unique_column_name_for_array_join, node->result_name)); break; case Type::FUNCTION: - expressions->add(ExpressionAction::applyFunction(node->function_builder, argument_names, node->result_name)); + { + ExpressionAction action; + action.type = ExpressionAction::APPLY_FUNCTION; + action.result_name = node->result_name; + action.result_type = node->result_type; + action.function_builder = node->function_builder; + action.function_base = node->function_base; + action.function = node->function; + action.argument_names = std::move(argument_names); + action.added_column = node->column; + + expressions->add(action); break; + } } for (const auto & parent : cur.parents) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index e6e5c038ac3..b35f8972c97 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -168,6 +168,8 @@ public: std::string result_name; DataTypePtr result_type; + std::string unique_column_name_for_array_join; + FunctionOverloadResolverPtr function_builder; /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. FunctionBasePtr function_base; @@ -202,9 +204,10 @@ public: std::string dumpNames() const; const Node & addInput(std::string name, DataTypePtr type); + const Node & addInput(ColumnWithTypeAndName column); const Node & addColumn(ColumnWithTypeAndName column); - const Node & addAlias(const std::string & name, std::string alias, bool can_replace); - const Node & addArrayJoin(const std::string & source_name, std::string result_name); + const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); + const Node & addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name); const Node & addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, @@ -464,7 +467,7 @@ struct ExpressionActionsChain throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); } - auto * step = typeid_cast(&steps.back()); + auto * step = typeid_cast(steps.back().get()); step->actions = step->actions_dag->buildExpressions(context); return step->actions; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 779c9ee7bf7..a4337f7b12c 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -158,24 +158,37 @@ void ExpressionAnalyzer::analyzeAggregation() if (select_query) { NamesAndTypesList array_join_columns; + columns_after_array_join = sourceColumns(); bool is_array_join_left; if (ASTPtr array_join_expression_list = select_query->arrayJoinExpressionList(is_array_join_left)) { getRootActionsNoMakeSet(array_join_expression_list, true, temp_actions, false); - if (auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left)) - { - auto sample_columns = temp_actions->getResultColumns(); - array_join->prepare(sample_columns); - temp_actions = std::make_shared(sample_columns); - } + + auto array_join = addMultipleArrayJoinAction(temp_actions, is_array_join_left); + auto sample_columns = temp_actions->getResultColumns(); + array_join->prepare(sample_columns); + temp_actions = std::make_shared(sample_columns); + + NamesAndTypesList new_columns_after_array_join; + NameSet added_columns; for (auto & column : temp_actions->getResultColumns()) + { if (syntax->array_join_result_to_source.count(column.name)) - array_join_columns.emplace_back(column.name, column.type); + { + new_columns_after_array_join.emplace_back(column.name, column.type); + added_columns.emplace(column.name); + } + } + + for (auto & column : columns_after_array_join) + if (added_columns.count(column.name) == 0) + new_columns_after_array_join.emplace_back(column.name, column.type); + + columns_after_array_join.swap(new_columns_after_array_join); } - columns_after_array_join = sourceColumns(); columns_after_array_join.insert(columns_after_array_join.end(), array_join_columns.begin(), array_join_columns.end()); const ASTTablesInSelectQueryElement * join = select_query->join(); @@ -393,7 +406,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_sub sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, no_subqueries, true, only_consts, !isRemoteStorage()); ActionsVisitor(visitor_data, log.stream()).visit(ast); - visitor_data.getActions(); + actions = visitor_data.getActions(); } @@ -881,7 +894,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain return true; } -void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -927,7 +940,9 @@ void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & } } - chain.getLastActions()->add(ExpressionAction::project(result_columns)); + auto actions = chain.getLastActions(); + actions->add(ExpressionAction::project(result_columns)); + return actions; } @@ -1175,8 +1190,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.addStep(); } - query_analyzer.appendProjectResult(chain); - final_projection = chain.getLastActions(); + final_projection = query_analyzer.appendProjectResult(chain); finalize_chain(chain); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 7728cd9e6ea..825bf858713 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -269,7 +269,7 @@ public: /// These appends are public only for tests void appendSelect(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - void appendProjectResult(ExpressionActionsChain & chain) const; + ExpressionActionsPtr appendProjectResult(ExpressionActionsChain & chain) const; private: StorageMetadataPtr metadata_snapshot; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 2639c94a9ca..06738ce6ff6 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -620,6 +620,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// Remove all intermediate columns. actions_chain.addStep(); actions_chain.getLastStep().required_output.assign(stage.output_columns.begin(), stage.output_columns.end()); + actions_chain.getLastActions(); actions_chain.finalize(); From 782823de4d7b30cb2529a6fcd8f6eb8ffc3e7675 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 30 Sep 2020 14:45:47 +0300 Subject: [PATCH 6/8] Fix build. --- src/Interpreters/ActionsVisitor.cpp | 20 ++++++++++++-------- src/Interpreters/ActionsVisitor.h | 4 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 741a380e441..4af17658b6d 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -329,7 +329,7 @@ Block createBlockForSet( } SetPtr makeExplicitSet( - const ASTFunction * node, const ActionsDAG::Index & index, bool create_ordered_set, + const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, const Context & context, const SizeLimits & size_limits, PreparedSets & prepared_sets) { const IAST & args = *node->arguments; @@ -340,6 +340,7 @@ SetPtr makeExplicitSet( const ASTPtr & left_arg = args.children.at(0); const ASTPtr & right_arg = args.children.at(1); + const auto & index = actions.getIndex(); auto it = index.find(left_arg->getColumnName()); if (it == index.end()) throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER); @@ -408,8 +409,10 @@ void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) size_t ScopeStack::getColumnLevel(const std::string & name) { - for (int i = static_cast(stack.size()) - 1; i >= 0; --i) + for (size_t i = stack.size(); i > 0;) { + --i; + if (stack[i].inputs.count(name)) return i; @@ -482,9 +485,9 @@ std::string ScopeStack::dumpNames() const return stack.back().actions->dumpNames(); } -const ActionsDAG::Index & ScopeStack::getIndex() const +const ActionsDAG & ScopeStack::getLastActions() const { - return stack.back().actions->getIndex(); + return *stack.back().actions; } struct CachedColumnName @@ -707,7 +710,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & child_column_name = as_literal->unique_column_name; } - const auto & index = data.actions_stack.getIndex(); + const auto & index = data.actions_stack.getLastActions().getIndex(); auto it = index.find(child_column_name); if (it != index.end()) { @@ -813,7 +816,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, if (literal.unique_column_name.empty()) { const auto default_name = literal.getColumnName(); - const auto & index = data.actions_stack.getIndex(); + const auto & index = data.actions_stack.getLastActions().getIndex(); const ActionsDAG::Node * existing_column = nullptr; auto it = index.find(default_name); @@ -927,10 +930,11 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su } else { - const auto & index = data.actions_stack.getIndex(); + const auto & last_actions = data.actions_stack.getLastActions(); + const auto & index = last_actions.getIndex(); if (index.count(left_in_operand->getColumnName()) != 0) /// An explicit enumeration of values in parentheses. - return makeExplicitSet(&node, index, false, data.context, data.set_size_limit, data.prepared_sets); + return makeExplicitSet(&node, last_actions, false, data.context, data.set_size_limit, data.prepared_sets); else return {}; } diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b6fc7039a48..4d91f9de01b 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -21,7 +21,7 @@ using ActionsDAGPtr = std::shared_ptr; /// The case of an explicit enumeration of values. SetPtr makeExplicitSet( - const ASTFunction * node, const ActionsDAG::Index & index, bool create_ordered_set, + const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set, const Context & context, const SizeLimits & limits, PreparedSets & prepared_sets); /** Create a block for set from expression. @@ -89,7 +89,7 @@ struct ScopeStack ActionsDAGPtr popLevel(); - const ActionsDAG::Index & getIndex() const; + const ActionsDAG & getLastActions() const; std::string dumpNames() const; }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2778117c4eb..2f0dee58141 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -380,7 +380,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) getRootActions(left_in_operand, true, temp_actions); if (temp_actions->getIndex().count(left_in_operand->getColumnName()) != 0) - makeExplicitSet(func, temp_actions->getIndex(), true, context, + makeExplicitSet(func, *temp_actions, true, context, settings.size_limits_for_set, prepared_sets); } } From a1aca0471102f7537bcc2857dbb9377fe2e1a5dd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 30 Sep 2020 15:32:37 +0300 Subject: [PATCH 7/8] Fix build. --- src/Interpreters/ActionsVisitor.cpp | 26 ++++++++++++++++++++++++++ src/Interpreters/ActionsVisitor.h | 22 ++-------------------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 4af17658b6d..d41f1c0499f 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -375,6 +375,32 @@ SetPtr makeExplicitSet( return set; } +ActionsMatcher::Data::Data( + const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, + const NamesAndTypesList & source_columns_, ActionsDAGPtr actions, + PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, + bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_) + : context(context_) + , set_size_limit(set_size_limit_) + , subquery_depth(subquery_depth_) + , source_columns(source_columns_) + , prepared_sets(prepared_sets_) + , subqueries_for_sets(subqueries_for_sets_) + , no_subqueries(no_subqueries_) + , no_makeset(no_makeset_) + , only_consts(only_consts_) + , no_storage_or_local(no_storage_or_local_) + , visit_depth(0) + , actions_stack(std::move(actions), context) + , next_unique_suffix(actions_stack.getLastActions().getIndex().size() + 1) +{ +} + +bool ActionsMatcher::Data::hasColumn(const String & column_name) const +{ + return actions_stack.getLastActions().getIndex().count(column_name) != 0; +} + ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_) : context(context_) { diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 4d91f9de01b..ac53669c20f 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -128,28 +128,10 @@ public: Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, const NamesAndTypesList & source_columns_, ActionsDAGPtr actions, PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, - bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_) - : context(context_), - set_size_limit(set_size_limit_), - subquery_depth(subquery_depth_), - source_columns(source_columns_), - prepared_sets(prepared_sets_), - subqueries_for_sets(subqueries_for_sets_), - no_subqueries(no_subqueries_), - no_makeset(no_makeset_), - only_consts(only_consts_), - no_storage_or_local(no_storage_or_local_), - visit_depth(0), - actions_stack(std::move(actions), context), - next_unique_suffix(actions_stack.getIndex().size() + 1) - {} + bool no_subqueries_, bool no_makeset_, bool only_consts_, bool no_storage_or_local_); /// Does result of the calculation already exists in the block. - bool hasColumn(const String & column_name) const - { - return actions_stack.getIndex().count(column_name) != 0; - } - + bool hasColumn(const String & column_name) const; void addColumn(ColumnWithTypeAndName column) { actions_stack.addColumn(std::move(column)); From 0b1fe69ad779531cb0f799e577a630056a7b1f90 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 30 Sep 2020 16:31:06 +0300 Subject: [PATCH 8/8] Fix build. --- src/Interpreters/ActionsVisitor.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index ac53669c20f..f4da9932163 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -19,6 +19,9 @@ using ExpressionActionsPtr = std::shared_ptr; class ActionsDAG; using ActionsDAGPtr = std::shared_ptr; +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + /// The case of an explicit enumeration of values. SetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, bool create_ordered_set,