From 07a7c46b8979265cc0c240fa159586f2f80c0ca0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 3 Nov 2020 14:28:28 +0300 Subject: [PATCH] Refactor ExpressionActions [Part 3] --- src/Core/Block.cpp | 16 +- src/Core/Block.h | 3 + src/Core/iostream_debug_helpers.cpp | 10 +- src/Core/iostream_debug_helpers.h | 4 +- .../CheckConstraintsBlockOutputStream.cpp | 2 +- src/Functions/FunctionsMiscellaneous.h | 2 +- src/Interpreters/ActionsVisitor.cpp | 37 +- src/Interpreters/ActionsVisitor.h | 11 +- src/Interpreters/Aggregator.cpp | 32 +- src/Interpreters/ExpressionActions.cpp | 1847 +++++++---------- src/Interpreters/ExpressionActions.h | 312 +-- src/Interpreters/ExpressionAnalyzer.cpp | 144 +- src/Interpreters/ExpressionAnalyzer.h | 33 +- src/Interpreters/ExpressionJIT.cpp | 312 +-- src/Interpreters/ExpressionJIT.h | 2 +- src/Interpreters/HashJoin.cpp | 8 + src/Interpreters/InterpreterSelectQuery.cpp | 66 +- src/Interpreters/InterpreterSelectQuery.h | 16 +- src/Interpreters/MutationsInterpreter.cpp | 8 +- .../getHeaderForProcessingStage.cpp | 2 +- src/Processors/QueryPlan/ExpressionStep.cpp | 27 +- src/Processors/QueryPlan/ExpressionStep.h | 10 +- src/Processors/QueryPlan/FillingStep.cpp | 2 +- src/Processors/QueryPlan/FilterStep.cpp | 14 +- src/Processors/QueryPlan/FilterStep.h | 10 +- src/Processors/QueryPlan/QueryPlan.cpp | 2 +- .../QueryPlan/ReadFromStorageStep.cpp | 10 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 9 +- src/Processors/QueryPlan/TotalsHavingStep.h | 8 +- .../Transforms/ExpressionTransform.cpp | 9 +- .../Transforms/FillingTransform.cpp | 15 +- src/Processors/Transforms/FillingTransform.h | 2 + src/Processors/Transforms/FilterTransform.cpp | 8 +- .../Transforms/TotalsHavingTransform.cpp | 18 +- src/Storages/ColumnsDescription.cpp | 2 +- src/Storages/IndicesDescription.cpp | 4 +- src/Storages/KeyDescription.cpp | 4 +- src/Storages/MergeTree/KeyCondition.cpp | 18 +- .../MergeTreeBaseSelectProcessor.cpp | 35 +- .../MergeTree/MergeTreeBaseSelectProcessor.h | 2 + .../MergeTree/MergeTreeBlockReadUtils.cpp | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 22 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 27 +- .../MergeTree/MergeTreeDataSelectExecutor.h | 4 +- .../MergeTree/MergeTreeRangeReader.cpp | 45 +- src/Storages/MergeTree/MergeTreeRangeReader.h | 13 +- src/Storages/ReadInOrderOptimizer.cpp | 6 +- src/Storages/SelectQueryInfo.h | 13 +- src/Storages/StorageBuffer.cpp | 4 +- src/Storages/StorageDistributed.cpp | 4 +- src/Storages/TTLDescription.cpp | 16 +- ...ray_join_with_arrays_of_nullable.reference | 10 +- ...526_array_join_with_arrays_of_nullable.sql | 2 +- .../01508_explain_header.reference | 2 +- 54 files changed, 1570 insertions(+), 1678 deletions(-) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 07ec0810f96..cd2855739e2 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -40,7 +40,7 @@ Block::Block(const ColumnsWithTypeAndName & data_) : data{data_} void Block::initializeIndexByName() { for (size_t i = 0, size = data.size(); i < size; ++i) - index_by_name[data[i].name] = i; + index_by_name.emplace(data[i].name, i); } @@ -295,6 +295,20 @@ std::string Block::dumpStructure() const return out.str(); } +std::string Block::dumpIndex() const +{ + WriteBufferFromOwnString out; + bool first = true; + for (const auto & [name, pos] : index_by_name) + { + if (!first) + out << ", "; + first = false; + + out << name << ' ' << pos; + } + return out.str(); +} Block Block::cloneEmpty() const { diff --git a/src/Core/Block.h b/src/Core/Block.h index f55a75162c6..eef3c27363b 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -119,6 +119,9 @@ public: /** List of names, types and lengths of columns. Designed for debugging. */ std::string dumpStructure() const; + /** List of column names and positions from index */ + std::string dumpIndex() const; + /** Get the same block, but empty. */ Block cloneEmpty() const; diff --git a/src/Core/iostream_debug_helpers.cpp b/src/Core/iostream_debug_helpers.cpp index a6fc329c8eb..f64d1a5ae2d 100644 --- a/src/Core/iostream_debug_helpers.cpp +++ b/src/Core/iostream_debug_helpers.cpp @@ -106,11 +106,11 @@ std::ostream & operator<<(std::ostream & stream, const Packet & what) return stream; } -std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what) -{ - stream << "ExpressionAction(" << what.toString() << ")"; - return stream; -} +//std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what) +//{ +// stream << "ExpressionAction(" << what.toString() << ")"; +// return stream; +//} std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what) { diff --git a/src/Core/iostream_debug_helpers.h b/src/Core/iostream_debug_helpers.h index ef195ed4abf..4005508e4b8 100644 --- a/src/Core/iostream_debug_helpers.h +++ b/src/Core/iostream_debug_helpers.h @@ -40,8 +40,8 @@ std::ostream & operator<<(std::ostream & stream, const IColumn & what); struct Packet; std::ostream & operator<<(std::ostream & stream, const Packet & what); -struct ExpressionAction; -std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what); +//struct ExpressionAction; +//std::ostream & operator<<(std::ostream & stream, const ExpressionAction & what); class ExpressionActions; std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what); diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 7a67074dbdf..7c0253c6731 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -46,7 +46,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) auto * constraint_ptr = constraints.constraints[i]->as(); - ColumnWithTypeAndName res_column = block_to_calculate.getByPosition(block_to_calculate.columns() - 1); + ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName()); if (!isUInt8(res_column.type)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8", diff --git a/src/Functions/FunctionsMiscellaneous.h b/src/Functions/FunctionsMiscellaneous.h index ce8a827b4cd..096f640b553 100644 --- a/src/Functions/FunctionsMiscellaneous.h +++ b/src/Functions/FunctionsMiscellaneous.h @@ -201,7 +201,7 @@ public: { /// Check that expression does not contain unusual actions that will break columnss structure. for (const auto & action : expression_actions->getActions()) - if (action.type == ExpressionAction::Type::ARRAY_JOIN) + if (action.node->type == ActionsDAG::Type::ARRAY_JOIN) throw Exception("Expression with arrayJoin or other unusual action cannot be captured", ErrorCodes::BAD_ARGUMENTS); std::unordered_map arguments_map; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e0e921b003b..fe7ccc2c6ea 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -350,7 +350,7 @@ SetPtr makeExplicitSet( auto it = index.find(left_arg->getColumnName()); if (it == index.end()) throw Exception("Unknown identifier: '" + left_arg->getColumnName() + "'", ErrorCodes::UNKNOWN_IDENTIFIER); - const DataTypePtr & left_arg_type = it->second->result_type; + const DataTypePtr & left_arg_type = (*it)->result_type; DataTypes set_element_types = {left_arg_type}; const auto * left_tuple_type = typeid_cast(left_arg_type.get()); @@ -404,7 +404,7 @@ ActionsMatcher::Data::Data( bool ActionsMatcher::Data::hasColumn(const String & column_name) const { - return actions_stack.getLastActions().getIndex().count(column_name) != 0; + return actions_stack.getLastActions().getIndex().contains(column_name); } ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_) @@ -413,9 +413,9 @@ ScopeStack::ScopeStack(ActionsDAGPtr actions, const Context & context_) auto & level = stack.emplace_back(); level.actions = std::move(actions); - for (const auto & [name, node] : level.actions->getIndex()) + for (const auto & node : level.actions->getIndex()) if (node->type == ActionsDAG::Type::INPUT) - level.inputs.emplace(name); + level.inputs.emplace(node->result_name); } void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) @@ -432,9 +432,9 @@ void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) const auto & index = level.actions->getIndex(); - for (const auto & [name, node] : prev.actions->getIndex()) + for (const auto & node : prev.actions->getIndex()) { - if (index.count(name) == 0) + if (!index.contains(node->result_name)) level.actions->addInput({node->column, node->result_type, node->result_name}); } } @@ -451,7 +451,7 @@ size_t ScopeStack::getColumnLevel(const std::string & name) const auto & index = stack[i].actions->getIndex(); auto it = index.find(name); - if (it != index.end() && it->second->type != ActionsDAG::Type::INPUT) + if (it != index.end() && (*it)->type != ActionsDAG::Type::INPUT) return i; } @@ -475,15 +475,15 @@ void ScopeStack::addAlias(const std::string & name, std::string alias) stack[j].actions->addInput({node.column, node.result_type, node.result_name}); } -void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name) +void ScopeStack::addArrayJoin(const std::string & source_name, std::string result_name) { getColumnLevel(source_name); - if (stack.front().actions->getIndex().count(source_name) == 0) + if (!stack.front().actions->getIndex().contains(source_name)) throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name, ErrorCodes::BAD_ARGUMENTS); - const auto & node = stack.front().actions->addArrayJoin(source_name, std::move(result_name), std::move(unique_column_name)); + const auto & node = stack.front().actions->addArrayJoin(source_name, std::move(result_name)); for (size_t j = 1; j < stack.size(); ++j) stack[j].actions->addInput({node.column, node.result_type, node.result_name}); @@ -492,14 +492,13 @@ void ScopeStack::addArrayJoin(const std::string & source_name, std::string resul void ScopeStack::addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, - std::string result_name, - bool compile_expressions) + std::string result_name) { size_t level = 0; for (const auto & argument : argument_names) level = std::max(level, getColumnLevel(argument)); - const auto & node = stack[level].actions->addFunction(function, argument_names, std::move(result_name), compile_expressions); + const auto & node = stack[level].actions->addFunction(function, argument_names, std::move(result_name), context); for (size_t j = level + 1; j < stack.size(); ++j) stack[j].actions->addInput({node.column, node.result_type, node.result_name}); @@ -746,7 +745,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto it = index.find(child_column_name); if (it != index.end()) { - argument_types.push_back(it->second->result_type); + argument_types.push_back((*it)->result_type); argument_names.push_back(child_column_name); } else @@ -792,10 +791,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.actions_stack.pushLevel(lambda_arguments); visit(lambda->arguments->children.at(1), data); auto lambda_dag = data.actions_stack.popLevel(); - auto lambda_actions = lambda_dag->buildExpressions(data.context); String result_name = lambda->arguments->children.at(1)->getColumnName(); - lambda_actions->finalize(Names(1, result_name)); + lambda_dag->removeUnusedActions(Names(1, result_name)); + + auto lambda_actions = lambda_dag->buildExpressions(); + DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; Names captured; @@ -853,7 +854,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */, auto it = index.find(default_name); if (it != index.end()) - existing_column = it->second; + existing_column = *it; /* * To approximate CSE, bind all identical literals to a single temporary @@ -964,7 +965,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su { const auto & last_actions = data.actions_stack.getLastActions(); const auto & index = last_actions.getIndex(); - if (index.count(left_in_operand->getColumnName()) != 0) + if (index.contains(left_in_operand->getColumnName())) /// An explicit enumeration of values in parentheses. return makeExplicitSet(&node, last_actions, false, data.context, data.set_size_limit, data.prepared_sets); else diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index f4da9932163..2d53bfa6185 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -12,7 +12,6 @@ namespace DB class Context; class ASTFunction; -struct ExpressionAction; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; @@ -83,12 +82,11 @@ struct ScopeStack void addColumn(ColumnWithTypeAndName column); void addAlias(const std::string & name, std::string alias); - void addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name); + void addArrayJoin(const std::string & source_name, std::string result_name); void addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, - std::string result_name, - bool compile_expressions); + std::string result_name); ActionsDAGPtr popLevel(); @@ -147,15 +145,14 @@ public: void addArrayJoin(const std::string & source_name, std::string result_name) { - actions_stack.addArrayJoin(source_name, std::move(result_name), getUniqueName("_array_join_" + source_name)); + actions_stack.addArrayJoin(source_name, std::move(result_name)); } void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name) { - actions_stack.addFunction(function, argument_names, std::move(result_name), - context.getSettingsRef().compile_expressions); + actions_stack.addFunction(function, argument_names, std::move(result_name)); } ActionsDAGPtr getActions() diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 5b9169a878b..87abca4d7cd 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -112,7 +112,22 @@ Block Aggregator::Params::getHeader( { Block res; - if (src_header) + if (intermediate_header) + { + res = intermediate_header.cloneEmpty(); + + if (final) + { + for (const auto & aggregate : aggregates) + { + auto & elem = res.getByName(aggregate.column_name); + + elem.type = aggregate.function->getReturnType(); + elem.column = elem.type->createColumn(); + } + } + } + else { for (const auto & key : keys) res.insert(src_header.safeGetByPosition(key).cloneEmpty()); @@ -133,21 +148,6 @@ Block Aggregator::Params::getHeader( res.insert({ type, aggregate.column_name }); } } - else if (intermediate_header) - { - res = intermediate_header.cloneEmpty(); - - if (final) - { - for (const auto & aggregate : aggregates) - { - auto & elem = res.getByName(aggregate.column_name); - - elem.type = aggregate.function->getReturnType(); - elem.column = elem.type->createColumn(); - } - } - } return materializeBlock(res); } diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index ae6a355f31e..bb30e5b3cda 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -44,537 +45,94 @@ namespace ErrorCodes } /// Read comment near usage -static constexpr auto DUMMY_COLUMN_NAME = "_dummy"; +/// static constexpr auto DUMMY_COLUMN_NAME = "_dummy"; - -Names ExpressionAction::getNeededColumns() const +static std::ostream & operator << (std::ostream & out, const ExpressionActions::Argument & argument) { - Names res = argument_names; - - if (table_join) - res.insert(res.end(), table_join->keyNamesLeft().begin(), table_join->keyNamesLeft().end()); - - for (const auto & column : projection) - res.push_back(column.first); - - if (!source_name.empty()) - res.push_back(source_name); - - return res; + return out << (argument.remove ? "*" : "") << argument.pos; } - -ExpressionAction ExpressionAction::applyFunction( - const FunctionOverloadResolverPtr & function_, - const std::vector & argument_names_, - std::string result_name_) +std::string ExpressionActions::Action::toString() const { - if (result_name_.empty()) + std::stringstream out; + switch (node->type) { - result_name_ = function_->getName() + "("; - for (size_t i = 0 ; i < argument_names_.size(); ++i) - { - if (i) - result_name_ += ", "; - result_name_ += argument_names_[i]; - } - result_name_ += ")"; - } - - ExpressionAction a; - a.type = APPLY_FUNCTION; - a.result_name = result_name_; - a.function_builder = function_; - a.argument_names = argument_names_; - return a; -} - -ExpressionAction ExpressionAction::addColumn( - const ColumnWithTypeAndName & added_column_) -{ - ExpressionAction a; - a.type = ADD_COLUMN; - a.result_name = added_column_.name; - a.result_type = added_column_.type; - a.added_column = added_column_.column; - return a; -} - -ExpressionAction ExpressionAction::removeColumn(const std::string & removed_name) -{ - ExpressionAction a; - a.type = REMOVE_COLUMN; - a.source_name = removed_name; - return a; -} - -ExpressionAction ExpressionAction::copyColumn(const std::string & from_name, const std::string & to_name, bool can_replace) -{ - ExpressionAction a; - a.type = COPY_COLUMN; - a.source_name = from_name; - a.result_name = to_name; - a.can_replace = can_replace; - return a; -} - -ExpressionAction ExpressionAction::project(const NamesWithAliases & projected_columns_) -{ - ExpressionAction a; - a.type = PROJECT; - a.projection = projected_columns_; - return a; -} - -ExpressionAction ExpressionAction::project(const Names & projected_columns_) -{ - ExpressionAction a; - a.type = PROJECT; - a.projection.resize(projected_columns_.size()); - for (size_t i = 0; i < projected_columns_.size(); ++i) - a.projection[i] = NameWithAlias(projected_columns_[i], ""); - return a; -} - -ExpressionAction ExpressionAction::addAliases(const NamesWithAliases & aliased_columns_) -{ - ExpressionAction a; - a.type = ADD_ALIASES; - a.projection = aliased_columns_; - return a; -} - -ExpressionAction ExpressionAction::arrayJoin(std::string source_name, std::string result_name) -{ - if (source_name == result_name) - throw Exception("ARRAY JOIN action should have different source and result names", ErrorCodes::LOGICAL_ERROR); - - ExpressionAction a; - a.type = ARRAY_JOIN; - a.source_name = std::move(source_name); - a.result_name = std::move(result_name); - return a; -} - - -void ExpressionAction::prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding) -{ - // std::cerr << "preparing: " << toString() << std::endl; - - /** Constant expressions should be evaluated, and put the result in sample_block. - */ - - switch (type) - { - case APPLY_FUNCTION: - { - if (sample_block.has(result_name)) - throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - - bool all_const = true; - bool all_suitable_for_constant_folding = true; - - ColumnsWithTypeAndName arguments(argument_names.size()); - for (size_t i = 0; i < argument_names.size(); ++i) - { - arguments[i] = sample_block.getByName(argument_names[i]); - ColumnPtr col = arguments[i].column; - if (!col || !isColumnConst(*col)) - all_const = false; - - if (names_not_for_constant_folding.count(argument_names[i])) - all_suitable_for_constant_folding = false; - } - - size_t result_position = sample_block.columns(); - sample_block.insert({nullptr, result_type, result_name}); - if (!function) - function = function_base->prepare(arguments); - function->createLowCardinalityResultCache(settings.max_threads); - - bool compile_expressions = false; -#if USE_EMBEDDED_COMPILER - compile_expressions = settings.compile_expressions; -#endif - /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. - /// But if we compile expressions compiled version of this function maybe placed in cache, - /// so we don't want to unfold non deterministic functions - if (all_const && function_base->isSuitableForConstantFolding() && (!compile_expressions || function_base->isDeterministic())) - { - if (added_column) - sample_block.getByPosition(result_position).column = added_column; - else - sample_block.getByPosition(result_position).column = function->execute(arguments, result_type, sample_block.rows(), true); - - /// If the result is not a constant, just in case, we will consider the result as unknown. - ColumnWithTypeAndName & col = sample_block.safeGetByPosition(result_position); - if (!isColumnConst(*col.column)) - { - col.column = nullptr; - } - else - { - /// All constant (literal) columns in block are added with size 1. - /// But if there was no columns in block before executing a function, the result has size 0. - /// Change the size to 1. - - if (col.column->empty()) - col.column = col.column->cloneResized(1); - - if (!all_suitable_for_constant_folding) - names_not_for_constant_folding.insert(result_name); - } - } - - /// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant. - /// We can't do constant folding, but can specify in sample block that function result is constant to avoid - /// unnecessary materialization. - auto & res = sample_block.getByPosition(result_position); - if (!res.column && function_base->isSuitableForConstantFolding()) - { - if (auto col = function_base->getResultIfAlwaysReturnsConstantAndHasArguments(arguments)) - { - res.column = std::move(col); - names_not_for_constant_folding.insert(result_name); - } - } - - break; - } - - case ARRAY_JOIN: - { - ColumnWithTypeAndName current = sample_block.getByName(source_name); - sample_block.erase(source_name); - - const DataTypeArray * array_type = typeid_cast(&*current.type); - if (!array_type) - throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); - - current.name = result_name; - current.type = array_type->getNestedType(); - current.column = nullptr; /// Result is never const - sample_block.insert(std::move(current)); - - break; - } - - case PROJECT: - { - Block new_block; - - for (const auto & elem : projection) - { - const std::string & name = elem.first; - const std::string & alias = elem.second; - ColumnWithTypeAndName column = sample_block.getByName(name); - if (!alias.empty()) - column.name = alias; - new_block.insert(std::move(column)); - } - - sample_block.swap(new_block); - break; - } - - case ADD_ALIASES: - { - for (const auto & elem : projection) - { - const std::string & name = elem.first; - const std::string & alias = elem.second; - const ColumnWithTypeAndName & column = sample_block.getByName(name); - if (!alias.empty() && !sample_block.has(alias)) - sample_block.insert({column.column, column.type, alias}); - } - break; - } - - case REMOVE_COLUMN: - { - sample_block.erase(source_name); - break; - } - - case ADD_COLUMN: - { - if (sample_block.has(result_name)) - throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - - sample_block.insert(ColumnWithTypeAndName(added_column, result_type, result_name)); - break; - } - - case COPY_COLUMN: - { - const auto & source = sample_block.getByName(source_name); - result_type = source.type; - - if (sample_block.has(result_name)) - { - if (can_replace) - { - auto & result = sample_block.getByName(result_name); - result.type = result_type; - result.column = source.column; - } - else - throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - } - else - sample_block.insert(ColumnWithTypeAndName(source.column, result_type, result_name)); - - break; - } - } -} - -void ExpressionAction::execute(Block & block, bool dry_run) const -{ - size_t input_rows_count = block.rows(); - - if (type == REMOVE_COLUMN || type == COPY_COLUMN) - if (!block.has(source_name)) - throw Exception("Not found column '" + source_name + "'. There are columns: " + block.dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - - if (type == ADD_COLUMN || (type == COPY_COLUMN && !can_replace) || type == APPLY_FUNCTION) - if (block.has(result_name)) - throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - - switch (type) - { - case APPLY_FUNCTION: - { - ColumnsWithTypeAndName arguments(argument_names.size()); - for (size_t i = 0; i < argument_names.size(); ++i) - arguments[i] = block.getByName(argument_names[i]); - - size_t num_columns_without_result = block.columns(); - block.insert({ nullptr, result_type, result_name}); - - ProfileEvents::increment(ProfileEvents::FunctionExecute); - if (is_function_compiled) - ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute); - block.getByPosition(num_columns_without_result).column = function->execute(arguments, result_type, input_rows_count, dry_run); - - break; - } - - case ARRAY_JOIN: - { - auto source = block.getByName(source_name); - block.erase(source_name); - source.column = source.column->convertToFullColumnIfConst(); - - const ColumnArray * array = typeid_cast(source.column.get()); - if (!array) - throw Exception("ARRAY JOIN of not array: " + source_name, ErrorCodes::TYPE_MISMATCH); - - for (auto & column : block) - column.column = column.column->replicate(array->getOffsets()); - - source.column = array->getDataPtr(); - source.type = assert_cast(*source.type).getNestedType(); - source.name = result_name; - - block.insert(std::move(source)); - - break; - } - - case PROJECT: - { - Block new_block; - - for (const auto & elem : projection) - { - const std::string & name = elem.first; - const std::string & alias = elem.second; - ColumnWithTypeAndName column = block.getByName(name); - if (!alias.empty()) - column.name = alias; - new_block.insert(std::move(column)); - } - - block.swap(new_block); - - break; - } - - case ADD_ALIASES: - { - for (const auto & elem : projection) - { - const std::string & name = elem.first; - const std::string & alias = elem.second; - const ColumnWithTypeAndName & column = block.getByName(name); - if (!alias.empty() && !block.has(alias)) - block.insert({column.column, column.type, alias}); - } - break; - } - - case REMOVE_COLUMN: - block.erase(source_name); + case ActionsDAG::Type::COLUMN: + out << "COLUMN " + << (node->column ? node->column->getName() : "(no column)"); break; - case ADD_COLUMN: - block.insert({ added_column->cloneResized(input_rows_count), result_type, result_name }); + case ActionsDAG::Type::ALIAS: + out << "ALIAS " << node->children.front()->result_name << " " << arguments.front(); break; - case COPY_COLUMN: - if (can_replace && block.has(result_name)) - { - auto & result = block.getByName(result_name); - const auto & source = block.getByName(source_name); - result.type = source.type; - result.column = source.column; - } - else - { - const auto & source_column = block.getByName(source_name); - block.insert({source_column.column, source_column.type, result_name}); - } - - break; - } -} - - -std::string ExpressionAction::toString() const -{ - std::stringstream ss; - switch (type) - { - case ADD_COLUMN: - ss << "ADD " << result_name << " " - << (result_type ? result_type->getName() : "(no type)") << " " - << (added_column ? added_column->getName() : "(no column)"); - break; - - case REMOVE_COLUMN: - ss << "REMOVE " << source_name; - break; - - case COPY_COLUMN: - ss << "COPY " << result_name << " = " << source_name; - if (can_replace) - ss << " (can replace)"; - break; - - case APPLY_FUNCTION: - ss << "FUNCTION " << result_name << " " << (is_function_compiled ? "[compiled] " : "") - << (result_type ? result_type->getName() : "(no type)") << " = " - << (function_base ? function_base->getName() : "(no function)") << "("; - for (size_t i = 0; i < argument_names.size(); ++i) + case ActionsDAG::Type::FUNCTION: + out << "FUNCTION " << (node->is_function_compiled ? "[compiled] " : "") + << (node->function_base ? node->function_base->getName() : "(no function)") << "("; + for (size_t i = 0; i < node->children.size(); ++i) { if (i) - ss << ", "; - ss << argument_names[i]; + out << ", "; + out << node->children[i]->result_name << " " << arguments[i]; } - ss << ")"; + out << ")"; break; - case ARRAY_JOIN: - ss << "ARRAY JOIN " << source_name << " -> " << result_name; + case ActionsDAG::Type::ARRAY_JOIN: + out << "ARRAY JOIN " << node->children.front()->result_name << " " << arguments.front(); break; - case PROJECT: [[fallthrough]]; - case ADD_ALIASES: - ss << (type == PROJECT ? "PROJECT " : "ADD_ALIASES "); - for (size_t i = 0; i < projection.size(); ++i) - { - if (i) - ss << ", "; - ss << projection[i].first; - if (!projection[i].second.empty() && projection[i].second != projection[i].first) - ss << " AS " << projection[i].second; - } + case ActionsDAG::Type::INPUT: + out << "INPUT " << arguments.front(); break; } - return ss.str(); -} - -ExpressionActions::ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_) - : input_columns(input_columns_), settings(context_.getSettingsRef()) -{ - for (const auto & input_elem : input_columns) - sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name)); - -#if USE_EMBEDDED_COMPILER -compilation_cache = context_.getCompiledExpressionCache(); -#endif -} - -/// For constant columns the columns themselves can be contained in `input_columns_`. -ExpressionActions::ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_) - : settings(context_.getSettingsRef()) -{ - for (const auto & input_elem : input_columns_) - { - input_columns.emplace_back(input_elem.name, input_elem.type); - sample_block.insert(input_elem); - } -#if USE_EMBEDDED_COMPILER - compilation_cache = context_.getCompiledExpressionCache(); -#endif + out << " -> " << node->result_name + << " " << (node->result_type ? node->result_type->getName() : "(no type)") << " " << result_position; + return out.str(); } ExpressionActions::~ExpressionActions() = default; void ExpressionActions::checkLimits(ExecutionContext & execution_context) const { - if (settings.max_temporary_columns && block.columns() > settings.max_temporary_columns) - throw Exception("Too many temporary columns: " + block.dumpNames() - + ". Maximum: " + settings.max_temporary_columns.toString(), - ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS); - - if (settings.max_temporary_non_const_columns) + if (max_temporary_non_const_columns) { size_t non_const_columns = 0; - for (size_t i = 0, size = block.columns(); i < size; ++i) - if (block.safeGetByPosition(i).column && !isColumnConst(*block.safeGetByPosition(i).column)) + for (const auto & column : execution_context.columns) + if (column.column && !isColumnConst(*column.column)) ++non_const_columns; - if (non_const_columns > settings.max_temporary_non_const_columns) + if (non_const_columns > max_temporary_non_const_columns) { std::stringstream list_of_non_const_columns; - for (size_t i = 0, size = block.columns(); i < size; ++i) - if (block.safeGetByPosition(i).column && !isColumnConst(*block.safeGetByPosition(i).column)) - list_of_non_const_columns << "\n" << block.safeGetByPosition(i).name; + for (const auto & column : execution_context.columns) + if (column.column && !isColumnConst(*column.column)) + list_of_non_const_columns << "\n" << column.name; throw Exception("Too many temporary non-const columns:" + list_of_non_const_columns.str() - + ". Maximum: " + settings.max_temporary_non_const_columns.toString(), + + ". Maximum: " + std::to_string(max_temporary_non_const_columns), ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS); } } } -void ExpressionActions::prependProjectInput() -{ - actions.insert(actions.begin(), ExpressionAction::project(getRequiredColumns())); -} - -void ExpressionActions::execute(Block & block, bool dry_run) const +void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) const { ExecutionContext execution_context { - .input_columns = block.data, - .num_rows = block.rows(), + .inputs = block.data, + .num_rows = num_rows, }; - execution_context.columns.reserve(num_columns); + execution_context.inputs_pos.reserve(required_columns.size()); - ColumnNumbers inputs_to_remove; - inputs_to_remove.reserve(required_columns.size()); for (const auto & column : required_columns) { - size_t pos = block.getPositionByName(column.name); - execution_context.columns.emplace_back(std::move(block.getByPosition(pos))); - - if (!sample_block.has(column.name)) - inputs_to_remove.emplace_back(pos); + ssize_t pos = -1; + if (block.has(column.name)) + pos = block.getPositionByName(column.name); + execution_context.inputs_pos.push_back(pos); } execution_context.columns.resize(num_columns); @@ -585,6 +143,10 @@ void ExpressionActions::execute(Block & block, bool dry_run) const { executeAction(action, execution_context, dry_run); checkLimits(execution_context); + + //std::cerr << "Action: " << action.toString() << std::endl; + //for (const auto & col : execution_context.columns) + // std::cerr << col.dumpStructure() << std::endl; } catch (Exception & e) { @@ -593,27 +155,38 @@ void ExpressionActions::execute(Block & block, bool dry_run) const } } - std::sort(inputs_to_remove.rbegin(), inputs_to_remove.rend()); - for (auto input : inputs_to_remove) - block.erase(input); - - for (const auto & action : actions) + if (project_input) { - if (!action.is_used_in_result) - continue; - - auto & column = execution_context.columns[action.result_position]; - column.name = action.node->result_name; - - if (block.has(action.node->result_name)) - block.getByName(action.node->result_name) = std::move(column); - else - block.insert(std::move(column)); + block.clear(); } + else + { + std::sort(execution_context.inputs_pos.rbegin(), execution_context.inputs_pos.rend()); + for (auto input : execution_context.inputs_pos) + if (input >= 0) + block.erase(input); + } + + for (auto pos : result_positions) + if (execution_context.columns[pos].column) + block.insert(execution_context.columns[pos]); + + num_rows = execution_context.num_rows; +} + +void ExpressionActions::execute(Block & block, bool dry_run) const +{ + size_t num_rows = block.rows(); + + execute(block, num_rows, dry_run); + + if (!block) + block.insert({DataTypeUInt8().createColumnConst(num_rows, 0), std::make_shared(), "_dummy"}); } void ExpressionActions::executeAction(const Action & action, ExecutionContext & execution_context, bool dry_run) { + auto & inputs = execution_context.inputs; auto & columns = execution_context.columns; auto & num_rows = execution_context.num_rows; @@ -626,24 +199,32 @@ void ExpressionActions::executeAction(const Action & action, ExecutionContext & throw Exception("Result column is not empty", ErrorCodes::LOGICAL_ERROR); res_column.type = action.node->result_type; - /// Columns names are not used, avoid extra copy. - /// res_column.name = action.node->result_name; + res_column.name = action.node->result_name; + + ColumnsWithTypeAndName arguments(action.arguments.size()); + for (size_t i = 0; i < arguments.size(); ++i) + { + if (action.arguments[i].remove) + arguments[i] = std::move(columns[action.arguments[i].pos]); + else + arguments[i] = columns[action.arguments[i].pos]; + } ProfileEvents::increment(ProfileEvents::FunctionExecute); if (action.node->is_function_compiled) ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute); - action.node->function->execute(columns, action.arguments, action.result_position, num_rows, dry_run); + res_column.column = action.node->function->execute(arguments, res_column.type, num_rows, dry_run); break; } case ActionsDAG::Type::ARRAY_JOIN: { - size_t array_join_key_pos = action.arguments.front(); + size_t array_join_key_pos = action.arguments.front().pos; auto array_join_key = columns[array_join_key_pos]; /// Remove array join argument in advance if it is not needed. - if (!action.to_remove.empty()) + if (action.arguments.front().remove) columns[array_join_key_pos] = {}; array_join_key.column = array_join_key.column->convertToFullColumnIfConst(); @@ -656,7 +237,7 @@ void ExpressionActions::executeAction(const Action & action, ExecutionContext & if (column.column) column.column = column.column->replicate(array->getOffsets()); - for (auto & column : execution_context.input_columns) + for (auto & column : inputs) if (column.column) column.column = column.column->replicate(array->getOffsets()); @@ -664,6 +245,7 @@ void ExpressionActions::executeAction(const Action & action, ExecutionContext & res_column.column = array->getDataPtr(); res_column.type = assert_cast(*array_join_key.type).getNestedType(); + res_column.name = action.node->result_name; num_rows = res_column.column->size(); break; @@ -674,24 +256,51 @@ void ExpressionActions::executeAction(const Action & action, ExecutionContext & auto & res_column = columns[action.result_position]; res_column.column = action.node->column->cloneResized(num_rows); res_column.type = action.node->result_type; + res_column.name = action.node->result_name; break; } case ActionsDAG::Type::ALIAS: { - /// Do not care about names, they are empty. - columns[action.result_position] = columns[action.arguments.front()]; + const auto & arg = action.arguments.front(); + if (action.result_position != arg.pos) + { + columns[action.result_position].column = columns[arg.pos].column; + columns[action.result_position].type = columns[arg.pos].type; + + if (arg.remove) + columns[arg.pos] = {}; + } + + columns[action.result_position].name = action.node->result_name; + break; } case ActionsDAG::Type::INPUT: { - throw Exception("Cannot execute INPUT action", ErrorCodes::LOGICAL_ERROR); + auto pos = execution_context.inputs_pos[action.arguments.front().pos]; + if (pos < 0) + { + if (!action.arguments.front().remove) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Not found column {} in block", + action.node->result_name); + } + else + columns[action.result_position] = std::move(inputs[pos]); + + break; } } +} - for (auto to_remove : action.to_remove) - columns[to_remove] = {}; +Names ExpressionActions::getRequiredColumns() const +{ + Names names; + for (const auto & input : required_columns) + names.push_back(input.name); + return names; } bool ExpressionActions::hasArrayJoin() const @@ -727,242 +336,12 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum return res; } -void ExpressionActions::finalize(const Names & output_columns) -{ - NameSet final_columns; - for (const auto & name : output_columns) - { - if (!sample_block.has(name)) - throw Exception("Unknown column: " + name + ", there are only columns " - + sample_block.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER); - final_columns.insert(name); - } - -#if USE_EMBEDDED_COMPILER - /// This has to be done before removing redundant actions and inserting REMOVE_COLUMNs - /// because inlining may change dependency sets. - if (settings.compile_expressions) - compileFunctions(actions, output_columns, sample_block, compilation_cache, settings.min_count_to_compile_expression); -#endif - - /// Which columns are needed to perform actions from the current to the last. - NameSet needed_columns = final_columns; - /// Which columns nobody will touch from the current action to the last. - NameSet unmodified_columns; - - { - NamesAndTypesList sample_columns = sample_block.getNamesAndTypesList(); - for (const auto & sample_column : sample_columns) - unmodified_columns.insert(sample_column.name); - } - - /// Let's go from the end and maintain set of required columns at this stage. - /// We will throw out unnecessary actions, although usually they are absent by construction. - for (int i = static_cast(actions.size()) - 1; i >= 0; --i) - { - ExpressionAction & action = actions[i]; - Names in = action.getNeededColumns(); - - if (action.type == ExpressionAction::PROJECT) - { - needed_columns = NameSet(in.begin(), in.end()); - unmodified_columns.clear(); - } - else if (action.type == ExpressionAction::ADD_ALIASES) - { - needed_columns.insert(in.begin(), in.end()); - for (auto & name_wit_alias : action.projection) - { - auto it = unmodified_columns.find(name_wit_alias.second); - if (it != unmodified_columns.end()) - unmodified_columns.erase(it); - } - } - else if (action.type == ExpressionAction::ARRAY_JOIN) - { - /// We need source anyway, in order to calculate number of rows correctly. - needed_columns.insert(action.source_name); - unmodified_columns.erase(action.result_name); - needed_columns.erase(action.result_name); - - /// Note: technically, if result of arrayJoin is not needed, - /// we may remove all the columns and loose the number of rows here. - /// However, I cannot imagine how it is possible. - /// For "big" ARRAY JOIN it could have happened in query like - /// SELECT count() FROM table ARRAY JOIN x - /// Now, "big" ARRAY JOIN is moved to separate pipeline step, - /// and arrayJoin(x) is an expression which result can't be lost. - } - else - { - std::string out = action.result_name; - if (!out.empty()) - { - /// If the result is not used and there are no side effects, throw out the action. - if (!needed_columns.count(out) && - (action.type == ExpressionAction::APPLY_FUNCTION - || action.type == ExpressionAction::ADD_COLUMN - || action.type == ExpressionAction::COPY_COLUMN)) - { - actions.erase(actions.begin() + i); - - if (unmodified_columns.count(out)) - { - sample_block.erase(out); - unmodified_columns.erase(out); - } - - continue; - } - - unmodified_columns.erase(out); - needed_columns.erase(out); - - /** If the function is a constant expression, then replace the action by adding a column-constant - result. - * That is, we perform constant folding. - */ - if (action.type == ExpressionAction::APPLY_FUNCTION && sample_block.has(out)) - { - auto & result = sample_block.getByName(out); - if (result.column && names_not_for_constant_folding.count(result.name) == 0) - { - action.type = ExpressionAction::ADD_COLUMN; - action.result_type = result.type; - action.added_column = result.column; - action.function_builder = nullptr; - action.function_base = nullptr; - action.function = nullptr; - action.argument_names.clear(); - in.clear(); - } - } - } - - needed_columns.insert(in.begin(), in.end()); - } - } - - - /// 1) Sometimes we don't need any columns to perform actions and sometimes actions doesn't produce any columns as result. - /// But Block class doesn't store any information about structure itself, it uses information from column. - /// If we remove all columns from input or output block we will lose information about amount of rows in it. - /// To avoid this situation we always leaving one of the columns in required columns (input) - /// and output column. We choose that "redundant" column by size with help of getSmallestColumn. - /// - /// 2) Sometimes we have to read data from different Storages to execute query. - /// For example in 'remote' function which requires to read data from local table (for example MergeTree) and - /// remote table (doesn't know anything about it). - /// - /// If we have combination of two previous cases, our heuristic from (1) can choose absolutely different columns, - /// so generated streams with these actions will have different headers. To avoid this we additionally rename our "redundant" column - /// to DUMMY_COLUMN_NAME with help of COPY_COLUMN action and consequent remove of original column. - /// It doesn't affect any logic, but all streams will have same "redundant" column in header called "_dummy". - - /// Also, it seems like we will always have same type (UInt8) of "redundant" column, but it's not obvious. - - bool dummy_column_copied = false; - - - /// We will not throw out all the input columns, so as not to lose the number of rows in the block. - if (needed_columns.empty() && !input_columns.empty()) - { - auto colname = getSmallestColumn(input_columns); - needed_columns.insert(colname); - actions.insert(actions.begin(), ExpressionAction::copyColumn(colname, DUMMY_COLUMN_NAME, true)); - dummy_column_copied = true; - } - - /// We will not leave the block empty so as not to lose the number of rows in it. - if (final_columns.empty() && !input_columns.empty()) - { - auto colname = getSmallestColumn(input_columns); - final_columns.insert(DUMMY_COLUMN_NAME); - if (!dummy_column_copied) /// otherwise we already have this column - actions.insert(actions.begin(), ExpressionAction::copyColumn(colname, DUMMY_COLUMN_NAME, true)); - } - - for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();) - { - NamesAndTypesList::iterator it0 = it; - ++it; - if (!needed_columns.count(it0->name)) - { - if (unmodified_columns.count(it0->name)) - sample_block.erase(it0->name); - input_columns.erase(it0); - } - } - -/* std::cerr << "\n"; - for (const auto & action : actions) - std::cerr << action.toString() << "\n"; - std::cerr << "\n";*/ - - /// Deletes unnecessary temporary columns. - - /// If the column after performing the function `refcount = 0`, it can be deleted. - std::map columns_refcount; - - for (const auto & name : final_columns) - ++columns_refcount[name]; - - for (const auto & action : actions) - { - if (!action.source_name.empty()) - ++columns_refcount[action.source_name]; - - for (const auto & name : action.argument_names) - ++columns_refcount[name]; - - for (const auto & name_alias : action.projection) - ++columns_refcount[name_alias.first]; - } - - Actions new_actions; - new_actions.reserve(actions.size()); - - for (const auto & action : actions) - { - new_actions.push_back(action); - - auto process = [&] (const String & name) - { - auto refcount = --columns_refcount[name]; - if (refcount <= 0 && action.type != ExpressionAction::ARRAY_JOIN) - { - new_actions.push_back(ExpressionAction::removeColumn(name)); - if (sample_block.has(name)) - sample_block.erase(name); - } - }; - - if (!action.source_name.empty()) - process(action.source_name); - - for (const auto & name : action.argument_names) - process(name); - - /// For `projection`, there is no reduction in `refcount`, because the `project` action replaces the names of the columns, in effect, already deleting them under the old names. - } - - actions.swap(new_actions); - -/* std::cerr << "\n"; - for (const auto & action : actions) - std::cerr << action.toString() << "\n"; - std::cerr << "\n";*/ - - optimizeArrayJoin(); - checkLimits(sample_block); -} - - std::string ExpressionActions::dumpActions() const { std::stringstream ss; ss << "input:\n"; - for (const auto & input_column : input_columns) + for (const auto & input_column : required_columns) ss << input_column.name << " " << input_column.type->getName() << "\n"; ss << "\nactions:\n"; @@ -974,130 +353,197 @@ std::string ExpressionActions::dumpActions() const for (const auto & output_column : output_columns) ss << output_column.name << " " << output_column.type->getName() << "\n"; + ss << "\nproject input: " << project_input << "\noutput positions:"; + for (auto pos : result_positions) + ss << " " << pos; + ss << "\n"; + return ss.str(); } -ExpressionActionsPtr ExpressionActions::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) +//static std::string getUniqueNameForIndex(ActionsDAG::Index & index, std::string name) +//{ +// if (index.contains(name)) +// return name; +// +// size_t next_id = 0; +// std::string res; +// do +// res = name + "_" + std::to_string(next_id); +// while (index.contains(res)); +// +// return res; +//} + +bool ActionsDAG::hasArrayJoin() const { - /// Create new actions. - /// Copy from this because we don't have context. - /// TODO: remove context from constructor? - auto split_actions = std::make_shared(*this); - split_actions->actions.clear(); - split_actions->sample_block.clear(); - split_actions->input_columns.clear(); + for (const auto & node : nodes) + if (node.type == Type::ARRAY_JOIN) + return true; - /// Expected chain: - /// Expression (this) -> ArrayJoin (array_joined_columns) -> Expression (split_actions) + return false; +} - /// We are going to move as many actions as we can from this to split_actions. - /// We can move all inputs which are not depend on array_joined_columns - /// (with some exceptions to PROJECT and REMOVE_COLUMN +bool ActionsDAG::empty() const +{ + for (const auto & node : nodes) + if (node.type != Type::INPUT) + return false; - /// Use the same inputs for split_actions, except array_joined_columns. - for (const auto & input_column : input_columns) + return true; +} + +ActionsDAGPtr ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) +{ + /// Split DAG into two parts. + /// (this_nodes, this_index) is a part which depends on ARRAY JOIN and stays here. + /// (split_nodes, split_index) is a part which will be moved before ARRAY JOIN. + std::list this_nodes; + std::list split_nodes; + Index this_index; + Index split_index; + + struct Frame { - if (array_joined_columns.count(input_column.name) == 0) - { - split_actions->input_columns.emplace_back(input_column); - split_actions->sample_block.insert(ColumnWithTypeAndName(nullptr, input_column.type, input_column.name)); - } - } + Node * node; + size_t next_child_to_visit = 0; + }; - /// Do not split action if input depends only on array joined columns. - if (split_actions->input_columns.empty()) - return nullptr; - - /// Actions which depend on ARRAY JOIN result. - NameSet array_join_dependent_columns = array_joined_columns; - /// Arguments of actions which depend on ARRAY JOIN result. - /// This columns can't be deleted in split_actions. - NameSet array_join_dependent_columns_arguments; - - /// We create new_actions list for `this`. Current actions are moved to new_actions nor added to split_actions. - Actions new_actions; - for (const auto & action : actions) + struct Data { - /// Exception for PROJECT. - /// It removes columns, so it will remove split_actions output which may be needed for actions from `this`. - /// So, we replace it ADD_ALIASES. - /// Usually, PROJECT is added to begin of actions in order to remove unused output of prev actions. - /// We skip it now, but will prependProjectInput at the end. - if (action.type == ExpressionAction::PROJECT) + bool depend_on_array_join = false; + bool visited = false; + bool used_in_result = false; + + /// Copies of node in one of the DAGs. + /// For COLUMN and INPUT both copies may exist. + Node * to_this = nullptr; + Node * to_split = nullptr; + }; + + std::stack stack; + std::unordered_map data; + + for (const auto & node : index) + data[node].used_in_result = true; + + /// DFS. Decide if node depends on ARRAY JOIN and move it to one of the DAGs. + for (auto & node : nodes) + { + if (!data[&node].visited) + stack.push({.node = &node}); + + while (!stack.empty()) { - /// Each alias has separate dependencies, so we split this action into two parts. - NamesWithAliases split_aliases; - NamesWithAliases depend_aliases; - for (const auto & pair : action.projection) + auto & cur = stack.top(); + auto & cur_data = data[cur.node]; + + /// At first, visit all children. We depend on ARRAY JOIN if any child does. + while (cur.next_child_to_visit < cur.node->children.size()) { - /// Skip if is not alias. - if (pair.second.empty()) - continue; + auto * child = cur.node->children[cur.next_child_to_visit]; + auto & child_data = data[child]; - if (array_join_dependent_columns.count(pair.first)) + if (!child_data.visited) { - array_join_dependent_columns.insert(pair.second); - depend_aliases.emplace_back(std::move(pair)); + stack.push({.node = child}); + break; } - else - split_aliases.emplace_back(std::move(pair)); + + ++cur.next_child_to_visit; + if (child_data.depend_on_array_join) + cur_data.depend_on_array_join = true; } - if (!split_aliases.empty()) - split_actions->add(ExpressionAction::addAliases(split_aliases)); + /// Make a copy part. + if (cur.next_child_to_visit == cur.node->children.size()) + { + if (cur.node->type == Type::INPUT && array_joined_columns.count(cur.node->result_name)) + cur_data.depend_on_array_join = true; - if (!depend_aliases.empty()) - new_actions.emplace_back(ExpressionAction::addAliases(depend_aliases)); + cur_data.visited = true; + stack.pop(); - continue; + if (cur_data.depend_on_array_join) + { + auto & copy = this_nodes.emplace_back(*cur.node); + cur_data.to_this = © + + /// Replace children to newly created nodes. + for (auto & child : copy.children) + { + auto & child_data = data[child]; + + /// If children is not created, int may be from split part. + if (!child_data.to_this) + { + if (child->type == Type::COLUMN) /// Just create new node for COLUMN action. + { + child_data.to_this = &this_nodes.emplace_back(*child); + } + else + { + /// Node from split part is added as new input. + Node input_node; + input_node.type = Type::INPUT; + input_node.result_type = child->result_type; + input_node.result_name = child->result_name; // getUniqueNameForIndex(index, child->result_name); + child_data.to_this = &this_nodes.emplace_back(std::move(input_node)); + + /// This node is needed for current action, so put it to index also. + split_index[child_data.to_split->result_name] = child_data.to_split; + } + } + + child = child_data.to_this; + } + } + else + { + auto & copy = split_nodes.emplace_back(*cur.node); + cur_data.to_split = © + + /// Replace children to newly created nodes. + for (auto & child : copy.children) + { + child = data[child].to_split; + assert(child != nullptr); + } + + if (cur_data.used_in_result) + { + split_index[copy.result_name] = © + + /// If this node is needed in result, add it as input. + Node input_node; + input_node.type = Type::INPUT; + input_node.result_type = node.result_type; + input_node.result_name = node.result_name; + cur_data.to_this = &this_nodes.emplace_back(std::move(input_node)); + } + } + } } - - bool depends_on_array_join = false; - for (auto & column : action.getNeededColumns()) - if (array_join_dependent_columns.count(column) != 0) - depends_on_array_join = true; - - if (depends_on_array_join) - { - /// Add result of this action to array_join_dependent_columns too. - if (!action.result_name.empty()) - array_join_dependent_columns.insert(action.result_name); - - /// Add arguments of this action to array_join_dependent_columns_arguments. - auto needed = action.getNeededColumns(); - array_join_dependent_columns_arguments.insert(needed.begin(), needed.end()); - - new_actions.emplace_back(action); - } - else if (action.type == ExpressionAction::REMOVE_COLUMN) - { - /// Exception for REMOVE_COLUMN. - /// We cannot move it to split_actions if any argument from `this` needed that column. - if (array_join_dependent_columns_arguments.count(action.source_name)) - new_actions.emplace_back(action); - else - split_actions->add(action); - } - else - split_actions->add(action); } - /// Return empty actions if nothing was separated. Keep `this` unchanged. - if (split_actions->getActions().empty()) - return nullptr; + for (auto * node : index) + this_index.insert(data[node].to_this); - std::swap(actions, new_actions); + /// Consider actions are empty if all nodes are constants or inputs. + bool split_actions_are_empty = true; + for (const auto & node : split_nodes) + if (!node.children.empty()) + split_actions_are_empty = false; - /// Collect inputs from ARRAY JOIN. - NamesAndTypesList inputs_from_array_join; - for (auto & column : input_columns) - if (array_joined_columns.count(column.name)) - inputs_from_array_join.emplace_back(std::move(column)); + if (split_actions_are_empty) + return {}; - /// Fix inputs for `this`. - /// It is output of split_actions + inputs from ARRAY JOIN. - input_columns = split_actions->getSampleBlock().getNamesAndTypesList(); - input_columns.insert(input_columns.end(), inputs_from_array_join.begin(), inputs_from_array_join.end()); + index.swap(this_index); + nodes.swap(this_nodes); + + auto split_actions = cloneEmpty(); + split_actions->nodes.swap(split_nodes); + split_actions->index.swap(split_index); return split_actions; } @@ -1111,15 +557,16 @@ bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) con for (auto it = actions.rbegin(); it != actions.rend(); ++it) { const auto & action = *it; - if (action.type == action.APPLY_FUNCTION && action.function_base) + if (action.node->type == ActionsDAG::Type::FUNCTION && action.node->function_base) { - auto name = action.function_base->getName(); - if ((name == "in" || name == "globalIn") - && action.result_name == column_name - && action.argument_names.size() > 1) + if (action.node->result_name == column_name && action.node->children.size() > 1) { - set_to_check = action.argument_names[1]; - break; + auto name = action.node->function_base->getName(); + if ((name == "in" || name == "globalIn")) + { + set_to_check = action.node->children[1]->result_name; + break; + } } } } @@ -1128,10 +575,10 @@ bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) con { for (const auto & action : actions) { - if (action.type == action.ADD_COLUMN && action.result_name == set_to_check) + if (action.node->type == ActionsDAG::Type::COLUMN && action.node->result_name == set_to_check) { // Constant ColumnSet cannot be empty, so we only need to check non-constant ones. - if (const auto * column_set = checkAndGetColumn(action.added_column.get())) + if (const auto * column_set = checkAndGetColumn(action.node->column.get())) { if (column_set->getData()->isCreated() && column_set->getData()->getTotalRowCount() == 0) return true; @@ -1144,115 +591,113 @@ bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) con } -/// It is not important to calculate the hash of individual strings or their concatenation -UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action) const -{ - SipHash hash; - hash.update(action.type); - hash.update(action.is_function_compiled); - switch (action.type) - { - case ADD_COLUMN: - hash.update(action.result_name); - if (action.result_type) - hash.update(action.result_type->getName()); - if (action.added_column) - hash.update(action.added_column->getName()); - break; - case REMOVE_COLUMN: - hash.update(action.source_name); - break; - case COPY_COLUMN: - hash.update(action.result_name); - hash.update(action.source_name); - break; - case APPLY_FUNCTION: - hash.update(action.result_name); - if (action.result_type) - hash.update(action.result_type->getName()); - if (action.function_base) - { - hash.update(action.function_base->getName()); - for (const auto & arg_type : action.function_base->getArgumentTypes()) - hash.update(arg_type->getName()); - } - for (const auto & arg_name : action.argument_names) - hash.update(arg_name); - break; - case ARRAY_JOIN: - hash.update(action.result_name); - hash.update(action.source_name); - break; - case PROJECT: - for (const auto & pair_of_strs : action.projection) - { - hash.update(pair_of_strs.first); - hash.update(pair_of_strs.second); - } - break; - case ADD_ALIASES: - break; - } - UInt128 result; - hash.get128(result.low, result.high); - return result; -} +///// It is not important to calculate the hash of individual strings or their concatenation +//UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action) const +//{ +// SipHash hash; +// hash.update(action.type); +// hash.update(action.is_function_compiled); +// switch (action.type) +// { +// case ADD_COLUMN: +// hash.update(action.result_name); +// if (action.result_type) +// hash.update(action.result_type->getName()); +// if (action.added_column) +// hash.update(action.added_column->getName()); +// break; +// case REMOVE_COLUMN: +// hash.update(action.source_name); +// break; +// case COPY_COLUMN: +// hash.update(action.result_name); +// hash.update(action.source_name); +// break; +// case APPLY_FUNCTION: +// hash.update(action.result_name); +// if (action.result_type) +// hash.update(action.result_type->getName()); +// if (action.function_base) +// { +// hash.update(action.function_base->getName()); +// for (const auto & arg_type : action.function_base->getArgumentTypes()) +// hash.update(arg_type->getName()); +// } +// for (const auto & arg_name : action.argument_names) +// hash.update(arg_name); +// break; +// case ARRAY_JOIN: +// hash.update(action.result_name); +// hash.update(action.source_name); +// break; +// case PROJECT: +// for (const auto & pair_of_strs : action.projection) +// { +// hash.update(pair_of_strs.first); +// hash.update(pair_of_strs.second); +// } +// break; +// case ADD_ALIASES: +// break; +// } +// UInt128 result; +// hash.get128(result.low, result.high); +// return result; +//} +// +//bool ExpressionAction::operator==(const ExpressionAction & other) const +//{ +// if (result_type != other.result_type) +// { +// if (result_type == nullptr || other.result_type == nullptr) +// return false; +// else if (!result_type->equals(*other.result_type)) +// return false; +// } +// +// if (function_base != other.function_base) +// { +// if (function_base == nullptr || other.function_base == nullptr) +// return false; +// else if (function_base->getName() != other.function_base->getName()) +// return false; +// +// const auto & my_arg_types = function_base->getArgumentTypes(); +// const auto & other_arg_types = other.function_base->getArgumentTypes(); +// if (my_arg_types.size() != other_arg_types.size()) +// return false; +// +// for (size_t i = 0; i < my_arg_types.size(); ++i) +// if (!my_arg_types[i]->equals(*other_arg_types[i])) +// return false; +// } +// +// if (added_column != other.added_column) +// { +// if (added_column == nullptr || other.added_column == nullptr) +// return false; +// else if (added_column->getName() != other.added_column->getName()) +// return false; +// } +// +// return source_name == other.source_name +// && result_name == other.result_name +// && argument_names == other.argument_names +// && TableJoin::sameJoin(table_join.get(), other.table_join.get()) +// && projection == other.projection +// && is_function_compiled == other.is_function_compiled; +//} -bool ExpressionAction::operator==(const ExpressionAction & other) const -{ - if (result_type != other.result_type) - { - if (result_type == nullptr || other.result_type == nullptr) - return false; - else if (!result_type->equals(*other.result_type)) - return false; - } - - if (function_base != other.function_base) - { - if (function_base == nullptr || other.function_base == nullptr) - return false; - else if (function_base->getName() != other.function_base->getName()) - return false; - - const auto & my_arg_types = function_base->getArgumentTypes(); - const auto & other_arg_types = other.function_base->getArgumentTypes(); - if (my_arg_types.size() != other_arg_types.size()) - return false; - - for (size_t i = 0; i < my_arg_types.size(); ++i) - if (!my_arg_types[i]->equals(*other_arg_types[i])) - return false; - } - - if (added_column != other.added_column) - { - if (added_column == nullptr || other.added_column == nullptr) - return false; - else if (added_column->getName() != other.added_column->getName()) - return false; - } - - return source_name == other.source_name - && result_name == other.result_name - && argument_names == other.argument_names - && TableJoin::sameJoin(table_join.get(), other.table_join.get()) - && projection == other.projection - && is_function_compiled == other.is_function_compiled; -} - -void ExpressionActionsChain::addStep() +void ExpressionActionsChain::addStep(NameSet non_constant_inputs) { if (steps.empty()) throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); - if (auto * step = typeid_cast(steps.back().get())) - { - if (!step->actions) - step->actions = step->actions_dag->buildExpressions(context); - } - ColumnsWithTypeAndName columns = steps.back()->getResultColumns(); + for (auto & column : columns) + if (column.column && isColumnConst(*column.column) && non_constant_inputs.count(column.name)) + column.column = nullptr; + steps.push_back(std::make_unique(std::make_shared(columns))); } @@ -1402,15 +847,10 @@ void ExpressionActionsChain::JoinStep::finalize(const Names & required_output_) ActionsDAGPtr & ExpressionActionsChain::Step::actions() { - return typeid_cast(this)->actions_dag; + return typeid_cast(this)->actions; } const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const -{ - return typeid_cast(this)->actions_dag; -} - -ExpressionActionsPtr ExpressionActionsChain::Step::getExpression() const { return typeid_cast(this)->actions; } @@ -1424,7 +864,12 @@ ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs) ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs) { for (const auto & input : inputs) - addInput(input); + { + if (input.column && isColumnConst(*input.column)) + addInput(input); + else + addInput(input.name, input.type); + } } ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) @@ -1435,9 +880,6 @@ ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace) auto & res = nodes.emplace_back(std::move(node)); - if (it != index.end()) - it->second->renaming_parent = &res; - index[res.result_name] = &res; return res; } @@ -1448,7 +890,7 @@ ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) if (it == index.end()) throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); - return *it->second; + return **it; } const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type) @@ -1475,7 +917,7 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column) const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column) { if (!column.column) - throw Exception("Cannot add column " + column.name + " because it is nullptr", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name); Node node; node.type = Type::COLUMN; @@ -1501,8 +943,7 @@ const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::str return addNode(std::move(node), can_replace); } -const ActionsDAG::Node & ActionsDAG::addArrayJoin( - const std::string & source_name, std::string result_name, std::string unique_column_name) +const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) { auto & child = getNode(source_name); @@ -1514,7 +955,6 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin( node.type = Type::ARRAY_JOIN; node.result_type = array_type->getNestedType(); node.result_name = std::move(result_name); - node.unique_column_name_for_array_join = std::move(unique_column_name); node.children.emplace_back(&child); return addNode(std::move(node)); @@ -1524,8 +964,20 @@ const ActionsDAG::Node & ActionsDAG::addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name, - bool compile_expressions [[maybe_unused]]) + const Context & context [[maybe_unused]]) { + const auto & settings = context.getSettingsRef(); + max_temporary_columns = settings.max_temporary_columns; + max_temporary_non_const_columns = settings.max_temporary_non_const_columns; + + bool do_compile_expressions = false; +#if USE_EMBEDDED_COMPILER + do_compile_expressions = settings.compile_expressions; + + if (!compilation_cache) + compilation_cache = context.getCompiledExpressionCache(); +#endif + size_t num_arguments = argument_names.size(); Node node; @@ -1556,10 +1008,6 @@ const ActionsDAG::Node & ActionsDAG::addFunction( node.result_type = node.function_base->getResultType(); node.function = node.function_base->prepare(arguments); - bool do_compile_expressions = false; -#if USE_EMBEDDED_COMPILER - do_compile_expressions = compile_expressions; -#endif /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. /// But if we compile expressions compiled version of this function maybe placed in cache, /// so we don't want to unfold non deterministic functions @@ -1611,13 +1059,22 @@ const ActionsDAG::Node & ActionsDAG::addFunction( return addNode(std::move(node)); } +NamesAndTypesList ActionsDAG::getRequiredColumns() const +{ + NamesAndTypesList result; + for (const auto & node : nodes) + if (node.type == Type::INPUT) + result.push_back({node.result_name, node.result_type}); + + return result; +} + ColumnsWithTypeAndName ActionsDAG::getResultColumns() const { ColumnsWithTypeAndName result; result.reserve(index.size()); - for (const auto & node : nodes) - if (!node.renaming_parent) - result.emplace_back(node.column, node.result_type, node.result_name); + for (const auto & node : index) + result.emplace_back(node->column, node->result_type, node->result_name); return result; } @@ -1625,9 +1082,8 @@ ColumnsWithTypeAndName ActionsDAG::getResultColumns() const NamesAndTypesList ActionsDAG::getNamesAndTypesList() const { NamesAndTypesList result; - for (const auto & node : nodes) - if (!node.renaming_parent) - result.emplace_back(node.result_name, node.result_type); + for (const auto & node : index) + result.emplace_back(node->result_name, node->result_type); return result; } @@ -1636,9 +1092,8 @@ Names ActionsDAG::getNames() const { Names names; names.reserve(index.size()); - for (const auto & node : nodes) - if (!node.renaming_parent) - names.emplace_back(node.result_name); + for (const auto & node : index) + names.emplace_back(node->result_name); return names; } @@ -1655,70 +1110,243 @@ std::string ActionsDAG::dumpNames() const return out.str(); } -ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) +void ActionsDAG::removeUnusedActions(const Names & required_names) +{ + std::unordered_set nodes_set; + std::vector required_nodes; + required_nodes.reserve(required_names.size()); + + for (const auto & name : required_names) + { + auto it = index.find(name); + if (it == index.end()) + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, + "Unknown column: {}, there are only columns {}", name, dumpNames()); + + if (nodes_set.insert(*it).second) + required_nodes.push_back(*it); + } + + removeUnusedActions(required_nodes); +} + +void ActionsDAG::removeUnusedActions(const std::vector & required_nodes) +{ + std::unordered_set visited_nodes; + std::stack stack; + + { + Index new_index; + + for (auto * node : required_nodes) + { + new_index.insert(node); + visited_nodes.insert(node); + stack.push(node); + } + + index.swap(new_index); + } + + while (!stack.empty()) + { + auto * node = stack.top(); + stack.pop(); + + if (!node->children.empty() && node->column && isColumnConst(*node->column) && node->allow_constant_folding) + { + /// Constant folding. + node->type = ActionsDAG::Type::COLUMN; + node->children.clear(); + } + + for (auto * child : node->children) + { + if (visited_nodes.count(child) == 0) + { + stack.push(child); + visited_nodes.insert(child); + } + } + } + + nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; }); +} + +void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector & result_nodes) +{ + std::vector required_nodes; + + for (const auto & item : aliases) + { + auto & child = getNode(item.first); + required_nodes.push_back(&child); + } + + result_nodes.reserve(aliases.size()); + + for (size_t i = 0; i < aliases.size(); ++i) + { + const auto & item = aliases[i]; + auto * child = required_nodes[i]; + + if (!item.second.empty() && item.first != item.second) + { + Node node; + node.type = Type::ALIAS; + node.result_type = child->result_type; + node.result_name = std::move(item.second); + node.column = child->column; + node.allow_constant_folding = child->allow_constant_folding; + node.children.emplace_back(child); + + auto & alias = addNode(std::move(node), true); + result_nodes.push_back(&alias); + } + else + result_nodes.push_back(child); + } +} + +void ActionsDAG::addAliases(const NamesWithAliases & aliases) +{ + std::vector result_nodes; + addAliases(aliases, result_nodes); +} + +void ActionsDAG::project(const NamesWithAliases & projection) +{ + std::vector result_nodes; + addAliases(projection, result_nodes); + removeUnusedActions(result_nodes); + projectInput(); + projected_output = true; +} + +void ActionsDAG::removeColumn(const std::string & column_name) +{ + auto & node = getNode(column_name); + index.remove(&node); +} + +bool ActionsDAG::tryRestoreColumn(const std::string & column_name) +{ + if (index.contains(column_name)) + return true; + + for (auto it = nodes.rbegin(); it != nodes.rend(); ++it) + { + auto & node = *it; + if (node.result_name == column_name) + { + index[node.result_name] = &node; + return true; + } + } + + return false; +} + +ActionsDAGPtr ActionsDAG::clone() const +{ + auto actions = cloneEmpty(); + + std::unordered_map copy_map; + + for (const auto & node : nodes) + { + auto & copy_node = actions->nodes.emplace_back(node); + copy_map[&node] = ©_node; + } + + for (auto & node : actions->nodes) + for (auto & child : node.children) + child = copy_map[child]; + + for (const auto & node : index) + actions->index.insert(copy_map[node]); + + return actions; +} + +ExpressionActionsPtr ExpressionActions::clone() const +{ + auto expressions = std::make_shared(); + + expressions->actions = actions; + expressions->num_columns = num_columns; + expressions->required_columns = required_columns; + expressions->result_positions = result_positions; + expressions->sample_block = sample_block; + expressions->project_input = project_input; + expressions->max_temporary_non_const_columns = max_temporary_non_const_columns; + + std::unordered_map copy_map; + for (const auto & node : nodes) + { + auto & copy_node = expressions->nodes.emplace_back(node); + copy_map[&node] = ©_node; + } + + for (auto & node : expressions->nodes) + for (auto & child : node.children) + child = copy_map[child]; + + for (auto & action : expressions->actions) + action.node = copy_map[action.node]; + + return expressions; +} + + +ExpressionActionsPtr ActionsDAG::linearizeActions() const { struct Data { - Node * node = nullptr; + const Node * node = nullptr; size_t num_created_children = 0; - size_t num_expected_children = 0; - std::vector parents; - Node * renamed_child = nullptr; + std::vector parents; + + ssize_t position = -1; + size_t num_created_parents = 0; + bool used_in_result = false; }; std::vector data(nodes.size()); - std::unordered_map reverse_index; + std::unordered_map reverse_index; - for (auto & node : nodes) + for (const auto & node : nodes) { size_t id = reverse_index.size(); data[id].node = &node; reverse_index[&node] = id; } - std::queue ready_nodes; - std::queue ready_array_joins; + std::queue ready_nodes; + std::queue ready_array_joins; - for (auto & node : nodes) + for (const auto * node : index) + data[reverse_index[node]].used_in_result = true; + + for (const auto & node : nodes) { - data[reverse_index[&node]].num_expected_children += node.children.size(); - for (const auto & child : node.children) data[reverse_index[child]].parents.emplace_back(&node); - - if (node.renaming_parent) - { - - auto & cur = data[reverse_index[node.renaming_parent]]; - cur.renamed_child = &node; - cur.num_expected_children += 1; - } } - for (auto & node : nodes) + for (const auto & node : nodes) { - if (node.children.empty() && data[reverse_index[&node]].renamed_child == nullptr) + if (node.children.empty()) ready_nodes.emplace(&node); } - auto update_parent = [&](Node * parent) - { - auto & cur = data[reverse_index[parent]]; - ++cur.num_created_children; - - if (cur.num_created_children == cur.num_expected_children) - { - auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes; - push_stack.push(parent); - } - }; - - auto expressions = std::make_shared(NamesAndTypesList(), context); + auto expressions = std::make_shared(); + std::stack free_positions; while (!ready_nodes.empty() || !ready_array_joins.empty()) { auto & stack = ready_nodes.empty() ? ready_array_joins : ready_nodes; - Node * node = stack.front(); + const Node * node = stack.front(); stack.pop(); Names argument_names; @@ -1727,51 +1355,160 @@ ExpressionActionsPtr ActionsDAG::buildExpressions(const Context & context) auto & cur = data[reverse_index[node]]; - switch (node->type) + size_t free_position = expressions->num_columns; + if (free_positions.empty()) + ++expressions->num_columns; + else { - case Type::INPUT: - expressions->addInput({node->column, node->result_type, node->result_name}); - break; - case Type::COLUMN: - expressions->add(ExpressionAction::addColumn({node->column, node->result_type, node->result_name})); - break; - case Type::ALIAS: - expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->result_name, cur.renamed_child != nullptr)); - break; - case Type::ARRAY_JOIN: - /// Here we copy argument because arrayJoin removes source column. - /// It makes possible to remove source column before arrayJoin if it won't be needed anymore. - - /// It could have been possible to implement arrayJoin which keeps source column, - /// but in this case it will always be replicated (as many arrays), which is expensive. - expressions->add(ExpressionAction::copyColumn(argument_names.at(0), node->unique_column_name_for_array_join)); - expressions->add(ExpressionAction::arrayJoin(node->unique_column_name_for_array_join, node->result_name)); - break; - case Type::FUNCTION: - { - ExpressionAction action; - action.type = ExpressionAction::APPLY_FUNCTION; - action.result_name = node->result_name; - action.result_type = node->result_type; - action.function_builder = node->function_builder; - action.function_base = node->function_base; - action.function = node->function; - action.argument_names = std::move(argument_names); - action.added_column = node->column; - - expressions->add(action); - break; - } + free_position = free_positions.top(); + free_positions.pop(); } - for (const auto & parent : cur.parents) - update_parent(parent); + cur.position = free_position; - if (node->renaming_parent) - update_parent(node->renaming_parent); + ExpressionActions::Arguments arguments; + arguments.reserve(cur.node->children.size()); + for (auto * child : cur.node->children) + { + auto & arg = data[reverse_index[child]]; + + if (arg.position < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument was not calculated for {}", child->result_name); + + ++arg.num_created_parents; + + ExpressionActions::Argument argument; + argument.pos = arg.position; + argument.remove = !arg.used_in_result && arg.num_created_parents == arg.parents.size(); + + if (argument.remove) + free_positions.push(argument.pos); + + arguments.emplace_back(argument); + } + + if (node->type == Type::INPUT) + { + /// Argument for input is special. It contains the position from required columns. + ExpressionActions::Argument argument; + argument.pos = expressions->required_columns.size(); + argument.remove = cur.parents.empty(); + arguments.emplace_back(argument); + + expressions->required_columns.push_back({node->result_name, node->result_type}); + } + + expressions->actions.push_back({node, arguments, free_position}); + + for (const auto & parent : cur.parents) + { + auto & parent_data = data[reverse_index[parent]]; + ++parent_data.num_created_children; + + if (parent_data.num_created_children == parent->children.size()) + { + auto & push_stack = parent->type == Type::ARRAY_JOIN ? ready_array_joins : ready_nodes; + push_stack.push(parent); + } + } + } + + expressions->result_positions.reserve(index.size()); + + for (const auto & node : index) + { + auto pos = data[reverse_index[node]].position; + + if (pos < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Action for {} was not calculated", node->result_name); + + expressions->result_positions.push_back(pos); + + ColumnWithTypeAndName col{node->column, node->result_type, node->result_name}; + expressions->sample_block.insert(std::move(col)); } return expressions; } +ExpressionActionsPtr ActionsDAG::buildExpressions() +{ + auto cloned = clone(); + auto expressions = cloned->linearizeActions(); + + expressions->nodes.swap(cloned->nodes); + + if (max_temporary_columns && expressions->num_columns > max_temporary_columns) + throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, + "Too many temporary columns: {}. Maximum: {}", + dumpNames(), std::to_string(max_temporary_columns)); + + expressions->max_temporary_non_const_columns = max_temporary_non_const_columns; + expressions->project_input = project_input; + + return expressions; +} + +std::string ActionsDAG::dump() const +{ + return linearizeActions()->dumpActions(); +} + +std::string ActionsDAG::dumpDAG() const +{ + std::unordered_map map; + for (const auto & node : nodes) + { + size_t idx = map.size(); + map[&node] = idx; + } + + std::stringstream out; + for (const auto & node : nodes) + { + out << map[&node] << " : "; + switch (node.type) + { + case ActionsDAG::Type::COLUMN: + out << "COLUMN "; + break; + + case ActionsDAG::Type::ALIAS: + out << "ALIAS "; + break; + + case ActionsDAG::Type::FUNCTION: + out << "FUNCTION "; + break; + + case ActionsDAG::Type::ARRAY_JOIN: + out << "ARRAY JOIN "; + break; + + case ActionsDAG::Type::INPUT: + out << "INPUT "; + break; + } + + out << "("; + for (size_t i = 0; i < node.children.size(); ++i) + { + if (i) + out << ", "; + out << map[node.children[i]]; + } + out << ")"; + + out << " " << (node.column ? node.column->getName() : "(no column)"); + out << " " << (node.result_type ? node.result_type->getName() : "(no type)"); + out << " " << (!node.result_name.empty() ? node.result_name : "(no name)"); + if (node.function_base) + out << " [" << node.function_base->getName() << "]"; + + out << "\n"; + } + + return out.str(); +} + } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 75edd4bf7ee..c7011dd7d86 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -11,6 +12,11 @@ #include #include +#include +#include +#include +#include + #include #if !defined(ARCADIA_BUILD) @@ -49,97 +55,12 @@ class CompiledExpressionCache; class ArrayJoinAction; using ArrayJoinActionPtr = std::shared_ptr; -/** Action on the block. - */ -struct ExpressionAction -{ -private: - using ExpressionActionsPtr = std::shared_ptr; -public: - enum Type - { - ADD_COLUMN, - REMOVE_COLUMN, - COPY_COLUMN, - - APPLY_FUNCTION, - - /// Replaces the source column with array into column with elements. - /// Duplicates the values in the remaining columns by the number of elements in the arrays. - /// Source column is removed from block. - ARRAY_JOIN, - - /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. - PROJECT, - /// Add columns with alias names. This columns are the same as non-aliased. PROJECT columns if you need to modify them. - ADD_ALIASES, - }; - - Type type{}; - - /// For ADD/REMOVE/ARRAY_JOIN/COPY_COLUMN. - std::string source_name; - std::string result_name; - DataTypePtr result_type; - - /// If COPY_COLUMN can replace the result column. - bool can_replace = false; - - /// For ADD_COLUMN. - ColumnPtr added_column; - - /// For APPLY_FUNCTION. - /// OverloadResolver is used before action was added to ExpressionActions (when we don't know types of arguments). - FunctionOverloadResolverPtr function_builder; - - /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. - FunctionBasePtr function_base; - /// Prepared function which is used in function execution. - ExecutableFunctionPtr function; - Names argument_names; - bool is_function_compiled = false; - - /// For JOIN - std::shared_ptr table_join; - JoinPtr join; - - /// For PROJECT. - NamesWithAliases projection; - - /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". - static ExpressionAction applyFunction( - const FunctionOverloadResolverPtr & function_, const std::vector & argument_names_, std::string result_name_ = ""); - - static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_); - static ExpressionAction removeColumn(const std::string & removed_name); - static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name, bool can_replace = false); - static ExpressionAction project(const NamesWithAliases & projected_columns_); - static ExpressionAction project(const Names & projected_columns_); - static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); - static ExpressionAction arrayJoin(std::string source_name, std::string result_name); - - /// Which columns necessary to perform this action. - Names getNeededColumns() const; - - std::string toString() const; - - bool operator==(const ExpressionAction & other) const; - - struct ActionHash - { - UInt128 operator()(const ExpressionAction & action) const; - }; - -private: - friend class ExpressionActions; - - void prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding); - void execute(Block & block, bool dry_run) const; -}; - class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + class ActionsDAG { public: @@ -160,16 +81,12 @@ public: struct Node { std::vector children; - /// This field is filled if current node is replaced by existing node with the same name. - Node * renaming_parent = nullptr; Type type; std::string result_name; DataTypePtr result_type; - std::string unique_column_name_for_array_join; - FunctionOverloadResolverPtr function_builder; /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. FunctionBasePtr function_base; @@ -185,12 +102,75 @@ public: bool allow_constant_folding = true; }; - using Index = std::unordered_map; + class Index + { + public: + Node *& operator[](std::string_view key) + { + auto res = map.emplace(key, list.end()); + if (res.second) + res.first->second = list.emplace(list.end(), nullptr); + + return *res.first->second; + } + + void swap(Index & other) + { + list.swap(other.list); + map.swap(other.map); + } + + auto size() const { return list.size(); } + bool contains(std::string_view key) const { return map.count(key) != 0; } + + std::list::iterator begin() { return list.begin(); } + std::list::iterator end() { return list.end(); } + std::list::const_iterator begin() const { return list.begin(); } + std::list::const_iterator end() const { return list.end(); } + std::list::const_iterator find(std::string_view key) const + { + auto it = map.find(key); + if (it == map.end()) + return list.end(); + + return it->second; + } + + /// Insert method doesn't check if map already have node with the same name. + /// If node with the same name exists, it is removed from map, but not list. + /// It is expected and used for project(), when result may have several columns with the same name. + void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } + void remove(Node * node) + { + auto it = map.find(node->result_name); + if (it != map.end()) + return; + + list.erase(it->second); + map.erase(it); + } + + private: + std::list list; + std::unordered_map::iterator> map; + }; + + using Nodes = std::list; private: - std::list nodes; + Nodes nodes; Index index; + size_t max_temporary_columns = 0; + size_t max_temporary_non_const_columns = 0; + +#if USE_EMBEDDED_COMPILER + std::shared_ptr compilation_cache; +#endif + + bool project_input = false; + bool projected_output = false; + public: ActionsDAG() = default; ActionsDAG(const ActionsDAG &) = delete; @@ -198,58 +178,108 @@ public: explicit ActionsDAG(const NamesAndTypesList & inputs); explicit ActionsDAG(const ColumnsWithTypeAndName & inputs); + const Nodes & getNodes() const { return nodes; } const Index & getIndex() const { return index; } + NamesAndTypesList getRequiredColumns() const; ColumnsWithTypeAndName getResultColumns() const; NamesAndTypesList getNamesAndTypesList() const; + Names getNames() const; std::string dumpNames() const; + std::string dump() const; + std::string dumpDAG() const; const Node & addInput(std::string name, DataTypePtr type); const Node & addInput(ColumnWithTypeAndName column); const Node & addColumn(ColumnWithTypeAndName column); const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); - const Node & addArrayJoin(const std::string & source_name, std::string result_name, std::string unique_column_name); + const Node & addArrayJoin(const std::string & source_name, std::string result_name); const Node & addFunction( const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name, - bool compile_expressions); + const Context & context); - ExpressionActionsPtr buildExpressions(const Context & context); + /// Call addAlias several times. + void addAliases(const NamesWithAliases & aliases); + /// Adds alias actions and removes unused columns from index. + void project(const NamesWithAliases & projection); + + /// Removes column from index. + void removeColumn(const std::string & column_name); + /// If column is not in index, try to find it in nodes and insert back into index. + bool tryRestoreColumn(const std::string & column_name); + + void projectInput() { project_input = true; } + void removeUnusedActions(const Names & required_names); + ExpressionActionsPtr buildExpressions(); + + /// Splits actions into two parts. Returned half may be swapped with ARRAY JOIN. + /// Returns nullptr if no actions may be moved before ARRAY JOIN. + ActionsDAGPtr splitActionsBeforeArrayJoin(const NameSet & array_joined_columns); + + bool hasArrayJoin() const; + bool empty() const; + bool projectedOutput() const { return projected_output; } + + ActionsDAGPtr clone() const; private: Node & addNode(Node node, bool can_replace = false); Node & getNode(const std::string & name); + + ActionsDAGPtr cloneEmpty() const + { + auto actions = std::make_shared(); + actions->max_temporary_columns = max_temporary_columns; + actions->max_temporary_non_const_columns = max_temporary_non_const_columns; + +#if USE_EMBEDDED_COMPILER + actions->compilation_cache = compilation_cache; +#endif + return actions; + } + + ExpressionActionsPtr linearizeActions() const; + void removeUnusedActions(const std::vector & required_nodes); + void addAliases(const NamesWithAliases & aliases, std::vector & result_nodes); }; -using ActionsDAGPtr = std::shared_ptr; /** Contains a sequence of actions on the block. */ class ExpressionActions { -private: +public: using Node = ActionsDAG::Node; using Index = ActionsDAG::Index; + struct Argument + { + size_t pos; + bool remove; + }; + + using Arguments = std::vector; + struct Action { - Node * node; - ColumnNumbers arguments; - /// Columns which will be removed after actions is executed. - /// It is always a subset of arguments. - ColumnNumbers to_remove; + const Node * node; + Arguments arguments; size_t result_position; - bool is_used_in_result; + + std::string toString() const; }; using Actions = std::vector; +private: struct ExecutionContext { - ColumnsWithTypeAndName & input_columns; + ColumnsWithTypeAndName & inputs; ColumnsWithTypeAndName columns; + std::vector inputs_pos; size_t num_rows; }; @@ -258,19 +288,26 @@ private: size_t num_columns; NamesAndTypesList required_columns; + ColumnNumbers result_positions; Block sample_block; + /// This flag means that all columns except input will be removed from block before execution. + bool project_input = false; + + size_t max_temporary_non_const_columns = 0; + + friend class ActionsDAG; + public: ~ExpressionActions(); + ExpressionActions() = default; + ExpressionActions(const ExpressionActions &) = delete; + ExpressionActions & operator=(const ExpressionActions &) = delete; - ExpressionActions(const ExpressionActions & other) = default; + const Actions & getActions() const { return actions; } /// Adds to the beginning the removal of all extra columns. - void prependProjectInput(); - - /// Splits actions into two parts. Returned half may be swapped with ARRAY JOIN. - /// Returns nullptr if no actions may be moved before ARRAY JOIN. - ExpressionActionsPtr splitActionsBeforeArrayJoin(const NameSet & array_joined_columns); + void projectInput() { project_input = true; } /// - Adds actions to delete all but the specified columns. /// - Removes unused input columns. @@ -286,6 +323,7 @@ public: /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns. void execute(Block & block, bool dry_run = false) const; + void execute(Block & block, size_t & num_rows, bool dry_run = false) const; bool hasArrayJoin() const; @@ -296,18 +334,13 @@ public: static std::string getSmallestColumn(const NamesAndTypesList & columns); - const Settings & getSettings() const { return settings; } - /// Check if column is always zero. True if it's definite, false if we can't say for sure. /// Call it only after subqueries for sets were executed. bool checkColumnIsAlwaysFalse(const String & column_name) const; -private: + ExpressionActionsPtr clone() const; - Settings settings; -#if USE_EMBEDDED_COMPILER - std::shared_ptr compilation_cache; -#endif +private: void checkLimits(ExecutionContext & execution_context) const; @@ -343,8 +376,8 @@ struct ExpressionActionsChain /// If not empty, has the same size with required_output; is filled in finalize(). std::vector can_remove_required_output; - virtual const NamesAndTypesList & getRequiredColumns() const = 0; - virtual const ColumnsWithTypeAndName & getResultColumns() const = 0; + virtual NamesAndTypesList getRequiredColumns() const = 0; + virtual ColumnsWithTypeAndName getResultColumns() const = 0; /// Remove unused result and update required columns virtual void finalize(const Names & required_output_) = 0; /// Add projections to expression @@ -354,43 +387,42 @@ struct ExpressionActionsChain /// Only for ExpressionActionsStep ActionsDAGPtr & actions(); const ActionsDAGPtr & actions() const; - ExpressionActionsPtr getExpression() const; }; struct ExpressionActionsStep : public Step { - ActionsDAGPtr actions_dag; - ExpressionActionsPtr actions; + ActionsDAGPtr actions; explicit ExpressionActionsStep(ActionsDAGPtr actions_, Names required_output_ = Names()) : Step(std::move(required_output_)) - , actions_dag(std::move(actions_)) + , actions(std::move(actions_)) { } - const NamesAndTypesList & getRequiredColumns() const override + NamesAndTypesList getRequiredColumns() const override { - return actions->getRequiredColumnsWithTypes(); + return actions->getRequiredColumns(); } - const ColumnsWithTypeAndName & getResultColumns() const override + ColumnsWithTypeAndName getResultColumns() const override { - return actions->getSampleBlock().getColumnsWithTypeAndName(); + return actions->getResultColumns(); } void finalize(const Names & required_output_) override { - actions->finalize(required_output_); + if (!actions->projectedOutput()) + actions->removeUnusedActions(required_output_); } void prependProjectInput() const override { - actions->prependProjectInput(); + actions->projectInput(); } std::string dump() const override { - return actions->dumpActions(); + return actions->dump(); } }; @@ -402,8 +434,8 @@ struct ExpressionActionsChain ArrayJoinStep(ArrayJoinActionPtr array_join_, ColumnsWithTypeAndName required_columns_); - const NamesAndTypesList & getRequiredColumns() const override { return required_columns; } - const ColumnsWithTypeAndName & getResultColumns() const override { return result_columns; } + NamesAndTypesList getRequiredColumns() const override { return required_columns; } + ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const Names & required_output_) override; void prependProjectInput() const override {} /// TODO: remove unused columns before ARRAY JOIN ? std::string dump() const override { return "ARRAY JOIN"; } @@ -418,8 +450,8 @@ struct ExpressionActionsChain ColumnsWithTypeAndName result_columns; JoinStep(std::shared_ptr analyzed_join_, JoinPtr join_, ColumnsWithTypeAndName required_columns_); - const NamesAndTypesList & getRequiredColumns() const override { return required_columns; } - const ColumnsWithTypeAndName & getResultColumns() const override { return result_columns; } + NamesAndTypesList getRequiredColumns() const override { return required_columns; } + ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const Names & required_output_) override; void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ? std::string dump() const override { return "JOIN"; } @@ -431,7 +463,7 @@ struct ExpressionActionsChain const Context & context; Steps steps; - void addStep(); + void addStep(NameSet non_constant_inputs = {}); void finalize(); @@ -440,7 +472,7 @@ struct ExpressionActionsChain steps.clear(); } - ExpressionActionsPtr getLastActions(bool allow_empty = false) + ActionsDAGPtr getLastActions(bool allow_empty = false) { if (steps.empty()) { @@ -449,9 +481,7 @@ struct ExpressionActionsChain throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); } - auto * step = typeid_cast(steps.back().get()); - step->actions = step->actions_dag->buildExpressions(context); - return step->actions; + return typeid_cast(steps.back().get())->actions; } Step & getLastStep() diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index f79bb36ec46..49eaa21cc3d 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -70,16 +70,16 @@ namespace /// Check if there is an ignore function. It's used for disabling constant folding in query /// predicates because some performance tests use ignore function as a non-optimize guard. -bool allowEarlyConstantFolding(const ExpressionActions & actions, const Settings & settings) +bool allowEarlyConstantFolding(const ActionsDAG & actions, const Settings & settings) { if (!settings.enable_early_constant_folding) return false; - for (const auto & action : actions.getActions()) + for (const auto & node : actions.getNodes()) { - if (action.type == action.APPLY_FUNCTION && action.function_base) + if (node.type == ActionsDAG::Type::FUNCTION && node.function_base) { - auto name = action.function_base->getName(); + auto name = node.function_base->getName(); if (name == "ignore") return false; } @@ -234,7 +234,7 @@ void ExpressionAnalyzer::analyzeAggregation() if (it == index.end()) throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); - const auto & node = it->second; + const auto & node = *it; /// Constant expressions have non-null column pointer at this stage. if (node->column && isColumnConst(*node->column)) @@ -382,7 +382,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) auto temp_actions = std::make_shared(columns_after_join); getRootActions(left_in_operand, true, temp_actions); - if (temp_actions->getIndex().count(left_in_operand->getColumnName()) != 0) + if (temp_actions->getIndex().contains(left_in_operand->getColumnName())) makeExplicitSet(func, *temp_actions, true, context, settings.size_limits_for_set, prepared_sets); } @@ -434,7 +434,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) if (it == index.end()) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in aggregate function '{}'): {}", node->name, name); - types[i] = it->second->result_type; + types[i] = (*it)->result_type; aggregate.argument_names[i] = name; } @@ -481,7 +481,7 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr return std::make_shared(result_columns, array_join_is_left, context); } -ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ExpressionActionsPtr & before_array_join, bool only_types) +ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types) { const auto * select_query = getSelectQuery(); @@ -637,11 +637,11 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQuer return subquery_for_join.join; } -ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( +ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) { const auto * select_query = getSelectQuery(); - ExpressionActionsPtr prewhere_actions; + ActionsDAGPtr prewhere_actions; if (!select_query->prewhere()) return prewhere_actions; @@ -652,7 +652,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( step.required_output.push_back(prewhere_column_name); step.can_remove_required_output.push_back(true); - auto filter_type = step.actions()->getIndex().find(prewhere_column_name)->second->result_type; + auto filter_type = (*step.actions()->getIndex().find(prewhere_column_name))->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); @@ -661,8 +661,8 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( /// Remove unused source_columns from prewhere actions. auto tmp_actions_dag = std::make_shared(sourceColumns()); getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); - auto tmp_actions = tmp_actions_dag->buildExpressions(context); - tmp_actions->finalize({prewhere_column_name}); + tmp_actions_dag->removeUnusedActions({prewhere_column_name}); + auto tmp_actions = tmp_actions_dag->buildExpressions(); auto required_columns = tmp_actions->getRequiredColumns(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); @@ -686,7 +686,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( Names required_output(name_set.begin(), name_set.end()); prewhere_actions = chain.getLastActions(); - prewhere_actions->finalize(required_output); + prewhere_actions->removeUnusedActions(required_output); } { @@ -697,11 +697,14 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( /// 2. Store side columns which were calculated during prewhere actions execution if they are used. /// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step. /// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN. - ColumnsWithTypeAndName columns = prewhere_actions->getSampleBlock().getColumnsWithTypeAndName(); + ColumnsWithTypeAndName columns = prewhere_actions->getResultColumns(); auto required_columns = prewhere_actions->getRequiredColumns(); - NameSet prewhere_input_names(required_columns.begin(), required_columns.end()); + NameSet prewhere_input_names; NameSet unused_source_columns; + for (const auto & col : required_columns) + prewhere_input_names.insert(col.name); + for (const auto & column : sourceColumns()) { if (prewhere_input_names.count(column.name) == 0) @@ -721,7 +724,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendPrewhere( return prewhere_actions; } -void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name) +void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ActionsDAGPtr actions, String column_name) { ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); @@ -749,7 +752,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); - auto filter_type = step.actions()->getIndex().find(where_column_name)->second->result_type; + auto filter_type = (*step.actions()->getIndex().find(where_column_name))->result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); @@ -780,7 +783,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { auto actions_dag = std::make_shared(columns_after_join); getRootActions(child, only_types, actions_dag); - group_by_elements_actions.emplace_back(actions_dag->buildExpressions(context)); + group_by_elements_actions.emplace_back(actions_dag->buildExpressions()); } } @@ -842,18 +845,24 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, step.required_output.push_back(child->getColumnName()); } -bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, +ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions & order_by_elements_actions) { const auto * select_query = getSelectQuery(); if (!select_query->orderBy()) - return false; + { + auto actions = chain.getLastActions(); + chain.addStep(); + return actions; + } ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); getRootActions(select_query->orderBy(), only_types, step.actions()); + bool with_fill = false; + NameSet order_by_keys; for (auto & child : select_query->orderBy()->children) { const auto * ast = child->as(); @@ -861,6 +870,9 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); ASTPtr order_expression = ast->children.at(0); step.required_output.push_back(order_expression->getColumnName()); + + if (ast->with_fill) + with_fill = true; } if (optimize_read_in_order) @@ -869,10 +881,21 @@ bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain { auto actions_dag = std::make_shared(columns_after_join); getRootActions(child, only_types, actions_dag); - order_by_elements_actions.emplace_back(actions_dag->buildExpressions(context)); + order_by_elements_actions.emplace_back(actions_dag->buildExpressions()); } } - return true; + + NameSet non_constant_inputs; + if (with_fill) + { + for (const auto & column : step.getResultColumns()) + if (!order_by_keys.count(column.name)) + non_constant_inputs.insert(column.name); + } + + auto actions = chain.getLastActions(); + chain.addStep(non_constant_inputs); + return actions; } bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) @@ -903,7 +926,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain return true; } -ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -950,7 +973,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::appendProjectResult(Expressi } auto actions = chain.getLastActions(); - actions->add(ExpressionAction::project(result_columns)); + actions->project(result_columns); return actions; } @@ -963,7 +986,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const } -ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) +ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_result) { auto actions_dag = std::make_shared(aggregated_columns); NamesWithAliases result_columns; @@ -989,14 +1012,12 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje getRootActions(ast, false, actions_dag); } - auto actions = actions_dag->buildExpressions(context); - if (add_aliases) { if (project_result) - actions->add(ExpressionAction::project(result_columns)); + actions_dag->project(result_columns); else - actions->add(ExpressionAction::addAliases(result_columns)); + actions_dag->addAliases(result_columns); } if (!(add_aliases && project_result)) @@ -1006,9 +1027,13 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje result_names.push_back(column_name_type.name); } - actions->finalize(result_names); + actions_dag->removeUnusedActions(result_names); + return actions_dag; +} - return actions; +ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) +{ + return getActionsDAG(add_aliases, project_result)->buildExpressions(); } @@ -1017,10 +1042,10 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions() auto actions = std::make_shared(NamesAndTypesList()); getRootActions(query, true, actions, true); - return actions->buildExpressions(context); + return actions->buildExpressions(); } -ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions() +ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() { ExpressionActionsChain new_chain(context); appendSelect(new_chain, false); @@ -1061,7 +1086,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (!finalized) { - finalize(chain, context, where_step_num); + finalize(chain, where_step_num); finalized = true; } @@ -1107,7 +1132,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { - prewhere_info->prewhere_actions->execute(before_prewhere_sample); + prewhere_info->prewhere_actions->buildExpressions()->execute(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. if (column_elem.column) @@ -1140,7 +1165,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where_sample = source_header; if (sanitizeBlock(before_where_sample)) { - before_where->execute(before_where_sample); + before_where->buildExpressions()->execute(before_where_sample); auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); /// If the filter column is a constant, record it. if (column_elem.column) @@ -1188,10 +1213,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers. query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage)); selected_columns = chain.getLastStep().required_output; - has_order_by = query_analyzer.appendOrderBy(chain, only_types || (need_aggregate ? !second_stage : !first_stage), - optimize_read_in_order, order_by_elements_actions); - before_order_and_select = chain.getLastActions(); - chain.addStep(); + has_order_by = query.orderBy() != nullptr; + before_order_and_select = query_analyzer.appendOrderBy( + chain, + only_types || (need_aggregate ? !second_stage : !first_stage), + optimize_read_in_order, + order_by_elements_actions); if (query_analyzer.appendLimitBy(chain, only_types || !second_stage)) { @@ -1210,28 +1237,35 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( checkActions(); } -void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, const Context & context_, size_t where_step_num) +void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num) { if (hasPrewhere()) { const ExpressionActionsChain::Step & step = *chain.steps.at(0); prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0); - Names columns_to_remove; + NameSet columns_to_remove; for (size_t i = 1; i < step.required_output.size(); ++i) { if (step.can_remove_required_output[i]) - columns_to_remove.push_back(step.required_output[i]); + columns_to_remove.insert(step.required_output[i]); } if (!columns_to_remove.empty()) { - auto columns = prewhere_info->prewhere_actions->getSampleBlock().getNamesAndTypesList(); - ExpressionActionsPtr actions = std::make_shared(columns, context_); - for (const auto & column : columns_to_remove) - actions->add(ExpressionAction::removeColumn(column)); + auto columns = prewhere_info->prewhere_actions->getResultColumns(); - prewhere_info->remove_columns_actions = std::move(actions); + auto remove_actions = std::make_shared(); + for (const auto & column : columns) + { + if (columns_to_remove.count(column.name)) + { + remove_actions->addInput(column); + remove_actions->removeColumn(column.name); + } + } + + prewhere_info->remove_columns_actions = std::move(remove_actions); } columns_to_remove_after_prewhere = std::move(columns_to_remove); @@ -1248,11 +1282,11 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, co void ExpressionAnalysisResult::removeExtraColumns() const { if (hasFilter()) - filter_info->actions->prependProjectInput(); + filter_info->actions->projectInput(); if (hasWhere()) - before_where->prependProjectInput(); + before_where->projectInput(); if (hasHaving()) - before_having->prependProjectInput(); + before_having->projectInput(); } void ExpressionAnalysisResult::checkActions() const @@ -1260,11 +1294,11 @@ void ExpressionAnalysisResult::checkActions() const /// Check that PREWHERE doesn't contain unusual actions. Unusual actions are that can change number of rows. if (hasPrewhere()) { - auto check_actions = [](const ExpressionActionsPtr & actions) + auto check_actions = [](const ActionsDAGPtr & actions) { if (actions) - for (const auto & action : actions->getActions()) - if (action.type == ExpressionAction::Type::ARRAY_JOIN) + for (const auto & node : actions->getNodes()) + if (node.type == ActionsDAG::Type::ARRAY_JOIN) throw Exception("PREWHERE cannot contain ARRAY JOIN action", ErrorCodes::ILLEGAL_PREWHERE); }; diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 6389d8a142c..95a65ee2bfa 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -102,6 +102,7 @@ public: /// If add_aliases, only the calculated values in the desired order and add aliases. /// If also project_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. + ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true); ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true); /// Actions that can be performed on an empty block: adding constants and applying functions that depend only on constants. @@ -182,22 +183,22 @@ struct ExpressionAnalysisResult bool optimize_aggregation_in_order = false; bool join_has_delayed_stream = false; - ExpressionActionsPtr before_array_join; + ActionsDAGPtr before_array_join; ArrayJoinActionPtr array_join; - ExpressionActionsPtr before_join; + ActionsDAGPtr before_join; JoinPtr join; - ExpressionActionsPtr before_where; - ExpressionActionsPtr before_aggregation; - ExpressionActionsPtr before_having; - ExpressionActionsPtr before_order_and_select; - ExpressionActionsPtr before_limit_by; - ExpressionActionsPtr final_projection; + ActionsDAGPtr before_where; + ActionsDAGPtr before_aggregation; + ActionsDAGPtr before_having; + ActionsDAGPtr before_order_and_select; + ActionsDAGPtr before_limit_by; + ActionsDAGPtr final_projection; /// Columns from the SELECT list, before renaming them to aliases. Names selected_columns; /// Columns will be removed after prewhere actions execution. - Names columns_to_remove_after_prewhere; + NameSet columns_to_remove_after_prewhere; PrewhereInfoPtr prewhere_info; FilterInfoPtr filter_info; @@ -229,7 +230,7 @@ struct ExpressionAnalysisResult void removeExtraColumns() const; void checkActions() const; - void finalize(const ExpressionActionsChain & chain, const Context & context, size_t where_step_num); + void finalize(const ExpressionActionsChain & chain, size_t where_step_num); }; /// SelectQuery specific ExpressionAnalyzer part. @@ -267,12 +268,12 @@ public: /// Tables that will need to be sent to remote servers for distributed query processing. const TemporaryTablesMapping & getExternalTables() const { return external_tables; } - ExpressionActionsPtr simpleSelectActions(); + ActionsDAGPtr simpleSelectActions(); /// These appends are public only for tests void appendSelect(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - ExpressionActionsPtr appendProjectResult(ExpressionActionsChain & chain) const; + ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; private: StorageMetadataPtr metadata_snapshot; @@ -315,14 +316,14 @@ private: */ /// Before aggregation: - ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ExpressionActionsPtr & before_array_join, bool only_types); + ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); JoinPtr appendJoin(ExpressionActionsChain & chain); /// Add preliminary rows filtration. Actions are created in other expression analyzer to prevent any possible alias injection. - void appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name); + void appendPreliminaryFilter(ExpressionActionsChain & chain, ActionsDAGPtr actions, String column_name); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - ExpressionActionsPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); + ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); @@ -330,7 +331,7 @@ private: /// After aggregation: bool appendHaving(ExpressionActionsChain & chain, bool only_types); /// appendSelect - bool appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); + ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); /// appendProjectResult }; diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 056d23299b7..27554e8a4d8 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -442,7 +442,7 @@ struct LLVMModuleState }; LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, const DB::Block & sample_block) - : name(actions.back().result_name) + : name(actions.back().node->result_name) , module_state(std::make_unique()) { LLVMContext context; @@ -452,21 +452,21 @@ LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, const DB: subexpressions[c.name] = subexpression(c.column, c.type); for (const auto & action : actions) { - const auto & names = action.argument_names; - const auto & types = action.function_base->getArgumentTypes(); + const auto & children = action.node->children; + const auto & types = action.node->function_base->getArgumentTypes(); std::vector args; - for (size_t i = 0; i < names.size(); ++i) + for (size_t i = 0; i < children.size(); ++i) { - auto inserted = subexpressions.emplace(names[i], subexpression(arg_names.size())); + auto inserted = subexpressions.emplace(children[i]->result_name, subexpression(arg_names.size())); if (inserted.second) { - arg_names.push_back(names[i]); + arg_names.push_back(children[i]->result_name); arg_types.push_back(types[i]); } args.push_back(inserted.first->second); } - subexpressions[action.result_name] = subexpression(*action.function_base, std::move(args)); - originals.push_back(action.function_base); + subexpressions[action.node->result_name] = subexpression(*action.node->function_base, std::move(args)); + originals.push_back(action.node->function_base); } compileFunctionToLLVMByteCode(context, *this); context.compileAllFunctionsToNativeCode(); @@ -555,155 +555,155 @@ LLVMFunction::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataType } -static bool isCompilable(const IFunctionBase & function) -{ - if (!canBeNativeType(*function.getResultType())) - return false; - for (const auto & type : function.getArgumentTypes()) - if (!canBeNativeType(*type)) - return false; - return function.isCompilable(); -} +//static bool isCompilable(const IFunctionBase & function) +//{ +// if (!canBeNativeType(*function.getResultType())) +// return false; +// for (const auto & type : function.getArgumentTypes()) +// if (!canBeNativeType(*type)) +// return false; +// return function.isCompilable(); +//} -static std::vector>> getActionsDependents(const ExpressionActions::Actions & actions, const Names & output_columns) -{ - /// an empty optional is a poisoned value prohibiting the column's producer from being removed - /// (which it could be, if it was inlined into every dependent function). - std::unordered_map>> current_dependents; - for (const auto & name : output_columns) - current_dependents[name].emplace(); - /// a snapshot of each compilable function's dependents at the time of its execution. - std::vector>> dependents(actions.size()); - for (size_t i = actions.size(); i--;) - { - switch (actions[i].type) - { - case ExpressionAction::REMOVE_COLUMN: - current_dependents.erase(actions[i].source_name); - /// poison every other column used after this point so that inlining chains do not cross it. - for (auto & dep : current_dependents) - dep.second.emplace(); - break; - - case ExpressionAction::PROJECT: - current_dependents.clear(); - for (const auto & proj : actions[i].projection) - current_dependents[proj.first].emplace(); - break; - - case ExpressionAction::ADD_ALIASES: - for (const auto & proj : actions[i].projection) - current_dependents[proj.first].emplace(); - break; - - case ExpressionAction::ADD_COLUMN: - case ExpressionAction::COPY_COLUMN: - case ExpressionAction::ARRAY_JOIN: - { - Names columns = actions[i].getNeededColumns(); - for (const auto & column : columns) - current_dependents[column].emplace(); - break; - } - - case ExpressionAction::APPLY_FUNCTION: - { - dependents[i] = current_dependents[actions[i].result_name]; - const bool compilable = isCompilable(*actions[i].function_base); - for (const auto & name : actions[i].argument_names) - { - if (compilable) - current_dependents[name].emplace(i); - else - current_dependents[name].emplace(); - } - break; - } - } - } - return dependents; -} - -void compileFunctions( - ExpressionActions::Actions & actions, - const Names & output_columns, - const Block & sample_block, - std::shared_ptr compilation_cache, - size_t min_count_to_compile_expression) -{ - static std::unordered_map counter; - static std::mutex mutex; - - struct LLVMTargetInitializer - { - LLVMTargetInitializer() - { - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); - } - }; - - static LLVMTargetInitializer initializer; - - auto dependents = getActionsDependents(actions, output_columns); - std::vector fused(actions.size()); - for (size_t i = 0; i < actions.size(); ++i) - { - if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !isCompilable(*actions[i].function_base)) - continue; - - fused[i].push_back(actions[i]); - if (dependents[i].find({}) != dependents[i].end()) - { - /// the result of compiling one function in isolation is pretty much the same as its `execute` method. - if (fused[i].size() == 1) - continue; - - auto hash_key = ExpressionActions::ActionsHash{}(fused[i]); - { - std::lock_guard lock(mutex); - if (counter[hash_key]++ < min_count_to_compile_expression) - continue; - } - - FunctionBasePtr fn; - if (compilation_cache) - { - std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&inlined_func=std::as_const(fused[i]), &sample_block] () - { - Stopwatch watch; - FunctionBasePtr result_fn; - result_fn = std::make_shared(std::make_unique(inlined_func, sample_block)); - ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); - return result_fn; - }); - } - else - { - Stopwatch watch; - fn = std::make_shared(std::make_unique(fused[i], sample_block)); - ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); - } - - actions[i].function_base = fn; - actions[i].argument_names = typeid_cast(typeid_cast(fn.get())->getImpl())->getArgumentNames(); - actions[i].is_function_compiled = true; - - continue; - } - - /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. - for (const auto & dep : dependents[i]) - fused[*dep].insert(fused[*dep].end(), fused[i].begin(), fused[i].end()); - } - - for (auto & action : actions) - { - if (action.type == ExpressionAction::APPLY_FUNCTION && action.is_function_compiled) - action.function = action.function_base->prepare({}); /// Arguments are not used for LLVMFunction. - } -} +//static std::vector>> getActionsDependents(const ExpressionActions::Actions & actions, const Names & output_columns) +//{ +// /// an empty optional is a poisoned value prohibiting the column's producer from being removed +// /// (which it could be, if it was inlined into every dependent function). +// std::unordered_map>> current_dependents; +// for (const auto & name : output_columns) +// current_dependents[name].emplace(); +// /// a snapshot of each compilable function's dependents at the time of its execution. +// std::vector>> dependents(actions.size()); +// for (size_t i = actions.size(); i--;) +// { +// switch (actions[i].type) +// { +// case ExpressionAction::REMOVE_COLUMN: +// current_dependents.erase(actions[i].source_name); +// /// poison every other column used after this point so that inlining chains do not cross it. +// for (auto & dep : current_dependents) +// dep.second.emplace(); +// break; +// +// case ExpressionAction::PROJECT: +// current_dependents.clear(); +// for (const auto & proj : actions[i].projection) +// current_dependents[proj.first].emplace(); +// break; +// +// case ExpressionAction::ADD_ALIASES: +// for (const auto & proj : actions[i].projection) +// current_dependents[proj.first].emplace(); +// break; +// +// case ExpressionAction::ADD_COLUMN: +// case ExpressionAction::COPY_COLUMN: +// case ExpressionAction::ARRAY_JOIN: +// { +// Names columns = actions[i].getNeededColumns(); +// for (const auto & column : columns) +// current_dependents[column].emplace(); +// break; +// } +// +// case ExpressionAction::APPLY_FUNCTION: +// { +// dependents[i] = current_dependents[actions[i].result_name]; +// const bool compilable = isCompilable(*actions[i].function_base); +// for (const auto & name : actions[i].argument_names) +// { +// if (compilable) +// current_dependents[name].emplace(i); +// else +// current_dependents[name].emplace(); +// } +// break; +// } +// } +// } +// return dependents; +//} +// +//void compileFunctions( +// ExpressionActions::Actions & actions, +// const Names & output_columns, +// const Block & sample_block, +// std::shared_ptr compilation_cache, +// size_t min_count_to_compile_expression) +//{ +// static std::unordered_map counter; +// static std::mutex mutex; +// +// struct LLVMTargetInitializer +// { +// LLVMTargetInitializer() +// { +// llvm::InitializeNativeTarget(); +// llvm::InitializeNativeTargetAsmPrinter(); +// llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); +// } +// }; +// +// static LLVMTargetInitializer initializer; +// +// auto dependents = getActionsDependents(actions, output_columns); +// std::vector fused(actions.size()); +// for (size_t i = 0; i < actions.size(); ++i) +// { +// if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !isCompilable(*actions[i].function_base)) +// continue; +// +// fused[i].push_back(actions[i]); +// if (dependents[i].find({}) != dependents[i].end()) +// { +// /// the result of compiling one function in isolation is pretty much the same as its `execute` method. +// if (fused[i].size() == 1) +// continue; +// +// auto hash_key = ExpressionActions::ActionsHash{}(fused[i]); +// { +// std::lock_guard lock(mutex); +// if (counter[hash_key]++ < min_count_to_compile_expression) +// continue; +// } +// +// FunctionBasePtr fn; +// if (compilation_cache) +// { +// std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&inlined_func=std::as_const(fused[i]), &sample_block] () +// { +// Stopwatch watch; +// FunctionBasePtr result_fn; +// result_fn = std::make_shared(std::make_unique(inlined_func, sample_block)); +// ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); +// return result_fn; +// }); +// } +// else +// { +// Stopwatch watch; +// fn = std::make_shared(std::make_unique(fused[i], sample_block)); +// ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); +// } +// +// actions[i].function_base = fn; +// actions[i].argument_names = typeid_cast(typeid_cast(fn.get())->getImpl())->getArgumentNames(); +// actions[i].is_function_compiled = true; +// +// continue; +// } +// +// /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. +// for (const auto & dep : dependents[i]) +// fused[*dep].insert(fused[*dep].end(), fused[i].begin(), fused[i].end()); +// } +// +// for (auto & action : actions) +// { +// if (action.type == ExpressionAction::APPLY_FUNCTION && action.is_function_compiled) +// action.function = action.function_base->prepare({}); /// Arguments are not used for LLVMFunction. +// } +//} } diff --git a/src/Interpreters/ExpressionJIT.h b/src/Interpreters/ExpressionJIT.h index b2226aad638..bf015478215 100644 --- a/src/Interpreters/ExpressionJIT.h +++ b/src/Interpreters/ExpressionJIT.h @@ -74,7 +74,7 @@ public: /// For each APPLY_FUNCTION action, try to compile the function to native code; if the only uses of a compilable /// function's result are as arguments to other compilable functions, inline it and leave the now-redundant action as-is. -void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr compilation_cache, size_t min_count_to_compile_expression); +// void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr compilation_cache, size_t min_count_to_compile_expression); } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 1c3ffd4db1c..cf73581c6d8 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -988,6 +988,10 @@ void HashJoin::joinBlockImpl( const auto & right_key = required_right_keys.getByPosition(i); const auto & left_name = required_right_keys_sources[i]; + /// asof column is already in block. + if (is_asof_join && right_key.name == key_names_right.back()) + continue; + const auto & col = block.getByName(left_name); bool is_nullable = nullable_right_side || right_key.type->isNullable(); block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable)); @@ -1007,6 +1011,10 @@ void HashJoin::joinBlockImpl( const auto & right_key = required_right_keys.getByPosition(i); const auto & left_name = required_right_keys_sources[i]; + /// asof column is already in block. + if (is_asof_join && right_key.name == key_names_right.back()) + continue; + const auto & col = block.getByName(left_name); bool is_nullable = nullable_right_side || right_key.type->isNullable(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d9821be4e4e..349fd926402 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -98,7 +98,7 @@ namespace ErrorCodes /// Assumes `storage` is set and the table filter (row-level security) is not empty. String InterpreterSelectQuery::generateFilterActions( - ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns) const + ActionsDAGPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns) const { const auto & db_name = table_id.getDatabaseName(); const auto & table_name = table_id.getTableName(); @@ -393,7 +393,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( filter_info = std::make_shared(); filter_info->column_name = generateFilterActions(filter_info->actions, row_policy_filter, required_columns); source_header = metadata_snapshot->getSampleBlockForColumns( - filter_info->actions->getRequiredColumns(), storage->getVirtuals(), storage->getStorageID()); + filter_info->actions->getRequiredColumns().getNames(), storage->getVirtuals(), storage->getStorageID()); } } @@ -520,7 +520,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (analysis_result.prewhere_info) { - analysis_result.prewhere_info->prewhere_actions->execute(header); + analysis_result.prewhere_info->prewhere_actions->buildExpressions()->execute(header); header = materializeBlock(header); if (analysis_result.prewhere_info->remove_prewhere_column) header.erase(analysis_result.prewhere_info->prewhere_column_name); @@ -531,9 +531,9 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (options.to_stage == QueryProcessingStage::Enum::WithMergeableState) { if (!analysis_result.need_aggregate) - return analysis_result.before_order_and_select->getSampleBlock(); + return analysis_result.before_order_and_select->getResultColumns(); - auto header = analysis_result.before_aggregation->getSampleBlock(); + Block header = analysis_result.before_aggregation->getResultColumns(); Block res; @@ -557,10 +557,10 @@ Block InterpreterSelectQuery::getSampleBlockImpl() if (options.to_stage == QueryProcessingStage::Enum::WithMergeableStateAfterAggregation) { - return analysis_result.before_order_and_select->getSampleBlock(); + return analysis_result.before_order_and_select->getResultColumns(); } - return analysis_result.final_projection->getSampleBlock(); + return analysis_result.final_projection->getResultColumns(); } static Field getWithFillFieldValue(const ASTPtr & node, const Context & context) @@ -1108,7 +1108,7 @@ static StreamLocalLimits getLimitsForStorage(const Settings & settings, const Se void InterpreterSelectQuery::executeFetchColumns( QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan, - const PrewhereInfoPtr & prewhere_info, const Names & columns_to_remove_after_prewhere) + const PrewhereInfoPtr & prewhere_info, const NameSet & columns_to_remove_after_prewhere) { auto & query = getSelectQuery(); const Settings & settings = context->getSettingsRef(); @@ -1156,7 +1156,7 @@ void InterpreterSelectQuery::executeFetchColumns( auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); - auto header = analysis_result.before_aggregation->getSampleBlock(); + Block header = analysis_result.before_aggregation->getResultColumns(); size_t arguments_size = desc.argument_names.size(); DataTypes argument_types(arguments_size); for (size_t j = 0; j < arguments_size; ++j) @@ -1176,7 +1176,7 @@ void InterpreterSelectQuery::executeFetchColumns( } /// Actions to calculate ALIAS if required. - ExpressionActionsPtr alias_actions; + ActionsDAGPtr alias_actions; if (storage) { @@ -1185,14 +1185,14 @@ void InterpreterSelectQuery::executeFetchColumns( if (row_policy_filter) { auto initial_required_columns = required_columns; - ExpressionActionsPtr actions; + ActionsDAGPtr actions; generateFilterActions(actions, row_policy_filter, initial_required_columns); auto required_columns_from_filter = actions->getRequiredColumns(); for (const auto & column : required_columns_from_filter) { - if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column)) - required_columns.push_back(column); + if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name)) + required_columns.push_back(column.name); } } @@ -1224,7 +1224,7 @@ void InterpreterSelectQuery::executeFetchColumns( if (prewhere_info) { /// Get some columns directly from PREWHERE expression actions - auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns(); + auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames(); required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end()); } @@ -1270,7 +1270,7 @@ void InterpreterSelectQuery::executeFetchColumns( if (prewhere_info) { NameSet columns_to_remove(columns_to_remove_after_prewhere.begin(), columns_to_remove_after_prewhere.end()); - Block prewhere_actions_result = prewhere_info->prewhere_actions->getSampleBlock(); + Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns(); /// Populate required columns with the columns, added by PREWHERE actions and not removed afterwards. /// XXX: looks hacky that we already know which columns after PREWHERE we won't need for sure. @@ -1291,10 +1291,10 @@ void InterpreterSelectQuery::executeFetchColumns( } auto syntax_result = TreeRewriter(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot); - alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, *context).getActions(true); + alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, *context).getActionsDAG(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. - required_columns = alias_actions->getRequiredColumns(); + required_columns = alias_actions->getRequiredColumns().getNames(); /// Do not remove prewhere filter if it is a column which is used as alias. if (prewhere_info && prewhere_info->remove_prewhere_column) @@ -1311,27 +1311,21 @@ void InterpreterSelectQuery::executeFetchColumns( if (prewhere_info) { /// Don't remove columns which are needed to be aliased. - auto new_actions = std::make_shared(prewhere_info->prewhere_actions->getRequiredColumnsWithTypes(), *context); - for (const auto & action : prewhere_info->prewhere_actions->getActions()) - { - if (action.type != ExpressionAction::REMOVE_COLUMN - || required_columns.end() == std::find(required_columns.begin(), required_columns.end(), action.source_name)) - new_actions->add(action); - } - prewhere_info->prewhere_actions = std::move(new_actions); + for (const auto & name : required_columns) + prewhere_info->prewhere_actions->tryRestoreColumn(name); auto analyzed_result = TreeRewriter(*context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical()); prewhere_info->alias_actions - = ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, *context).getActions(true, false); + = ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, *context).getActionsDAG(true, false); /// Add (physical?) columns required by alias actions. auto required_columns_from_alias = prewhere_info->alias_actions->getRequiredColumns(); - Block prewhere_actions_result = prewhere_info->prewhere_actions->getSampleBlock(); + Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns(); for (auto & column : required_columns_from_alias) - if (!prewhere_actions_result.has(column)) - if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column)) - required_columns.push_back(column); + if (!prewhere_actions_result.has(column.name)) + if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name)) + required_columns.push_back(column.name); /// Add physical columns required by prewhere actions. for (const auto & column : required_columns_from_prewhere) @@ -1488,7 +1482,7 @@ void InterpreterSelectQuery::executeFetchColumns( } -void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ExpressionActionsPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) { auto where_step = std::make_unique( query_plan.getCurrentDataStream(), @@ -1501,7 +1495,7 @@ void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const Expressi } -void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ExpressionActionsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) +void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) { auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), expression); expression_before_aggregation->setStepDescription("Before GROUP BY"); @@ -1598,7 +1592,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool } -void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ExpressionActionsPtr & expression) +void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression) { auto having_step = std::make_unique( query_plan.getCurrentDataStream(), @@ -1609,7 +1603,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Express } -void InterpreterSelectQuery::executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final) +void InterpreterSelectQuery::executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final) { const Settings & settings = context->getSettingsRef(); @@ -1651,7 +1645,7 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific } -void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ExpressionActionsPtr & expression, const std::string & description) +void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description) { auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression); @@ -1742,7 +1736,7 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const So } -void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ExpressionActionsPtr & expression) +void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression) { auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), expression); projection_step->setStepDescription("Projection"); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 455b1a1e623..9623f1eb5a0 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -117,14 +117,14 @@ private: QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan, const PrewhereInfoPtr & prewhere_info, - const Names & columns_to_remove_after_prewhere); + const NameSet & columns_to_remove_after_prewhere); - void executeWhere(QueryPlan & query_plan, const ExpressionActionsPtr & expression, bool remove_filter); - void executeAggregation(QueryPlan & query_plan, const ExpressionActionsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); + void executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); + void executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final); - void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row, bool final); - void executeHaving(QueryPlan & query_plan, const ExpressionActionsPtr & expression); - static void executeExpression(QueryPlan & query_plan, const ExpressionActionsPtr & expression, const std::string & description); + void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final); + void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression); + static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); void executeOrder(QueryPlan & query_plan, InputOrderInfoPtr sorting_info); void executeOrderOptimized(QueryPlan & query_plan, InputOrderInfoPtr sorting_info, UInt64 limit, SortDescription & output_order_descr); void executeWithFill(QueryPlan & query_plan); @@ -133,14 +133,14 @@ private: void executeLimitBy(QueryPlan & query_plan); void executeLimit(QueryPlan & query_plan); void executeOffset(QueryPlan & query_plan); - static void executeProjection(QueryPlan & query_plan, const ExpressionActionsPtr & expression); + static void executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression); void executeDistinct(QueryPlan & query_plan, bool before_order, Names columns, bool pre_distinct); void executeExtremes(QueryPlan & query_plan); void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, std::unordered_map & subqueries_for_sets); void executeMergeSorted(QueryPlan & query_plan, const SortDescription & sort_description, UInt64 limit, const std::string & description); String generateFilterActions( - ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns = {}) const; + ActionsDAGPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns = {}) const; enum class Modificator { diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 3e7ebfec139..3bfa05c1d8a 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -644,6 +644,10 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & for (const auto & column_name : prepared_stages[0].output_columns) select->select()->children.push_back(std::make_shared(column_name)); + /// Don't let select list be empty. + if (select->select()->children.empty()) + select->select()->children.push_back(std::make_shared(Field(0))); + if (!prepared_stages[0].filters.empty()) { ASTPtr where_expression; @@ -676,12 +680,12 @@ QueryPipelinePtr MutationsInterpreter::addStreamsForLaterStages(const std::vecto if (i < stage.filter_column_names.size()) { /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->getExpression(), stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions(), stage.filter_column_names[i], false)); } else { /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->getExpression())); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions())); } } diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index db7008a1779..3a98109fdd9 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -43,7 +43,7 @@ Block getHeaderForProcessingStage( Block header = metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()); if (query_info.prewhere_info) { - query_info.prewhere_info->prewhere_actions->execute(header); + query_info.prewhere_info->prewhere_actions->buildExpressions()->execute(header); if (query_info.prewhere_info->remove_prewhere_column) header.erase(query_info.prewhere_info->prewhere_column_name); } diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index 61e4f2ffebc..293583ef550 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -9,18 +9,18 @@ namespace DB { -static ITransformingStep::Traits getTraits(const ExpressionActionsPtr & expression) +static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions) { return ITransformingStep::Traits { { - .preserves_distinct_columns = !expression->hasArrayJoin(), + .preserves_distinct_columns = !actions->hasArrayJoin(), .returns_single_stream = false, .preserves_number_of_streams = true, - .preserves_sorting = !expression->hasArrayJoin(), + .preserves_sorting = !actions->hasArrayJoin(), }, { - .preserves_number_of_rows = !expression->hasArrayJoin(), + .preserves_number_of_rows = !actions->hasArrayJoin(), } }; } @@ -41,12 +41,12 @@ static ITransformingStep::Traits getJoinTraits() }; } -ExpressionStep::ExpressionStep(const DataStream & input_stream_, ExpressionActionsPtr expression_) +ExpressionStep::ExpressionStep(const DataStream & input_stream_, ActionsDAGPtr actions_) : ITransformingStep( input_stream_, - Transform::transformHeader(input_stream_.header, expression_), - getTraits(expression_)) - , expression(std::move(expression_)) + Transform::transformHeader(input_stream_.header, actions_->buildExpressions()), + getTraits(actions_)) + , actions(std::move(actions_)) { /// Some columns may be removed by expression. updateDistinctColumns(output_stream->header, output_stream->distinct_columns); @@ -55,7 +55,7 @@ ExpressionStep::ExpressionStep(const DataStream & input_stream_, ExpressionActio void ExpressionStep::updateInputStream(DataStream input_stream, bool keep_header) { Block out_header = keep_header ? std::move(output_stream->header) - : Transform::transformHeader(input_stream.header, expression); + : Transform::transformHeader(input_stream.header, actions->buildExpressions()); output_stream = createOutputStream( input_stream, std::move(out_header), @@ -67,6 +67,7 @@ void ExpressionStep::updateInputStream(DataStream input_stream, bool keep_header void ExpressionStep::transformPipeline(QueryPipeline & pipeline) { + auto expression = actions->buildExpressions(); pipeline.addSimpleTransform([&](const Block & header) { return std::make_shared(header, expression); @@ -82,11 +83,12 @@ void ExpressionStep::transformPipeline(QueryPipeline & pipeline) } } -static void doDescribeActions(const ExpressionActionsPtr & expression, IQueryPlanStep::FormatSettings & settings) +void ExpressionStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, ' '); bool first = true; + auto expression = actions->buildExpressions(); for (const auto & action : expression->getActions()) { settings.out << prefix << (first ? "Actions: " @@ -96,11 +98,6 @@ static void doDescribeActions(const ExpressionActionsPtr & expression, IQueryPla } } -void ExpressionStep::describeActions(FormatSettings & settings) const -{ - doDescribeActions(expression, settings); -} - JoinStep::JoinStep(const DataStream & input_stream_, JoinPtr join_) : ITransformingStep( input_stream_, diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h index 45aaa010121..c002de8deb1 100644 --- a/src/Processors/QueryPlan/ExpressionStep.h +++ b/src/Processors/QueryPlan/ExpressionStep.h @@ -4,8 +4,8 @@ namespace DB { -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; class IJoin; using JoinPtr = std::shared_ptr; @@ -19,7 +19,7 @@ class ExpressionStep : public ITransformingStep public: using Transform = ExpressionTransform; - explicit ExpressionStep(const DataStream & input_stream_, ExpressionActionsPtr expression_); + explicit ExpressionStep(const DataStream & input_stream_, ActionsDAGPtr actions_); String getName() const override { return "Expression"; } void transformPipeline(QueryPipeline & pipeline) override; @@ -28,10 +28,10 @@ public: void describeActions(FormatSettings & settings) const override; - const ExpressionActionsPtr & getExpression() const { return expression; } + const ActionsDAGPtr & getExpression() const { return actions; } private: - ExpressionActionsPtr expression; + ActionsDAGPtr actions; }; /// TODO: add separate step for join. diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 015b5224054..1a8fba97ee2 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -28,7 +28,7 @@ static ITransformingStep::Traits getTraits() } FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_description_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits()) + : ITransformingStep(input_stream_, FillingTransform::transformHeader(input_stream_.header, sort_description_), getTraits()) , sort_description(std::move(sort_description_)) { if (!input_stream_.has_single_port) diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 2bbbc0373da..ce6522cccc8 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -8,7 +8,7 @@ namespace DB { -static ITransformingStep::Traits getTraits(const ExpressionActionsPtr & expression) +static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression) { return ITransformingStep::Traits { @@ -26,14 +26,14 @@ static ITransformingStep::Traits getTraits(const ExpressionActionsPtr & expressi FilterStep::FilterStep( const DataStream & input_stream_, - ExpressionActionsPtr expression_, + ActionsDAGPtr actions_, String filter_column_name_, bool remove_filter_column_) : ITransformingStep( input_stream_, - FilterTransform::transformHeader(input_stream_.header, expression_, filter_column_name_, remove_filter_column_), - getTraits(expression_)) - , expression(std::move(expression_)) + FilterTransform::transformHeader(input_stream_.header, actions_->buildExpressions(), filter_column_name_, remove_filter_column_), + getTraits(actions_)) + , actions(std::move(actions_)) , filter_column_name(std::move(filter_column_name_)) , remove_filter_column(remove_filter_column_) { @@ -45,7 +45,7 @@ void FilterStep::updateInputStream(DataStream input_stream, bool keep_header) { Block out_header = std::move(output_stream->header); if (keep_header) - out_header = FilterTransform::transformHeader(input_stream.header, expression, filter_column_name, remove_filter_column); + out_header = FilterTransform::transformHeader(input_stream.header, actions->buildExpressions(), filter_column_name, remove_filter_column); output_stream = createOutputStream( input_stream, @@ -58,6 +58,7 @@ void FilterStep::updateInputStream(DataStream input_stream, bool keep_header) void FilterStep::transformPipeline(QueryPipeline & pipeline) { + auto expression = actions->buildExpressions(); pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) { bool on_totals = stream_type == QueryPipeline::StreamType::Totals; @@ -79,6 +80,7 @@ void FilterStep::describeActions(FormatSettings & settings) const settings.out << prefix << "Filter column: " << filter_column_name << '\n'; bool first = true; + auto expression = actions->buildExpressions(); for (const auto & action : expression->getActions()) { settings.out << prefix << (first ? "Actions: " diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index d827fe920eb..efd91637c1f 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -4,8 +4,8 @@ namespace DB { -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; /// Implements WHERE, HAVING operations. See FilterTransform. class FilterStep : public ITransformingStep @@ -13,7 +13,7 @@ class FilterStep : public ITransformingStep public: FilterStep( const DataStream & input_stream_, - ExpressionActionsPtr expression_, + ActionsDAGPtr actions_, String filter_column_name_, bool remove_filter_column_); @@ -24,12 +24,12 @@ public: void describeActions(FormatSettings & settings) const override; - const ExpressionActionsPtr & getExpression() const { return expression; } + const ActionsDAGPtr & getExpression() const { return actions; } const String & getFilterColumnName() const { return filter_column_name; } bool removesFilterColumn() const { return remove_filter_column; } private: - ExpressionActionsPtr expression; + ActionsDAGPtr actions; String filter_column_name; bool remove_filter_column; }; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 1ff844480a9..040dc27518e 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -438,7 +438,7 @@ static void tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Node * return; /// All actions was moved before ARRAY JOIN. Swap Expression and ArrayJoin. - if (expression->getActions().empty()) + if (expression->empty()) { auto expected_header = parent->getOutputStream().header; diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index fd38dd9218b..88248c0d194 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -38,17 +38,19 @@ ReadFromStorageStep::ReadFromStorageStep( { if (query_info.prewhere_info->alias_actions) { + auto alias_actions = query_info.prewhere_info->alias_actions->buildExpressions(); pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, query_info.prewhere_info->alias_actions); + return std::make_shared(header, alias_actions); }); } + auto prewhere_actions = query_info.prewhere_info->prewhere_actions->buildExpressions(); pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( header, - query_info.prewhere_info->prewhere_actions, + prewhere_actions, query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); @@ -59,10 +61,10 @@ ReadFromStorageStep::ReadFromStorageStep( // This leads to mismatched header in distributed table if (query_info.prewhere_info->remove_columns_actions) { + auto remove_actions = query_info.prewhere_info->remove_columns_actions->buildExpressions(); pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared( - header, query_info.prewhere_info->remove_columns_actions); + return std::make_shared(header, remove_actions); }); } } diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index 823db356f7b..fd27f67f70e 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -27,17 +27,17 @@ static ITransformingStep::Traits getTraits(bool has_filter) TotalsHavingStep::TotalsHavingStep( const DataStream & input_stream_, bool overflow_row_, - const ExpressionActionsPtr & expression_, + const ActionsDAGPtr & actions_, const std::string & filter_column_, TotalsMode totals_mode_, double auto_include_threshold_, bool final_) : ITransformingStep( input_stream_, - TotalsHavingTransform::transformHeader(input_stream_.header, expression_, final_), + TotalsHavingTransform::transformHeader(input_stream_.header, (actions_ ? actions_->buildExpressions() : nullptr), final_), getTraits(!filter_column_.empty())) , overflow_row(overflow_row_) - , expression(expression_) + , actions(actions_) , filter_column_name(filter_column_) , totals_mode(totals_mode_) , auto_include_threshold(auto_include_threshold_) @@ -48,7 +48,7 @@ TotalsHavingStep::TotalsHavingStep( void TotalsHavingStep::transformPipeline(QueryPipeline & pipeline) { auto totals_having = std::make_shared( - pipeline.getHeader(), overflow_row, expression, + pipeline.getHeader(), overflow_row, (actions ? actions->buildExpressions() : nullptr), filter_column_name, totals_mode, auto_include_threshold, final); pipeline.addTotalsHavingTransform(std::move(totals_having)); @@ -78,6 +78,7 @@ void TotalsHavingStep::describeActions(FormatSettings & settings) const settings.out << prefix << "Mode: " << totalsModeToString(totals_mode, auto_include_threshold) << '\n'; bool first = true; + auto expression = actions->buildExpressions(); for (const auto & action : expression->getActions()) { settings.out << prefix << (first ? "Actions: " diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h index c9c73985126..792b96c9162 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.h +++ b/src/Processors/QueryPlan/TotalsHavingStep.h @@ -4,8 +4,8 @@ namespace DB { -class ExpressionActions; -using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; enum class TotalsMode; @@ -16,7 +16,7 @@ public: TotalsHavingStep( const DataStream & input_stream_, bool overflow_row_, - const ExpressionActionsPtr & expression_, + const ActionsDAGPtr & actions_, const std::string & filter_column_, TotalsMode totals_mode_, double auto_include_threshold_, @@ -30,7 +30,7 @@ public: private: bool overflow_row; - ExpressionActionsPtr expression; + ActionsDAGPtr actions; String filter_column_name; TotalsMode totals_mode; double auto_include_threshold; diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 501a01906ff..e60fc65e96e 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,13 +1,12 @@ #include -#include #include - namespace DB { Block ExpressionTransform::transformHeader(Block header, const ExpressionActionsPtr & expression) { - expression->execute(header, true); + size_t num_rows = header.rows(); + expression->execute(header, num_rows, true); return header; } @@ -20,11 +19,11 @@ ExpressionTransform::ExpressionTransform(const Block & header_, ExpressionAction void ExpressionTransform::transform(Chunk & chunk) { + size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - expression->execute(block); + expression->execute(block, num_rows); - auto num_rows = block.rows(); chunk.setColumns(block.getColumns(), num_rows); } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 76c4d7ddc16..3f3a0e75223 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -10,10 +10,23 @@ namespace ErrorCodes extern const int INVALID_WITH_FILL_EXPRESSION; } +Block FillingTransform::transformHeader(Block header, const SortDescription & sort_description) +{ + NameSet sort_keys; + for (const auto & key : sort_description) + sort_keys.insert(key.column_name); + + /// Columns which are not from sorting key may not be constant anymore. + for (auto & column : header) + if (column.column && isColumnConst(*column.column) && !sort_keys.count(column.name)) + column.column = column.type->createColumn(); + + return header; +} FillingTransform::FillingTransform( const Block & header_, const SortDescription & sort_description_) - : ISimpleTransform(header_, header_, true) + : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) , sort_description(sort_description_) , filling_row(sort_description_) , next_row(sort_description_) diff --git a/src/Processors/Transforms/FillingTransform.h b/src/Processors/Transforms/FillingTransform.h index 3df981634ca..33717b079a0 100644 --- a/src/Processors/Transforms/FillingTransform.h +++ b/src/Processors/Transforms/FillingTransform.h @@ -19,6 +19,8 @@ public: Status prepare() override; + static Block transformHeader(Block header, const SortDescription & sort_description); + protected: void transform(Chunk & Chunk) override; diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 197e0ac7595..23b1bf8a984 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -33,7 +33,8 @@ Block FilterTransform::transformHeader( const String & filter_column_name, bool remove_filter_column) { - expression->execute(header); + size_t num_rows = header.rows(); + expression->execute(header, num_rows); if (remove_filter_column) header.erase(filter_column_name); @@ -96,16 +97,15 @@ void FilterTransform::removeFilterIfNeed(Chunk & chunk) const void FilterTransform::transform(Chunk & chunk) { - size_t num_rows_before_filtration; + size_t num_rows_before_filtration = chunk.getNumRows(); auto columns = chunk.detachColumns(); { Block block = getInputPort().getHeader().cloneWithColumns(columns); columns.clear(); - expression->execute(block); + expression->execute(block, num_rows_before_filtration); - num_rows_before_filtration = block.rows(); columns = block.getColumns(); } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 65043f65e1a..fefe96aaa7c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -32,8 +32,10 @@ Block TotalsHavingTransform::transformHeader(Block block, const ExpressionAction if (final) finalizeBlock(block); + size_t num_rows = block.rows(); + if (expression) - expression->execute(block); + expression->execute(block, num_rows); return block; } @@ -64,7 +66,8 @@ TotalsHavingTransform::TotalsHavingTransform( if (expression) { auto totals_header = finalized_header; - expression->execute(totals_header); + size_t num_rows = totals_header.rows(); + expression->execute(totals_header, num_rows); outputs.emplace_back(totals_header, this); } else @@ -155,8 +158,9 @@ void TotalsHavingTransform::transform(Chunk & chunk) { /// Compute the expression in HAVING. const auto & cur_header = final ? finalized_header : getInputPort().getHeader(); + size_t num_rows = finalized.getNumRows(); auto finalized_block = cur_header.cloneWithColumns(finalized.detachColumns()); - expression->execute(finalized_block); + expression->execute(finalized_block, num_rows); auto columns = finalized_block.getColumns(); ColumnPtr filter_column_ptr = columns[filter_column_pos]; @@ -165,7 +169,6 @@ void TotalsHavingTransform::transform(Chunk & chunk) if (const_filter_description.always_true) { addToTotals(chunk, nullptr); - auto num_rows = columns.front()->size(); chunk.setColumns(std::move(columns), num_rows); return; } @@ -198,7 +201,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) } } - auto num_rows = columns.front()->size(); + num_rows = columns.front()->size(); chunk.setColumns(std::move(columns), num_rows); } @@ -255,10 +258,11 @@ void TotalsHavingTransform::prepareTotals() if (expression) { + size_t num_rows = totals.getNumRows(); auto block = finalized_header.cloneWithColumns(totals.detachColumns()); - expression->execute(block); + expression->execute(block, num_rows); /// Note: after expression totals may have several rows if `arrayJoin` was used in expression. - totals = Chunk(block.getColumns(), block.rows()); + totals = Chunk(block.getColumns(), num_rows); } } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 9bbc1653848..c55d608110a 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -502,7 +502,7 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N auto syntax_analyzer_result = TreeRewriter(context).analyze(default_expr_list, all_columns); const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); for (const auto & action : actions->getActions()) - if (action.type == ExpressionAction::Type::ARRAY_JOIN) + if (action.node->type == ActionsDAG::Type::ARRAY_JOIN) throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); return actions->getSampleBlock(); diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 143d97cdc15..8adf2be1bd4 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -31,7 +31,7 @@ IndexDescription::IndexDescription(const IndexDescription & other) , granularity(other.granularity) { if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); } @@ -54,7 +54,7 @@ IndexDescription & IndexDescription::operator=(const IndexDescription & other) type = other.type; if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); else expression.reset(); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 533736d19ed..e6cd72e51b3 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -25,7 +25,7 @@ KeyDescription::KeyDescription(const KeyDescription & other) , additional_column(other.additional_column) { if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); } KeyDescription & KeyDescription::operator=(const KeyDescription & other) @@ -45,7 +45,7 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); else expression.reset(); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7cb872f174a..39ee4684af9 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -569,7 +569,7 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return false; bool found_transformation = false; - for (const ExpressionAction & action : key_expr->getActions()) + for (const auto & action : key_expr->getActions()) { /** The key functional expression constraint may be inferred from a plain column in the expression. * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, @@ -581,25 +581,25 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( * Instead, we can qualify only functions that do not transform the range (for example rounding), * which while not strictly monotonic, are monotonic everywhere on the input range. */ - const auto & argument_names = action.argument_names; - if (action.type == ExpressionAction::Type::APPLY_FUNCTION - && argument_names.size() == 1 - && argument_names[0] == expr_name) + const auto & children = action.node->children; + if (action.node->type == ActionsDAG::Type::FUNCTION + && children.size() == 1 + && children[0]->result_name == expr_name) { - if (!action.function_base->hasInformationAboutMonotonicity()) + if (!action.node->function_base->hasInformationAboutMonotonicity()) return false; /// Range is irrelevant in this case. - IFunction::Monotonicity monotonicity = action.function_base->getMonotonicityForRange(*out_type, Field(), Field()); + IFunction::Monotonicity monotonicity = action.node->function_base->getMonotonicityForRange(*out_type, Field(), Field()); if (!monotonicity.is_always_monotonic) return false; /// Apply the next transformation step. std::tie(out_value, out_type) = applyFunctionForFieldOfUnknownType( - action.function_builder, + action.node->function_builder, out_type, out_value); - expr_name = action.result_name; + expr_name = action.node->result_name; /// Transformation results in a key expression, accept. auto it = key_columns.find(expr_name); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index c852151f27d..8f0ad643811 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -40,6 +40,13 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( , use_uncompressed_cache(use_uncompressed_cache_) , virt_column_names(virt_column_names_) { + if (prewhere_info) + { + if (prewhere_info->alias_actions) + prewhere_alias_actions = prewhere_info->alias_actions->buildExpressions(); + prewhere_actions = prewhere_info->prewhere_actions->buildExpressions(); + } + header_without_virtual_columns = getPort().getHeader(); for (auto it = virt_column_names.rbegin(); it != virt_column_names.rend(); ++it) @@ -74,23 +81,39 @@ void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & cu { if (reader->getColumns().empty()) { - current_task.range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_info, true); + current_task.range_reader = MergeTreeRangeReader( + pre_reader.get(), nullptr, + prewhere_alias_actions, + prewhere_actions, + prewhere_info->prewhere_column_name, + prewhere_info->remove_prewhere_column, + prewhere_info->need_filter, + true); } else { MergeTreeRangeReader * pre_reader_ptr = nullptr; if (pre_reader != nullptr) { - current_task.pre_range_reader = MergeTreeRangeReader(pre_reader.get(), nullptr, prewhere_info, false); + current_task.pre_range_reader = MergeTreeRangeReader( + pre_reader.get(), nullptr, + prewhere_alias_actions, + prewhere_actions, + prewhere_info->prewhere_column_name, + prewhere_info->remove_prewhere_column, + prewhere_info->need_filter, + false); pre_reader_ptr = ¤t_task.pre_range_reader; } - current_task.range_reader = MergeTreeRangeReader(reader.get(), pre_reader_ptr, nullptr, true); + current_task.range_reader = MergeTreeRangeReader( + reader.get(), pre_reader_ptr, nullptr, nullptr, {}, false, false, true); } } else { - current_task.range_reader = MergeTreeRangeReader(reader.get(), nullptr, nullptr, true); + current_task.range_reader = MergeTreeRangeReader( + reader.get(), nullptr, nullptr, nullptr, {}, false, false, true); } } @@ -314,9 +337,9 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P if (prewhere_info) { if (prewhere_info->alias_actions) - prewhere_info->alias_actions->execute(block); + prewhere_info->alias_actions->buildExpressions()->execute(block); - prewhere_info->prewhere_actions->execute(block); + prewhere_info->prewhere_actions->buildExpressions()->execute(block); auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 00ef131ae45..b44fe709f78 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -58,6 +58,8 @@ protected: StorageMetadataPtr metadata_snapshot; PrewhereInfoPtr prewhere_info; + ExpressionActionsPtr prewhere_alias_actions; + ExpressionActionsPtr prewhere_actions; UInt64 max_block_size_rows; UInt64 preferred_block_size_bytes; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 739dfedfde4..24f626e51df 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -260,9 +260,9 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (prewhere_info) { if (prewhere_info->alias_actions) - pre_column_names = prewhere_info->alias_actions->getRequiredColumns(); + pre_column_names = prewhere_info->alias_actions->getRequiredColumns().getNames(); else - pre_column_names = prewhere_info->prewhere_actions->getRequiredColumns(); + pre_column_names = prewhere_info->prewhere_actions->getRequiredColumns().getNames(); if (pre_column_names.empty()) pre_column_names.push_back(column_names[0]); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index cd776a661ed..c57d71dd31b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -256,14 +256,14 @@ StoragePolicyPtr MergeTreeData::getStoragePolicy() const static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name, bool allow_nullable_key) { - for (const ExpressionAction & action : expr.getActions()) + for (const auto & action : expr.getActions()) { - if (action.type == ExpressionAction::ARRAY_JOIN) + if (action.node->type == ActionsDAG::Type::ARRAY_JOIN) throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); - if (action.type == ExpressionAction::APPLY_FUNCTION) + if (action.node->type == ActionsDAG::Type::FUNCTION) { - IFunctionBase & func = *action.function_base; + IFunctionBase & func = *action.node->function_base; if (!func.isDeterministic()) throw Exception(key_name + " key cannot contain non-deterministic functions, " "but contains function " + func.getName(), @@ -437,7 +437,7 @@ void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_pa /// Add all columns used in the partition key to the min-max index. const NamesAndTypesList & minmax_idx_columns_with_types = new_partition_key.expression->getRequiredColumnsWithTypes(); - minmax_idx_expr = std::make_shared(minmax_idx_columns_with_types, global_context); + minmax_idx_expr = std::make_shared(minmax_idx_columns_with_types)->buildExpressions(); for (const NameAndTypePair & column : minmax_idx_columns_with_types) { minmax_idx_columns.emplace_back(column.name); @@ -1401,10 +1401,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S { /// Forbid altering columns inside partition key expressions because it can change partition ID format. auto partition_key_expr = old_metadata.getPartitionKey().expression; - for (const ExpressionAction & action : partition_key_expr->getActions()) + for (const auto & action : partition_key_expr->getActions()) { - auto action_columns = action.getNeededColumns(); - columns_alter_type_forbidden.insert(action_columns.begin(), action_columns.end()); + for (const auto * child : action.node->children) + columns_alter_type_forbidden.insert(child->result_name); } /// But allow to alter columns without expressions under certain condition. @@ -1421,10 +1421,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S if (old_metadata.hasSortingKey()) { auto sorting_key_expr = old_metadata.getSortingKey().expression; - for (const ExpressionAction & action : sorting_key_expr->getActions()) + for (const auto & action : sorting_key_expr->getActions()) { - auto action_columns = action.getNeededColumns(); - columns_alter_type_forbidden.insert(action_columns.begin(), action_columns.end()); + for (const auto * child : action.node->children) + columns_alter_type_forbidden.insert(child->result_name); } for (const String & col : sorting_key_expr->getRequiredColumns()) columns_alter_type_metadata_only.insert(col); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8c1dc845d26..b4d08b2b718 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -706,7 +706,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( /// Projection, that needed to drop columns, which have appeared by execution /// of some extra expressions, and to allow execute the same expressions later. /// NOTE: It may lead to double computation of expressions. - ExpressionActionsPtr result_projection; + ActionsDAGPtr result_projection; if (select.final()) { @@ -784,9 +784,10 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (result_projection) { - res.addSimpleTransform([&result_projection](const Block & header) + auto result_projection_actions = result_projection->buildExpressions(); + res.addSimpleTransform([&result_projection_actions](const Block & header) { - return std::make_shared(header, result_projection); + return std::make_shared(header, result_projection_actions); }); } @@ -802,9 +803,10 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) { - res.addSimpleTransform([&query_info](const Block & header) + auto remove_actions = query_info.prewhere_info->remove_columns_actions->buildExpressions(); + res.addSimpleTransform([&remove_actions](const Block & header) { - return std::make_shared(header, query_info.prewhere_info->remove_columns_actions); + return std::make_shared(header, remove_actions); }); } @@ -956,11 +958,12 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( } } -static ExpressionActionsPtr createProjection(const Pipe & pipe, const MergeTreeData & data) +static ActionsDAGPtr createProjection(const Pipe & pipe) { const auto & header = pipe.getHeader(); - auto projection = std::make_shared(header.getNamesAndTypesList(), data.global_context); - projection->add(ExpressionAction::project(header.getNames())); + auto projection = std::make_shared(header.getNamesAndTypesList()); + projection->removeUnusedActions(header.getNames()); + projection->projectInput(); return projection; } @@ -976,7 +979,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( const Names & virt_columns, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - ExpressionActionsPtr & out_projection) const + ActionsDAGPtr & out_projection) const { size_t sum_marks = 0; const InputOrderInfoPtr & input_order_info = query_info.input_order_info; @@ -1182,7 +1185,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( input_order_info->direction, 1); /// Drop temporary columns, added by 'sorting_key_prefix_expr' - out_projection = createProjection(pipe, data); + out_projection = createProjection(pipe); pipe.addSimpleTransform([sorting_key_prefix_expr](const Block & header) { return std::make_shared(header, sorting_key_prefix_expr); @@ -1210,7 +1213,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( const Names & virt_columns, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - ExpressionActionsPtr & out_projection) const + ActionsDAGPtr & out_projection) const { const auto data_settings = data.getSettings(); size_t sum_marks = 0; @@ -1259,7 +1262,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( /// Drop temporary columns, added by 'sorting_key_expr' if (!out_projection) - out_projection = createProjection(pipe, data); + out_projection = createProjection(pipe); pipe.addSimpleTransform([&metadata_snapshot](const Block & header) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 5894d6e044b..2217d570a56 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -73,7 +73,7 @@ private: const Names & virt_columns, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - ExpressionActionsPtr & out_projection) const; + ActionsDAGPtr & out_projection) const; Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, @@ -86,7 +86,7 @@ private: const Names & virt_columns, const Settings & settings, const MergeTreeReaderSettings & reader_settings, - ExpressionActionsPtr & out_projection) const; + ActionsDAGPtr & out_projection) const; /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. size_t getApproximateTotalRowsToRead( diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 726e405b3e8..8dcd7fa688f 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -489,11 +489,20 @@ size_t MergeTreeRangeReader::ReadResult::countBytesInResultFilter(const IColumn: MergeTreeRangeReader::MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereInfoPtr & prewhere_, + ExpressionActionsPtr prewhere_alias_actions_, + ExpressionActionsPtr prewhere_actions_, + String prewhere_column_name_, + bool remove_prewhere_column_, + bool prewhere_need_filter_, bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_) , index_granularity(&(merge_tree_reader->data_part->index_granularity)), prev_reader(prev_reader_) - , prewhere(prewhere_), last_reader_in_chain(last_reader_in_chain_), is_initialized(true) + , prewhere_alias_actions(std::move(prewhere_alias_actions_)) + , prewhere_actions(std::move(prewhere_actions_)) + , prewhere_column_name(std::move(prewhere_column_name_)) + , remove_prewhere_column(remove_prewhere_column_) + , prewhere_need_filter(prewhere_need_filter_) + , last_reader_in_chain(last_reader_in_chain_), is_initialized(true) { if (prev_reader) sample_block = prev_reader->getSampleBlock(); @@ -501,16 +510,16 @@ MergeTreeRangeReader::MergeTreeRangeReader( for (const auto & name_and_type : merge_tree_reader->getColumns()) sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); - if (prewhere) + if (prewhere_actions) { - if (prewhere->alias_actions) - prewhere->alias_actions->execute(sample_block, true); + if (prewhere_alias_actions) + prewhere_alias_actions->execute(sample_block, true); - if (prewhere->prewhere_actions) - prewhere->prewhere_actions->execute(sample_block, true); + if (prewhere_actions) + prewhere_actions->execute(sample_block, true); - if (prewhere->remove_prewhere_column) - sample_block.erase(prewhere->prewhere_column_name); + if (remove_prewhere_column) + sample_block.erase(prewhere_column_name); } } @@ -794,7 +803,7 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) { - if (!prewhere) + if (!prewhere_actions) return; const auto & header = merge_tree_reader->getColumns(); @@ -825,14 +834,14 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - if (prewhere->alias_actions) - prewhere->alias_actions->execute(block); + if (prewhere_alias_actions) + prewhere_alias_actions->execute(block); /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. result.block_before_prewhere = block; - prewhere->prewhere_actions->execute(block); + prewhere_actions->execute(block); - prewhere_column_pos = block.getPositionByName(prewhere->prewhere_column_name); + prewhere_column_pos = block.getPositionByName(prewhere_column_name); result.columns.clear(); result.columns.reserve(block.columns()); @@ -860,7 +869,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r if (result.totalRowsPerGranule() == 0) result.setFilterConstFalse(); /// If we need to filter in PREWHERE - else if (prewhere->need_filter || result.need_filter) + else if (prewhere_need_filter || result.need_filter) { /// If there is a filter and without optimized if (result.getFilter() && last_reader_in_chain) @@ -901,11 +910,11 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r /// Check if the PREWHERE column is needed if (!result.columns.empty()) { - if (prewhere->remove_prewhere_column) + if (remove_prewhere_column) result.columns.erase(result.columns.begin() + prewhere_column_pos); else result.columns[prewhere_column_pos] = - getSampleBlock().getByName(prewhere->prewhere_column_name).type-> + getSampleBlock().getByName(prewhere_column_name).type-> createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); } } @@ -913,7 +922,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r else { result.columns[prewhere_column_pos] = result.getFilterHolder()->convertToFullColumnIfConst(); - if (getSampleBlock().getByName(prewhere->prewhere_column_name).type->isNullable()) + if (getSampleBlock().getByName(prewhere_column_name).type->isNullable()) result.columns[prewhere_column_pos] = makeNullable(std::move(result.columns[prewhere_column_pos])); result.clearFilter(); // Acting as a flag to not filter in PREWHERE } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 381b87ecffd..d41ca6b56eb 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -24,7 +24,11 @@ public: MergeTreeRangeReader( IMergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, - const PrewhereInfoPtr & prewhere_, + ExpressionActionsPtr prewhere_alias_actions_, + ExpressionActionsPtr prewhere_actions_, + String prewhere_column_name_, + bool remove_prewhere_column_, + bool prewhere_need_filter_, bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -217,7 +221,12 @@ private: IMergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly. - PrewhereInfoPtr prewhere; + + ExpressionActionsPtr prewhere_alias_actions; + ExpressionActionsPtr prewhere_actions; + String prewhere_column_name; + bool remove_prewhere_column; + bool prewhere_need_filter; Stream stream; diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 37f07ad1876..48412f3dfa0 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -72,7 +72,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage bool found_function = false; for (const auto & action : elements_actions[i]->getActions()) { - if (action.type != ExpressionAction::APPLY_FUNCTION) + if (action.node->type != ActionsDAG::Type::FUNCTION) continue; if (found_function) @@ -83,13 +83,13 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage else found_function = true; - if (action.argument_names.size() != 1 || action.argument_names.at(0) != sorting_key_columns[i]) + if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i]) { current_direction = 0; break; } - const auto & func = *action.function_base; + const auto & func = *action.node->function_base; if (!func.hasInformationAboutMonotonicity()) { current_direction = 0; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 9a5d0cc6338..cbf2c816d87 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -12,27 +12,30 @@ namespace DB class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +class ActionsDAG; +using ActionsDAGPtr = std::shared_ptr; + struct PrewhereInfo { /// Actions which are executed in order to alias columns are used for prewhere actions. - ExpressionActionsPtr alias_actions; + ActionsDAGPtr alias_actions; /// Actions which are executed on block in order to get filter column for prewhere step. - ExpressionActionsPtr prewhere_actions; + ActionsDAGPtr prewhere_actions; /// Actions which are executed after reading from storage in order to remove unused columns. - ExpressionActionsPtr remove_columns_actions; + ActionsDAGPtr remove_columns_actions; String prewhere_column_name; bool remove_prewhere_column = false; bool need_filter = false; PrewhereInfo() = default; - explicit PrewhereInfo(ExpressionActionsPtr prewhere_actions_, String prewhere_column_name_) + explicit PrewhereInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_) : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {} }; /// Helper struct to store all the information about the filter expression. struct FilterInfo { - ExpressionActionsPtr actions; + ActionsDAGPtr actions; String column_name; bool do_remove_column = false; }; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index cf1eaa1f46d..a4d78865f4f 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -278,7 +278,7 @@ Pipe StorageBuffer::read( pipe_from_buffers.addSimpleTransform([&](const Block & header) { return std::make_shared( - header, query_info.prewhere_info->prewhere_actions, + header, query_info.prewhere_info->prewhere_actions->buildExpressions(), query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); }); @@ -286,7 +286,7 @@ Pipe StorageBuffer::read( { pipe_from_buffers.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, query_info.prewhere_info->alias_actions); + return std::make_shared(header, query_info.prewhere_info->alias_actions->buildExpressions()); }); } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b858239d637..be3b70a5e44 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -201,9 +201,9 @@ bool isExpressionActionsDeterministics(const ExpressionActionsPtr & actions) { for (const auto & action : actions->getActions()) { - if (action.type != ExpressionAction::APPLY_FUNCTION) + if (action.node->type != ActionsDAG::Type::FUNCTION) continue; - if (!action.function_base->isDeterministic()) + if (!action.node->function_base->isDeterministic()) return false; } return true; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 7f55badf819..50a74ddf7f0 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -30,7 +30,7 @@ TTLAggregateDescription::TTLAggregateDescription(const TTLAggregateDescription & , expression_result_column_name(other.expression_result_column_name) { if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); } TTLAggregateDescription & TTLAggregateDescription::operator=(const TTLAggregateDescription & other) @@ -41,7 +41,7 @@ TTLAggregateDescription & TTLAggregateDescription::operator=(const TTLAggregateD column_name = other.column_name; expression_result_column_name = other.expression_result_column_name; if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); else expression.reset(); return *this; @@ -54,9 +54,9 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin { for (const auto & action : ttl_expression->getActions()) { - if (action.type == ExpressionAction::APPLY_FUNCTION) + if (action.node->type == ActionsDAG::Type::FUNCTION) { - IFunctionBase & func = *action.function_base; + IFunctionBase & func = *action.node->function_base; if (!func.isDeterministic()) throw Exception( "TTL expression cannot contain non-deterministic functions, " @@ -92,10 +92,10 @@ TTLDescription::TTLDescription(const TTLDescription & other) , recompression_codec(other.recompression_codec) { if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); if (other.where_expression) - where_expression = std::make_shared(*other.where_expression); + where_expression = other.where_expression->clone(); } TTLDescription & TTLDescription::operator=(const TTLDescription & other) @@ -110,13 +110,13 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other) expression_ast.reset(); if (other.expression) - expression = std::make_shared(*other.expression); + expression = other.expression->clone(); else expression.reset(); result_column = other.result_column; if (other.where_expression) - where_expression = std::make_shared(*other.where_expression); + where_expression = other.where_expression->clone(); else where_expression.reset(); diff --git a/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.reference b/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.reference index e48bca390db..b0c7614fe30 100644 --- a/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.reference +++ b/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.reference @@ -1,9 +1,9 @@ 1 (NULL,'') a -1 (NULL,'') \N 1 (NULL,'') b -\N (123,'Hello') a -\N (123,'Hello') \N -\N (123,'Hello') b +1 (NULL,'') \N 3 (456,NULL) a -3 (456,NULL) \N 3 (456,NULL) b +3 (456,NULL) \N +\N (123,'Hello') a +\N (123,'Hello') b +\N (123,'Hello') \N diff --git a/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.sql b/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.sql index ce0894ba107..50a8e2b7d74 100644 --- a/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.sql +++ b/tests/queries/0_stateless/00526_array_join_with_arrays_of_nullable.sql @@ -1 +1 @@ -SELECT x, y, arrayJoin(['a', NULL, 'b']) AS z FROM system.one ARRAY JOIN [1, NULL, 3] AS x, [(NULL, ''), (123, 'Hello'), (456, NULL)] AS y; +SELECT x, y, arrayJoin(['a', NULL, 'b']) AS z FROM system.one ARRAY JOIN [1, NULL, 3] AS x, [(NULL, ''), (123, 'Hello'), (456, NULL)] AS y order by x, y, z; diff --git a/tests/queries/0_stateless/01508_explain_header.reference b/tests/queries/0_stateless/01508_explain_header.reference index 50216432e14..2b818ca9547 100644 --- a/tests/queries/0_stateless/01508_explain_header.reference +++ b/tests/queries/0_stateless/01508_explain_header.reference @@ -1,7 +1,7 @@ Expression (Projection) Header: x UInt8 Expression (Before ORDER BY and SELECT) - Header: _dummy UInt8 + Header: dummy UInt8 1 UInt8 ReadFromStorage (Read from SystemOne) Header: dummy UInt8