From 8a2e1870c757f3018698efc82f589cfc33c1c84a Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 7 Jun 2024 02:20:03 -0400 Subject: [PATCH 01/61] Fix alter query clone with SQL security --- src/Parsers/ASTAlterQuery.cpp | 3 +++ src/Parsers/ASTSQLSecurity.cpp | 2 +- ..._create_view_with_sql_security_option.reference | 3 +++ .../02884_create_view_with_sql_security_option.sh | 14 ++++++++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 90b63d2ce6f..58eeb7c4cbf 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -60,6 +60,8 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); + if (sql_security) + res->sql_security = res->children.emplace_back(sql_security->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -522,6 +524,7 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); + f(reinterpret_cast(&sql_security)); f(reinterpret_cast(&rename_to)); } diff --git a/src/Parsers/ASTSQLSecurity.cpp b/src/Parsers/ASTSQLSecurity.cpp index d6f1c21d035..74408747290 100644 --- a/src/Parsers/ASTSQLSecurity.cpp +++ b/src/Parsers/ASTSQLSecurity.cpp @@ -7,7 +7,7 @@ namespace DB void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!type.has_value()) + if (!type) return; if (definer || is_definer_current_user) diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index 0589fdeef04..a03343c8cb3 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -35,5 +35,8 @@ OK 2 2 6 6 9 9 +===== TestInsertChain ===== 1000 1000 +===== TestOnCluster ===== +1 diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index f32aee44bee..dd869cd9988 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -260,6 +260,8 @@ EOF ${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_row_2" +echo "===== TestInsertChain =====" + ${CLICKHOUSE_CLIENT} --multiquery < Date: Fri, 7 Jun 2024 17:56:24 +0200 Subject: [PATCH 02/61] Fix crash on destroying AccessControl: add explicit shutdown. --- src/Access/AccessControl.cpp | 19 ++++++++++++++++++- src/Access/AccessControl.h | 3 +++ src/Access/DiskAccessStorage.cpp | 15 ++++++++++++--- src/Access/DiskAccessStorage.h | 2 ++ src/Access/IAccessStorage.h | 5 +++++ src/Access/MultipleAccessStorage.cpp | 26 ++++++++++++++++++++++++-- src/Access/MultipleAccessStorage.h | 3 +++ src/Access/ReplicatedAccessStorage.cpp | 12 ++++++++++++ src/Access/ReplicatedAccessStorage.h | 2 ++ src/Interpreters/Context.cpp | 3 +++ 10 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c3bb42160ad..58ca5cdc435 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -261,7 +261,24 @@ AccessControl::AccessControl() } -AccessControl::~AccessControl() = default; +AccessControl::~AccessControl() +{ + try + { + shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +void AccessControl::shutdown() +{ + MultipleAccessStorage::shutdown(); + removeAllStorages(); +} void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d1537219a06..bfaf256ad48 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -53,6 +53,9 @@ public: AccessControl(); ~AccessControl() override; + /// Shutdown the access control and stops all background activity. + void shutdown() override; + /// Initializes access storage (user directories). void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index fe698b32816..c1633ee950f 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -194,11 +194,9 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String DiskAccessStorage::~DiskAccessStorage() { - stopListsWritingThread(); - try { - writeLists(); + shutdown(); } catch (...) { @@ -207,6 +205,17 @@ DiskAccessStorage::~DiskAccessStorage() } +void DiskAccessStorage::shutdown() +{ + stopListsWritingThread(); + + { + std::lock_guard lock{mutex}; + writeLists(); + } +} + + String DiskAccessStorage::getStorageParamsJSON() const { std::lock_guard lock{mutex}; diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 5d94008b34f..38172b26970 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -18,6 +18,8 @@ public: DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_); ~DiskAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } String getStorageParamsJSON() const override; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 4f980bf9212..e88b1601f32 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -44,6 +44,11 @@ public: explicit IAccessStorage(const String & storage_name_) : storage_name(storage_name_) {} virtual ~IAccessStorage() = default; + /// If the AccessStorage has to do some complicated work when destroying - do it in advance. + /// For example, if the AccessStorage contains any threads for background work - ask them to complete and wait for completion. + /// By default, does nothing. + virtual void shutdown() {} + /// Returns the name of this storage. const String & getStorageName() const { return storage_name; } virtual const char * getStorageType() const = 0; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index a8b508202b5..35b94de38c7 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -34,11 +34,23 @@ MultipleAccessStorage::MultipleAccessStorage(const String & storage_name_) MultipleAccessStorage::~MultipleAccessStorage() { - /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + try + { + shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void MultipleAccessStorage::shutdown() +{ + /// It's better to shutdown the storages in the reverse order because they could depend on each other somehow. const auto storages = getStoragesPtr(); for (const auto & storage : *storages | boost::adaptors::reversed) { - removeStorage(storage); + storage->shutdown(); } } @@ -72,6 +84,16 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove) ids_cache.clear(); } +void MultipleAccessStorage::removeAllStorages() +{ + /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + const auto storages = getStoragesPtr(); + for (const auto & storage : *storages | boost::adaptors::reversed) + { + removeStorage(storage); + } +} + std::vector MultipleAccessStorage::getStorages() { return *getStoragesPtr(); diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 005e6e2b9cd..e1543c59b67 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -21,6 +21,8 @@ public: explicit MultipleAccessStorage(const String & storage_name_ = STORAGE_TYPE); ~MultipleAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } bool isReadOnly() const override; bool isReadOnly(const UUID & id) const override; @@ -32,6 +34,7 @@ public: void setStorages(const std::vector & storages); void addStorage(const StoragePtr & new_storage); void removeStorage(const StoragePtr & storage_to_remove); + void removeAllStorages(); std::vector getStorages(); std::vector getStorages() const; std::shared_ptr> getStoragesPtr(); diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index cd9a86a1bd2..415b757483e 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -66,6 +66,18 @@ ReplicatedAccessStorage::ReplicatedAccessStorage( } ReplicatedAccessStorage::~ReplicatedAccessStorage() +{ + try + { + shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void ReplicatedAccessStorage::shutdown() { stopWatchingThread(); } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index cddb20860f7..f8518226997 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -23,6 +23,8 @@ public: ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup); ~ReplicatedAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } void startPeriodicReloading() override { startWatchingThread(); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5c9ae4716b9..df86f156a4e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -674,6 +674,9 @@ struct ContextSharedPart : boost::noncopyable } } + LOG_TRACE(log, "Shutting down AccessControl"); + access_control->shutdown(); + { std::lock_guard lock(mutex); From 85289219f12219f74efd6ebc932707c7b3d1f2ce Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 11 Jun 2024 18:03:44 +0000 Subject: [PATCH 03/61] Remove flags drom ActionsDAG (part 1). --- src/Interpreters/ActionsDAG.cpp | 3 - src/Interpreters/ActionsDAG.h | 17 +- src/Interpreters/ActionsVisitor.cpp | 56 +++---- src/Interpreters/ActionsVisitor.h | 15 +- src/Interpreters/ExpressionActions.cpp | 19 +-- src/Interpreters/ExpressionActions.h | 36 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 214 ++++++++++++------------ src/Interpreters/ExpressionAnalyzer.h | 56 +++---- src/Interpreters/WindowDescription.h | 4 +- 9 files changed, 206 insertions(+), 214 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 54db8980e12..271f8f7474e 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1150,8 +1150,6 @@ void ActionsDAG::project(const NamesWithAliases & projection) } removeUnusedActions(); - projectInput(); - projected_output = true; } bool ActionsDAG::tryRestoreColumn(const std::string & column_name) @@ -1526,7 +1524,6 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( actions_dag->outputs.swap(projection); actions_dag->removeUnusedActions(); - actions_dag->projectInput(); return actions_dag; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 150fa84762f..ae2cb4129d5 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -103,13 +103,11 @@ private: NodeRawConstPtrs inputs; NodeRawConstPtrs outputs; - bool project_input = false; - bool projected_output = false; - public: ActionsDAG() = default; ActionsDAG(ActionsDAG &&) = default; ActionsDAG(const ActionsDAG &) = delete; + ActionsDAG & operator=(ActionsDAG &&) = default; ActionsDAG & operator=(const ActionsDAG &) = delete; explicit ActionsDAG(const NamesAndTypesList & inputs_); explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_); @@ -179,10 +177,6 @@ public: /// Return true if column was removed from inputs. bool removeUnusedResult(const std::string & column_name); - void projectInput(bool project = true) { project_input = project; } - bool isInputProjected() const { return project_input; } - bool isOutputProjected() const { return projected_output; } - /// Remove actions that are not needed to compute output nodes void removeUnusedActions(bool allow_remove_inputs = true, bool allow_constant_folding = true); @@ -510,4 +504,13 @@ struct ActionDAGNodes ActionsDAG::NodeRawConstPtrs nodes; }; +struct ActionsAndFlags +{ + ActionsDAG actions; + bool project_input = false; + bool projected_output = false; +}; + +using ActionsAndFlagsPtr = std::shared_ptr; + } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 0bdd4c089f1..b41c4509bd3 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -486,16 +486,12 @@ public: } }; -ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level() = default; -ScopeStack::Level::Level(Level &&) noexcept = default; - ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, size_t subquery_depth_, std::reference_wrapper source_columns_, - ActionsDAGPtr actions_dag, + ActionsDAG actions_dag, PreparedSetsPtr prepared_sets_, bool no_subqueries_, bool no_makeset_, @@ -531,13 +527,12 @@ std::vector ActionsMatcher::Data::getAllColumnNames() const return index.getAllNames(); } -ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithContext(context_) +ScopeStack::ScopeStack(ActionsDAG actions_dag, ContextPtr context_) : WithContext(context_) { - auto & level = stack.emplace_back(); - level.actions_dag = std::move(actions_dag); - level.index = std::make_unique(level.actions_dag->getOutputs()); + auto & level = stack.emplace_back(ScopeStack::Level{std::move(actions_dag), {}, {}}); + level.index = std::make_unique(level.actions_dag.getOutputs()); - for (const auto & node : level.actions_dag->getOutputs()) + for (const auto & node : level.actions_dag.getOutputs()) if (node->type == ActionsDAG::ActionType::INPUT) level.inputs.emplace(node->result_name); } @@ -545,22 +540,21 @@ ScopeStack::ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_) : WithCon void ScopeStack::pushLevel(const NamesAndTypesList & input_columns) { auto & level = stack.emplace_back(); - level.actions_dag = std::make_shared(); - level.index = std::make_unique(level.actions_dag->getOutputs()); + level.index = std::make_unique(level.actions_dag.getOutputs()); const auto & prev = stack[stack.size() - 2]; for (const auto & input_column : input_columns) { - const auto & node = level.actions_dag->addInput(input_column.name, input_column.type); + const auto & node = level.actions_dag.addInput(input_column.name, input_column.type); level.index->addNode(&node); level.inputs.emplace(input_column.name); } - for (const auto & node : prev.actions_dag->getOutputs()) + for (const auto & node : prev.actions_dag.getOutputs()) { if (!level.index->contains(node->result_name)) { - const auto & input = level.actions_dag->addInput({node->column, node->result_type, node->result_name}); + const auto & input = level.actions_dag.addInput({node->column, node->result_type, node->result_name}); level.index->addNode(&input); } } @@ -585,12 +579,12 @@ size_t ScopeStack::getColumnLevel(const std::string & name) void ScopeStack::addColumn(ColumnWithTypeAndName column) { - const auto & node = stack[0].actions_dag->addColumn(std::move(column)); + const auto & node = stack[0].actions_dag.addColumn(std::move(column)); stack[0].index->addNode(&node); for (size_t j = 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -599,12 +593,12 @@ void ScopeStack::addAlias(const std::string & name, std::string alias) { auto level = getColumnLevel(name); const auto & source = stack[level].index->getNode(name); - const auto & node = stack[level].actions_dag->addAlias(source, std::move(alias)); + const auto & node = stack[level].actions_dag.addAlias(source, std::move(alias)); stack[level].index->addNode(&node); for (size_t j = level + 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -618,12 +612,12 @@ void ScopeStack::addArrayJoin(const std::string & source_name, std::string resul throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression with arrayJoin cannot depend on lambda argument: {}", source_name); - const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name)); + const auto & node = stack.front().actions_dag.addArrayJoin(*source_node, std::move(result_name)); stack.front().index->addNode(&node); for (size_t j = 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } @@ -642,17 +636,17 @@ void ScopeStack::addFunction( for (const auto & argument : argument_names) children.push_back(&stack[level].index->getNode(argument)); - const auto & node = stack[level].actions_dag->addFunction(function, std::move(children), std::move(result_name)); + const auto & node = stack[level].actions_dag.addFunction(function, std::move(children), std::move(result_name)); stack[level].index->addNode(&node); for (size_t j = level + 1; j < stack.size(); ++j) { - const auto & input = stack[j].actions_dag->addInput({node.column, node.result_type, node.result_name}); + const auto & input = stack[j].actions_dag.addInput({node.column, node.result_type, node.result_name}); stack[j].index->addNode(&input); } } -ActionsDAGPtr ScopeStack::popLevel() +ActionsDAG ScopeStack::popLevel() { auto res = std::move(stack.back().actions_dag); stack.pop_back(); @@ -661,12 +655,12 @@ ActionsDAGPtr ScopeStack::popLevel() std::string ScopeStack::dumpNames() const { - return stack.back().actions_dag->dumpNames(); + return stack.back().actions_dag.dumpNames(); } const ActionsDAG & ScopeStack::getLastActions() const { - return *stack.back().actions_dag; + return stack.back().actions_dag; } const ScopeStack::Index & ScopeStack::getLastActionsIndex() const @@ -989,7 +983,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data.set_size_limit, data.subquery_depth, data.source_columns, - std::make_shared(data.source_columns), + ActionsDAG(data.source_columns), data.prepared_sets, data.no_subqueries, data.no_makeset, @@ -1008,10 +1002,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } auto dag = index_hint_data.getActions(); - dag->project(args); + dag.project(args); auto index_hint = std::make_shared(); - index_hint->setActions(std::move(dag)); + index_hint->setActions(std::make_shared(std::move(dag))); // Arguments are removed. We add function instead of constant column to avoid constant folding. data.addFunction(std::make_unique(index_hint), {}, column_name); @@ -1271,10 +1265,10 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto lambda_dag = data.actions_stack.popLevel(); String result_name = lambda->arguments->children.at(1)->getColumnName(); - lambda_dag->removeUnusedActions(Names(1, result_name)); + lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - lambda_dag, + std::make_shared(lambda_dag), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 046c7387ee8..a0064637939 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB { @@ -43,20 +44,16 @@ struct ScopeStack : WithContext struct Level { - ActionsDAGPtr actions_dag; + ActionsDAG actions_dag; IndexPtr index; NameSet inputs; - - Level(); - Level(Level &&) noexcept; - ~Level(); }; using Levels = std::vector; Levels stack; - ScopeStack(ActionsDAGPtr actions_dag, ContextPtr context_); + ScopeStack(ActionsDAG actions_dag, ContextPtr context_); void pushLevel(const NamesAndTypesList & input_columns); @@ -67,7 +64,7 @@ struct ScopeStack : WithContext void addArrayJoin(const std::string & source_name, std::string result_name); void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name); - ActionsDAGPtr popLevel(); + ActionsDAG popLevel(); const ActionsDAG & getLastActions() const; const Index & getLastActionsIndex() const; @@ -147,7 +144,7 @@ public: SizeLimits set_size_limit_, size_t subquery_depth_, std::reference_wrapper source_columns_, - ActionsDAGPtr actions_dag, + ActionsDAG actions_dag, PreparedSetsPtr prepared_sets_, bool no_subqueries_, bool no_makeset_, @@ -182,7 +179,7 @@ public: actions_stack.addFunction(function, argument_names, std::move(result_name)); } - ActionsDAGPtr getActions() + ActionsDAG getActions() { return actions_stack.popLevel(); } diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 04f29f35c3c..3ba448a0da0 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -757,11 +757,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run, } } - if (actions_dag->isInputProjected()) - { - block.clear(); - } - else if (allow_duplicates_in_input) + if (allow_duplicates_in_input) { /// This case is the same as when the input is projected /// since we do not need any input columns. @@ -862,7 +858,7 @@ std::string ExpressionActions::dumpActions() const for (const auto & output_column : output_columns) ss << output_column.name << " " << output_column.type->getName() << "\n"; - ss << "\nproject input: " << actions_dag->isInputProjected() << "\noutput positions:"; + ss << "\noutput positions:"; for (auto pos : result_positions) ss << " " << pos; ss << "\n"; @@ -926,7 +922,6 @@ JSONBuilder::ItemPtr ExpressionActions::toTree() const map->add("Actions", std::move(actions_array)); map->add("Outputs", std::move(outputs_array)); map->add("Positions", std::move(positions_array)); - map->add("Project Input", actions_dag->isInputProjected()); return map; } @@ -980,7 +975,7 @@ void ExpressionActionsChain::addStep(NameSet non_constant_inputs) if (column.column && isColumnConst(*column.column) && non_constant_inputs.contains(column.name)) column.column = nullptr; - steps.push_back(std::make_unique(std::make_shared(columns))); + steps.push_back(std::make_unique(std::make_shared(ActionsDAG(columns), false, false))); } void ExpressionActionsChain::finalize() @@ -1129,14 +1124,14 @@ void ExpressionActionsChain::JoinStep::finalize(const NameSet & required_output_ std::swap(result_columns, new_result_columns); } -ActionsDAGPtr & ExpressionActionsChain::Step::actions() +ActionsAndFlagsPtr & ExpressionActionsChain::Step::actions() { - return typeid_cast(*this).actions_dag; + return typeid_cast(*this).actions_and_flags; } -const ActionsDAGPtr & ExpressionActionsChain::Step::actions() const +const ActionsAndFlagsPtr & ExpressionActionsChain::Step::actions() const { - return typeid_cast(*this).actions_dag; + return typeid_cast(*this).actions_and_flags; } } diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index cb467004d29..b8ac10fd80e 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -173,48 +173,48 @@ struct ExpressionActionsChain : WithContext /// Remove unused result and update required columns virtual void finalize(const NameSet & required_output_) = 0; /// Add projections to expression - virtual void prependProjectInput() const = 0; + virtual void prependProjectInput() = 0; virtual std::string dump() const = 0; /// Only for ExpressionActionsStep - ActionsDAGPtr & actions(); - const ActionsDAGPtr & actions() const; + ActionsAndFlagsPtr & actions(); + const ActionsAndFlagsPtr & actions() const; }; struct ExpressionActionsStep : public Step { - ActionsDAGPtr actions_dag; + ActionsAndFlagsPtr actions_and_flags; - explicit ExpressionActionsStep(ActionsDAGPtr actions_dag_, Names required_output_ = Names()) + explicit ExpressionActionsStep(ActionsAndFlagsPtr actiactions_and_flags_, Names required_output_ = Names()) : Step(std::move(required_output_)) - , actions_dag(std::move(actions_dag_)) + , actions_and_flags(std::move(actiactions_and_flags_)) { } NamesAndTypesList getRequiredColumns() const override { - return actions_dag->getRequiredColumns(); + return actions_and_flags->actions.getRequiredColumns(); } ColumnsWithTypeAndName getResultColumns() const override { - return actions_dag->getResultColumns(); + return actions_and_flags->actions.getResultColumns(); } void finalize(const NameSet & required_output_) override { - if (!actions_dag->isOutputProjected()) - actions_dag->removeUnusedActions(required_output_); + if (!actions_and_flags->projected_output) + actions_and_flags->actions.removeUnusedActions(required_output_); } - void prependProjectInput() const override + void prependProjectInput() override { - actions_dag->projectInput(); + actions_and_flags->project_input = true; } std::string dump() const override { - return actions_dag->dumpDAG(); + return actions_and_flags->actions.dumpDAG(); } }; @@ -229,7 +229,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; - void prependProjectInput() const override {} /// TODO: remove unused columns before ARRAY JOIN ? + void prependProjectInput() override {} /// TODO: remove unused columns before ARRAY JOIN ? std::string dump() const override { return "ARRAY JOIN"; } }; @@ -245,7 +245,7 @@ struct ExpressionActionsChain : WithContext NamesAndTypesList getRequiredColumns() const override { return required_columns; } ColumnsWithTypeAndName getResultColumns() const override { return result_columns; } void finalize(const NameSet & required_output_) override; - void prependProjectInput() const override {} /// TODO: remove unused columns before JOIN ? + void prependProjectInput() override {} /// TODO: remove unused columns before JOIN ? std::string dump() const override { return "JOIN"; } }; @@ -263,7 +263,7 @@ struct ExpressionActionsChain : WithContext steps.clear(); } - ActionsDAGPtr getLastActions(bool allow_empty = false) + ActionsAndFlagsPtr getLastActions(bool allow_empty = false) { if (steps.empty()) { @@ -272,7 +272,7 @@ struct ExpressionActionsChain : WithContext throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); } - return typeid_cast(steps.back().get())->actions_dag; + return typeid_cast(steps.back().get())->actions_and_flags; } Step & getLastStep() @@ -286,7 +286,7 @@ struct ExpressionActionsChain : WithContext Step & lastStep(const NamesAndTypesList & columns) { if (steps.empty()) - steps.emplace_back(std::make_unique(std::make_shared(columns))); + steps.emplace_back(std::make_unique(std::make_shared(ActionsDAG(columns), false, false))); return *steps.back(); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d80d5cd5b93..530bace3b1e 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -186,7 +186,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. initGlobalSubqueriesAndExternalTables(do_global, is_explain); - auto temp_actions = std::make_shared(sourceColumns()); + ActionsDAG temp_actions(sourceColumns()); columns_after_array_join = getColumnsAfterArrayJoin(temp_actions, sourceColumns()); columns_after_join = analyzeJoin(temp_actions, columns_after_array_join); /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns. @@ -199,7 +199,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( analyzeAggregation(temp_actions); } -NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns) +NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns) { const auto * select_query = query->as(); if (!select_query) @@ -213,14 +213,14 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a getRootActionsNoMakeSet(array_join_expression_list, actions, false); auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left); - auto sample_columns = actions->getResultColumns(); + auto sample_columns = actions.getResultColumns(); array_join->prepare(sample_columns); - actions = std::make_shared(sample_columns); + actions = ActionsDAG(sample_columns); NamesAndTypesList new_columns_after_array_join; NameSet added_columns; - for (auto & column : actions->getResultColumns()) + for (auto & column : actions.getResultColumns()) { if (syntax->array_join_result_to_source.contains(column.name)) { @@ -236,7 +236,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a return new_columns_after_array_join; } -NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns) +NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns) { const auto * select_query = query->as(); if (!select_query) @@ -246,9 +246,9 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const if (join) { getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false); - auto sample_columns = actions->getNamesAndTypesList(); + auto sample_columns = actions.getNamesAndTypesList(); syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true); - actions = std::make_shared(sample_columns); + actions = ActionsDAG(sample_columns); } NamesAndTypesList result_columns = src_columns; @@ -256,7 +256,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const return result_columns; } -void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) +void ExpressionAnalyzer::analyzeAggregation(ActionsDAG & temp_actions) { /** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions), * as well as a set of columns obtained after the aggregation, if any, @@ -272,7 +272,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) if (!has_aggregation) { - aggregated_columns = temp_actions->getNamesAndTypesList(); + aggregated_columns = temp_actions.getNamesAndTypesList(); return; } @@ -321,7 +321,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) ssize_t group_size = group_elements_ast.size(); const auto & column_name = group_elements_ast[j]->getColumnName(); - const auto * node = temp_actions->tryFindInOutputs(column_name); + const auto * node = temp_actions.tryFindInOutputs(column_name); if (!node) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); @@ -375,7 +375,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) getRootActionsNoMakeSet(group_asts[i], temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); - const auto * node = temp_actions->tryFindInOutputs(column_name); + const auto * node = temp_actions.tryFindInOutputs(column_name); if (!node) throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); @@ -434,7 +434,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) has_const_aggregation_keys = select_query->group_by_with_constant_keys; } else - aggregated_columns = temp_actions->getNamesAndTypesList(); + aggregated_columns = temp_actions.getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) aggregated_columns.emplace_back(desc.column_name, desc.function->getResultType()); @@ -465,7 +465,7 @@ SetPtr ExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & subquery_o return storage_set->getSet(); } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -485,7 +485,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -507,7 +507,7 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP void ExpressionAnalyzer::getRootActionsForHaving( - const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) + const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -528,7 +528,7 @@ void ExpressionAnalyzer::getRootActionsForHaving( } -void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions) +void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions) { LogAST log; ActionsVisitor::Data visitor_data( @@ -548,7 +548,7 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo } -void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions) +void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAG & actions, AggregateDescriptions & descriptions) { for (const ASTPtr & ast : aggregates()) { @@ -567,7 +567,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr for (size_t i = 0; i < arguments.size(); ++i) { const std::string & name = arguments[i]->getColumnName(); - const auto * dag_node = actions->tryFindInOutputs(name); + const auto * dag_node = actions.tryFindInOutputs(name); if (!dag_node) { throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, @@ -658,7 +658,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, with_alias->getColumnName(), 1 /* direction */, 1 /* nulls_direction */)); - auto actions_dag = std::make_shared(aggregated_columns); + ActionsDAG actions_dag(aggregated_columns); getRootActions(column_ast, false, actions_dag); desc.partition_by_actions.push_back(std::move(actions_dag)); } @@ -679,7 +679,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, order_by_element.direction, order_by_element.nulls_direction)); - auto actions_dag = std::make_shared(aggregated_columns); + ActionsDAG actions_dag(aggregated_columns); getRootActions(column_ast, false, actions_dag); desc.order_by_actions.push_back(std::move(actions_dag)); } @@ -720,7 +720,7 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, } } -void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) +void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) { auto current_context = getContext(); @@ -737,13 +737,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) desc, elem.definition.get()); auto [it, inserted] = window_descriptions.insert( - {desc.window_name, desc}); + {elem.name, std::move(desc)}); if (!inserted) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window '{}' is defined twice in the WINDOW clause", - desc.window_name); + elem.name); } } } @@ -776,7 +776,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) for (size_t i = 0; i < arguments.size(); ++i) { const std::string & name = arguments[i]->getColumnName(); - const auto * node = actions->tryFindInOutputs(name); + const auto * node = actions.tryFindInOutputs(name); if (!node) { @@ -817,13 +817,14 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) { const auto & definition = function_node.window_definition->as< const ASTWindowDefinition &>(); + auto default_window_name = definition.getDefaultWindowName(); WindowDescription desc; - desc.window_name = definition.getDefaultWindowName(); + desc.window_name = default_window_name; makeWindowDescriptionFromAST(*current_context, window_descriptions, desc, &definition); auto [it, inserted] = window_descriptions.insert( - {desc.window_name, desc}); + {default_window_name, std::move(desc)}); if (!inserted) { @@ -871,7 +872,7 @@ const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() cons } /// "Big" ARRAY JOIN. -ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool array_join_is_left) const +ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAG & actions, bool array_join_is_left) const { NameSet result_columns; for (const auto & result_source : syntax->array_join_result_to_source) @@ -879,8 +880,8 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr /// Assign new names to columns, if needed. if (result_source.first != result_source.second) { - const auto & node = actions->findInOutputs(result_source.second); - actions->getOutputs().push_back(&actions->addAlias(node, result_source.first)); + const auto & node = actions.findInOutputs(result_source.second); + actions.getOutputs().push_back(&actions.addAlias(node, result_source.first)); } /// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names. @@ -890,7 +891,7 @@ ArrayJoinActionPtr ExpressionAnalyzer::addMultipleArrayJoinAction(ActionsDAGPtr return std::make_shared(result_columns, array_join_is_left, getContext()); } -ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types) +ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, ActionsAndFlagsPtr & before_array_join, bool only_types) { const auto * select_query = getSelectQuery(); @@ -900,9 +901,9 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - getRootActions(array_join_expression_list, only_types, step.actions()); + getRootActions(array_join_expression_list, only_types, step.actions()->actions); - auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); + auto array_join = addMultipleArrayJoinAction(step.actions()->actions, is_array_join_left); before_array_join = chain.getLastActions(); chain.steps.push_back(std::make_unique(array_join, step.getResultColumns())); @@ -916,17 +917,19 @@ bool SelectQueryExpressionAnalyzer::appendJoinLeftKeys(ExpressionActionsChain & { ExpressionActionsChain::Step & step = chain.lastStep(columns_after_array_join); - getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()); + getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions()->actions); return true; } JoinPtr SelectQueryExpressionAnalyzer::appendJoin( ExpressionActionsChain & chain, - ActionsDAGPtr & converting_join_columns) + ActionsAndFlagsPtr & converting_join_columns) { const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); - JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_join_columns); + auto tmp_actions = std::make_shared(std::move(converting_join_columns->actions)); + JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, tmp_actions); + converting_join_columns->actions = std::move(*tmp_actions); if (converting_join_columns) { @@ -1065,7 +1068,6 @@ static std::unique_ptr buildJoinedPlan( rename_dag->getOutputs()[pos] = &alias; } } - rename_dag->projectInput(); auto rename_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(rename_dag)); rename_step->setStepDescription("Rename joined columns"); joined_plan->addStep(std::move(rename_step)); @@ -1166,45 +1168,45 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( return join; } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( +ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); if (!select_query->prewhere()) - return nullptr; + return {}; Names first_action_names; if (!chain.steps.empty()) first_action_names = chain.steps.front()->getRequiredColumns().getNames(); auto & step = chain.lastStep(sourceColumns()); - getRootActions(select_query->prewhere(), only_types, step.actions()); + getRootActions(select_query->prewhere(), only_types, step.actions()->actions); String prewhere_column_name = select_query->prewhere()->getColumnName(); step.addRequiredOutput(prewhere_column_name); - const auto & node = step.actions()->findInOutputs(prewhere_column_name); + const auto & node = step.actions()->actions.findInOutputs(prewhere_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", filter_type->getName()); - ActionsDAGPtr prewhere_actions; + ActionsAndFlagsPtr prewhere_actions; { /// Remove unused source_columns from prewhere actions. - auto tmp_actions_dag = std::make_shared(sourceColumns()); + ActionsDAG tmp_actions_dag(sourceColumns()); getRootActions(select_query->prewhere(), only_types, tmp_actions_dag); /// Constants cannot be removed since they can be used in other parts of the query. /// And if they are not used anywhere, except PREWHERE, they will be removed on the next step. - tmp_actions_dag->removeUnusedActions( + tmp_actions_dag.removeUnusedActions( NameSet{prewhere_column_name}, /* allow_remove_inputs= */ true, /* allow_constant_folding= */ false); - auto required_columns = tmp_actions_dag->getRequiredColumnsNames(); + auto required_columns = tmp_actions_dag.getRequiredColumnsNames(); NameSet required_source_columns(required_columns.begin(), required_columns.end()); required_source_columns.insert(first_action_names.begin(), first_action_names.end()); - auto names = step.actions()->getNames(); + auto names = step.actions()->actions.getNames(); NameSet name_set(names.begin(), names.end()); for (const auto & column : sourceColumns()) @@ -1213,13 +1215,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( Names required_output(name_set.begin(), name_set.end()); prewhere_actions = chain.getLastActions(); - prewhere_actions->removeUnusedActions(required_output); + prewhere_actions->actions.removeUnusedActions(required_output); } { - ActionsDAGPtr actions; + ActionsDAG actions; - auto required_columns = prewhere_actions->getRequiredColumns(); + auto required_columns = prewhere_actions->actions.getRequiredColumns(); NameSet prewhere_input_names; for (const auto & col : required_columns) prewhere_input_names.insert(col.name); @@ -1263,11 +1265,11 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = std::make_shared(std::move(required_columns)); + actions = ActionsDAG(required_columns); } else { - ColumnsWithTypeAndName columns = prewhere_actions->getResultColumns(); + ColumnsWithTypeAndName columns = prewhere_actions->actions.getResultColumns(); for (const auto & column : sourceColumns()) { @@ -1278,7 +1280,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = std::make_shared(std::move(columns)); + actions = ActionsDAG(columns); } chain.steps.emplace_back( @@ -1300,12 +1302,12 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); - getRootActions(select_query->where(), only_types, step.actions()); + getRootActions(select_query->where(), only_types, step.actions()->actions); auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); - const auto & node = step.actions()->findInOutputs(where_column_name); + const auto & node = step.actions()->actions.findInOutputs(where_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in WHERE: {}", @@ -1332,7 +1334,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (const auto & ast_element : ast->children) { step.addRequiredOutput(ast_element->getColumnName()); - getRootActions(ast_element, only_types, step.actions()); + getRootActions(ast_element, only_types, step.actions()->actions); } } } @@ -1341,7 +1343,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain for (const auto & ast : asts) { step.addRequiredOutput(ast->getColumnName()); - getRootActions(ast, only_types, step.actions()); + getRootActions(ast, only_types, step.actions()->actions); } } @@ -1349,7 +1351,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - auto actions_dag = std::make_shared(columns_after_join); + ActionsDAG actions_dag(columns_after_join); getRootActions(child, only_types, actions_dag); group_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); @@ -1387,7 +1389,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression const ASTFunction & node = typeid_cast(*ast); if (node.arguments) for (auto & argument : node.arguments->children) - getRootActions(argument, only_types, step.actions()); + getRootActions(argument, only_types, step.actions()->actions); } } @@ -1409,7 +1411,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // recursively together with (1b) as ASTFunction::window_definition. if (getSelectQuery()->window()) { - getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()); + getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()->actions); } for (const auto & [_, w] : window_descriptions) @@ -1420,7 +1422,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // definitions (1a). // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - getRootActionsNoMakeSet(f.function_node->clone(), step.actions()); + getRootActionsNoMakeSet(f.function_node->clone(), step.actions()->actions); // (2b) Required function argument columns. for (const auto & a : f.function_node->arguments->children) @@ -1442,17 +1444,17 @@ void SelectQueryExpressionAnalyzer::appendExpressionsAfterWindowFunctions(Expres ExpressionActionsChain::Step & step = chain.lastStep(columns_after_window); for (const auto & expression : syntax->expressions_with_window_function) - getRootActionsForWindowFunctions(expression->clone(), true, step.actions()); + getRootActionsForWindowFunctions(expression->clone(), true, step.actions()->actions); } -void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) +void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAG & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) { const auto * select_query = getAggregatingQuery(); if (!select_query->groupBy() || !(select_query->group_by_with_rollup || select_query->group_by_with_cube)) return; - auto source_columns = before_aggregation->getResultColumns(); + auto source_columns = before_aggregation.getResultColumns(); ColumnsWithTypeAndName result_columns; for (const auto & source_column : source_columns) @@ -1462,9 +1464,9 @@ void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & befor else result_columns.push_back(source_column); } - ExpressionActionsChain::Step & step = chain.lastStep(before_aggregation->getNamesAndTypesList()); + ExpressionActionsChain::Step & step = chain.lastStep(before_aggregation.getNamesAndTypesList()); - step.actions() = ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position); + step.actions()->actions = std::move(*ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position)); } void SelectQueryExpressionAnalyzer::appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node) @@ -1495,7 +1497,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActionsForHaving(select_query->having(), only_types, step.actions()); + getRootActionsForHaving(select_query->having(), only_types, step.actions()->actions); step.addRequiredOutput(select_query->having()->getColumnName()); @@ -1508,13 +1510,13 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->select(), only_types, step.actions()); + getRootActions(select_query->select(), only_types, step.actions()->actions); for (const auto & child : select_query->select()->children) appendSelectSkipWindowExpressions(step, child); } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, +ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions & order_by_elements_actions) { const auto * select_query = getSelectQuery(); @@ -1538,7 +1540,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); } - getRootActions(select_query->orderBy(), only_types, step.actions()); + getRootActions(select_query->orderBy(), only_types, step.actions()->actions); bool with_fill = false; @@ -1601,7 +1603,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai for (const auto & child : select_query->orderBy()->children) { auto actions_dag = std::make_shared(columns_after_join); - getRootActions(child, only_types, actions_dag); + getRootActions(child, only_types, *actions_dag); order_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1628,7 +1630,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->limitBy(), only_types, step.actions()); + getRootActions(select_query->limitBy(), only_types, step.actions()->actions); NameSet existing_column_names; for (const auto & column : aggregated_columns) @@ -1657,7 +1659,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain return true; } -ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -1706,14 +1708,15 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio } auto actions = chain.getLastActions(); - actions->project(result_columns); + actions->actions.project(result_columns); if (!required_result_columns.empty()) { result_columns.clear(); for (const auto & column : required_result_columns) result_columns.emplace_back(column, std::string{}); - actions->project(result_columns); + actions->actions.project(result_columns); + actions->project_input = actions->projected_output = true; } return actions; @@ -1723,14 +1726,14 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types) { ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns()); - getRootActions(expr, only_types, step.actions()); + getRootActions(expr, only_types, step.actions()->actions); step.addRequiredOutput(expr->getColumnName()); } - -ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_result) +ActionsAndFlagsPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_result) { - auto actions_dag = std::make_shared(aggregated_columns); + ActionsAndFlagsPtr res; + ActionsDAG actions_dag(aggregated_columns); NamesWithAliases result_columns; Names result_names; @@ -1757,9 +1760,12 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r if (add_aliases) { if (project_result) - actions_dag->project(result_columns); + { + actions_dag.project(result_columns); + res->project_input = res->projected_output = true; + } else - actions_dag->addAliases(result_columns); + actions_dag.addAliases(result_columns); } if (!(add_aliases && project_result)) @@ -1775,10 +1781,11 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r } } - actions_dag->removeUnusedActions(name_set); + actions_dag.removeUnusedActions(name_set); } - return actions_dag; + res->actions = std::move(actions_dag); + return res; } ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result, CompileExpressions compile_expressions) @@ -1790,7 +1797,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool proje ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) { auto actions = std::make_shared(constant_inputs); - getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); + getRootActions(query, true /* no_makeset_for_subqueries */, *actions, true /* only_consts */); return actions; } @@ -1805,7 +1812,7 @@ std::unique_ptr SelectQueryExpressionAnalyzer::getJoinedPlan() return std::move(joined_plan); } -ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() +ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::simpleSelectActions() { ExpressionActionsChain new_chain(getContext()); appendSelect(new_chain, false); @@ -1950,7 +1957,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( { where_step_num = chain.steps.size() - 1; before_where = chain.getLastActions(); - if (allowEarlyConstantFolding(*before_where, settings)) + if (allowEarlyConstantFolding(before_where->actions, settings)) { Block before_where_sample; if (chain.steps.size() > 1) @@ -1960,7 +1967,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (sanitizeBlock(before_where_sample)) { ExpressionActions( - before_where, + std::make_shared(before_where->actions.clone()), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); auto & column_elem @@ -1986,7 +1993,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_aggregation = chain.getLastActions(); if (settings.group_by_use_nulls) - query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); + query_analyzer.appendGroupByModifiers(before_aggregation->actions, chain, only_types); auto columns_before_aggregation = finalize_chain(chain); @@ -2033,8 +2040,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( true); auto & step = chain.lastStep(query_analyzer.aggregated_columns); - auto & actions = step.actions(); - actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + auto & actions = step.actions()->actions; + actions = std::move(*ActionsDAG::merge(std::move(actions), std::move(*converting))); } } @@ -2070,13 +2077,13 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( // the main SELECT, similar to what we do for aggregate functions. if (has_window) { - query_analyzer.makeWindowDescriptions(chain.getLastActions()); + query_analyzer.makeWindowDescriptions(chain.getLastActions()->actions); query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage); // Build a list of output columns of the window step. // 1) We need the columns that are the output of ExpressionActions. - for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) + for (const auto & x : chain.getLastActions()->actions.getNamesAndTypesList()) { query_analyzer.columns_after_window.push_back(x); } @@ -2113,7 +2120,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize_chain(chain); query_analyzer.appendExpressionsAfterWindowFunctions(chain, only_types || !first_stage); - for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) + for (const auto & x : chain.getLastActions()->actions.getNamesAndTypesList()) { query_analyzer.columns_after_window.push_back(x); } @@ -2173,7 +2180,6 @@ void ExpressionAnalysisResult::finalize( if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); - prewhere_info->prewhere_actions->projectInput(false); NameSet columns_to_remove; for (const auto & [name, can_remove] : step.required_output) @@ -2206,9 +2212,9 @@ void ExpressionAnalysisResult::finalize( void ExpressionAnalysisResult::removeExtraColumns() const { if (hasWhere()) - before_where->projectInput(); + before_where->project_input = true; if (hasHaving()) - before_having->projectInput(); + before_having->project_input = true; } void ExpressionAnalysisResult::checkActions() const @@ -2238,7 +2244,7 @@ std::string ExpressionAnalysisResult::dump() const if (before_array_join) { - ss << "before_array_join " << before_array_join->dumpDAG() << "\n"; + ss << "before_array_join " << before_array_join->actions.dumpDAG() << "\n"; } if (array_join) @@ -2248,12 +2254,12 @@ std::string ExpressionAnalysisResult::dump() const if (before_join) { - ss << "before_join " << before_join->dumpDAG() << "\n"; + ss << "before_join " << before_join->actions.dumpDAG() << "\n"; } if (before_where) { - ss << "before_where " << before_where->dumpDAG() << "\n"; + ss << "before_where " << before_where->actions.dumpDAG() << "\n"; } if (prewhere_info) @@ -2268,32 +2274,32 @@ std::string ExpressionAnalysisResult::dump() const if (before_aggregation) { - ss << "before_aggregation " << before_aggregation->dumpDAG() << "\n"; + ss << "before_aggregation " << before_aggregation->actions.dumpDAG() << "\n"; } if (before_having) { - ss << "before_having " << before_having->dumpDAG() << "\n"; + ss << "before_having " << before_having->actions.dumpDAG() << "\n"; } if (before_window) { - ss << "before_window " << before_window->dumpDAG() << "\n"; + ss << "before_window " << before_window->actions.dumpDAG() << "\n"; } if (before_order_by) { - ss << "before_order_by " << before_order_by->dumpDAG() << "\n"; + ss << "before_order_by " << before_order_by->actions.dumpDAG() << "\n"; } if (before_limit_by) { - ss << "before_limit_by " << before_limit_by->dumpDAG() << "\n"; + ss << "before_limit_by " << before_limit_by->actions.dumpDAG() << "\n"; } if (final_projection) { - ss << "final_projection " << final_projection->dumpDAG() << "\n"; + ss << "final_projection " << final_projection->actions.dumpDAG() << "\n"; } if (!selected_columns.empty()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 941194e69ff..43608cab412 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -117,7 +117,7 @@ public: /// If add_aliases, only the calculated values in the desired order and add aliases. /// If also project_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. - ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true); + ActionsAndFlagsPtr getActionsDAG(bool add_aliases, bool project_result); ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no); /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants. @@ -139,7 +139,7 @@ public: const WindowDescriptions & windowDescriptions() const { return window_descriptions; } void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast); - void makeWindowDescriptions(ActionsDAGPtr actions); + void makeWindowDescriptions(ActionsDAG & actions); /** Checks if subquery is not a plain StorageSet. * Because while making set we will read data from StorageSet which is not allowed. @@ -172,34 +172,34 @@ protected: /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain); - ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; + ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAG & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAG & actions, bool only_consts = false); - void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions, bool only_consts = false); - void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions); + void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAG & actions); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, * or after all the actions that are normally performed before aggregation. * Set has_aggregation = true if there is GROUP BY or at least one aggregate function. */ - void analyzeAggregation(ActionsDAGPtr & temp_actions); - void makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions); + void analyzeAggregation(ActionsDAG & temp_actions); + void makeAggregateDescriptions(ActionsDAG & actions, AggregateDescriptions & descriptions); const ASTSelectQuery * getSelectQuery() const; bool isRemoteStorage() const; - NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); - NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns); + NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns); + NamesAndTypesList analyzeJoin(ActionsDAG & actions, const NamesAndTypesList & src_columns); AggregationKeysInfo getAggregationKeysInfo() const noexcept { @@ -231,20 +231,20 @@ struct ExpressionAnalysisResult bool use_grouping_set_key = false; - ActionsDAGPtr before_array_join; + ActionsAndFlagsPtr before_array_join; ArrayJoinActionPtr array_join; - ActionsDAGPtr before_join; - ActionsDAGPtr converting_join_columns; + ActionsAndFlagsPtr before_join; + ActionsAndFlagsPtr converting_join_columns; JoinPtr join; - ActionsDAGPtr before_where; - ActionsDAGPtr before_aggregation; - ActionsDAGPtr before_having; + ActionsAndFlagsPtr before_where; + ActionsAndFlagsPtr before_aggregation; + ActionsAndFlagsPtr before_having; String having_column_name; bool remove_having_filter = false; - ActionsDAGPtr before_window; - ActionsDAGPtr before_order_by; - ActionsDAGPtr before_limit_by; - ActionsDAGPtr final_projection; + ActionsAndFlagsPtr before_window; + ActionsAndFlagsPtr before_order_by; + ActionsAndFlagsPtr before_limit_by; + ActionsAndFlagsPtr final_projection; /// Columns from the SELECT list, before renaming them to aliases. Used to /// perform SELECT DISTINCT. @@ -351,12 +351,12 @@ public: /// Tables that will need to be sent to remote servers for distributed query processing. const TemporaryTablesMapping & getExternalTables() const { return external_tables; } - ActionsDAGPtr simpleSelectActions(); + ActionsAndFlagsPtr simpleSelectActions(); /// These appends are public only for tests void appendSelect(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const; + ActionsAndFlagsPtr appendProjectResult(ExpressionActionsChain & chain) const; private: StorageMetadataPtr metadata_snapshot; @@ -386,13 +386,13 @@ private: */ /// Before aggregation: - ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types); + ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsAndFlagsPtr & before_array_join, bool only_types); bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types); - JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns); + JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsAndFlagsPtr & converting_join_columns); /// remove_filter is set in ExpressionActionsChain::finalize(); /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types); + ActionsAndFlagsPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types); bool appendWhere(ExpressionActionsChain & chain, bool only_types); bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &); void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); @@ -401,12 +401,12 @@ private: void appendExpressionsAfterWindowFunctions(ExpressionActionsChain & chain, bool only_types); void appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node); - void appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool only_types); + void appendGroupByModifiers(ActionsDAG & before_aggregation, ExpressionActionsChain & chain, bool only_types); /// After aggregation: bool appendHaving(ExpressionActionsChain & chain, bool only_types); /// appendSelect - ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); + ActionsAndFlagsPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &); bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); /// appendProjectResult }; diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index c26e4517c9a..15004189f77 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -93,8 +93,8 @@ struct WindowDescription // then by ORDER BY. This field holds this combined sort order. SortDescription full_sort_description; - std::vector partition_by_actions; - std::vector order_by_actions; + std::vector partition_by_actions; + std::vector order_by_actions; WindowFrame frame; From 17760a93dfc29c78861799b1d5632db49b2eb5ac Mon Sep 17 00:00:00 2001 From: 0x01f Date: Tue, 28 May 2024 13:07:23 +0800 Subject: [PATCH 04/61] Add SelectQueriesWithPrimaryKeyUsage in ProfileEvents --- src/Common/ProfileEvents.cpp | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 15 +++++--- ...t_queries_with_primary_key_usage.reference | 24 +++++++++++++ ..._select_queries_with_primary_key_usage.sql | 35 +++++++++++++++++++ 4 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.reference create mode 100644 tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.sql diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 8c8e2163aad..10c08b60dde 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -14,6 +14,7 @@ M(QueriesWithSubqueries, "Count queries with all subqueries") \ M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \ M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \ + M(SelectQueriesWithPrimaryKeyUsage, "Count SELECT queries with primary key as a WHERE condition.") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \ M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \ diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 6f0fa55c349..8b14034c63a 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -120,6 +120,7 @@ namespace ProfileEvents extern const Event SelectedParts; extern const Event SelectedRanges; extern const Event SelectedMarks; + extern const Event SelectQueriesWithPrimaryKeyUsage; } namespace DB @@ -1617,11 +1618,17 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); - if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) + if (indexes->key_condition.alwaysUnknownOrTrue()) { - throw Exception(ErrorCodes::INDEX_NOT_USED, - "Primary key ({}) is not used and setting 'force_primary_key' is set", - fmt::join(primary_key_column_names, ", ")); + if (settings.force_primary_key) + { + throw Exception(ErrorCodes::INDEX_NOT_USED, + "Primary key ({}) is not used and setting 'force_primary_key' is set", + fmt::join(primary_key_column_names, ", ")); + } + } else + { + ProfileEvents::increment(ProfileEvents::SelectQueriesWithPrimaryKeyUsage); } LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); diff --git a/tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.reference b/tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.reference new file mode 100644 index 00000000000..d5563921583 --- /dev/null +++ b/tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.reference @@ -0,0 +1,24 @@ +1 +100 +100 +100 +100 +Row 1: +────── +queries_with_primary_key_usage: 1 + +Row 2: +────── +queries_with_primary_key_usage: 1 + +Row 3: +────── +queries_with_primary_key_usage: 1 + +Row 4: +────── +queries_with_primary_key_usage: 0 + +Row 5: +────── +queries_with_primary_key_usage: 0 diff --git a/tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.sql b/tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.sql new file mode 100644 index 00000000000..6b7aa6fb3ee --- /dev/null +++ b/tests/queries/0_stateless/03164_select_queries_with_primary_key_usage.sql @@ -0,0 +1,35 @@ +-- Tests for ProfileEvents "SelectedMarksByPrimaryKeyUsage" +set log_queries=1; +set log_queries_min_type='QUERY_FINISH'; + +DROP TABLE IF EXISTS 03164_select_queries_with_primary_key_usage; +CREATE TABLE 03164_select_queries_with_primary_key_usage +( + a Int64, b Int64, c Int64 +) ENGINE = MergeTree ORDER BY a; + +ALTER TABLE 03164_select_queries_with_primary_key_usage ADD PROJECTION b_projection ( + SELECT * ORDER BY b +); + +INSERT INTO 03164_select_queries_with_primary_key_usage SELECT number, number + 100, number + 1000 FROM numbers(100); + +SELECT count(*) FROM 03164_select_queries_with_primary_key_usage WHERE a >= 0 and b <= 100; +SELECT count(*) FROM 03164_select_queries_with_primary_key_usage WHERE a >= 0; +SELECT count(*) FROM 03164_select_queries_with_primary_key_usage WHERE b >= 100; +SELECT count(*) FROM 03164_select_queries_with_primary_key_usage WHERE c >= 1000; +SELECT count(*) FROM 03164_select_queries_with_primary_key_usage; + +SYSTEM FLUSH LOGS; + +SELECT + IF (ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] > 0, 1, 0) AS queries_with_primary_key_usage +FROM + system.query_log +WHERE + current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM 03164_select_queries_with_primary_key_usage%' +ORDER BY query +FORMAT Vertical; + +DROP TABLE IF EXISTS 03164_select_queries_with_primary_key_usage; From 6e9285791d7c8b3812f038114618bf1abf694997 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 12 Jun 2024 13:34:03 +0200 Subject: [PATCH 05/61] Revert "Revert "Small fix for 02340_parts_refcnt_mergetree"" --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index b100f96befa..e7d95d8db72 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -58,7 +58,7 @@ function check_refcnt_for_table() $CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3" # Kill the query gracefully. - kill -INT $PID + kill -INT $PID ||: wait $PID grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED rm -f "${log_file:?}" From ae42f8635cbf4d18c10272a5c3e9ff74df60eb0e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 12 Jun 2024 16:53:32 +0000 Subject: [PATCH 06/61] Remove flags drom ActionsDAG (part 2). --- src/Interpreters/ActionsDAG.cpp | 51 +++++----- src/Interpreters/ActionsDAG.h | 5 +- src/Interpreters/ActionsVisitor.cpp | 9 +- src/Interpreters/ActionsVisitor.h | 4 + src/Interpreters/ExpressionActions.cpp | 9 +- src/Interpreters/ExpressionActions.h | 4 +- src/Interpreters/ExpressionAnalyzer.cpp | 39 ++++---- src/Interpreters/ExpressionAnalyzer.h | 6 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 + src/Interpreters/InterpreterSelectQuery.cpp | 93 +++++++++---------- src/Interpreters/InterpreterSelectQuery.h | 12 +-- src/Interpreters/MutationsInterpreter.cpp | 20 ++-- src/Interpreters/WindowDescription.h | 4 +- src/Planner/ActionsChain.cpp | 20 ++-- src/Planner/ActionsChain.h | 8 +- src/Planner/CollectTableExpressionData.cpp | 4 +- src/Planner/Planner.cpp | 34 +++++-- src/Planner/PlannerActionsVisitor.cpp | 11 ++- src/Planner/PlannerActionsVisitor.h | 2 +- src/Planner/PlannerExpressionAnalysis.cpp | 80 +++++++++------- src/Planner/PlannerExpressionAnalysis.h | 14 +-- src/Planner/PlannerJoinTree.cpp | 13 +-- src/Planner/PlannerJoins.cpp | 4 +- src/Planner/Utils.cpp | 8 +- src/Planner/Utils.h | 2 +- .../Optimizations/optimizeReadInOrder.cpp | 2 - .../optimizeUseAggregateProjection.cpp | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 7 +- .../Transforms/AddingDefaultsTransform.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/StorageMaterializedView.cpp | 1 - src/Storages/StorageMerge.cpp | 4 +- 32 files changed, 264 insertions(+), 214 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 271f8f7474e..da53f197fd8 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -786,9 +786,6 @@ Block ActionsDAG::updateHeader(const Block & header) const for (auto & col : result_columns) res.insert(std::move(col)); - if (isInputProjected()) - return res; - res.reserve(header.columns() - pos_to_remove.size()); for (size_t i = 0; i < header.columns(); i++) { @@ -1152,6 +1149,33 @@ void ActionsDAG::project(const NamesWithAliases & projection) removeUnusedActions(); } +void ActionsDAG::appendInputsForUnusedColumns(const Block & sample_block) +{ + std::unordered_map> names_map; + size_t num_columns = sample_block.columns(); + for (size_t pos = 0; pos < num_columns; ++pos) + names_map[sample_block.getByPosition(pos).name].push_back(pos); + + for (const auto * input : inputs) + { + auto & positions = names_map[input->result_name]; + if (positions.empty()) + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, + "Not found column {} in block {}", input->result_name, sample_block.dumpStructure()); + + positions.pop_front(); + } + + for (const auto & [_, positions] : names_map) + { + for (auto pos : positions) + { + const auto & col = sample_block.getByPosition(pos); + addInput(col.name, col.type); + } + } +} + bool ActionsDAG::tryRestoreColumn(const std::string & column_name) { for (const auto * output_node : outputs) @@ -1225,8 +1249,6 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name) ActionsDAGPtr ActionsDAG::clone() const { auto actions = std::make_shared(); - actions->project_input = project_input; - actions->projected_output = projected_output; std::unordered_map copy_map; @@ -1320,9 +1342,6 @@ std::string ActionsDAG::dumpDAG() const out << ' ' << map[node]; out << '\n'; - out << "Project input: " << project_input << '\n'; - out << "Projected output: " << projected_output << '\n'; - return out.str(); } @@ -1581,10 +1600,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) auto it = first_result.find(input_node->result_name); if (it == first_result.end() || it->second.empty()) { - if (first.project_input) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, - "Cannot find column {} in ActionsDAG result", input_node->result_name); - first.inputs.push_back(input_node); } else @@ -1620,13 +1635,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) } } - /// Update output nodes. - if (second.project_input) - { - first.outputs.swap(second.outputs); - first.project_input = true; - } - else { /// Add not removed result from first actions. for (const auto * output_node : first.outputs) @@ -1642,8 +1650,6 @@ void ActionsDAG::mergeInplace(ActionsDAG && second) } first.nodes.splice(first.nodes.end(), std::move(second.nodes)); - - first.projected_output = second.projected_output; } void ActionsDAG::mergeNodes(ActionsDAG && second, NodeRawConstPtrs * out_outputs) @@ -2039,7 +2045,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & } auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2083,7 +2088,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameS dumpDAG()); auto res = split(split_nodes); - res.second->project_input = project_input; return res; } @@ -2155,7 +2159,6 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & co std::unordered_set split_nodes = {node}; auto res = split(split_nodes); - res.second->project_input = project_input; return res; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index ae2cb4129d5..d251f66a129 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -166,9 +166,12 @@ public: /// Call addAlias several times. void addAliases(const NamesWithAliases & aliases); - /// Add alias actions and remove unused columns from outputs. Also specify result columns order in outputs. + /// Add alias actions. Also specify result columns order in outputs. void project(const NamesWithAliases & projection); + /// Add input for every column from sample_block which is not mapped to existing input. + void appendInputsForUnusedColumns(const Block & sample_block); + /// If column is not in outputs, try to find it in nodes and insert back into outputs. bool tryRestoreColumn(const std::string & column_name); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index b41c4509bd3..b292d02ccaa 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -392,6 +392,9 @@ Block createBlockForSet( } +ScopeStack::Level::Level() = default; +ScopeStack::Level::~Level() = default; +ScopeStack::Level::Level(Level &&) = default; FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) @@ -529,7 +532,9 @@ std::vector ActionsMatcher::Data::getAllColumnNames() const ScopeStack::ScopeStack(ActionsDAG actions_dag, ContextPtr context_) : WithContext(context_) { - auto & level = stack.emplace_back(ScopeStack::Level{std::move(actions_dag), {}, {}}); + ScopeStack::Level tmp; + tmp.actions_dag = std::move(actions_dag); + auto & level = stack.emplace_back(std::move(tmp)); level.index = std::make_unique(level.actions_dag.getOutputs()); for (const auto & node : level.actions_dag.getOutputs()) @@ -1268,7 +1273,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & lambda_dag.removeUnusedActions(Names(1, result_name)); auto lambda_actions = std::make_shared( - std::make_shared(lambda_dag), + std::make_shared(std::move(lambda_dag)), ExpressionActionsSettings::fromContext(data.getContext(), CompileExpressions::yes)); DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index a0064637939..e13bd82be06 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -47,6 +47,10 @@ struct ScopeStack : WithContext ActionsDAG actions_dag; IndexPtr index; NameSet inputs; + + ~Level(); + Level(); + Level(Level &&); }; using Levels = std::vector; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 3ba448a0da0..d25d5b53226 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -49,8 +49,9 @@ namespace ErrorCodes static std::unordered_set processShortCircuitFunctions(const ActionsDAG & actions_dag, ShortCircuitFunctionEvaluation short_circuit_function_evaluation); -ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_) - : settings(settings_) +ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_, bool project_inputs_) + : project_inputs(project_inputs_) + , settings(settings_) { actions_dag = actions_dag_->clone(); @@ -757,6 +758,10 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run, } } + if (project_inputs) + { + block.clear(); + } if (allow_duplicates_in_input) { /// This case is the same as when the input is projected diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index b8ac10fd80e..7c6af41c04d 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -79,11 +79,13 @@ private: ColumnNumbers result_positions; Block sample_block; + bool project_inputs = false; + ExpressionActionsSettings settings; public: ExpressionActions() = delete; - explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}); + explicit ExpressionActions(ActionsDAGPtr actions_dag_, const ExpressionActionsSettings & settings_ = {}, bool project_inputs_ = false); ExpressionActions(const ExpressionActions &) = default; ExpressionActions & operator=(const ExpressionActions &) = default; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 530bace3b1e..c53cdd0d2ed 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -658,8 +658,8 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, with_alias->getColumnName(), 1 /* direction */, 1 /* nulls_direction */)); - ActionsDAG actions_dag(aggregated_columns); - getRootActions(column_ast, false, actions_dag); + auto actions_dag = std::make_shared(aggregated_columns); + getRootActions(column_ast, false, *actions_dag); desc.partition_by_actions.push_back(std::move(actions_dag)); } } @@ -679,8 +679,8 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, order_by_element.direction, order_by_element.nulls_direction)); - ActionsDAG actions_dag(aggregated_columns); - getRootActions(column_ast, false, actions_dag); + auto actions_dag = std::make_shared(aggregated_columns); + getRootActions(column_ast, false, *actions_dag); desc.order_by_actions.push_back(std::move(actions_dag)); } } @@ -1068,6 +1068,7 @@ static std::unique_ptr buildJoinedPlan( rename_dag->getOutputs()[pos] = &alias; } } + rename_dag->appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header); auto rename_step = std::make_unique(joined_plan->getCurrentDataStream(), std::move(rename_dag)); rename_step->setStepDescription("Rename joined columns"); joined_plan->addStep(std::move(rename_step)); @@ -1219,7 +1220,7 @@ ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendPrewhere( } { - ActionsDAG actions; + auto actions = std::make_shared(); auto required_columns = prewhere_actions->actions.getRequiredColumns(); NameSet prewhere_input_names; @@ -1265,7 +1266,7 @@ ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = ActionsDAG(required_columns); + actions->actions = ActionsDAG(required_columns); } else { @@ -1280,7 +1281,7 @@ ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - actions = ActionsDAG(columns); + actions->actions = ActionsDAG(columns); } chain.steps.emplace_back( @@ -1351,8 +1352,8 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain { for (auto & child : asts) { - ActionsDAG actions_dag(columns_after_join); - getRootActions(child, only_types, actions_dag); + auto actions_dag = std::make_shared(columns_after_join); + getRootActions(child, only_types, *actions_dag); group_by_elements_actions.emplace_back( std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes))); } @@ -1730,9 +1731,8 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const step.addRequiredOutput(expr->getColumnName()); } -ActionsAndFlagsPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_result) +ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_unused_result) { - ActionsAndFlagsPtr res; ActionsDAG actions_dag(aggregated_columns); NamesWithAliases result_columns; Names result_names; @@ -1759,16 +1759,15 @@ ActionsAndFlagsPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool proj if (add_aliases) { - if (project_result) + if (remove_unused_result) { actions_dag.project(result_columns); - res->project_input = res->projected_output = true; } else actions_dag.addAliases(result_columns); } - if (!(add_aliases && project_result)) + if (!(add_aliases && remove_unused_result)) { NameSet name_set(result_names.begin(), result_names.end()); /// We will not delete the original columns. @@ -1784,14 +1783,13 @@ ActionsAndFlagsPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool proj actions_dag.removeUnusedActions(name_set); } - res->actions = std::move(actions_dag); - return res; + return std::make_unique(std::move(actions_dag)); } -ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result, CompileExpressions compile_expressions) +ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remove_unused_result, CompileExpressions compile_expressions) { return std::make_shared( - getActionsDAG(add_aliases, project_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions)); + getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), remove_unused_result); } ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) @@ -1925,7 +1923,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( { /// Prewhere is always the first one. prewhere_step_num = 0; - prewhere_info = std::make_shared(actions, query.prewhere()->getColumnName()); + auto dag = std::make_shared(std::move(actions->actions)); + prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) { @@ -1967,7 +1966,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (sanitizeBlock(before_where_sample)) { ExpressionActions( - std::make_shared(before_where->actions.clone()), + before_where->actions.clone(), ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); auto & column_elem diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 43608cab412..6fa50bb70c9 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -115,10 +115,10 @@ public: /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression. /// If add_aliases, only the calculated values in the desired order and add aliases. - /// If also project_result, than only aliases remain in the output block. + /// If also remove_unused_result, than only aliases remain in the output block. /// Otherwise, only temporary columns will be deleted from the block. - ActionsAndFlagsPtr getActionsDAG(bool add_aliases, bool project_result); - ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no); + ActionsDAGPtr getActionsDAG(bool add_aliases, bool remove_unused_result = true); + ExpressionActionsPtr getActions(bool add_aliases, bool remove_unused_result = true, CompileExpressions compile_expressions = CompileExpressions::no); /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants. /// Does not execute subqueries. diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 927bafe4bfb..91bfa863bd9 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -386,6 +386,8 @@ Chain InterpreterInsertQuery::buildPreSinkChain( auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); + std::cerr << adding_missing_defaults_actions->dumpActions() << std::endl; + /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) out.addSource(std::make_shared(query_sample_block, adding_missing_defaults_actions)); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index b72399df2c1..fac3c0637b5 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -124,6 +124,7 @@ namespace ErrorCodes extern const int UNKNOWN_IDENTIFIER; extern const int BAD_ARGUMENTS; extern const int SUPPORT_IS_DISABLED; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; } /// Assumes `storage` is set and the table filter (row-level security) is not empty. @@ -175,11 +176,10 @@ FilterDAGInfoPtr generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = TreeRewriter(context).analyzeSelect(query_ast, TreeRewriterResult({}, storage, storage_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot, {}, false, {}, prepared_sets); - filter_info->actions = analyzer.simpleSelectActions(); + filter_info->actions = std::make_unique(std::move(analyzer.simpleSelectActions()->actions)); filter_info->column_name = expr_list->children.at(0)->getColumnName(); filter_info->actions->removeUnusedActions(NameSet{filter_info->column_name}); - filter_info->actions->projectInput(false); for (const auto * node : filter_info->actions->getInputs()) filter_info->actions->getOutputs().push_back(node); @@ -1078,15 +1078,15 @@ Block InterpreterSelectQuery::getSampleBlockImpl() // with this code. See // https://github.com/ClickHouse/ClickHouse/issues/19857 for details. if (analysis_result.before_window) - return analysis_result.before_window->getResultColumns(); + return analysis_result.before_window->actions.getResultColumns(); // NOTE: should not handle before_limit_by specially since // WithMergeableState does not process LIMIT BY - return analysis_result.before_order_by->getResultColumns(); + return analysis_result.before_order_by->actions.getResultColumns(); } - Block header = analysis_result.before_aggregation->getResultColumns(); + Block header = analysis_result.before_aggregation->actions.getResultColumns(); Block res; @@ -1124,18 +1124,18 @@ Block InterpreterSelectQuery::getSampleBlockImpl() // It's different from selected_columns, see the comment above for // WithMergeableState stage. if (analysis_result.before_window) - return analysis_result.before_window->getResultColumns(); + return analysis_result.before_window->actions.getResultColumns(); // In case of query on remote shards executed up to // WithMergeableStateAfterAggregation*, they can process LIMIT BY, // since the initiator will not apply LIMIT BY again. if (analysis_result.before_limit_by) - return analysis_result.before_limit_by->getResultColumns(); + return analysis_result.before_limit_by->actions.getResultColumns(); - return analysis_result.before_order_by->getResultColumns(); + return analysis_result.before_order_by->actions.getResultColumns(); } - return analysis_result.final_projection->getResultColumns(); + return analysis_result.final_projection->actions.getResultColumns(); } @@ -1636,12 +1636,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(query_plan.getCurrentDataStream(), expressions.before_array_join); - before_array_join_step->setStepDescription("Before ARRAY JOIN"); - query_plan.addStep(std::move(before_array_join_step)); - } + executeExpression(query_plan, expressions.before_array_join, "Before ARRAY JOIN"); if (expressions.array_join) { @@ -1653,23 +1648,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

( - query_plan.getCurrentDataStream(), - expressions.before_join); - before_join_step->setStepDescription("Before JOIN"); - query_plan.addStep(std::move(before_join_step)); - } + executeExpression(query_plan, expressions.before_join, "Before JOIN"); /// Optional step to convert key columns to common supertype. if (expressions.converting_join_columns) - { - QueryPlanStepPtr convert_join_step = std::make_unique( - query_plan.getCurrentDataStream(), - expressions.converting_join_columns); - convert_join_step->setStepDescription("Convert JOIN columns"); - query_plan.addStep(std::move(convert_join_step)); - } + executeExpression(query_plan, expressions.converting_join_columns, "Convert JOIN columns"); if (expressions.hasJoin()) { @@ -2113,7 +2096,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); - analysis.prewhere_info->prewhere_actions->projectInput(false); analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; analysis.prewhere_info->need_filter = true; analysis.filter_info = nullptr; @@ -2124,7 +2106,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis /// Add row level security actions to prewhere. analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; analysis.prewhere_info->row_level_column_name = analysis.filter_info->column_name; - analysis.prewhere_info->row_level_filter->projectInput(false); analysis.filter_info = nullptr; } } @@ -2333,7 +2314,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle } if (analysis_result.hasWhere()) { - filter_nodes.push_back(&analysis_result.before_where->findInOutputs(analysis_result.where_column_name)); + filter_nodes.push_back(&analysis_result.before_where->actions.findInOutputs(analysis_result.where_column_name)); } auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes); @@ -2442,7 +2423,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); - Block header = analysis_result.before_aggregation->getResultColumns(); + Block header = analysis_result.before_aggregation->actions.getResultColumns(); size_t arguments_size = desc.argument_names.size(); DataTypes argument_types(arguments_size); for (size_t j = 0; j < arguments_size; ++j) @@ -2604,7 +2585,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Possible filters: row-security, additional filter, replica filter (before array join), where (after array join) query_info.has_filters_and_no_array_join_before_filter = row_policy_filter || additional_filter_info || parallel_replicas_custom_filter_info - || (analysis_result.hasWhere() && !analysis_result.before_where->hasArrayJoin() && !analysis_result.array_join); + || (analysis_result.hasWhere() && !analysis_result.before_where->actions.hasArrayJoin() && !analysis_result.array_join); storage->read(query_plan, required_columns, storage_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) @@ -2646,10 +2627,14 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } } -void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, bool remove_filter) { + auto dag = expression->actions.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + auto where_step = std::make_unique( - query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter); + query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter); where_step->setStepDescription("WHERE"); query_plan.addStep(std::move(where_step)); @@ -2723,11 +2708,9 @@ static GroupingSetsParamsList getAggregatorGroupingSetsParams(const SelectQueryE return result; } -void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) +void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) { - auto expression_before_aggregation = std::make_unique(query_plan.getCurrentDataStream(), expression); - expression_before_aggregation->setStepDescription("Before GROUP BY"); - query_plan.addStep(std::move(expression_before_aggregation)); + executeExpression(query_plan, expression, "Before GROUP BY"); AggregateDescriptions aggregates = query_analyzer->aggregates(); const Settings & settings = context->getSettingsRef(); @@ -2818,10 +2801,14 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool } -void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter) +void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, bool remove_filter) { + auto dag = expression->actions.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + auto having_step - = std::make_unique(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter); + = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter); having_step->setStepDescription("HAVING"); query_plan.addStep(std::move(having_step)); @@ -2829,15 +2816,19 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( - QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final) + QueryPlan & query_plan, bool has_having, const ActionsAndFlagsPtr & expression, bool remove_filter, bool overflow_row, bool final) { + auto dag = expression->actions.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + const Settings & settings = context->getSettingsRef(); auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), query_analyzer->aggregates(), overflow_row, - expression, + std::move(dag), has_having ? getSelectQuery().having()->getColumnName() : "", remove_filter, settings.totals_mode, @@ -2870,12 +2861,16 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific query_plan.addStep(std::move(step)); } -void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description) +void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, const std::string & description) { if (!expression) return; - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression); + auto dag = expression->actions.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(dag)); expression_step->setStepDescription(description); query_plan.addStep(std::move(expression_step)); @@ -3045,11 +3040,9 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st } -void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression) +void InterpreterSelectQuery::executeProjection(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression) { - auto projection_step = std::make_unique(query_plan.getCurrentDataStream(), expression); - projection_step->setStepDescription("Projection"); - query_plan.addStep(std::move(projection_step)); + executeExpression(query_plan, expression, "Projection"); } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index e89a1e5febf..c4012180b0f 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -174,13 +174,13 @@ private: /// Different stages of query execution. void executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan); - void executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); + void executeWhere(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, bool remove_filter); void executeAggregation( - QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); + QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info); void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final, bool has_grouping_sets); - void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final); - void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter); - static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description); + void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsAndFlagsPtr & expression, bool remove_filter, bool overflow_row, bool final); + void executeHaving(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, bool remove_filter); + static void executeExpression(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression, const std::string & description); /// FIXME should go through ActionsDAG to behave as a proper function void executeWindow(QueryPlan & query_plan); void executeOrder(QueryPlan & query_plan, InputOrderInfoPtr sorting_info); @@ -191,7 +191,7 @@ private: void executeLimitBy(QueryPlan & query_plan); void executeLimit(QueryPlan & query_plan); void executeOffset(QueryPlan & query_plan); - static void executeProjection(QueryPlan & query_plan, const ActionsDAGPtr & expression); + static void executeProjection(QueryPlan & query_plan, const ActionsAndFlagsPtr & expression); void executeDistinct(QueryPlan & query_plan, bool before_order, Names columns, bool pre_distinct); void executeExtremes(QueryPlan & query_plan); void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index ba33b70b59c..309ab2691a9 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -1137,9 +1137,9 @@ void MutationsInterpreter::prepareMutationStages(std::vector & prepared_s for (const auto & kv : stage.column_to_updated) { auto column_name = kv.second->getColumnName(); - const auto & dag_node = actions->findInOutputs(column_name); - const auto & alias = actions->addAlias(dag_node, kv.first); - actions->addOrReplaceInOutputs(alias); + const auto & dag_node = actions->actions.findInOutputs(column_name); + const auto & alias = actions->actions.addAlias(dag_node, kv.first); + actions->actions.addOrReplaceInOutputs(alias); } } @@ -1202,7 +1202,7 @@ void MutationsInterpreter::Source::read( { ActionsDAG::NodeRawConstPtrs nodes(num_filters); for (size_t i = 0; i < num_filters; ++i) - nodes[i] = &steps[i]->actions()->findInOutputs(names[i]); + nodes[i] = &steps[i]->actions()->actions.findInOutputs(names[i]); filter = ActionsDAG::buildFilterActionsDAG(nodes); } @@ -1273,18 +1273,24 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i) { const auto & step = stage.expressions_chain.steps[i]; - if (step->actions()->hasArrayJoin()) + if (step->actions()->actions.hasArrayJoin()) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "arrayJoin is not allowed in mutations"); if (i < stage.filter_column_names.size()) { + auto dag = step->actions()->actions.clone(); + if (step->actions()->project_input) + dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute DELETEs. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions(), stage.filter_column_names[i], false)); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag, stage.filter_column_names[i], false)); } else { + auto dag = step->actions()->actions.clone(); + if (step->actions()->project_input) + dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); /// Execute UPDATE or final projection. - plan.addStep(std::make_unique(plan.getCurrentDataStream(), step->actions())); + plan.addStep(std::make_unique(plan.getCurrentDataStream(), dag)); } } diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index 15004189f77..c26e4517c9a 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -93,8 +93,8 @@ struct WindowDescription // then by ORDER BY. This field holds this combined sort order. SortDescription full_sort_description; - std::vector partition_by_actions; - std::vector order_by_actions; + std::vector partition_by_actions; + std::vector order_by_actions; WindowFrame frame; diff --git a/src/Planner/ActionsChain.cpp b/src/Planner/ActionsChain.cpp index c5438b5d2d4..85d50429bab 100644 --- a/src/Planner/ActionsChain.cpp +++ b/src/Planner/ActionsChain.cpp @@ -11,7 +11,7 @@ namespace DB { -ActionsChainStep::ActionsChainStep(ActionsDAGPtr actions_, +ActionsChainStep::ActionsChainStep(ActionsAndFlagsPtr actions_, bool use_actions_nodes_as_output_columns_, ColumnsWithTypeAndName additional_output_columns_) : actions(std::move(actions_)) @@ -28,12 +28,12 @@ void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input auto child_input_columns_copy = child_input_columns; std::unordered_set output_nodes_names; - output_nodes_names.reserve(actions->getOutputs().size()); + output_nodes_names.reserve(actions->actions.getOutputs().size()); - for (auto & output_node : actions->getOutputs()) + for (auto & output_node : actions->actions.getOutputs()) output_nodes_names.insert(output_node->result_name); - for (const auto & node : actions->getNodes()) + for (const auto & node : actions->actions.getNodes()) { auto it = child_input_columns_copy.find(node.result_name); if (it == child_input_columns_copy.end()) @@ -45,20 +45,20 @@ void ActionsChainStep::finalizeInputAndOutputColumns(const NameSet & child_input if (output_nodes_names.contains(node.result_name)) continue; - actions->getOutputs().push_back(&node); + actions->actions.getOutputs().push_back(&node); output_nodes_names.insert(node.result_name); } - actions->removeUnusedActions(); + actions->actions.removeUnusedActions(); /// TODO: Analyzer fix ActionsDAG input and constant nodes with same name - actions->projectInput(); + actions->project_input = true; initialize(); } void ActionsChainStep::dump(WriteBuffer & buffer) const { buffer << "DAG" << '\n'; - buffer << actions->dumpDAG(); + buffer << actions->actions.dumpDAG(); if (!available_output_columns.empty()) { @@ -84,7 +84,7 @@ String ActionsChainStep::dump() const void ActionsChainStep::initialize() { - auto required_columns_names = actions->getRequiredColumnsNames(); + auto required_columns_names = actions->actions.getRequiredColumnsNames(); input_columns_names = NameSet(required_columns_names.begin(), required_columns_names.end()); available_output_columns.clear(); @@ -93,7 +93,7 @@ void ActionsChainStep::initialize() { std::unordered_set available_output_columns_names; - for (const auto & node : actions->getNodes()) + for (const auto & node : actions->actions.getNodes()) { if (available_output_columns_names.contains(node.result_name)) continue; diff --git a/src/Planner/ActionsChain.h b/src/Planner/ActionsChain.h index 4907fdbad87..ab5823b99d7 100644 --- a/src/Planner/ActionsChain.h +++ b/src/Planner/ActionsChain.h @@ -48,18 +48,18 @@ public: * If use_actions_nodes_as_output_columns = true output columns are initialized using actions dag nodes. * If additional output columns are specified they are added to output columns. */ - explicit ActionsChainStep(ActionsDAGPtr actions_, + explicit ActionsChainStep(ActionsAndFlagsPtr actions_, bool use_actions_nodes_as_output_columns = true, ColumnsWithTypeAndName additional_output_columns_ = {}); /// Get actions - ActionsDAGPtr & getActions() + ActionsAndFlagsPtr & getActions() { return actions; } /// Get actions - const ActionsDAGPtr & getActions() const + const ActionsAndFlagsPtr & getActions() const { return actions; } @@ -98,7 +98,7 @@ public: private: void initialize(); - ActionsDAGPtr actions; + ActionsAndFlagsPtr actions; bool use_actions_nodes_as_output_columns = true; diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 27b5909c13b..d5e39a9f123 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -90,7 +90,7 @@ public: ActionsDAGPtr alias_column_actions_dag = std::make_shared(); PlannerActionsVisitor actions_visitor(planner_context, false); - auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); + auto outputs = actions_visitor.visit(*alias_column_actions_dag, column_node->getExpression()); if (outputs.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); @@ -340,7 +340,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); + auto expression_nodes = visitor.visit(*prewhere_actions_dag, query_tree_node); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index b40e23a9553..b117a0cd34b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -99,6 +99,7 @@ namespace ErrorCodes extern const int TOO_DEEP_SUBQUERIES; extern const int NOT_IMPLEMENTED; extern const int SUPPORT_IS_DISABLED; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; } namespace @@ -329,12 +330,16 @@ public: }; void addExpressionStep(QueryPlan & query_plan, - const ActionsDAGPtr & expression_actions, + const ActionsAndFlagsPtr & expression_actions, const std::string & step_description, std::vector & result_actions_to_execute) { - result_actions_to_execute.push_back(expression_actions); - auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), expression_actions); + auto actions = expression_actions->actions.clone(); + if (expression_actions->project_input) + actions->appendInputsForUnusedColumns( query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); + auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); expression_step->setStepDescription(step_description); query_plan.addStep(std::move(expression_step)); } @@ -344,9 +349,13 @@ void addFilterStep(QueryPlan & query_plan, const std::string & step_description, std::vector & result_actions_to_execute) { - result_actions_to_execute.push_back(filter_analysis_result.filter_actions); + auto actions = filter_analysis_result.filter_actions->actions.clone(); + if (filter_analysis_result.filter_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); auto where_step = std::make_unique(query_plan.getCurrentDataStream(), - filter_analysis_result.filter_actions, + actions, filter_analysis_result.filter_column_name, filter_analysis_result.remove_filter_column); where_step->setStepDescription(step_description); @@ -545,14 +554,21 @@ void addTotalsHavingStep(QueryPlan & query_plan, const auto & having_analysis_result = expression_analysis_result.getHaving(); bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); + ActionsDAGPtr actions; if (having_analysis_result.filter_actions) - result_actions_to_execute.push_back(having_analysis_result.filter_actions); + { + actions = having_analysis_result.filter_actions->actions.clone(); + if (having_analysis_result.filter_actions->project_input) + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + + result_actions_to_execute.push_back(actions); + } auto totals_having_step = std::make_unique( query_plan.getCurrentDataStream(), aggregation_analysis_result.aggregate_descriptions, query_analysis_result.aggregate_overflow_row, - having_analysis_result.filter_actions, + actions, having_analysis_result.filter_column_name, having_analysis_result.remove_filter_column, settings.totals_mode, @@ -728,12 +744,12 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, auto & interpolate_node_typed = interpolate_node->as(); PlannerActionsVisitor planner_actions_visitor(planner_context); - auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getExpression()); if (expression_to_interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node"); - auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, + auto interpolate_expression_nodes = planner_actions_visitor.visit(*interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression()); if (interpolate_expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node"); diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 837307ba2ca..f5e71acee38 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -886,7 +886,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi for (const auto & argument : function_node.getArguments()) { - auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument); + auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(*index_hint_actions_dag, argument); for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes) { @@ -1013,10 +1013,13 @@ PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_c , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) {} -ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node) +ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node) { - PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name); - return actions_visitor_impl.visit(expression_node); + auto ptr = std::make_shared(std::move(actions_dag)); + PlannerActionsVisitorImpl actions_visitor_impl(ptr, planner_context, use_column_identifier_as_action_node_name); + auto res = actions_visitor_impl.visit(expression_node); + actions_dag = std::move(*ptr); + return res; } String calculateActionNodeName(const QueryTreeNodePtr & node, diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 8506c309171..6bb32047327 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -37,7 +37,7 @@ public: * Necessary actions are not added in actions dag output. * Returns query tree expression node actions dag nodes. */ - ActionsDAG::NodeRawConstPtrs visit(ActionsDAGPtr actions_dag, QueryTreeNodePtr expression_node); + ActionsDAG::NodeRawConstPtrs visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node); private: const PlannerContextPtr planner_context; diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index f0a2845c3e8..b2223d9c51d 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -44,8 +44,9 @@ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_no { FilterAnalysisResult result; - result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); - result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name; + result.filter_actions = std::make_shared(); + result.filter_actions->actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); + result.filter_column_name = result.filter_actions->actions.getOutputs().at(0)->result_name; actions_chain.addStep(std::make_unique(result.filter_actions)); return result; @@ -111,8 +112,9 @@ std::optional analyzeAggregation(const QueryTreeNodeP Names aggregation_keys; - ActionsDAGPtr before_aggregation_actions = std::make_shared(input_columns); - before_aggregation_actions->getOutputs().clear(); + ActionsAndFlagsPtr before_aggregation_actions = std::make_shared(); + before_aggregation_actions->actions = ActionsDAG(input_columns); + before_aggregation_actions->actions.getOutputs().clear(); std::unordered_set before_aggregation_actions_output_node_names; @@ -147,7 +149,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, grouping_set_key_node); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->actions, grouping_set_key_node); aggregation_keys.reserve(expression_dag_nodes.size()); for (auto & expression_dag_node : expression_dag_nodes) @@ -160,7 +162,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto column_after_aggregation = group_by_use_nulls && expression_dag_node->column != nullptr ? makeNullableSafe(expression_dag_node->column) : expression_dag_node->column; available_columns_after_aggregation.emplace_back(std::move(column_after_aggregation), expression_type_after_aggregation, expression_dag_node->result_name); aggregation_keys.push_back(expression_dag_node->result_name); - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->actions.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -199,7 +201,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP if (constant_key && !aggregates_descriptions.empty() && (!check_constants_for_group_by_key || canRemoveConstantFromGroupByKey(*constant_key))) continue; - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, group_by_key_node); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->actions, group_by_key_node); aggregation_keys.reserve(expression_dag_nodes.size()); for (auto & expression_dag_node : expression_dag_nodes) @@ -211,7 +213,7 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto column_after_aggregation = group_by_use_nulls && expression_dag_node->column != nullptr ? makeNullableSafe(expression_dag_node->column) : expression_dag_node->column; available_columns_after_aggregation.emplace_back(std::move(column_after_aggregation), expression_type_after_aggregation, expression_dag_node->result_name); aggregation_keys.push_back(expression_dag_node->result_name); - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->actions.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -225,13 +227,13 @@ std::optional analyzeAggregation(const QueryTreeNodeP auto & aggregate_function_node_typed = aggregate_function_node->as(); for (const auto & aggregate_function_node_argument : aggregate_function_node_typed.getArguments().getNodes()) { - auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions, aggregate_function_node_argument); + auto expression_dag_nodes = actions_visitor.visit(before_aggregation_actions->actions, aggregate_function_node_argument); for (auto & expression_dag_node : expression_dag_nodes) { if (before_aggregation_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_aggregation_actions->getOutputs().push_back(expression_dag_node); + before_aggregation_actions->actions.getOutputs().push_back(expression_dag_node); before_aggregation_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -278,8 +280,9 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query PlannerActionsVisitor actions_visitor(planner_context); - ActionsDAGPtr before_window_actions = std::make_shared(input_columns); - before_window_actions->getOutputs().clear(); + ActionsAndFlagsPtr before_window_actions = std::make_shared(); + before_window_actions->actions = ActionsDAG(input_columns); + before_window_actions->actions.getOutputs().clear(); std::unordered_set before_window_actions_output_node_names; @@ -288,25 +291,25 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query auto & window_function_node_typed = window_function_node->as(); auto & window_node = window_function_node_typed.getWindowNode()->as(); - auto expression_dag_nodes = actions_visitor.visit(before_window_actions, window_function_node_typed.getArgumentsNode()); + auto expression_dag_nodes = actions_visitor.visit(before_window_actions->actions, window_function_node_typed.getArgumentsNode()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->actions.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } - expression_dag_nodes = actions_visitor.visit(before_window_actions, window_node.getPartitionByNode()); + expression_dag_nodes = actions_visitor.visit(before_window_actions->actions, window_node.getPartitionByNode()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->actions.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } @@ -317,14 +320,14 @@ std::optional analyzeWindow(const QueryTreeNodePtr & query for (auto & sort_node : order_by_node_list.getNodes()) { auto & sort_node_typed = sort_node->as(); - expression_dag_nodes = actions_visitor.visit(before_window_actions, sort_node_typed.getExpression()); + expression_dag_nodes = actions_visitor.visit(before_window_actions->actions, sort_node_typed.getExpression()); for (auto & expression_dag_node : expression_dag_nodes) { if (before_window_actions_output_node_names.contains(expression_dag_node->result_name)) continue; - before_window_actions->getOutputs().push_back(expression_dag_node); + before_window_actions->actions.getOutputs().push_back(expression_dag_node); before_window_actions_output_node_names.insert(expression_dag_node->result_name); } } @@ -357,7 +360,8 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - auto projection_actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); + auto projection_actions = std::make_shared(); + projection_actions->actions = buildActionsDAGFromExpressionNode(query_node.getProjectionNode(), input_columns, planner_context); auto projection_columns = query_node.getProjectionColumns(); size_t projection_columns_size = projection_columns.size(); @@ -366,7 +370,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node, NamesWithAliases projection_column_names_with_display_aliases; projection_column_names_with_display_aliases.reserve(projection_columns_size); - auto & projection_actions_outputs = projection_actions->getOutputs(); + auto & projection_actions_outputs = projection_actions->actions.getOutputs(); size_t projection_outputs_size = projection_actions_outputs.size(); if (projection_columns_size != projection_outputs_size) @@ -404,8 +408,9 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, const PlannerContextPtr & planner_context, ActionsChain & actions_chain) { - ActionsDAGPtr before_sort_actions = std::make_shared(input_columns); - auto & before_sort_actions_outputs = before_sort_actions->getOutputs(); + auto before_sort_actions = std::make_shared(); + before_sort_actions->actions = ActionsDAG(input_columns); + auto & before_sort_actions_outputs = before_sort_actions->actions.getOutputs(); before_sort_actions_outputs.clear(); PlannerActionsVisitor actions_visitor(planner_context); @@ -419,7 +424,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, for (const auto & sort_node : order_by_node_list.getNodes()) { auto & sort_node_typed = sort_node->as(); - auto expression_dag_nodes = actions_visitor.visit(before_sort_actions, sort_node_typed.getExpression()); + auto expression_dag_nodes = actions_visitor.visit(before_sort_actions->actions, sort_node_typed.getExpression()); has_with_fill |= sort_node_typed.withFill(); for (auto & action_dag_node : expression_dag_nodes) @@ -435,7 +440,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, if (has_with_fill) { for (auto & output_node : before_sort_actions_outputs) - output_node = &before_sort_actions->materializeNode(*output_node); + output_node = &before_sort_actions->actions.materializeNode(*output_node); } /// We add only INPUT columns necessary for INTERPOLATE expression in before ORDER BY actions DAG @@ -444,7 +449,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, auto & interpolate_list_node = query_node.getInterpolate()->as(); PlannerActionsVisitor interpolate_actions_visitor(planner_context); - auto interpolate_actions_dag = std::make_shared(); + ActionsDAG interpolate_actions_dag; for (auto & interpolate_node : interpolate_list_node.getNodes()) { @@ -453,10 +458,10 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, } std::unordered_map before_sort_actions_inputs_name_to_node; - for (const auto & node : before_sort_actions->getInputs()) + for (const auto & node : before_sort_actions->actions.getInputs()) before_sort_actions_inputs_name_to_node.emplace(node->result_name, node); - for (const auto & node : interpolate_actions_dag->getNodes()) + for (const auto & node : interpolate_actions_dag.getNodes()) { if (before_sort_actions_dag_output_node_names.contains(node.result_name) || node.type != ActionsDAG::ActionType::INPUT) @@ -466,7 +471,7 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node, if (input_node_it == before_sort_actions_inputs_name_to_node.end()) { auto input_column = ColumnWithTypeAndName{node.column, node.result_type, node.result_name}; - const auto * input_node = &before_sort_actions->addInput(std::move(input_column)); + const auto * input_node = &before_sort_actions->actions.addInput(std::move(input_column)); auto [it, _] = before_sort_actions_inputs_name_to_node.emplace(node.result_name, input_node); input_node_it = it; } @@ -491,22 +496,23 @@ LimitByAnalysisResult analyzeLimitBy(const QueryNode & query_node, const NameSet & required_output_nodes_names, ActionsChain & actions_chain) { - auto before_limit_by_actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); + auto before_limit_by_actions = std::make_shared(); + before_limit_by_actions->actions = buildActionsDAGFromExpressionNode(query_node.getLimitByNode(), input_columns, planner_context); NameSet limit_by_column_names_set; Names limit_by_column_names; - limit_by_column_names.reserve(before_limit_by_actions->getOutputs().size()); - for (auto & output_node : before_limit_by_actions->getOutputs()) + limit_by_column_names.reserve(before_limit_by_actions->actions.getOutputs().size()); + for (auto & output_node : before_limit_by_actions->actions.getOutputs()) { limit_by_column_names_set.insert(output_node->result_name); limit_by_column_names.push_back(output_node->result_name); } - for (const auto & node : before_limit_by_actions->getNodes()) + for (const auto & node : before_limit_by_actions->actions.getNodes()) { if (required_output_nodes_names.contains(node.result_name) && !limit_by_column_names_set.contains(node.result_name)) - before_limit_by_actions->getOutputs().push_back(&node); + before_limit_by_actions->actions.getOutputs().push_back(&node); } auto actions_step_before_limit_by = std::make_unique(before_limit_by_actions); @@ -591,7 +597,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo if (sort_analysis_result_optional.has_value() && planner_query_processing_info.isFirstStage() && planner_query_processing_info.getToStage() != QueryProcessingStage::Complete) { const auto & before_order_by_actions = sort_analysis_result_optional->before_order_by_actions; - for (const auto & output_node : before_order_by_actions->getOutputs()) + for (const auto & output_node : before_order_by_actions->actions.getOutputs()) required_output_nodes_names.insert(output_node->result_name); } @@ -647,8 +653,10 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo } } - auto project_names_actions = std::make_shared(project_names_input); - project_names_actions->project(projection_analysis_result.projection_column_names_with_display_aliases); + auto project_names_actions = std::make_shared(); + project_names_actions->actions = ActionsDAG(project_names_input); + project_names_actions->actions.project(projection_analysis_result.projection_column_names_with_display_aliases); + project_names_actions->project_input = true; actions_chain.addStep(std::make_unique(project_names_actions)); actions_chain.finalize(); diff --git a/src/Planner/PlannerExpressionAnalysis.h b/src/Planner/PlannerExpressionAnalysis.h index 0773272e49a..3a9ed903bbc 100644 --- a/src/Planner/PlannerExpressionAnalysis.h +++ b/src/Planner/PlannerExpressionAnalysis.h @@ -17,22 +17,22 @@ namespace DB struct ProjectionAnalysisResult { - ActionsDAGPtr projection_actions; + ActionsAndFlagsPtr projection_actions; Names projection_column_names; NamesWithAliases projection_column_names_with_display_aliases; - ActionsDAGPtr project_names_actions; + ActionsAndFlagsPtr project_names_actions; }; struct FilterAnalysisResult { - ActionsDAGPtr filter_actions; + ActionsAndFlagsPtr filter_actions; std::string filter_column_name; bool remove_filter_column = false; }; struct AggregationAnalysisResult { - ActionsDAGPtr before_aggregation_actions; + ActionsAndFlagsPtr before_aggregation_actions; Names aggregation_keys; AggregateDescriptions aggregate_descriptions; GroupingSetsParamsList grouping_sets_parameters_list; @@ -41,19 +41,19 @@ struct AggregationAnalysisResult struct WindowAnalysisResult { - ActionsDAGPtr before_window_actions; + ActionsAndFlagsPtr before_window_actions; std::vector window_descriptions; }; struct SortAnalysisResult { - ActionsDAGPtr before_order_by_actions; + ActionsAndFlagsPtr before_order_by_actions; bool has_with_fill = false; }; struct LimitByAnalysisResult { - ActionsDAGPtr before_limit_by_actions; + ActionsAndFlagsPtr before_limit_by_actions; Names limit_by_column_names; }; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index efc449402b9..18c3744f864 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -79,6 +79,7 @@ namespace ErrorCodes extern const int TOO_MANY_COLUMNS; extern const int UNSUPPORTED_METHOD; extern const int BAD_ARGUMENTS; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; } namespace @@ -1072,7 +1073,7 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP } } - cast_actions_dag->projectInput(); + cast_actions_dag->appendInputsForUnusedColumns( plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); plan_to_add_cast.addStep(std::move(cast_join_columns_step)); @@ -1118,12 +1119,12 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ join_table_expression, planner_context); - join_clauses_and_actions.left_join_expressions_actions->projectInput(); + join_clauses_and_actions.left_join_expressions_actions->appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header); auto left_join_expressions_actions_step = std::make_unique(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions); left_join_expressions_actions_step->setStepDescription("JOIN actions"); left_plan.addStep(std::move(left_join_expressions_actions_step)); - join_clauses_and_actions.right_join_expressions_actions->projectInput(); + join_clauses_and_actions.right_join_expressions_actions->appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header); auto right_join_expressions_actions_step = std::make_unique(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions); right_join_expressions_actions_step->setStepDescription("JOIN actions"); right_plan.addStep(std::move(right_join_expressions_actions_step)); @@ -1175,7 +1176,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ output_node = &cast_actions_dag->addCast(*output_node, cast_type, output_node->result_name); } - cast_actions_dag->projectInput(); + cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN USING columns"); @@ -1570,7 +1571,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ array_join_column_names.insert(array_join_column_identifier); auto & array_join_expression_column = array_join_expression->as(); - auto expression_dag_index_nodes = actions_visitor.visit(array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); + auto expression_dag_index_nodes = actions_visitor.visit(*array_join_action_dag, array_join_expression_column.getExpressionOrThrow()); for (auto & expression_dag_index_node : expression_dag_index_nodes) { @@ -1580,7 +1581,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ } } - array_join_action_dag->projectInput(); + array_join_action_dag->appendInputsForUnusedColumns(plan.getCurrentDataStream().header); join_tree_query_plan.actions_dags.push_back(array_join_action_dag); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index c410b04f209..84efdd21336 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -183,7 +183,7 @@ const ActionsDAG::Node * appendExpression( const JoinNode & join_node) { PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(dag, expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*dag, expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", @@ -603,7 +603,7 @@ JoinClausesAndActions buildJoinClausesAndActions( { auto mixed_join_expressions_actions = std::make_shared(mixed_table_expression_columns); PlannerActionsVisitor join_expression_visitor(planner_context); - auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(*mixed_join_expressions_actions, join_expression); if (join_expression_dag_node_raw_pointers.size() != 1) throw Exception( ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 4a74bf413d3..18a6d297838 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -213,14 +213,14 @@ StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptio return {limits, leaf_limits}; } -ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, +ActionsDAG buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context) { - ActionsDAGPtr action_dag = std::make_shared(input_columns); + ActionsDAG action_dag(input_columns); PlannerActionsVisitor actions_visitor(planner_context); auto expression_dag_index_nodes = actions_visitor.visit(action_dag, expression_node); - action_dag->getOutputs() = std::move(expression_dag_index_nodes); + action_dag.getOutputs() = std::move(expression_dag_index_nodes); return action_dag; } @@ -443,7 +443,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, auto filter_actions_dag = std::make_shared(); PlannerActionsVisitor actions_visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = actions_visitor.visit(filter_actions_dag, filter_query_tree); + auto expression_nodes = actions_visitor.visit(*filter_actions_dag, filter_query_tree); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filter actions must return single output node. Actual {}", diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 4706f552c9d..3172847f053 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -47,7 +47,7 @@ StorageLimits buildStorageLimits(const Context & context, const SelectQueryOptio * Inputs are not used for actions dag outputs. * Only root query tree expression node is used as actions dag output. */ -ActionsDAGPtr buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, +ActionsDAG buildActionsDAGFromExpressionNode(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & input_columns, const PlannerContextPtr & planner_context); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index c175cd516ac..537555afa2a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -176,8 +176,6 @@ static void appendExpression(ActionsDAGPtr & dag, const ActionsDAGPtr & expressi dag->mergeInplace(std::move(*expression->clone())); else dag = expression->clone(); - - dag->projectInput(false); } /// This function builds a common DAG which is a merge of DAGs from Filter and Expression steps chain. diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 4017670ad14..7eca8ed74f9 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -77,7 +77,7 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( AggregateProjectionInfo info; info.context = interpreter.getContext(); - info.before_aggregation = analysis_result.before_aggregation; + info.before_aggregation = analysis_result.before_aggregation->actions.clone(); info.keys = query_analyzer->aggregationKeys().getNames(); info.aggregates = query_analyzer->aggregates(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 887a95da60d..750bb5600b3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -850,8 +850,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_ static ActionsDAGPtr createProjection(const Block & header) { auto projection = std::make_shared(header.getNamesAndTypesList()); - projection->removeUnusedActions(header.getNames()); - projection->projectInput(); + // projection->removeUnusedActions(header.getNames()); return projection; } @@ -2010,6 +2009,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons Block cur_header = pipe.getHeader(); + bool project_inputs = result_projection != nullptr; auto append_actions = [&result_projection](ActionsDAGPtr actions) { if (!result_projection) @@ -2035,6 +2035,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result_projection) { + if (project_inputs) + result_projection->appendInputsForUnusedColumns(pipe.getHeader()); + auto projection_actions = std::make_shared(result_projection); pipe.addSimpleTransform([&](const Block & header) { diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index e6c2bcec2c8..7945b3999c1 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -178,7 +178,7 @@ void AddingDefaultsTransform::transform(Chunk & chunk) auto dag = evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), columns, context, false); if (dag) { - auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + auto actions = std::make_shared(std::move(dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes), true); actions->execute(evaluate_block); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index cd706dab9ae..ea1d9cd8131 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7050,7 +7050,7 @@ ActionDAGNodes MergeTreeData::getFiltersForPrimaryKeyAnalysis(const InterpreterS filter_nodes.nodes.push_back(&additional_filter_info->actions->findInOutputs(additional_filter_info->column_name)); if (before_where) - filter_nodes.nodes.push_back(&before_where->findInOutputs(where_column_name)); + filter_nodes.nodes.push_back(&before_where->actions.findInOutputs(where_column_name)); return filter_nodes; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 735f51e1f32..9625c13197c 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -273,7 +273,6 @@ void StorageMaterializedView::read( * They may be added in case of distributed query with JOIN. * In that case underlying table returns joined columns as well. */ - converting_actions->projectInput(false); auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); query_plan.addStep(std::move(converting_step)); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 4c678a1228b..bb27715ddd1 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -964,7 +964,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo } PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - actions_visitor.visit(filter_actions_dag, column_node); + actions_visitor.visit(*filter_actions_dag, column_node); } column_names_as_aliases = filter_actions_dag->getRequiredColumnsNames(); if (column_names_as_aliases.empty()) @@ -1513,7 +1513,7 @@ void ReadFromMerge::convertAndFilterSourceStream( query_analysis_pass.run(query_tree, local_context); PlannerActionsVisitor actions_visitor(modified_query_info.planner_context, false /*use_column_identifier_as_action_node_name*/); - const auto & nodes = actions_visitor.visit(actions_dag, query_tree); + const auto & nodes = actions_visitor.visit(*actions_dag, query_tree); if (nodes.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size()); From 2de54fa4d304ea1db42e7c4cc608bbc236b39d93 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 13 Jun 2024 12:21:49 +0100 Subject: [PATCH 07/61] incorporate suggestion from azat --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index e7d95d8db72..5720a8a69e9 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -16,7 +16,7 @@ function check_refcnt_for_table() -- queue may hold the parts lock for awhile as well system stop pulling replication log $table; " - $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(200)" + $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into $table select number, number%4 from numbers(2000)" local query_id query_id="$table-$(random_str 10)" @@ -35,7 +35,7 @@ function check_refcnt_for_table() ) # Notes: - # - query may sleep 0.1*(200/4)=5 seconds maximum, it is enough to check system.parts + # - query may sleep 0.1*(2000/4)=5 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" & From f82c173c209e2cc7b2e8aa3e9ad134e86c4c02cf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 13 Jun 2024 16:23:35 +0000 Subject: [PATCH 08/61] Fixing some tests. --- src/Interpreters/ActionsDAG.cpp | 31 +++++++++++++++++-------- src/Interpreters/ExpressionAnalyzer.cpp | 23 +++++++++++------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index da53f197fd8..92d8e3ace9e 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1149,6 +1149,21 @@ void ActionsDAG::project(const NamesWithAliases & projection) removeUnusedActions(); } +static void appendInputsFromNamesMap( + ActionsDAG & dag, + const ColumnsWithTypeAndName & source_columns, + const std::unordered_map> & names_map) +{ + for (const auto & [_, positions] : names_map) + { + for (auto pos : positions) + { + const auto & col = source_columns[pos]; + dag.addInput(col.name, col.type); + } + } +} + void ActionsDAG::appendInputsForUnusedColumns(const Block & sample_block) { std::unordered_map> names_map; @@ -1166,14 +1181,7 @@ void ActionsDAG::appendInputsForUnusedColumns(const Block & sample_block) positions.pop_front(); } - for (const auto & [_, positions] : names_map) - { - for (auto pos : positions) - { - const auto & col = sample_block.getByPosition(pos); - addInput(col.name, col.type); - } - } + appendInputsFromNamesMap(*this, sample_block.getColumnsWithTypeAndName(), names_map); } bool ActionsDAG::tryRestoreColumn(const std::string & column_name) @@ -1426,7 +1434,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( FunctionOverloadResolverPtr func_builder_materialize = std::make_unique(std::make_shared()); - std::map> inputs; + std::unordered_map> inputs; if (mode == MatchColumnsMode::Name) { size_t input_nodes_size = actions_dag->inputs.size(); @@ -1542,7 +1550,10 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( } actions_dag->outputs.swap(projection); - actions_dag->removeUnusedActions(); + actions_dag->removeUnusedActions(false); + + if (mode == MatchColumnsMode::Name) + appendInputsFromNamesMap(*actions_dag, source, inputs); return actions_dag; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index c53cdd0d2ed..b4fa852916e 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1789,7 +1789,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool remove_un ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool remove_unused_result, CompileExpressions compile_expressions) { return std::make_shared( - getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), remove_unused_result); + getActionsDAG(add_aliases, remove_unused_result), ExpressionActionsSettings::fromContext(getContext(), compile_expressions), add_aliases && remove_unused_result); } ActionsDAGPtr ExpressionAnalyzer::getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs) @@ -1850,14 +1850,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; + ActionsAndFlagsPtr prewhere_dag_and_flags; + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); - auto required_columns_ = prewhere_info->prewhere_actions->getRequiredColumnsNames(); - NameSet required_source_columns(required_columns_.begin(), required_columns_.end()); + auto prewhere_required_columns = prewhere_dag_and_flags->actions.getRequiredColumnsNames(); + NameSet required_source_columns(prewhere_required_columns.begin(), prewhere_required_columns.end()); /// Add required columns to required output in order not to remove them after prewhere execution. /// TODO: add sampling and final execution to common chain. for (const auto & column : additional_required_columns_after_prewhere) @@ -1869,6 +1871,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.finalize(); + if (prewhere_dag_and_flags) + { + auto dag = std::make_shared(std::move(prewhere_dag_and_flags->actions)); + prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); + } + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); auto res = chain.getLastStep().getResultColumns(); @@ -1919,20 +1927,19 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( filter_info->do_remove_column = true; } - if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage)) + if (prewhere_dag_and_flags = query_analyzer.appendPrewhere(chain, !first_stage); prewhere_dag_and_flags) { /// Prewhere is always the first one. prewhere_step_num = 0; - auto dag = std::make_shared(std::move(actions->actions)); - prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); - if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings)) + if (allowEarlyConstantFolding(prewhere_dag_and_flags->actions, settings)) { Block before_prewhere_sample = source_header; if (sanitizeBlock(before_prewhere_sample)) { + auto dag = prewhere_dag_and_flags->actions.clone(); ExpressionActions( - prewhere_info->prewhere_actions, + dag, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample); auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName()); /// If the filter column is a constant, record it. From 3f7286dc51bcffe499f1da45fdfe75ae4504aa3c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 13 Jun 2024 18:08:00 +0000 Subject: [PATCH 09/61] Fixing build. --- src/Planner/PlannerExpressionAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 2e597a7c466..738b8c1e9d5 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -48,7 +48,7 @@ std::optional analyzeFilter(const QueryTreeNodePtr & filte result.filter_actions = std::make_shared(); result.filter_actions->actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context); - const auto * output = result.filter_actions->getOutputs().at(0); + const auto * output = result.filter_actions->actions.getOutputs().at(0); if (output->column && ConstantFilterDescription(*output->column).always_true) return {}; From c349df44af8fcdf0e39fc85dab346f463832f5ee Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 13 Jun 2024 18:14:13 +0000 Subject: [PATCH 10/61] Fixing some tests. --- src/Interpreters/ActionsVisitor.cpp | 5 ++--- src/Interpreters/ActionsVisitor.h | 3 ++- src/Interpreters/ExpressionActions.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 -- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index b292d02ccaa..89de76d2815 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -532,9 +532,8 @@ std::vector ActionsMatcher::Data::getAllColumnNames() const ScopeStack::ScopeStack(ActionsDAG actions_dag, ContextPtr context_) : WithContext(context_) { - ScopeStack::Level tmp; - tmp.actions_dag = std::move(actions_dag); - auto & level = stack.emplace_back(std::move(tmp)); + auto & level = stack.emplace_back(); + level.actions_dag = std::move(actions_dag); level.index = std::make_unique(level.actions_dag.getOutputs()); for (const auto & node : level.actions_dag.getOutputs()) diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index e13bd82be06..7e3b7fcb6bb 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -53,7 +54,7 @@ struct ScopeStack : WithContext Level(Level &&); }; - using Levels = std::vector; + using Levels = std::deque; Levels stack; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d25d5b53226..e19c4b0cd20 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -762,7 +762,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run, { block.clear(); } - if (allow_duplicates_in_input) + else if (allow_duplicates_in_input) { /// This case is the same as when the input is projected /// since we do not need any input columns. diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 91bfa863bd9..927bafe4bfb 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -386,8 +386,6 @@ Chain InterpreterInsertQuery::buildPreSinkChain( auto adding_missing_defaults_actions = std::make_shared(adding_missing_defaults_dag); - std::cerr << adding_missing_defaults_actions->dumpActions() << std::endl; - /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) out.addSource(std::make_shared(query_sample_block, adding_missing_defaults_actions)); From 57e84c0344119d1628eed1a5e6f048cae0505b5c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Jun 2024 09:40:11 +0000 Subject: [PATCH 11/61] Fixing style. --- src/Interpreters/InterpreterSelectQuery.cpp | 1 - src/Planner/Planner.cpp | 3 +-- src/Planner/PlannerJoinTree.cpp | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 51d602b0bc8..68967f331f0 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -124,7 +124,6 @@ namespace ErrorCodes extern const int UNKNOWN_IDENTIFIER; extern const int BAD_ARGUMENTS; extern const int SUPPORT_IS_DISABLED; - extern const int NOT_FOUND_COLUMN_IN_BLOCK; } /// Assumes `storage` is set and the table filter (row-level security) is not empty. diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 8e8183c0762..5496a7e4257 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -99,7 +99,6 @@ namespace ErrorCodes extern const int TOO_DEEP_SUBQUERIES; extern const int NOT_IMPLEMENTED; extern const int SUPPORT_IS_DISABLED; - extern const int NOT_FOUND_COLUMN_IN_BLOCK; } namespace @@ -336,7 +335,7 @@ void addExpressionStep(QueryPlan & query_plan, { auto actions = expression_actions->actions.clone(); if (expression_actions->project_input) - actions->appendInputsForUnusedColumns( query_plan.getCurrentDataStream().header); + actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); result_actions_to_execute.push_back(actions); auto expression_step = std::make_unique(query_plan.getCurrentDataStream(), actions); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index fc3cd16f8ed..43b223172e6 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -81,7 +81,6 @@ namespace ErrorCodes extern const int TOO_MANY_COLUMNS; extern const int UNSUPPORTED_METHOD; extern const int BAD_ARGUMENTS; - extern const int NOT_FOUND_COLUMN_IN_BLOCK; } namespace @@ -1133,7 +1132,7 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP } } - cast_actions_dag->appendInputsForUnusedColumns( plan_to_add_cast.getCurrentDataStream().header); + cast_actions_dag->appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header); auto cast_join_columns_step = std::make_unique(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag)); cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable"); plan_to_add_cast.addStep(std::move(cast_join_columns_step)); From 9d416da6010db16d0cde24d0c3fba1fe4dab97a1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Jun 2024 10:23:26 +0000 Subject: [PATCH 12/61] Another fix. --- src/Interpreters/ActionsDAG.cpp | 3 --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ---- 2 files changed, 7 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 92d8e3ace9e..c01142ff01a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1552,9 +1552,6 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( actions_dag->outputs.swap(projection); actions_dag->removeUnusedActions(false); - if (mode == MatchColumnsMode::Name) - appendInputsFromNamesMap(*actions_dag, source, inputs); - return actions_dag; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8ee3654067c..5f94367b46d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -2010,7 +2010,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons Block cur_header = pipe.getHeader(); - bool project_inputs = result_projection != nullptr; auto append_actions = [&result_projection](ActionsDAGPtr actions) { if (!result_projection) @@ -2036,9 +2035,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (result_projection) { - if (project_inputs) - result_projection->appendInputsForUnusedColumns(pipe.getHeader()); - auto projection_actions = std::make_shared(result_projection); pipe.addSimpleTransform([&](const Block & header) { From fc6c714c11ee0ba67b3ffa3bbcc4a2cb79772322 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Jun 2024 10:54:30 +0000 Subject: [PATCH 13/61] Fix some crashes. --- src/Interpreters/ExpressionAnalyzer.cpp | 9 ++++++--- src/Interpreters/InterpreterSelectQuery.cpp | 10 +++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index b4fa852916e..2e7bc69402f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -927,12 +927,15 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin( { const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); - auto tmp_actions = std::make_shared(std::move(converting_join_columns->actions)); - JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, tmp_actions); - converting_join_columns->actions = std::move(*tmp_actions); + ActionsDAGPtr converting_actions; + if (converting_join_columns) + converting_actions = std::make_shared(std::move(converting_join_columns->actions)); + + JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_actions); if (converting_join_columns) { + converting_join_columns->actions = std::move(*converting_actions); chain.steps.push_back(std::make_unique(converting_join_columns)); chain.addStep(); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 68967f331f0..a1ad10ff623 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2770,9 +2770,13 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions void InterpreterSelectQuery::executeTotalsAndHaving( QueryPlan & query_plan, bool has_having, const ActionsAndFlagsPtr & expression, bool remove_filter, bool overflow_row, bool final) { - auto dag = expression->actions.clone(); - if (expression->project_input) - dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + ActionsDAGPtr dag; + if (expression) + { + dag = expression->actions.clone(); + if (expression->project_input) + dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header); + } const Settings & settings = context->getSettingsRef(); From 3bc720e7a7824e81137504e9eb684e268d7654e1 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 14 Jun 2024 15:33:19 +0200 Subject: [PATCH 14/61] Fix nonsense docs --- .../reference/stddevpop.md | 2 +- .../aggregate-functions/reference/varpop.md | 81 +++---------- .../reference/varpopstable.md | 52 +++++++++ .../aggregate-functions/reference/varsamp.md | 106 ++++-------------- .../reference/varsampstable.md | 63 +++++++++++ 5 files changed, 156 insertions(+), 148 deletions(-) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/varpopstable.md create mode 100644 docs/en/sql-reference/aggregate-functions/reference/varsampstable.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index d2406197ecc..ece9f722c45 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -25,7 +25,7 @@ stddevPop(x) **Returned value** -Square root of standard deviation of `x`. [Float64](../../data-types/float.md). +- Square root of standard deviation of `x`. [Float64](../../data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index fcabeb4c6a8..be57115f146 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -4,30 +4,29 @@ slug: "/en/sql-reference/aggregate-functions/reference/varpop" sidebar_position: 32 --- -This page covers the `varPop` and `varPopStable` functions available in ClickHouse. - ## varPop -Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +Calculates the population variance. + +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`varPopStable`](../reference/varpopstable.md) function. It works slower but provides a lower computational error. +::: **Syntax** ```sql -covarPop(x, y) +varPop(x) ``` +Alias: `VAR_POP`. + **Parameters** -- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) -- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns an integer of type `Float64`. - -**Implementation details** - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). +- Returns the population variance of `x`. [`Float64`](../../data-types/float.md). **Example** @@ -37,69 +36,21 @@ Query: DROP TABLE IF EXISTS test_data; CREATE TABLE test_data ( - x Int32, - y Int32 + x UInt8, ) ENGINE = Memory; -INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); +INSERT INTO test_data VALUES (3), (3), (3), (4), (4), (5), (5), (7), (11), (15); SELECT - covarPop(x, y) AS covar_pop + varPop(x) AS var_pop FROM test_data; ``` Result: ```response -3 -``` - -## varPopStable - -Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. - -**Syntax** - -```sql -covarPopStable(x, y) -``` - -**Parameters** - -- `x`: The first data column. [String literal](../../syntax#syntax-string-literal) -- `y`: The second data column. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -Returns an integer of type `Float64`. - -**Implementation details** - -Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. - -**Example** - -Query: - -```sql -DROP TABLE IF EXISTS test_data; -CREATE TABLE test_data -( - x Int32, - y Int32 -) -ENGINE = Memory; - -INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); - -SELECT - covarPopStable(x, y) AS covar_pop_stable -FROM test_data; -``` - -Result: - -```response -0.5999999999999999 +┌─var_pop─┐ +│ 14.4 │ +└─────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md new file mode 100644 index 00000000000..811506c2fa8 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md @@ -0,0 +1,52 @@ +--- +title: "varPopStable" +slug: "/en/sql-reference/aggregate-functions/reference/varpopstable" +sidebar_position: 32 +--- + +## varPopStable + +Returns the population variance. Unlike [`varPop`](../reference/varpop.md), this function uses a [numerically stable](https://en.wikipedia.org/wiki/Numerical_stability) algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varPopStable(x) +``` + +Alias: `VAR_POP_STABLE`. + +**Parameters** + +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the population variance of `x`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x UInt8, +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15); + +SELECT + varPopStable(x) AS var_pop_stable +FROM test_data; +``` + +Result: + +```response +┌─var_pop_stable─┐ +│ 14.4 │ +└────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index be669a16ae8..4182265cff2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -4,33 +4,37 @@ slug: /en/sql-reference/aggregate-functions/reference/varsamp sidebar_position: 33 --- -This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. - ## varSamp Calculate the sample variance of a data set. +:::note +This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`varSampStable`](../reference/varsampstable.md) function. It works slower but provides a lower computational error. +::: + **Syntax** ```sql -varSamp(expr) +varSamp(x) ``` +Alias: `VAR_SAMP`. + **Parameters** -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns a Float64 value representing the sample variance of the input data set. +- Returns the sample variance of the input data set `x`. [Float64](../../data-types/float.md). **Implementation details** -The `varSamp()` function calculates the sample variance using the following formula: +The `varSamp` function calculates the sample variance using the following formula: -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ Where: @@ -38,91 +42,29 @@ Where: - `mean(x)` is the arithmetic mean of the data set. - `n` is the number of data points in the data set. -The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use [`varPop`](../reference/varpop.md) instead. **Example** Query: ```sql -CREATE TABLE example_table +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data ( - id UInt64, - value Float64 + x Float64 ) -ENGINE = MergeTree -ORDER BY id; +ENGINE = Memory; -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); -SELECT varSamp(value) FROM example_table; +SELECT round(varSamp(x),3) AS var_samp FROM test_data; ``` Response: ```response -0.8650000000000091 -``` - -## varSampStable - -Calculate the sample variance of a data set using a numerically stable algorithm. - -**Syntax** - -```sql -varSampStable(expr) -``` - -**Parameters** - -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. - -**Implementation details** - -The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): - -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` - -Where: -- `x` is each individual data point in the data set. -- `mean(x)` is the arithmetic mean of the data set. -- `n` is the number of data points in the data set. - -The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. - -Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. - -**Example** - -Query: - -```sql -CREATE TABLE example_table -( - id UInt64, - value Float64 -) -ENGINE = MergeTree -ORDER BY id; - -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); - -SELECT varSampStable(value) FROM example_table; -``` - -Response: - -```response -0.865 -``` - -This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. +┌─var_samp─┐ +│ 0.865 │ +└──────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md new file mode 100644 index 00000000000..04650b50666 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md @@ -0,0 +1,63 @@ +--- +title: "varSampStable" +slug: /en/sql-reference/aggregate-functions/reference/varsampstable +sidebar_position: 33 +--- + +## varSampStable + +Calculate the sample variance of a data set. Unlike [`varSamp`](../reference/varsamp.md), this function uses a numerically stable algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varSampStable(x) +``` + +Alias: `VAR_SAMP_STABLE` + +**Parameters** + +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the sample variance of the input data set. [Float64](../../data-types/float.md). + +**Implementation details** + +The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](../reference/varsamp.md): + +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Float64 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); + +SELECT round(varSampStable(x),3) AS var_samp_stable FROM test_data; +``` + +Response: + +```response +┌─var_samp_stable─┐ +│ 0.865 │ +└─────────────────┘ +``` From 398b3cc22f9a56cfdb2c97d277626b4755418a9e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 14 Jun 2024 15:49:39 +0200 Subject: [PATCH 15/61] Update tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh Co-authored-by: Azat Khuzhin --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 5720a8a69e9..1e268fa53d3 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -35,7 +35,7 @@ function check_refcnt_for_table() ) # Notes: - # - query may sleep 0.1*(2000/4)=5 seconds maximum, it is enough to check system.parts + # - query may sleep 0.1*(2000/4)=50 seconds maximum, it is enough to check system.parts # - "part = 1" condition should prune all parts except first # - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier $CLICKHOUSE_CLIENT "${args[@]}" -q "select sleepEachRow(0.1) from $table where part = 1" & From c503f9b51ec78ec8f23be3ffc8721060981b390f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 14 Jun 2024 15:50:20 +0200 Subject: [PATCH 16/61] Update tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh --- tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 1e268fa53d3..bd018018789 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -58,7 +58,7 @@ function check_refcnt_for_table() $CLICKHOUSE_CLIENT -q "select table, name, refcount>=6 from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and refcount >= 3" # Kill the query gracefully. - kill -INT $PID ||: + kill -INT $PID wait $PID grep -F Exception "$log_file" | grep -v -F QUERY_WAS_CANCELLED rm -f "${log_file:?}" From 5a577915aba1f927124d340d0b27da39a9db5bad Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Jun 2024 15:43:43 +0000 Subject: [PATCH 17/61] Fixing more tests. --- src/Interpreters/ActionsDAG.cpp | 4 ++-- src/Interpreters/ExpressionAnalyzer.cpp | 9 ++++----- src/Storages/StorageMaterializedView.cpp | 1 + .../02576_predicate_push_down_sorting_fix.reference | 9 +++++---- .../0_stateless/03095_window_functions_qualify.reference | 5 +++-- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index c01142ff01a..6e11dc96769 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2754,8 +2754,8 @@ void ActionsDAG::removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions std::unordered_set used_inputs; for (const auto * input : inputs) { - if (removes_filter && input == predicate) - continue; + // if (removes_filter && input == predicate) + // continue; used_inputs.insert(input); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2e7bc69402f..d0f037ad9e1 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -928,13 +928,11 @@ JoinPtr SelectQueryExpressionAnalyzer::appendJoin( const ColumnsWithTypeAndName & left_sample_columns = chain.getLastStep().getResultColumns(); ActionsDAGPtr converting_actions; - if (converting_join_columns) - converting_actions = std::make_shared(std::move(converting_join_columns->actions)); - JoinPtr join = makeJoin(*syntax->ast_join, left_sample_columns, converting_actions); - if (converting_join_columns) + if (converting_actions) { + converting_join_columns = std::make_shared(); converting_join_columns->actions = std::move(*converting_actions); chain.steps.push_back(std::make_unique(converting_join_columns)); chain.addStep(); @@ -1720,9 +1718,9 @@ ActionsAndFlagsPtr SelectQueryExpressionAnalyzer::appendProjectResult(Expression for (const auto & column : required_result_columns) result_columns.emplace_back(column, std::string{}); actions->actions.project(result_columns); - actions->project_input = actions->projected_output = true; } + actions->project_input = actions->projected_output = true; return actions; } @@ -1878,6 +1876,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( { auto dag = std::make_shared(std::move(prewhere_dag_and_flags->actions)); prewhere_info = std::make_shared(std::move(dag), query.prewhere()->getColumnName()); + prewhere_dag_and_flags.reset(); } finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 9625c13197c..316f398b476 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -273,6 +273,7 @@ void StorageMaterializedView::read( * They may be added in case of distributed query with JOIN. * In that case underlying table returns joined columns as well. */ + converting_actions->removeUnusedActions(); auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); converting_step->setStepDescription("Convert target table structure to MaterializedView structure"); query_plan.addStep(std::move(converting_step)); diff --git a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference index dd107065380..d391c365ea7 100644 --- a/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference +++ b/tests/queries/0_stateless/02576_predicate_push_down_sorting_fix.reference @@ -1,10 +1,11 @@ Expression ((Project names + (Projection + ))) Header: number UInt64 Actions: INPUT : 0 -> __table2.number UInt64 : 0 - ALIAS __table2.number :: 0 -> number UInt64 : 1 - ALIAS number :: 1 -> __table1.number UInt64 : 0 - ALIAS __table1.number :: 0 -> number UInt64 : 1 -Positions: 1 + INPUT :: 1 -> ignore(2_UInt8) UInt8 : 1 + ALIAS __table2.number :: 0 -> number UInt64 : 2 + ALIAS number :: 2 -> __table1.number UInt64 : 0 + ALIAS __table1.number :: 0 -> number UInt64 : 2 +Positions: 2 Sorting (Sorting for ORDER BY) Header: ignore(2_UInt8) UInt8 __table2.number UInt64 diff --git a/tests/queries/0_stateless/03095_window_functions_qualify.reference b/tests/queries/0_stateless/03095_window_functions_qualify.reference index c74a212428b..9ffbe64f85e 100644 --- a/tests/queries/0_stateless/03095_window_functions_qualify.reference +++ b/tests/queries/0_stateless/03095_window_functions_qualify.reference @@ -48,8 +48,9 @@ Positions: 2 0 INPUT :: 1 -> count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) UInt64 : 1 INPUT : 2 -> count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) UInt64 : 2 COLUMN Const(UInt8) -> 4_UInt8 UInt8 : 3 - FUNCTION equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) :: 2, 4_UInt8 :: 3) -> equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)), 4_UInt8) UInt8 : 4 - Positions: 4 0 1 + INPUT :: 3 -> modulo(__table1.number, 3_UInt8) UInt8 : 4 + FUNCTION equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)) :: 2, 4_UInt8 :: 3) -> equals(count() OVER (PARTITION BY modulo(__table1.number, 3_UInt8)), 4_UInt8) UInt8 : 5 + Positions: 5 0 1 Window (Window step for window \'PARTITION BY modulo(__table1.number, 3_UInt8)\') Header: modulo(__table1.number, 3_UInt8) UInt8 __table1.number UInt64 From f4b01dd132156bb48a267c2122eaf9960237be12 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 15 Jun 2024 08:42:07 +0200 Subject: [PATCH 18/61] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 64ff3e8e2cb..2191d877d75 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2847,7 +2847,9 @@ variantElement variantType varint varpop +varpopstable varsamp +varsampstable vectorized vectorscan vendoring From b845a242cc1f616eca3919a9d509afabc63b4f7f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 17 Jun 2024 11:06:22 +0000 Subject: [PATCH 19/61] Fixing build. --- src/Interpreters/tests/gtest_actions_visitor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/tests/gtest_actions_visitor.cpp b/src/Interpreters/tests/gtest_actions_visitor.cpp index 3de39ae6bfa..28e83306c53 100644 --- a/src/Interpreters/tests/gtest_actions_visitor.cpp +++ b/src/Interpreters/tests/gtest_actions_visitor.cpp @@ -31,7 +31,7 @@ TEST(ActionsVisitor, VisitLiteral) size_limits_for_set, size_t(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, @@ -39,7 +39,7 @@ TEST(ActionsVisitor, VisitLiteral) info); ActionsVisitor(visitor_data).visit(ast); auto actions = visitor_data.getActions(); - ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); + ASSERT_EQ(actions.getResultColumns().back().type->getTypeId(), expect_type->getTypeId()); } TEST(ActionsVisitor, VisitLiteralWithType) @@ -61,7 +61,7 @@ TEST(ActionsVisitor, VisitLiteralWithType) size_limits_for_set, size_t(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, @@ -69,5 +69,5 @@ TEST(ActionsVisitor, VisitLiteralWithType) info); ActionsVisitor(visitor_data).visit(ast); auto actions = visitor_data.getActions(); - ASSERT_EQ(actions->getResultColumns().back().type->getTypeId(), date_type->getTypeId()); + ASSERT_EQ(actions.getResultColumns().back().type->getTypeId(), date_type->getTypeId()); } From 84f70ba664c6d9562e0b1bbfa62ffea432f505a6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 17 Jun 2024 16:09:55 +0000 Subject: [PATCH 20/61] Trying to fix more tests. --- src/Interpreters/ActionsVisitor.cpp | 2 +- src/Interpreters/ActionsVisitor.h | 2 +- src/Interpreters/ExpressionActions.h | 7 ++++++- src/Interpreters/ExpressionAnalyzer.cpp | 4 +++- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 89de76d2815..9e56d740e5e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -394,7 +394,7 @@ Block createBlockForSet( ScopeStack::Level::Level() = default; ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level(Level &&) = default; +ScopeStack::Level::Level(Level &&) noexcept = default; FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 7e3b7fcb6bb..46d2d60e461 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -51,7 +51,7 @@ struct ScopeStack : WithContext ~Level(); Level(); - Level(Level &&); + Level(Level &&) noexcept; }; using Levels = std::deque; diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 7c6af41c04d..c762abc1149 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -288,10 +288,15 @@ struct ExpressionActionsChain : WithContext Step & lastStep(const NamesAndTypesList & columns) { if (steps.empty()) - steps.emplace_back(std::make_unique(std::make_shared(ActionsDAG(columns), false, false))); + return addStep(columns); return *steps.back(); } + Step & addStep(const NamesAndTypesList & columns) + { + return *steps.emplace_back(std::make_unique(std::make_shared(ActionsDAG(columns), false, false))); + } + std::string dumpChain() const; }; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d0f037ad9e1..ff65475d9af 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1466,7 +1466,9 @@ void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAG & before_a else result_columns.push_back(source_column); } - ExpressionActionsChain::Step & step = chain.lastStep(before_aggregation.getNamesAndTypesList()); + auto required_output = chain.getLastStep().required_output; + ExpressionActionsChain::Step & step = chain.addStep(before_aggregation.getNamesAndTypesList()); + step.required_output = std::move(required_output); step.actions()->actions = std::move(*ActionsDAG::makeConvertingActions(source_columns, result_columns, ActionsDAG::MatchColumnsMode::Position)); } From a2f3bdbe83e59d3bbb11fb994cd6905994b46385 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 17 Jun 2024 10:09:21 +0000 Subject: [PATCH 21/61] Remove unused headers --- src/Common/SystemLogBase.h | 2 -- src/Interpreters/AsynchronousMetricLog.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 95906c63349..9441578c9bc 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,9 +1,7 @@ #pragma once -#include #include #include -#include #include #include diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 739b2aa5b56..2ce1d929592 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -8,8 +8,6 @@ #include #include -#include -#include #include From 0ae0ca1a95756e9d784b5a8e4b2d5d93a0e1a0cb Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 17 Jun 2024 15:02:50 +0000 Subject: [PATCH 22/61] Add system.error_log that flushes periodically system.errors to disk --- programs/server/config.xml | 12 ++++ src/Common/ErrorCodes.cpp | 1 - src/Common/ErrorCodes.h | 3 +- src/Common/SystemLogBase.cpp | 1 + src/Common/SystemLogBase.h | 3 +- src/Interpreters/ErrorLog.cpp | 121 +++++++++++++++++++++++++++++++++ src/Interpreters/ErrorLog.h | 60 ++++++++++++++++ src/Interpreters/SystemLog.cpp | 12 ++++ src/Interpreters/SystemLog.h | 2 + 9 files changed, 212 insertions(+), 3 deletions(-) create mode 100644 src/Interpreters/ErrorLog.cpp create mode 100644 src/Interpreters/ErrorLog.h diff --git a/programs/server/config.xml b/programs/server/config.xml index b7a4b8dd0e9..e3228813482 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1155,6 +1155,18 @@ false + + + system + error_log
+ 7500 + 1048576 + 8192 + 524288 + 1000 + false +
+ + system error_log
diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 9fc188e97aa..acc21fff5bb 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -726,6 +726,13 @@ metric_log: flush_interval_milliseconds: 7500 collect_interval_milliseconds: 1000 +# Error log contains rows with current values of errors collected with "collect_interval_milliseconds" interval. +error_log: + database: system + table: error_log + flush_interval_milliseconds: 7500 + collect_interval_milliseconds: 1000 + # Asynchronous metric log contains values of metrics from # system.asynchronous_metrics. asynchronous_metric_log: diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml index 3218dae4dc7..7d80c7fbf78 100644 --- a/tests/integration/test_MemoryTracking/configs/no_system_log.xml +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -5,6 +5,7 @@ + diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 8363d42f9af..d8662fad011 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1485,6 +1485,7 @@ def test_backup_all(exclude_system_log_tables): "processors_profile_log", "asynchronous_insert_log", "backup_log", + "error_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] diff --git a/tests/integration/test_config_xml_full/configs/config.d/error_log.xml b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml new file mode 100644 index 00000000000..903d8699f5c --- /dev/null +++ b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml @@ -0,0 +1,8 @@ + + + system + error_log
+ 7500 + 1000 +
+
diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index 628e1432350..61aa0a5c724 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -756,6 +756,14 @@ 1000
+ + + system + error_log
+ 7500 + 1000 +
+