diff --git a/dbms/src/DataStreams/tests/expression_stream.cpp b/dbms/src/DataStreams/tests/expression_stream.cpp index b63d9d1f3b5..26eb8c856f1 100644 --- a/dbms/src/DataStreams/tests/expression_stream.cpp +++ b/dbms/src/DataStreams/tests/expression_stream.cpp @@ -39,7 +39,7 @@ try NamesAndTypesList source_columns = {{"number", std::make_shared()}}; auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, source_columns); - ExpressionAnalyzer analyzer(ast, syntax_result, context); + SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context); ExpressionActionsChain chain(context); analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain); diff --git a/dbms/src/DataStreams/tests/filter_stream.cpp b/dbms/src/DataStreams/tests/filter_stream.cpp index 4199ba40b53..d36cce7e909 100644 --- a/dbms/src/DataStreams/tests/filter_stream.cpp +++ b/dbms/src/DataStreams/tests/filter_stream.cpp @@ -44,7 +44,7 @@ try NamesAndTypesList source_columns = {{"number", std::make_shared()}}; auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, source_columns); - ExpressionAnalyzer analyzer(ast, syntax_result, context); + SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context); ExpressionActionsChain chain(context); analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain); diff --git a/dbms/src/Interpreters/ActionsVisitor.cpp b/dbms/src/Interpreters/ActionsVisitor.cpp index 523343a288e..c519d75b812 100644 --- a/dbms/src/Interpreters/ActionsVisitor.cpp +++ b/dbms/src/Interpreters/ActionsVisitor.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -19,8 +21,6 @@ #include #include -#include -#include #include #include #include @@ -228,346 +228,350 @@ const Block & ScopeStack::getSampleBlock() const return stack.back().actions->getSampleBlock(); } - -ActionsVisitor::ActionsVisitor( - const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions, - PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, - bool no_subqueries_, bool only_consts_, bool no_storage_or_local_, std::ostream * ostr_) -: context(context_), - set_size_limit(set_size_limit_), - subquery_depth(subquery_depth_), - source_columns(source_columns_), - prepared_sets(prepared_sets_), - subqueries_for_sets(subqueries_for_sets_), - no_subqueries(no_subqueries_), - only_consts(only_consts_), - no_storage_or_local(no_storage_or_local_), - visit_depth(0), - ostr(ostr_), - actions_stack(actions, context) +struct CachedColumnName { + String cached; + + const String & get(const ASTPtr & ast) + { + if (cached.empty()) + cached = ast->getColumnName(); + return cached; + } +}; + +bool ActionsMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child) +{ + /// Visit children themself + if (node->as() || + node->as() || + node->as()) + return false; + + /// Do not go to FROM, JOIN, UNION. + if (child->as() || + child->as()) + return false; + + return true; } -void ActionsVisitor::visit(const ASTPtr & ast) +void ActionsMatcher::visit(const ASTPtr & ast, Data & data) { - DumpASTNode dump(*ast, ostr, visit_depth, "getActions"); + if (const auto * identifier = ast->as()) + visit(*identifier, ast, data); + else if (const auto * node = ast->as()) + visit(*node, ast, data); + else if (const auto * literal = ast->as()) + visit(*literal, ast, data); +} - String ast_column_name; - auto getColumnName = [&ast, &ast_column_name]() - { - if (ast_column_name.empty()) - ast_column_name = ast->getColumnName(); - - return ast_column_name; - }; - - /// If the result of the calculation already exists in the block. - if ((ast->as() || ast->as()) && actions_stack.getSampleBlock().has(getColumnName())) +void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data) +{ + CachedColumnName column_name; + if (data.hasColumn(column_name.get(ast))) return; - if (const auto * identifier = ast->as()) + if (!data.only_consts) { - if (!only_consts && !actions_stack.getSampleBlock().has(getColumnName())) - { - /// The requested column is not in the block. - /// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY. + /// The requested column is not in the block. + /// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY. - bool found = false; - for (const auto & column_name_type : source_columns) - if (column_name_type.name == getColumnName()) - found = true; + bool found = false; + for (const auto & column_name_type : data.source_columns) + if (column_name_type.name == column_name.get(ast)) + found = true; - if (found) - throw Exception("Column " + getColumnName() + " is not under aggregate function and not in GROUP BY.", - ErrorCodes::NOT_AN_AGGREGATE); + if (found) + throw Exception("Column " + column_name.get(ast) + " is not under aggregate function and not in GROUP BY.", + ErrorCodes::NOT_AN_AGGREGATE); - /// Special check for WITH statement alias. Add alias action to be able to use this alias. - if (identifier->prefer_alias_to_column_name && !identifier->alias.empty()) - actions_stack.addAction(ExpressionAction::addAliases({{identifier->name, identifier->alias}})); - } + /// Special check for WITH statement alias. Add alias action to be able to use this alias. + if (identifier.prefer_alias_to_column_name && !identifier.alias.empty()) + data.addAction(ExpressionAction::addAliases({{identifier.name, identifier.alias}})); } - else if (const auto * node = ast->as()) +} + +void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data) +{ + CachedColumnName column_name; + if (data.hasColumn(column_name.get(ast))) + return; + + if (node.name == "lambda") + throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION); + + /// Function arrayJoin. + if (node.name == "arrayJoin") { - if (node->name == "lambda") - throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION); + if (node.arguments->children.size() != 1) + throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH); - /// Function arrayJoin. - if (node->name == "arrayJoin") + ASTPtr arg = node.arguments->children.at(0); + visit(arg, data); + if (!data.only_consts) { - if (node->arguments->children.size() != 1) - throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH); + String result_name = column_name.get(ast); + data.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); + NameSet joined_columns; + joined_columns.insert(result_name); + data.addAction(ExpressionAction::arrayJoin(joined_columns, false, data.context)); + } - ASTPtr arg = node->arguments->children.at(0); - visit(arg); - if (!only_consts) + return; + } + + SetPtr prepared_set; + if (functionIsInOrGlobalInOperator(node.name)) + { + /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). + visit(node.arguments->children.at(0), data); + + if (!data.no_subqueries) + { + /// Transform tuple or subquery into a set. + prepared_set = makeSet(node, data); + } + else + { + if (!data.only_consts) { - String result_name = getColumnName(); - actions_stack.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); - NameSet joined_columns; - joined_columns.insert(result_name); - actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns, false, context)); - } + /// We are in the part of the tree that we are not going to compute. You just need to define types. + /// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function. + data.addAction(ExpressionAction::applyFunction( + FunctionFactory::instance().get("ignoreExceptNull", data.context), + { node.arguments->children.at(0)->getColumnName() }, + column_name.get(ast))); + } return; } + } - SetPtr prepared_set; - if (functionIsInOrGlobalInOperator(node->name)) + /// A special function `indexHint`. Everything that is inside it is not calculated + /// (and is used only for index analysis, see KeyCondition). + if (node.name == "indexHint") + { + data.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( + ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), + column_name.get(ast)))); + return; + } + + if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name)) + return; + + /// Context object that we pass to function should live during query. + const Context & function_context = data.context.hasQueryContext() + ? data.context.getQueryContext() + : data.context; + + FunctionBuilderPtr function_builder; + try + { + function_builder = FunctionFactory::instance().get(node.name, function_context); + } + catch (DB::Exception & e) + { + auto hints = AggregateFunctionFactory::instance().getHints(node.name); + if (!hints.empty()) + e.addMessage("Or unknown aggregate function " + node.name + ". Maybe you meant: " + toString(hints)); + e.rethrow(); + } + + Names argument_names; + DataTypes argument_types; + bool arguments_present = true; + + /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. + bool has_lambda_arguments = false; + + for (size_t arg = 0; arg < node.arguments->children.size(); ++arg) + { + auto & child = node.arguments->children[arg]; + auto child_column_name = child->getColumnName(); + + const auto * lambda = child->as(); + if (lambda && lambda->name == "lambda") { - /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). - visit(node->arguments->children.at(0)); + /// If the argument is a lambda expression, just remember its approximate type. + if (lambda->arguments->children.size() != 2) + throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!no_subqueries) + const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as(); + + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") + throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); + + has_lambda_arguments = true; + argument_types.emplace_back(std::make_shared(DataTypes(lambda_args_tuple->arguments->children.size()))); + /// Select the name in the next cycle. + argument_names.emplace_back(); + } + else if (functionIsInOrGlobalInOperator(node.name) && arg == 1 && prepared_set) + { + ColumnWithTypeAndName column; + column.type = std::make_shared(); + + /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, + /// so that sets with the same literal representation do not fuse together (they can have different types). + if (!prepared_set->empty()) + column.name = getUniqueName(data.getSampleBlock(), "__set"); + else + column.name = child_column_name; + + if (!data.hasColumn(column.name)) { - /// Transform tuple or subquery into a set. - prepared_set = makeSet(node, actions_stack.getSampleBlock()); + column.column = ColumnSet::create(1, prepared_set); + + data.addAction(ExpressionAction::addColumn(column)); + } + + argument_types.push_back(column.type); + argument_names.push_back(column.name); + } + else + { + /// If the argument is not a lambda expression, call it recursively and find out its type. + visit(child, data); + std::string name = child_column_name; + if (data.hasColumn(name)) + { + argument_types.push_back(data.getSampleBlock().getByName(name).type); + argument_names.push_back(name); } else { - if (!only_consts) - { - /// We are in the part of the tree that we are not going to compute. You just need to define types. - /// Do not subquery and create sets. We treat "IN" as "ignoreExceptNull" function. - - actions_stack.addAction(ExpressionAction::applyFunction( - FunctionFactory::instance().get("ignoreExceptNull", context), - { node->arguments->children.at(0)->getColumnName() }, - getColumnName())); - } - return; + if (data.only_consts) + arguments_present = false; + else + throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); } } + } - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see KeyCondition). - if (node->name == "indexHint") + if (data.only_consts && !arguments_present) + return; + + if (has_lambda_arguments && !data.only_consts) + { + function_builder->getLambdaArgumentTypes(argument_types); + + /// Call recursively for lambda expressions. + for (size_t i = 0; i < node.arguments->children.size(); ++i) { - actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( - ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), - getColumnName()))); - return; - } - - if (AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) - return; - - /// Context object that we pass to function should live during query. - const Context & function_context = context.hasQueryContext() - ? context.getQueryContext() - : context; - - FunctionBuilderPtr function_builder; - try - { - function_builder = FunctionFactory::instance().get(node->name, function_context); - } - catch (DB::Exception & e) - { - auto hints = AggregateFunctionFactory::instance().getHints(node->name); - if (!hints.empty()) - e.addMessage("Or unknown aggregate function " + node->name + ". Maybe you meant: " + toString(hints)); - e.rethrow(); - } - - Names argument_names; - DataTypes argument_types; - bool arguments_present = true; - - /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. - bool has_lambda_arguments = false; - - for (size_t arg = 0; arg < node->arguments->children.size(); ++arg) - { - auto & child = node->arguments->children[arg]; - auto child_column_name = child->getColumnName(); + ASTPtr child = node.arguments->children[i]; const auto * lambda = child->as(); if (lambda && lambda->name == "lambda") { - /// If the argument is a lambda expression, just remember its approximate type. - if (lambda->arguments->children.size() != 2) - throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - + const DataTypeFunction * lambda_type = typeid_cast(argument_types[i].get()); const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as(); + const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; + NamesAndTypesList lambda_arguments; - if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); - - has_lambda_arguments = true; - argument_types.emplace_back(std::make_shared(DataTypes(lambda_args_tuple->arguments->children.size()))); - /// Select the name in the next cycle. - argument_names.emplace_back(); - } - else if (functionIsInOrGlobalInOperator(node->name) && arg == 1 && prepared_set) - { - ColumnWithTypeAndName column; - column.type = std::make_shared(); - - /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, - /// so that sets with the same literal representation do not fuse together (they can have different types). - if (!prepared_set->empty()) - column.name = getUniqueName(actions_stack.getSampleBlock(), "__set"); - else - column.name = child_column_name; - - if (!actions_stack.getSampleBlock().has(column.name)) + for (size_t j = 0; j < lambda_arg_asts.size(); ++j) { - column.column = ColumnSet::create(1, prepared_set); + auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]); + if (!opt_arg_name) + throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - actions_stack.addAction(ExpressionAction::addColumn(column)); + lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]); } - argument_types.push_back(column.type); - argument_names.push_back(column.name); + data.actions_stack.pushLevel(lambda_arguments); + visit(lambda->arguments->children.at(1), data); + ExpressionActionsPtr lambda_actions = data.actions_stack.popLevel(); + + String result_name = lambda->arguments->children.at(1)->getColumnName(); + lambda_actions->finalize(Names(1, result_name)); + DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; + + Names captured; + Names required = lambda_actions->getRequiredColumns(); + for (const auto & required_arg : required) + if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end()) + captured.push_back(required_arg); + + /// We can not name `getColumnName()`, + /// because it does not uniquely define the expression (the types of arguments can be different). + String lambda_name = getUniqueName(data.getSampleBlock(), "__lambda"); + + auto function_capture = std::make_shared( + lambda_actions, captured, lambda_arguments, result_type, result_name); + data.addAction(ExpressionAction::applyFunction(function_capture, captured, lambda_name)); + + argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); + argument_names[i] = lambda_name; } - else - { - /// If the argument is not a lambda expression, call it recursively and find out its type. - visit(child); - std::string name = child_column_name; - if (actions_stack.getSampleBlock().has(name)) - { - argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type); - argument_names.push_back(name); - } - else - { - if (only_consts) - arguments_present = false; - else - throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); - } - } - } - - if (only_consts && !arguments_present) - return; - - if (has_lambda_arguments && !only_consts) - { - function_builder->getLambdaArgumentTypes(argument_types); - - /// Call recursively for lambda expressions. - for (size_t i = 0; i < node->arguments->children.size(); ++i) - { - ASTPtr child = node->arguments->children[i]; - - const auto * lambda = child->as(); - if (lambda && lambda->name == "lambda") - { - const DataTypeFunction * lambda_type = typeid_cast(argument_types[i].get()); - const auto * lambda_args_tuple = lambda->arguments->children.at(0)->as(); - const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; - NamesAndTypesList lambda_arguments; - - for (size_t j = 0; j < lambda_arg_asts.size(); ++j) - { - auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]); - if (!opt_arg_name) - throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - - lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]); - } - - actions_stack.pushLevel(lambda_arguments); - visit(lambda->arguments->children.at(1)); - ExpressionActionsPtr lambda_actions = actions_stack.popLevel(); - - String result_name = lambda->arguments->children.at(1)->getColumnName(); - lambda_actions->finalize(Names(1, result_name)); - DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; - - Names captured; - Names required = lambda_actions->getRequiredColumns(); - for (const auto & required_arg : required) - if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end()) - captured.push_back(required_arg); - - /// We can not name `getColumnName()`, - /// because it does not uniquely define the expression (the types of arguments can be different). - String lambda_name = getUniqueName(actions_stack.getSampleBlock(), "__lambda"); - - auto function_capture = std::make_shared( - lambda_actions, captured, lambda_arguments, result_type, result_name); - actions_stack.addAction(ExpressionAction::applyFunction(function_capture, captured, lambda_name)); - - argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); - argument_names[i] = lambda_name; - } - } - } - - if (only_consts) - { - for (const auto & argument_name : argument_names) - { - if (!actions_stack.getSampleBlock().has(argument_name)) - { - arguments_present = false; - break; - } - } - } - - if (arguments_present) - { - actions_stack.addAction( - ExpressionAction::applyFunction(function_builder, argument_names, getColumnName())); } } - else if (const auto * literal = ast->as()) - { - DataTypePtr type = applyVisitor(FieldToDataType(), literal->value); - ColumnWithTypeAndName column; - column.column = type->createColumnConst(1, convertFieldToType(literal->value, *type)); - column.type = type; - column.name = getColumnName(); - - actions_stack.addAction(ExpressionAction::addColumn(column)); - } - else + if (data.only_consts) { - for (auto & child : ast->children) + for (const auto & argument_name : argument_names) { - /// Do not go to FROM, JOIN, UNION. - if (!child->as() && !child->as()) - visit(child); + if (!data.hasColumn(argument_name)) + { + arguments_present = false; + break; + } } } + + if (arguments_present) + { + data.addAction(ExpressionAction::applyFunction(function_builder, argument_names, column_name.get(ast))); + } } -SetPtr ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_block) +void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data) +{ + CachedColumnName column_name; + if (data.hasColumn(column_name.get(ast))) + return; + + DataTypePtr type = applyVisitor(FieldToDataType(), literal.value); + + ColumnWithTypeAndName column; + column.column = type->createColumnConst(1, convertFieldToType(literal.value, *type)); + column.type = type; + column.name = column_name.get(ast); + + data.addAction(ExpressionAction::addColumn(column)); +} + +SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data) { /** You need to convert the right argument to a set. * This can be a table name, a value, a value enumeration, or a subquery. * The enumeration of values is parsed as a function `tuple`. */ - const IAST & args = *node->arguments; + const IAST & args = *node.arguments; const ASTPtr & arg = args.children.at(1); + const Block & sample_block = data.getSampleBlock(); /// If the subquery or table name for SELECT. const auto * identifier = arg->as(); if (arg->as() || identifier) { auto set_key = PreparedSetKey::forSubquery(*arg); - if (prepared_sets.count(set_key)) - return prepared_sets.at(set_key); + if (data.prepared_sets.count(set_key)) + return data.prepared_sets.at(set_key); /// A special case is if the name of the table is specified on the right side of the IN statement, /// and the table has the type Set (a previously prepared set). if (identifier) { DatabaseAndTableWithAlias database_table(*identifier); - StoragePtr table = context.tryGetTable(database_table.database, database_table.table); + StoragePtr table = data.context.tryGetTable(database_table.database, database_table.table); if (table) { StorageSet * storage_set = dynamic_cast(table.get()); if (storage_set) { - prepared_sets[set_key] = storage_set->getSet(); + data.prepared_sets[set_key] = storage_set->getSet(); return storage_set->getSet(); } } @@ -576,25 +580,25 @@ SetPtr ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bl /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. String set_id = arg->getColumnName(); - SubqueryForSet & subquery_for_set = subqueries_for_sets[set_id]; + SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id]; /// If you already created a Set with the same subquery / table. if (subquery_for_set.set) { - prepared_sets[set_key] = subquery_for_set.set; + data.prepared_sets[set_key] = subquery_for_set.set; return subquery_for_set.set; } - SetPtr set = std::make_shared(set_size_limit, false); + SetPtr set = std::make_shared(data.set_size_limit, false); /** The following happens for GLOBAL INs: * - in the addExternalStorage function, the IN (SELECT ...) subquery is replaced with IN _data1, * in the subquery_for_set object, this subquery is set as source and the temporary table _data1 as the table. * - this function shows the expression IN_data1. */ - if (!subquery_for_set.source && no_storage_or_local) + if (!subquery_for_set.source && data.no_storage_or_local) { - auto interpreter = interpretSubquery(arg, context, subquery_depth, {}); + auto interpreter = interpretSubquery(arg, data.context, data.subquery_depth, {}); subquery_for_set.source = std::make_shared( interpreter->getSampleBlock(), [interpreter]() mutable { return interpreter->execute().in; }); @@ -627,13 +631,13 @@ SetPtr ActionsVisitor::makeSet(const ASTFunction * node, const Block & sample_bl } subquery_for_set.set = set; - prepared_sets[set_key] = set; + data.prepared_sets[set_key] = set; return set; } else { /// An explicit enumeration of values in parentheses. - return makeExplicitSet(node, sample_block, false, context, set_size_limit, prepared_sets); + return makeExplicitSet(&node, sample_block, false, data.context, data.set_size_limit, data.prepared_sets); } } diff --git a/dbms/src/Interpreters/ActionsVisitor.h b/dbms/src/Interpreters/ActionsVisitor.h index 4d03f758f61..def72c7ad1c 100644 --- a/dbms/src/Interpreters/ActionsVisitor.h +++ b/dbms/src/Interpreters/ActionsVisitor.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -52,38 +53,81 @@ struct ScopeStack const Block & getSampleBlock() const; }; +class ASTIdentifier; +class ASTFunction; +class ASTLiteral; /// Collect ExpressionAction from AST. Returns PreparedSets and SubqueriesForSets too. -class ActionsVisitor +class ActionsMatcher { public: - ActionsVisitor(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, - const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions, - PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, - bool no_subqueries_, bool only_consts_, bool no_storage_or_local_, std::ostream * ostr_ = nullptr); + using Visitor = ConstInDepthNodeVisitor; - void visit(const ASTPtr & ast, ExpressionActionsPtr & actions) + struct Data { - visit(ast); - actions = actions_stack.popLevel(); - } + const Context & context; + SizeLimits set_size_limit; + size_t subquery_depth; + const NamesAndTypesList & source_columns; + PreparedSets & prepared_sets; + SubqueriesForSets & subqueries_for_sets; + bool no_subqueries; + bool only_consts; + bool no_storage_or_local; + size_t visit_depth; + ScopeStack actions_stack; + + Data(const Context & context_, SizeLimits set_size_limit_, size_t subquery_depth_, + const NamesAndTypesList & source_columns_, const ExpressionActionsPtr & actions, + PreparedSets & prepared_sets_, SubqueriesForSets & subqueries_for_sets_, + bool no_subqueries_, bool only_consts_, bool no_storage_or_local_) + : context(context_), + set_size_limit(set_size_limit_), + subquery_depth(subquery_depth_), + source_columns(source_columns_), + prepared_sets(prepared_sets_), + subqueries_for_sets(subqueries_for_sets_), + no_subqueries(no_subqueries_), + only_consts(only_consts_), + no_storage_or_local(no_storage_or_local_), + visit_depth(0), + actions_stack(actions, context) + {} + + void updateActions(ExpressionActionsPtr & actions) + { + actions = actions_stack.popLevel(); + } + + void addAction(const ExpressionAction & action) + { + actions_stack.addAction(action); + } + + const Block & getSampleBlock() const + { + return actions_stack.getSampleBlock(); + } + + /// Does result of the calculation already exists in the block. + bool hasColumn(const String & columnName) const + { + return actions_stack.getSampleBlock().has(columnName); + } + }; + + static void visit(const ASTPtr & ast, Data & data); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); private: - const Context & context; - SizeLimits set_size_limit; - size_t subquery_depth; - const NamesAndTypesList & source_columns; - PreparedSets & prepared_sets; - SubqueriesForSets & subqueries_for_sets; - const bool no_subqueries; - const bool only_consts; - const bool no_storage_or_local; - mutable size_t visit_depth; - std::ostream * ostr; - ScopeStack actions_stack; - void visit(const ASTPtr & ast); - SetPtr makeSet(const ASTFunction * node, const Block & sample_block); + static void visit(const ASTIdentifier & identifier, const ASTPtr & ast, Data & data); + static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data); + static void visit(const ASTLiteral & literal, const ASTPtr & ast, Data & data); + + static SetPtr makeSet(const ASTFunction & node, Data & data); }; +using ActionsVisitor = ActionsMatcher::Visitor; + } diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 44e145dba8a..1bd6c83d073 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -80,7 +80,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( ASTPtr query = expression_list; auto syntax_result = SyntaxAnalyzer(context).analyze(query, columns_from_joined_table, required_columns); - ExpressionAnalyzer analyzer(query, syntax_result, context, required_columns_set); + ExpressionAnalyzer analyzer(query, syntax_result, context); return analyzer.getActions(true, false); } diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index db76acd5ac3..cb026af20dc 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -35,6 +35,7 @@ private: friend class SyntaxAnalyzer; friend struct SyntaxAnalyzerResult; friend class ExpressionAnalyzer; + friend class SelectQueryExpressionAnalyzer; Names key_names_left; Names key_names_right; /// Duplicating names are qualified. diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8d4eecf2df8..5d12de724d0 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -54,8 +54,8 @@ #include #include #include - #include + #include #include #include @@ -76,11 +76,9 @@ ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & query_, const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, - const NameSet & required_result_columns_, size_t subquery_depth_, bool do_global) - : ExpressionAnalyzerData(required_result_columns_) - , query(query_), context(context_), settings(context.getSettings()) + : query(query_), context(context_), settings(context.getSettings()) , subquery_depth(subquery_depth_) , syntax(syntax_analyzer_result_) { @@ -236,7 +234,7 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) } -void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) +void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) { auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); if (prepared_sets.count(set_key)) @@ -262,7 +260,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_ /// Perfomance optimisation for IN() if storage supports it. -void ExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) +void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) { if (!node || !storage() || !storage()->supportsIndexForIn()) return; @@ -317,10 +315,11 @@ void ExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor actions_visitor(context, settings.size_limits_for_set, subquery_depth, + ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth, sourceColumns(), actions, prepared_sets, subqueries_for_sets, - no_subqueries, only_consts, !isRemoteStorage(), log.stream()); - actions_visitor.visit(ast, actions); + no_subqueries, only_consts, !isRemoteStorage()); + ActionsVisitor(visitor_data, log.stream()).visit(ast); + visitor_data.updateActions(actions); } @@ -361,7 +360,7 @@ const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const return select_query; } -const ASTSelectQuery * ExpressionAnalyzer::getAggregatingQuery() const +const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const { if (!has_aggregation) throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR); @@ -393,7 +392,7 @@ void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actio actions->add(ExpressionAction::arrayJoin(result_columns, array_join_is_left, context)); } -bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); @@ -435,7 +434,7 @@ static void appendRequiredColumns( required_columns.insert(column.name); } -bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); @@ -552,7 +551,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty return true; } -bool ExpressionAnalyzer::appendPrewhere( +bool SelectQueryExpressionAnalyzer::appendPrewhere( ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) { const auto * select_query = getSelectQuery(); @@ -626,7 +625,7 @@ bool ExpressionAnalyzer::appendPrewhere( return true; } -bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); @@ -644,7 +643,7 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_t return true; } -bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getAggregatingQuery(); @@ -664,7 +663,7 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only return true; } -void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types) +void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getAggregatingQuery(); @@ -697,7 +696,7 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai getRootActions(argument, only_types, step.actions); } -bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getAggregatingQuery(); @@ -713,7 +712,7 @@ bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_ return true; } -void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types) +void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); @@ -726,7 +725,7 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_ step.required_output.push_back(child->getColumnName()); } -bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); @@ -750,7 +749,7 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only return true; } -bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) +bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) { const auto * select_query = getSelectQuery(); @@ -779,7 +778,7 @@ bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only return true; } -void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const +void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { const auto * select_query = getSelectQuery(); @@ -867,7 +866,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions() return actions; } -void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const +void SelectQueryExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const { for (const auto & name_and_type : aggregation_keys) key_names.emplace_back(name_and_type.name); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 2e70662f196..04938675737 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -31,9 +31,6 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr; /// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately. struct ExpressionAnalyzerData { - /// If non-empty, ignore all expressions in not from this list. - NameSet required_result_columns; - SubqueriesForSets subqueries_for_sets; PreparedSets prepared_sets; @@ -49,11 +46,6 @@ struct ExpressionAnalyzerData /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries. Tables external_tables; - -protected: - ExpressionAnalyzerData(const NameSet & required_result_columns_) - : required_result_columns(required_result_columns_) - {} }; @@ -61,7 +53,7 @@ protected: * * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. */ -class ExpressionAnalyzer : private ExpressionAnalyzerData, private boost::noncopyable +class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable { private: /// Extracts settings to enlight which are used (and avoid copy of others). @@ -83,51 +75,14 @@ private: }; public: + /// Ctor for non-select queries. Generally its usage is: + /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions(); ExpressionAnalyzer( const ASTPtr & query_, const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, - const Context & context_, - const NameSet & required_result_columns_ = {}, - size_t subquery_depth_ = 0, - bool do_global_ = false); - - /// Does the expression have aggregate functions or a GROUP BY or HAVING section. - bool hasAggregation() const { return has_aggregation; } - - /// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY. - void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const; - - /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. - * - * Example usage: - * ExpressionActionsChain chain; - * analyzer.appendWhere(chain); - * chain.addStep(); - * analyzer.appendSelect(chain); - * analyzer.appendOrderBy(chain); - * chain.finalize(); - * - * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way - * shouldn't be executed, they are only needed to get a list of columns with their types. - */ - - /// Before aggregation: - bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types); - bool appendJoin(ExpressionActionsChain & chain, bool only_types); - /// remove_filter is set in ExpressionActionsChain::finalize(); - /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). - bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); - bool appendWhere(ExpressionActionsChain & chain, bool only_types); - bool appendGroupBy(ExpressionActionsChain & chain, bool only_types); - void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); - - /// After aggregation: - bool appendHaving(ExpressionActionsChain & chain, bool only_types); - void appendSelect(ExpressionActionsChain & chain, bool only_types); - bool appendOrderBy(ExpressionActionsChain & chain, bool only_types); - bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); - /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. - void appendProjectResult(ExpressionActionsChain & chain) const; + const Context & context_) + : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false) + {} void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types); @@ -148,21 +103,17 @@ public: */ const SubqueriesForSets & getSubqueriesForSets() const { return subqueries_for_sets; } - const PreparedSets & getPreparedSets() const { return prepared_sets; } - - /** Tables that will need to be sent to remote servers for distributed query processing. - */ - const Tables & getExternalTables() const { return external_tables; } - /// Get intermediates for tests const ExpressionAnalyzerData & getAnalyzedData() const { return *this; } - /// Create Set-s that we can from IN section to use the index on them. - void makeSetsForIndex(const ASTPtr & node); +protected: + ExpressionAnalyzer( + const ASTPtr & query_, + const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, + const Context & context_, + size_t subquery_depth_, + bool do_global_); - bool hasGlobalSubqueries() { return has_global_subqueries; } - -private: ASTPtr query; const Context & context; const ExtractedSettings settings; @@ -197,7 +148,75 @@ private: void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const; const ASTSelectQuery * getSelectQuery() const; - const ASTSelectQuery * getAggregatingQuery() const; + + bool isRemoteStorage() const; +}; + +/// SelectQuery specific ExpressionAnalyzer part. +class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer +{ +public: + SelectQueryExpressionAnalyzer( + const ASTPtr & query_, + const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, + const Context & context_, + const NameSet & required_result_columns_ = {}, + size_t subquery_depth_ = 0, + bool do_global_ = false) + : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, subquery_depth_, do_global_) + , required_result_columns(required_result_columns_) + {} + + /// Does the expression have aggregate functions or a GROUP BY or HAVING section. + bool hasAggregation() const { return has_aggregation; } + bool hasGlobalSubqueries() { return has_global_subqueries; } + + /// Get a list of aggregation keys and descriptions of aggregate functions if the query contains GROUP BY. + void getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const; + + const PreparedSets & getPreparedSets() const { return prepared_sets; } + + /// Tables that will need to be sent to remote servers for distributed query processing. + const Tables & getExternalTables() const { return external_tables; } + + /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. + * + * Example usage: + * ExpressionActionsChain chain; + * analyzer.appendWhere(chain); + * chain.addStep(); + * analyzer.appendSelect(chain); + * analyzer.appendOrderBy(chain); + * chain.finalize(); + * + * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way + * shouldn't be executed, they are only needed to get a list of columns with their types. + */ + + /// Before aggregation: + bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types); + bool appendJoin(ExpressionActionsChain & chain, bool only_types); + /// remove_filter is set in ExpressionActionsChain::finalize(); + /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier). + bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns); + bool appendWhere(ExpressionActionsChain & chain, bool only_types); + bool appendGroupBy(ExpressionActionsChain & chain, bool only_types); + void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types); + + /// After aggregation: + bool appendHaving(ExpressionActionsChain & chain, bool only_types); + void appendSelect(ExpressionActionsChain & chain, bool only_types); + bool appendOrderBy(ExpressionActionsChain & chain, bool only_types); + bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); + /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. + void appendProjectResult(ExpressionActionsChain & chain) const; + + /// Create Set-s that we can from IN section to use the index on them. + void makeSetsForIndex(const ASTPtr & node); + +private: + /// If non-empty, ignore all expressions not from this list. + NameSet required_result_columns; /** * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. @@ -205,7 +224,7 @@ private: */ void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); - bool isRemoteStorage() const; + const ASTSelectQuery * getAggregatingQuery() const; }; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index db65c8a95b1..e73fb669015 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -144,7 +144,7 @@ String generateFilterActions(ExpressionActionsPtr & actions, const StoragePtr & /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = SyntaxAnalyzer(context).analyze(query_ast, storage->getColumns().getAllPhysical()); - ExpressionAnalyzer analyzer(query_ast, syntax_result, context); + SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context); ExpressionActionsChain new_chain(context); analyzer.appendSelect(new_chain, false); actions = new_chain.getLastActions(); @@ -296,7 +296,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage, NamesAndTypesList()); - query_analyzer = std::make_unique( + query_analyzer = std::make_unique( query_ptr, syntax_analyzer_result, context, NameSet(required_result_column_names.begin(), required_result_column_names.end()), options.subquery_depth, !options.only_analyze); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 0e2cfcd4c7b..103d7578464 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -248,7 +248,7 @@ private: Context context; NamesAndTypesList source_columns; SyntaxAnalyzerResultPtr syntax_analyzer_result; - std::unique_ptr query_analyzer; + std::unique_ptr query_analyzer; SelectQueryInfo query_info; /// How many streams we ask for storage to produce, and in how many threads we will do further processing. diff --git a/dbms/src/Interpreters/tests/expression.cpp b/dbms/src/Interpreters/tests/expression.cpp index 2dbf01d148b..a818fde089b 100644 --- a/dbms/src/Interpreters/tests/expression.cpp +++ b/dbms/src/Interpreters/tests/expression.cpp @@ -56,7 +56,7 @@ int main(int argc, char ** argv) }; auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, columns); - ExpressionAnalyzer analyzer(ast, syntax_result, context); + SelectQueryExpressionAnalyzer analyzer(ast, syntax_result, context); ExpressionActionsChain chain(context); analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain);