diff --git a/dbms/src/Interpreters/ColumnNamesContext.cpp b/dbms/src/Interpreters/ColumnNamesContext.cpp new file mode 100644 index 00000000000..1d17106d3e7 --- /dev/null +++ b/dbms/src/Interpreters/ColumnNamesContext.cpp @@ -0,0 +1,113 @@ +#include +#include + +namespace DB +{ + +bool ColumnNamesContext::addTableAliasIfAny(const IAST & ast) +{ + String alias = ast.tryGetAlias(); + if (alias.empty()) + return false; + + table_aliases.insert(alias); + return true; +} + +bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public) +{ + String alias = ast.tryGetAlias(); + if (alias.empty()) + return false; + + if (required_names.count(alias)) + masked_columns.insert(alias); + + if (is_public) + public_columns.insert(alias); + column_aliases.insert(alias); + return true; +} + +void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node, bool is_public) +{ + if (!node.general()) + return; + + required_names.insert(node.name); + + if (!addColumnAliasIfAny(node, is_public) && is_public) + public_columns.insert(node.name); +} + +bool ColumnNamesContext::addArrayJoinAliasIfAny(const IAST & ast) +{ + String alias = ast.tryGetAlias(); + if (alias.empty()) + return false; + + array_join_columns.insert(alias); + return true; +} + +void ColumnNamesContext::addArrayJoinIdentifier(const ASTIdentifier & node) +{ + array_join_columns.insert(node.name); +} + +NameSet ColumnNamesContext::requiredColumns() const +{ + NameSet required; + for (const auto & name : required_names) + { + String table_name = Nested::extractTableName(name); + + /// Tech debt. There's its own logic for ARRAY JOIN columns. + if (array_join_columns.count(name) || array_join_columns.count(table_name)) + continue; + + if (!column_aliases.count(name) || masked_columns.count(name)) + required.insert(name); + } + return required; +} + +std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols) +{ + os << "required_names: "; + for (const auto & x : cols.required_names) + os << "'" << x << "' "; + os << "source_tables: "; + for (const auto & x : cols.tables) + { + auto alias = x.alias(); + auto name = x.name(); + if (alias && name) + os << "'" << *alias << "'/'" << *name << "' "; + else if (alias) + os << "'" << *alias << "' "; + else if (name) + os << "'" << *name << "' "; + } + os << "table_aliases: "; + for (const auto & x : cols.table_aliases) + os << "'" << x << "' "; + os << "private_aliases: "; + for (const auto & x : cols.private_aliases) + os << "'" << x << "' "; + os << "column_aliases: "; + for (const auto & x : cols.column_aliases) + os << "'" << x << "' "; + os << "public_columns: "; + for (const auto & x : cols.public_columns) + os << "'" << x << "' "; + os << "masked_columns: "; + for (const auto & x : cols.masked_columns) + os << "'" << x << "' "; + os << "array_join_columns: "; + for (const auto & x : cols.array_join_columns) + os << "'" << x << "' "; + return os; +} + +} diff --git a/dbms/src/Interpreters/ColumnNamesContext.h b/dbms/src/Interpreters/ColumnNamesContext.h new file mode 100644 index 00000000000..63ad5a4e2e4 --- /dev/null +++ b/dbms/src/Interpreters/ColumnNamesContext.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +/// Information about table and column names extracted from ASTSelectQuery block. Do not include info from subselects. +struct ColumnNamesContext +{ + struct JoinedTable + { + const ASTTableExpression * expr; + const ASTTableJoin * join; + + std::optional alias() const + { + String alias; + if (expr->database_and_table_name) + alias = expr->database_and_table_name->tryGetAlias(); + else if (expr->table_function) + alias = expr->table_function->tryGetAlias(); + else if (expr->subquery) + alias = expr->subquery->tryGetAlias(); + if (!alias.empty()) + return alias; + return {}; + } + + std::optional name() const + { + if (auto * node = expr->database_and_table_name.get()) + if (auto * identifier = typeid_cast(node)) + return identifier->name; + return {}; + } + + std::optional joinKind() const + { + if (join) + return join->kind; + return {}; + } + }; + + NameSet required_names; + NameSet table_aliases; + NameSet private_aliases; + NameSet column_aliases; + NameSet masked_columns; + NameSet public_columns; + NameSet array_join_columns; + std::vector tables; /// ordered list of visited tables in FROM section with joins + bool has_table_join = false; + bool has_array_join = false; + + bool addTableAliasIfAny(const IAST & ast); + bool addColumnAliasIfAny(const IAST & ast, bool is_public = false); + void addColumnIdentifier(const ASTIdentifier & node, bool is_public = false); + bool addArrayJoinAliasIfAny(const IAST & ast); + void addArrayJoinIdentifier(const ASTIdentifier & node); + + NameSet requiredColumns() const; +}; + +std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols); + +} diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e488aa5d81b..92e26786e95 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -106,10 +106,10 @@ ExpressionAnalyzer::ExpressionAnalyzer( removeDuplicateColumns(source_columns); } - /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. + /// Delete the unnecessary from `source_columns` list. Form `columns_added_by_join`. collectUsedColumns(); - /// external_tables, subqueries_for_sets for global subqueries. + /// external_tables, subqueries_for_sets for global subqueries.f /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. initGlobalSubqueriesAndExternalTables(); @@ -1001,6 +1001,15 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio aggregates = aggregate_descriptions; } +/// db.table.column -> table.column / table.column -> column +static String cropDatabaseOrTableName(const String & name) +{ + size_t pos = name.find('.', 0); + if (pos != std::string::npos) + return name.substr(pos + 1, name.size() - pos - 1); + return name; +} + void ExpressionAnalyzer::collectUsedColumns() { /** Calculate which columns are required to execute the expression. @@ -1008,83 +1017,131 @@ void ExpressionAnalyzer::collectUsedColumns() * After execution, columns will only contain the list of columns needed to read from the table. */ - NameSet required; - NameSet ignored; + RequiredSourceColumnsVisitor::Data columns_context; + RequiredSourceColumnsVisitor(columns_context).visit(query); - NameSet available_columns; - for (const auto & column : source_columns) - available_columns.insert(column.name); + NameSet required = columns_context.requiredColumns(); - if (select_query && select_query->array_join_expression_list()) +#if 0 + std::cerr << "Query: " << query << std::endl; + std::cerr << "CTX: " << columns_context << std::endl; + std::cerr << "source_columns: "; + for (const auto & name : source_columns) + std::cerr << "'" << name.name << "' "; + std::cerr << "required: "; + for (const auto & name : required) + std::cerr << "'" << name << "' "; + std::cerr << std::endl; +#endif + + if (columns_context.has_table_join) { - ASTs & expressions = select_query->array_join_expression_list()->children; - for (size_t i = 0; i < expressions.size(); ++i) + const AnalyzedJoin & analyzed_join = analyzedJoin(); +#if 0 + std::cerr << "key_names_left: "; + for (const auto & name : analyzed_join.key_names_left) + std::cerr << "'" << name << "' "; + std::cerr << "key_names_right: "; + for (const auto & name : analyzed_join.key_names_right) + std::cerr << "'" << name << "' "; + std::cerr << "columns_from_joined_table: "; + for (const auto & column : analyzed_join.columns_from_joined_table) + std::cerr << "'" << column.name_and_type.name << '/' << column.original_name << "' "; + std::cerr << "available_joined_columns: "; + for (const auto & column : analyzed_join.available_joined_columns) + std::cerr << "'" << column.name_and_type.name << '/' << column.original_name << "' "; + std::cerr << std::endl; +#endif + NameSet avaliable_columns; + for (const auto & name : source_columns) + avaliable_columns.insert(name.name); + + NameSet right_keys; + for (const auto & right_key_name : analyzed_join.key_names_right) + right_keys.insert(right_key_name); + + /** You also need to ignore the identifiers of the columns that are obtained by JOIN. + * (Do not assume that they are required for reading from the "left" table). + */ + columns_added_by_join.clear(); + for (const auto & joined_column : analyzed_join.available_joined_columns) { - /// Ignore the top-level identifiers from the ARRAY JOIN section. - /// Then add them separately. - if (typeid_cast(expressions[i].get())) + auto & name = joined_column.name_and_type.name; + if (required.count(name) && !avaliable_columns.count(name)) { - ignored.insert(expressions[i]->getColumnName()); - } - else - { - /// Nothing needs to be ignored for expressions in ARRAY JOIN. - NameSet empty; - RequiredSourceColumnsVisitor::Data visitor_data{available_columns, required, empty, empty, empty}; - RequiredSourceColumnsVisitor(visitor_data).visit(expressions[i]); - } + columns_added_by_join.push_back(joined_column); + required.erase(name); - ignored.insert(expressions[i]->getAliasOrColumnName()); + /// Some columns from right join key may be used in query. This columns will be appended to block during join. + if (right_keys.count(name)) + columns_added_by_join_from_right_keys.insert(name); + } } + + /// @fix filter required columns according to misqualified names in JOIN ON + if (columns_context.has_table_join && + columns_context.tables.size() >= 2 && + columns_context.tables[1].join && + columns_context.tables[1].join->on_expression) + { + NameSet fixed_required; + + for (const auto & req_name : required) + { + bool collated = false; + String cropped_name = req_name; + static const constexpr size_t max_column_prefix = 2; + + for (size_t i = 0; i < max_column_prefix && !collated; ++i) + { + cropped_name = cropDatabaseOrTableName(cropped_name); + + if (avaliable_columns.count(cropped_name)) + { + fixed_required.insert(cropped_name); + collated = true; + break; + } + + for (const auto & joined_column : analyzed_join.available_joined_columns) + { + auto & name = joined_column.name_and_type.name; + + if (cropped_name == name) + { + columns_added_by_join.push_back(joined_column); + if (right_keys.count(name)) + columns_added_by_join_from_right_keys.insert(name); + collated = true; + break; + } + } + } + + if (!collated) + fixed_required.insert(req_name); + } + + required.swap(fixed_required); + } + + /// @note required_columns_from_joined_table is output + joined_block_actions = analyzed_join.createJoinedBlockActions( + columns_added_by_join, select_query, context, required_columns_from_joined_table); } - /** You also need to ignore the identifiers of the columns that are obtained by JOIN. - * (Do not assume that they are required for reading from the "left" table). - */ - NameSet available_joined_columns; - for (const auto & joined_column : analyzedJoin().available_joined_columns) - available_joined_columns.insert(joined_column.name_and_type.name); - - NameSet required_joined_columns; - - for (const auto & left_key_ast : syntax->analyzed_join.key_asts_left) + if (columns_context.has_array_join) { - NameSet empty; - RequiredSourceColumnsVisitor::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; - ASTPtr tmp = left_key_ast; - RequiredSourceColumnsVisitor(columns_data).visit(tmp); + /// Insert the columns required for the ARRAY JOIN calculation into the required columns list. + NameSet array_join_sources; + for (const auto & result_source : syntax->array_join_result_to_source) + array_join_sources.insert(result_source.second); + + for (const auto & column_name_type : source_columns) + if (array_join_sources.count(column_name_type.name)) + required.insert(column_name_type.name); } - RequiredSourceColumnsVisitor::Data columns_visitor_data{available_columns, required, ignored, - available_joined_columns, required_joined_columns}; - RequiredSourceColumnsVisitor(columns_visitor_data).visit(query); - - columns_added_by_join = analyzedJoin().available_joined_columns; - for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();) - { - if (required_joined_columns.count(it->name_and_type.name)) - ++it; - else - columns_added_by_join.erase(it++); - } - - joined_block_actions = analyzedJoin().createJoinedBlockActions( - columns_added_by_join, select_query, context, required_columns_from_joined_table); - - /// Some columns from right join key may be used in query. This columns will be appended to block during join. - for (const auto & right_key_name : analyzedJoin().key_names_right) - if (required_joined_columns.count(right_key_name)) - columns_added_by_join_from_right_keys.insert(right_key_name); - - /// Insert the columns required for the ARRAY JOIN calculation into the required columns list. - NameSet array_join_sources; - for (const auto & result_source : syntax->array_join_result_to_source) - array_join_sources.insert(result_source.second); - - for (const auto & column_name_type : source_columns) - if (array_join_sources.count(column_name_type.name)) - required.insert(column_name_type.name); - /// You need to read at least one column to find the number of rows. if (select_query && required.empty()) required.insert(ExpressionActions::getSmallestColumn(source_columns)); @@ -1118,9 +1175,17 @@ void ExpressionAnalyzer::collectUsedColumns() } if (!unknown_required_source_columns.empty()) + { + std::stringstream ss; + ss << columns_context; + ss << "source_columns: "; + for (const auto & name : source_columns) + ss << "'" << name.name << "' "; + throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin() - + (select_query && !select_query->tables ? ". Note that there is no tables (FROM clause) in your query" : ""), - ErrorCodes::UNKNOWN_IDENTIFIER); + + (select_query && !select_query->tables ? ". Note that there is no tables (FROM clause) in your query" : "") + + ", context: " + ss.str(), ErrorCodes::UNKNOWN_IDENTIFIER); + } } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index ba1e3a252d0..5c89df50863 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -232,8 +232,7 @@ private: const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; } /** Remove all unnecessary columns from the list of all available columns of the table (`columns`). - * At the same time, form a set of unknown columns (`unknown_required_source_columns`), - * as well as the columns added by JOIN (`columns_added_by_join`). + * At the same time, form a set of columns added by JOIN (`columns_added_by_join`). */ void collectUsedColumns(); diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp new file mode 100644 index 00000000000..b4ce3281d09 --- /dev/null +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static std::vector extractNamesFromLambda(const ASTFunction & node) +{ + if (node.arguments->children.size() != 2) + throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTFunction * lambda_args_tuple = typeid_cast(node.arguments->children[0].get()); + + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") + throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); + + std::vector names; + for (auto & child : lambda_args_tuple->arguments->children) + { + ASTIdentifier * identifier = typeid_cast(child.get()); + if (!identifier) + throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); + + names.push_back(identifier->name); + } + + return names; +} + +bool RequiredSourceColumnsMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) +{ + if (typeid_cast(child.get())) + return false; + + /// Processed. Do not need children. + if (typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + if (auto * f = typeid_cast(node.get())) + { + /// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition + /// "lambda" visit children itself. + if (f->name == "indexHint" || f->name == "lambda") + return false; + } + + return true; +} + +std::vector RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & data) +{ + /// results are columns + + if (auto * t = typeid_cast(ast.get())) + { + data.addColumnAliasIfAny(*ast); + visit(*t, ast, data); + return {}; + } + if (auto * t = typeid_cast(ast.get())) + { + data.addColumnAliasIfAny(*ast); + visit(*t, ast, data); + return {}; + } + + /// results are tables + + if (auto * t = typeid_cast(ast.get())) + { + visit(*t, ast, data); + return {}; + } + + if (auto * t = typeid_cast(ast.get())) + { + //data.addTableAliasIfAny(*ast); alias is attached to child + visit(*t, ast, data); + return {}; + } + if (auto * t = typeid_cast(ast.get())) + { + data.addTableAliasIfAny(*ast); + return visit(*t, ast, data); + } + if (auto * t = typeid_cast(ast.get())) + { + data.addTableAliasIfAny(*ast); + return {}; + } + + /// other + + if (auto * t = typeid_cast(ast.get())) + { + data.has_array_join = true; + return visit(*t, ast, data); + } + + return {}; +} + +std::vector RequiredSourceColumnsMatcher::visit(ASTSelectQuery & select, const ASTPtr &, Data & data) +{ + /// special case for top-level SELECT items: they are publics + for (auto & node : select.select_expression_list->children) + { + if (auto * identifier = typeid_cast(node.get())) + data.addColumnIdentifier(*identifier, true); + else + data.addColumnAliasIfAny(*node, true); + } + + std::vector out; + for (auto & node : select.children) + if (node != select.select_expression_list) + out.push_back(&node); + + /// revisit select_expression_list (with children) when all the aliases are set + out.push_back(&select.select_expression_list); + return out; +} + +void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data) +{ + if (node.name.empty()) + throw Exception("Expected not empty name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!data.private_aliases.count(node.name)) + data.addColumnIdentifier(node); +} + +void RequiredSourceColumnsMatcher::visit(const ASTFunction & node, const ASTPtr &, Data & data) +{ + /// Do not add formal parameters of the lambda expression + if (node.name == "lambda") + { + Names local_aliases; + for (const auto & name : extractNamesFromLambda(node)) + if (data.private_aliases.insert(name).second) + local_aliases.push_back(name); + + /// visit child with masked local aliases + visit(node.arguments->children[1], data); + + for (const auto & name : local_aliases) + data.private_aliases.erase(name); + } +} + +void RequiredSourceColumnsMatcher::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) +{ + ASTTableExpression * expr = nullptr; + ASTTableJoin * join = nullptr; + + for (auto & child : node.children) + { + if (auto * e = typeid_cast(child.get())) + expr = e; + if (auto * j = typeid_cast(child.get())) + join = j; + } + + if (join) + data.has_table_join = true; + data.tables.emplace_back(ColumnNamesContext::JoinedTable{expr, join}); +} + +std::vector RequiredSourceColumnsMatcher::visit(ASTTableExpression & node, const ASTPtr &, Data & data) +{ + /// ASTIdentifiers here are tables. Do not visit them as generic ones. + if (node.database_and_table_name) + data.addTableAliasIfAny(*node.database_and_table_name); + + std::vector out; + if (node.table_function) + { + data.addTableAliasIfAny(*node.table_function); + out.push_back(&node.table_function); + } + + if (node.subquery) + { + data.addTableAliasIfAny(*node.subquery); + out.push_back(&node.subquery); + } + + return out; +} + +std::vector RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & node, const ASTPtr &, Data & data) +{ + ASTPtr expression_list = node.expression_list; + if (!expression_list || expression_list->children.empty()) + throw Exception("Expected not empty expression_list", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + std::vector out; + + /// Tech debt. Ignore ARRAY JOIN top-level identifiers and aliases. There's its own logic for them. + for (auto & expr : expression_list->children) + { + data.addArrayJoinAliasIfAny(*expr); + + if (auto * identifier = typeid_cast(expr.get())) + { + data.addArrayJoinIdentifier(*identifier); + continue; + } + + out.push_back(&expr); + } + + return out; +} + +} diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h index 859326acbe8..3e107111e96 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h @@ -1,140 +1,45 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "InDepthNodeVisitor.h" +#include +#include namespace DB { namespace ErrorCodes { - extern const int TYPE_MISMATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -/** Get a set of necessary columns to read from the table. - * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. - * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. - * Put in required_joined_columns the set of columns available from JOIN and needed. - */ +class ASTIdentifier; +class ASTFunction; +class ASTSelectQuery; +struct ASTTablesInSelectQueryElement; +struct ASTArrayJoin; +struct ASTTableExpression; + class RequiredSourceColumnsMatcher { public: - struct Data - { - const NameSet & available_columns; - NameSet & required_source_columns; - NameSet & ignored_names; - const NameSet & available_joined_columns; - NameSet & required_joined_columns; - }; + using Data = ColumnNamesContext; static constexpr const char * label = "RequiredSourceColumns"; - static bool needChildVisit(ASTPtr & node, const ASTPtr & child) - { - /// We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns. - /// There, `collectUsedColumns` will send us separately. - if (typeid_cast(child.get()) || - typeid_cast(child.get()) || - typeid_cast(child.get()) || - typeid_cast(child.get())) - return false; - - /// Processed. Do not need children. - if (typeid_cast(node.get())) - return false; - - if (auto * f = typeid_cast(node.get())) - { - /// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition - /// "lambda" visit children itself. - if (f->name == "indexHint" || f->name == "lambda") - return false; - } - - return true; - } - - /** Find all the identifiers in the query. - * We will use depth first search in AST. - * In this case - * - for lambda functions we will not take formal parameters; - * - do not go into subqueries (they have their own identifiers); - * - there is some exception for the ARRAY JOIN clause (it has a slightly different identifiers); - * - we put identifiers available from JOIN in required_joined_columns. - */ - static std::vector visit(ASTPtr & ast, Data & data) - { - if (auto * t = typeid_cast(ast.get())) - visit(*t, ast, data); - if (auto * t = typeid_cast(ast.get())) - visit(*t, ast, data); - return {}; - } + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); + static std::vector visit(ASTPtr & ast, Data & data); private: - static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data) - { - if (node.general() - && !data.ignored_names.count(node.name) - && !data.ignored_names.count(Nested::extractTableName(node.name))) - { - /// Read column from left table if has. - if (!data.available_joined_columns.count(node.name) || data.available_columns.count(node.name)) - data.required_source_columns.insert(node.name); - else - data.required_joined_columns.insert(node.name); - } - } - - static void visit(const ASTFunction & node, const ASTPtr &, Data & data) - { - NameSet & ignored_names = data.ignored_names; - - if (node.name == "lambda") - { - if (node.arguments->children.size() != 2) - throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - ASTFunction * lambda_args_tuple = typeid_cast(node.arguments->children.at(0).get()); - - if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); - - /// You do not need to add formal parameters of the lambda expression in required_source_columns. - Names added_ignored; - for (auto & child : lambda_args_tuple->arguments->children) - { - ASTIdentifier * identifier = typeid_cast(child.get()); - if (!identifier) - throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - - String & name = identifier->name; - if (!ignored_names.count(name)) - { - ignored_names.insert(name); - added_ignored.push_back(name); - } - } - - /// @note It's a special case where we visit children inside the matcher, not in visitor. - visit(node.arguments->children[1], data); - - for (size_t i = 0; i < added_ignored.size(); ++i) - ignored_names.erase(added_ignored[i]); - } - } + static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data); + static void visit(const ASTFunction & node, const ASTPtr &, Data & data); + static void visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data); + static std::vector visit(ASTTableExpression & node, const ASTPtr &, Data & data); + static std::vector visit(const ASTArrayJoin & node, const ASTPtr &, Data & data); + static std::vector visit(ASTSelectQuery & select, const ASTPtr &, Data & data); }; -/// Get a set of necessary columns to read from the table. -using RequiredSourceColumnsVisitor = InDepthNodeVisitor; +/// Extracts all the information about columns and tables from ASTSelectQuery block into ColumnNamesContext object. +/// It doesn't use anithing but AST. It visits nodes from bottom to top except ASTFunction content to get aliases in right manner. +/// @note There's some ambiguousness with nested columns names that can't be solved without schema. +using RequiredSourceColumnsVisitor = InDepthNodeVisitor; }