rewrite ExpressionAnalyzer.collectUsedColumns CLICKHOUSE-3996

This commit is contained in:
chertus 2018-12-26 17:43:25 +03:00
parent 8f306e8b45
commit 6ea13516f0
6 changed files with 576 additions and 188 deletions

View File

@ -0,0 +1,113 @@
#include <Interpreters/ColumnNamesContext.h>
#include <DataTypes/NestedUtils.h>
namespace DB
{
bool ColumnNamesContext::addTableAliasIfAny(const IAST & ast)
{
String alias = ast.tryGetAlias();
if (alias.empty())
return false;
table_aliases.insert(alias);
return true;
}
bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public)
{
String alias = ast.tryGetAlias();
if (alias.empty())
return false;
if (required_names.count(alias))
masked_columns.insert(alias);
if (is_public)
public_columns.insert(alias);
column_aliases.insert(alias);
return true;
}
void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node, bool is_public)
{
if (!node.general())
return;
required_names.insert(node.name);
if (!addColumnAliasIfAny(node, is_public) && is_public)
public_columns.insert(node.name);
}
bool ColumnNamesContext::addArrayJoinAliasIfAny(const IAST & ast)
{
String alias = ast.tryGetAlias();
if (alias.empty())
return false;
array_join_columns.insert(alias);
return true;
}
void ColumnNamesContext::addArrayJoinIdentifier(const ASTIdentifier & node)
{
array_join_columns.insert(node.name);
}
NameSet ColumnNamesContext::requiredColumns() const
{
NameSet required;
for (const auto & name : required_names)
{
String table_name = Nested::extractTableName(name);
/// Tech debt. There's its own logic for ARRAY JOIN columns.
if (array_join_columns.count(name) || array_join_columns.count(table_name))
continue;
if (!column_aliases.count(name) || masked_columns.count(name))
required.insert(name);
}
return required;
}
std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols)
{
os << "required_names: ";
for (const auto & x : cols.required_names)
os << "'" << x << "' ";
os << "source_tables: ";
for (const auto & x : cols.tables)
{
auto alias = x.alias();
auto name = x.name();
if (alias && name)
os << "'" << *alias << "'/'" << *name << "' ";
else if (alias)
os << "'" << *alias << "' ";
else if (name)
os << "'" << *name << "' ";
}
os << "table_aliases: ";
for (const auto & x : cols.table_aliases)
os << "'" << x << "' ";
os << "private_aliases: ";
for (const auto & x : cols.private_aliases)
os << "'" << x << "' ";
os << "column_aliases: ";
for (const auto & x : cols.column_aliases)
os << "'" << x << "' ";
os << "public_columns: ";
for (const auto & x : cols.public_columns)
os << "'" << x << "' ";
os << "masked_columns: ";
for (const auto & x : cols.masked_columns)
os << "'" << x << "' ";
os << "array_join_columns: ";
for (const auto & x : cols.array_join_columns)
os << "'" << x << "' ";
return os;
}
}

View File

@ -0,0 +1,74 @@
#pragma once
#include <ostream>
#include <optional>
#include <Common/typeid_cast.h>
#include <Core/Names.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
/// Information about table and column names extracted from ASTSelectQuery block. Do not include info from subselects.
struct ColumnNamesContext
{
struct JoinedTable
{
const ASTTableExpression * expr;
const ASTTableJoin * join;
std::optional<String> alias() const
{
String alias;
if (expr->database_and_table_name)
alias = expr->database_and_table_name->tryGetAlias();
else if (expr->table_function)
alias = expr->table_function->tryGetAlias();
else if (expr->subquery)
alias = expr->subquery->tryGetAlias();
if (!alias.empty())
return alias;
return {};
}
std::optional<String> name() const
{
if (auto * node = expr->database_and_table_name.get())
if (auto * identifier = typeid_cast<const ASTIdentifier *>(node))
return identifier->name;
return {};
}
std::optional<ASTTableJoin::Kind> joinKind() const
{
if (join)
return join->kind;
return {};
}
};
NameSet required_names;
NameSet table_aliases;
NameSet private_aliases;
NameSet column_aliases;
NameSet masked_columns;
NameSet public_columns;
NameSet array_join_columns;
std::vector<JoinedTable> tables; /// ordered list of visited tables in FROM section with joins
bool has_table_join = false;
bool has_array_join = false;
bool addTableAliasIfAny(const IAST & ast);
bool addColumnAliasIfAny(const IAST & ast, bool is_public = false);
void addColumnIdentifier(const ASTIdentifier & node, bool is_public = false);
bool addArrayJoinAliasIfAny(const IAST & ast);
void addArrayJoinIdentifier(const ASTIdentifier & node);
NameSet requiredColumns() const;
};
std::ostream & operator << (std::ostream & os, const ColumnNamesContext & cols);
}

View File

@ -106,10 +106,10 @@ ExpressionAnalyzer::ExpressionAnalyzer(
removeDuplicateColumns(source_columns);
}
/// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`.
/// Delete the unnecessary from `source_columns` list. Form `columns_added_by_join`.
collectUsedColumns();
/// external_tables, subqueries_for_sets for global subqueries.
/// external_tables, subqueries_for_sets for global subqueries.f
/// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
initGlobalSubqueriesAndExternalTables();
@ -1001,6 +1001,15 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio
aggregates = aggregate_descriptions;
}
/// db.table.column -> table.column / table.column -> column
static String cropDatabaseOrTableName(const String & name)
{
size_t pos = name.find('.', 0);
if (pos != std::string::npos)
return name.substr(pos + 1, name.size() - pos - 1);
return name;
}
void ExpressionAnalyzer::collectUsedColumns()
{
/** Calculate which columns are required to execute the expression.
@ -1008,83 +1017,131 @@ void ExpressionAnalyzer::collectUsedColumns()
* After execution, columns will only contain the list of columns needed to read from the table.
*/
NameSet required;
NameSet ignored;
RequiredSourceColumnsVisitor::Data columns_context;
RequiredSourceColumnsVisitor(columns_context).visit(query);
NameSet available_columns;
for (const auto & column : source_columns)
available_columns.insert(column.name);
NameSet required = columns_context.requiredColumns();
if (select_query && select_query->array_join_expression_list())
#if 0
std::cerr << "Query: " << query << std::endl;
std::cerr << "CTX: " << columns_context << std::endl;
std::cerr << "source_columns: ";
for (const auto & name : source_columns)
std::cerr << "'" << name.name << "' ";
std::cerr << "required: ";
for (const auto & name : required)
std::cerr << "'" << name << "' ";
std::cerr << std::endl;
#endif
if (columns_context.has_table_join)
{
ASTs & expressions = select_query->array_join_expression_list()->children;
for (size_t i = 0; i < expressions.size(); ++i)
const AnalyzedJoin & analyzed_join = analyzedJoin();
#if 0
std::cerr << "key_names_left: ";
for (const auto & name : analyzed_join.key_names_left)
std::cerr << "'" << name << "' ";
std::cerr << "key_names_right: ";
for (const auto & name : analyzed_join.key_names_right)
std::cerr << "'" << name << "' ";
std::cerr << "columns_from_joined_table: ";
for (const auto & column : analyzed_join.columns_from_joined_table)
std::cerr << "'" << column.name_and_type.name << '/' << column.original_name << "' ";
std::cerr << "available_joined_columns: ";
for (const auto & column : analyzed_join.available_joined_columns)
std::cerr << "'" << column.name_and_type.name << '/' << column.original_name << "' ";
std::cerr << std::endl;
#endif
NameSet avaliable_columns;
for (const auto & name : source_columns)
avaliable_columns.insert(name.name);
NameSet right_keys;
for (const auto & right_key_name : analyzed_join.key_names_right)
right_keys.insert(right_key_name);
/** You also need to ignore the identifiers of the columns that are obtained by JOIN.
* (Do not assume that they are required for reading from the "left" table).
*/
columns_added_by_join.clear();
for (const auto & joined_column : analyzed_join.available_joined_columns)
{
/// Ignore the top-level identifiers from the ARRAY JOIN section.
/// Then add them separately.
if (typeid_cast<ASTIdentifier *>(expressions[i].get()))
auto & name = joined_column.name_and_type.name;
if (required.count(name) && !avaliable_columns.count(name))
{
ignored.insert(expressions[i]->getColumnName());
}
else
{
/// Nothing needs to be ignored for expressions in ARRAY JOIN.
NameSet empty;
RequiredSourceColumnsVisitor::Data visitor_data{available_columns, required, empty, empty, empty};
RequiredSourceColumnsVisitor(visitor_data).visit(expressions[i]);
}
columns_added_by_join.push_back(joined_column);
required.erase(name);
ignored.insert(expressions[i]->getAliasOrColumnName());
/// Some columns from right join key may be used in query. This columns will be appended to block during join.
if (right_keys.count(name))
columns_added_by_join_from_right_keys.insert(name);
}
}
/// @fix filter required columns according to misqualified names in JOIN ON
if (columns_context.has_table_join &&
columns_context.tables.size() >= 2 &&
columns_context.tables[1].join &&
columns_context.tables[1].join->on_expression)
{
NameSet fixed_required;
for (const auto & req_name : required)
{
bool collated = false;
String cropped_name = req_name;
static const constexpr size_t max_column_prefix = 2;
for (size_t i = 0; i < max_column_prefix && !collated; ++i)
{
cropped_name = cropDatabaseOrTableName(cropped_name);
if (avaliable_columns.count(cropped_name))
{
fixed_required.insert(cropped_name);
collated = true;
break;
}
for (const auto & joined_column : analyzed_join.available_joined_columns)
{
auto & name = joined_column.name_and_type.name;
if (cropped_name == name)
{
columns_added_by_join.push_back(joined_column);
if (right_keys.count(name))
columns_added_by_join_from_right_keys.insert(name);
collated = true;
break;
}
}
}
if (!collated)
fixed_required.insert(req_name);
}
required.swap(fixed_required);
}
/// @note required_columns_from_joined_table is output
joined_block_actions = analyzed_join.createJoinedBlockActions(
columns_added_by_join, select_query, context, required_columns_from_joined_table);
}
/** You also need to ignore the identifiers of the columns that are obtained by JOIN.
* (Do not assume that they are required for reading from the "left" table).
*/
NameSet available_joined_columns;
for (const auto & joined_column : analyzedJoin().available_joined_columns)
available_joined_columns.insert(joined_column.name_and_type.name);
NameSet required_joined_columns;
for (const auto & left_key_ast : syntax->analyzed_join.key_asts_left)
if (columns_context.has_array_join)
{
NameSet empty;
RequiredSourceColumnsVisitor::Data columns_data{available_columns, required, ignored, empty, required_joined_columns};
ASTPtr tmp = left_key_ast;
RequiredSourceColumnsVisitor(columns_data).visit(tmp);
/// Insert the columns required for the ARRAY JOIN calculation into the required columns list.
NameSet array_join_sources;
for (const auto & result_source : syntax->array_join_result_to_source)
array_join_sources.insert(result_source.second);
for (const auto & column_name_type : source_columns)
if (array_join_sources.count(column_name_type.name))
required.insert(column_name_type.name);
}
RequiredSourceColumnsVisitor::Data columns_visitor_data{available_columns, required, ignored,
available_joined_columns, required_joined_columns};
RequiredSourceColumnsVisitor(columns_visitor_data).visit(query);
columns_added_by_join = analyzedJoin().available_joined_columns;
for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();)
{
if (required_joined_columns.count(it->name_and_type.name))
++it;
else
columns_added_by_join.erase(it++);
}
joined_block_actions = analyzedJoin().createJoinedBlockActions(
columns_added_by_join, select_query, context, required_columns_from_joined_table);
/// Some columns from right join key may be used in query. This columns will be appended to block during join.
for (const auto & right_key_name : analyzedJoin().key_names_right)
if (required_joined_columns.count(right_key_name))
columns_added_by_join_from_right_keys.insert(right_key_name);
/// Insert the columns required for the ARRAY JOIN calculation into the required columns list.
NameSet array_join_sources;
for (const auto & result_source : syntax->array_join_result_to_source)
array_join_sources.insert(result_source.second);
for (const auto & column_name_type : source_columns)
if (array_join_sources.count(column_name_type.name))
required.insert(column_name_type.name);
/// You need to read at least one column to find the number of rows.
if (select_query && required.empty())
required.insert(ExpressionActions::getSmallestColumn(source_columns));
@ -1118,9 +1175,17 @@ void ExpressionAnalyzer::collectUsedColumns()
}
if (!unknown_required_source_columns.empty())
{
std::stringstream ss;
ss << columns_context;
ss << "source_columns: ";
for (const auto & name : source_columns)
ss << "'" << name.name << "' ";
throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin()
+ (select_query && !select_query->tables ? ". Note that there is no tables (FROM clause) in your query" : ""),
ErrorCodes::UNKNOWN_IDENTIFIER);
+ (select_query && !select_query->tables ? ". Note that there is no tables (FROM clause) in your query" : "")
+ ", context: " + ss.str(), ErrorCodes::UNKNOWN_IDENTIFIER);
}
}

View File

@ -232,8 +232,7 @@ private:
const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; }
/** Remove all unnecessary columns from the list of all available columns of the table (`columns`).
* At the same time, form a set of unknown columns (`unknown_required_source_columns`),
* as well as the columns added by JOIN (`columns_added_by_join`).
* At the same time, form a set of columns added by JOIN (`columns_added_by_join`).
*/
void collectUsedColumns();

View File

@ -0,0 +1,232 @@
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Common/typeid_cast.h>
#include <Core/Names.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
static std::vector<String> extractNamesFromLambda(const ASTFunction & node)
{
if (node.arguments->children.size() != 2)
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTFunction * lambda_args_tuple = typeid_cast<ASTFunction *>(node.arguments->children[0].get());
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
std::vector<String> names;
for (auto & child : lambda_args_tuple->arguments->children)
{
ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(child.get());
if (!identifier)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
names.push_back(identifier->name);
}
return names;
}
bool RequiredSourceColumnsMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
{
if (typeid_cast<ASTSelectQuery *>(child.get()))
return false;
/// Processed. Do not need children.
if (typeid_cast<ASTIdentifier *>(node.get()) ||
typeid_cast<ASTTableExpression *>(node.get()) ||
typeid_cast<ASTArrayJoin *>(node.get()) ||
typeid_cast<ASTSelectQuery *>(node.get()))
return false;
if (auto * f = typeid_cast<ASTFunction *>(node.get()))
{
/// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition
/// "lambda" visit children itself.
if (f->name == "indexHint" || f->name == "lambda")
return false;
}
return true;
}
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTPtr & ast, Data & data)
{
/// results are columns
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
{
data.addColumnAliasIfAny(*ast);
visit(*t, ast, data);
return {};
}
if (auto * t = typeid_cast<ASTFunction *>(ast.get()))
{
data.addColumnAliasIfAny(*ast);
visit(*t, ast, data);
return {};
}
/// results are tables
if (auto * t = typeid_cast<ASTTablesInSelectQueryElement *>(ast.get()))
{
visit(*t, ast, data);
return {};
}
if (auto * t = typeid_cast<ASTTableExpression *>(ast.get()))
{
//data.addTableAliasIfAny(*ast); alias is attached to child
visit(*t, ast, data);
return {};
}
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
{
data.addTableAliasIfAny(*ast);
return visit(*t, ast, data);
}
if (auto * t = typeid_cast<ASTSubquery *>(ast.get()))
{
data.addTableAliasIfAny(*ast);
return {};
}
/// other
if (auto * t = typeid_cast<ASTArrayJoin *>(ast.get()))
{
data.has_array_join = true;
return visit(*t, ast, data);
}
return {};
}
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
{
/// special case for top-level SELECT items: they are publics
for (auto & node : select.select_expression_list->children)
{
if (auto * identifier = typeid_cast<ASTIdentifier *>(node.get()))
data.addColumnIdentifier(*identifier, true);
else
data.addColumnAliasIfAny(*node, true);
}
std::vector<ASTPtr *> out;
for (auto & node : select.children)
if (node != select.select_expression_list)
out.push_back(&node);
/// revisit select_expression_list (with children) when all the aliases are set
out.push_back(&select.select_expression_list);
return out;
}
void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPtr &, Data & data)
{
if (node.name.empty())
throw Exception("Expected not empty name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!data.private_aliases.count(node.name))
data.addColumnIdentifier(node);
}
void RequiredSourceColumnsMatcher::visit(const ASTFunction & node, const ASTPtr &, Data & data)
{
/// Do not add formal parameters of the lambda expression
if (node.name == "lambda")
{
Names local_aliases;
for (const auto & name : extractNamesFromLambda(node))
if (data.private_aliases.insert(name).second)
local_aliases.push_back(name);
/// visit child with masked local aliases
visit(node.arguments->children[1], data);
for (const auto & name : local_aliases)
data.private_aliases.erase(name);
}
}
void RequiredSourceColumnsMatcher::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
{
ASTTableExpression * expr = nullptr;
ASTTableJoin * join = nullptr;
for (auto & child : node.children)
{
if (auto * e = typeid_cast<ASTTableExpression *>(child.get()))
expr = e;
if (auto * j = typeid_cast<ASTTableJoin *>(child.get()))
join = j;
}
if (join)
data.has_table_join = true;
data.tables.emplace_back(ColumnNamesContext::JoinedTable{expr, join});
}
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(ASTTableExpression & node, const ASTPtr &, Data & data)
{
/// ASTIdentifiers here are tables. Do not visit them as generic ones.
if (node.database_and_table_name)
data.addTableAliasIfAny(*node.database_and_table_name);
std::vector<ASTPtr *> out;
if (node.table_function)
{
data.addTableAliasIfAny(*node.table_function);
out.push_back(&node.table_function);
}
if (node.subquery)
{
data.addTableAliasIfAny(*node.subquery);
out.push_back(&node.subquery);
}
return out;
}
std::vector<ASTPtr *> RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & node, const ASTPtr &, Data & data)
{
ASTPtr expression_list = node.expression_list;
if (!expression_list || expression_list->children.empty())
throw Exception("Expected not empty expression_list", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
std::vector<ASTPtr *> out;
/// Tech debt. Ignore ARRAY JOIN top-level identifiers and aliases. There's its own logic for them.
for (auto & expr : expression_list->children)
{
data.addArrayJoinAliasIfAny(*expr);
if (auto * identifier = typeid_cast<ASTIdentifier *>(expr.get()))
{
data.addArrayJoinIdentifier(*identifier);
continue;
}
out.push_back(&expr);
}
return out;
}
}

View File

@ -1,140 +1,45 @@
#pragma once
#include <Core/Names.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <DataTypes/NestedUtils.h>
#include <Common/typeid_cast.h>
#include "InDepthNodeVisitor.h"
#include <Interpreters/ColumnNamesContext.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Get a set of necessary columns to read from the table.
* In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified.
* The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table.
* Put in required_joined_columns the set of columns available from JOIN and needed.
*/
class ASTIdentifier;
class ASTFunction;
class ASTSelectQuery;
struct ASTTablesInSelectQueryElement;
struct ASTArrayJoin;
struct ASTTableExpression;
class RequiredSourceColumnsMatcher
{
public:
struct Data
{
const NameSet & available_columns;
NameSet & required_source_columns;
NameSet & ignored_names;
const NameSet & available_joined_columns;
NameSet & required_joined_columns;
};
using Data = ColumnNamesContext;
static constexpr const char * label = "RequiredSourceColumns";
static bool needChildVisit(ASTPtr & node, const ASTPtr & child)
{
/// We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns.
/// There, `collectUsedColumns` will send us separately.
if (typeid_cast<ASTSelectQuery *>(child.get()) ||
typeid_cast<ASTArrayJoin *>(child.get()) ||
typeid_cast<ASTTableExpression *>(child.get()) ||
typeid_cast<ASTTableJoin *>(child.get()))
return false;
/// Processed. Do not need children.
if (typeid_cast<ASTIdentifier *>(node.get()))
return false;
if (auto * f = typeid_cast<ASTFunction *>(node.get()))
{
/// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition
/// "lambda" visit children itself.
if (f->name == "indexHint" || f->name == "lambda")
return false;
}
return true;
}
/** Find all the identifiers in the query.
* We will use depth first search in AST.
* In this case
* - for lambda functions we will not take formal parameters;
* - do not go into subqueries (they have their own identifiers);
* - there is some exception for the ARRAY JOIN clause (it has a slightly different identifiers);
* - we put identifiers available from JOIN in required_joined_columns.
*/
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data)
{
if (auto * t = typeid_cast<ASTIdentifier *>(ast.get()))
visit(*t, ast, data);
if (auto * t = typeid_cast<ASTFunction *>(ast.get()))
visit(*t, ast, data);
return {};
}
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
private:
static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data)
{
if (node.general()
&& !data.ignored_names.count(node.name)
&& !data.ignored_names.count(Nested::extractTableName(node.name)))
{
/// Read column from left table if has.
if (!data.available_joined_columns.count(node.name) || data.available_columns.count(node.name))
data.required_source_columns.insert(node.name);
else
data.required_joined_columns.insert(node.name);
}
}
static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
{
NameSet & ignored_names = data.ignored_names;
if (node.name == "lambda")
{
if (node.arguments->children.size() != 2)
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTFunction * lambda_args_tuple = typeid_cast<ASTFunction *>(node.arguments->children.at(0).get());
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
/// You do not need to add formal parameters of the lambda expression in required_source_columns.
Names added_ignored;
for (auto & child : lambda_args_tuple->arguments->children)
{
ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(child.get());
if (!identifier)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
String & name = identifier->name;
if (!ignored_names.count(name))
{
ignored_names.insert(name);
added_ignored.push_back(name);
}
}
/// @note It's a special case where we visit children inside the matcher, not in visitor.
visit(node.arguments->children[1], data);
for (size_t i = 0; i < added_ignored.size(); ++i)
ignored_names.erase(added_ignored[i]);
}
}
static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data);
static void visit(const ASTFunction & node, const ASTPtr &, Data & data);
static void visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data);
static std::vector<ASTPtr *> visit(ASTTableExpression & node, const ASTPtr &, Data & data);
static std::vector<ASTPtr *> visit(const ASTArrayJoin & node, const ASTPtr &, Data & data);
static std::vector<ASTPtr *> visit(ASTSelectQuery & select, const ASTPtr &, Data & data);
};
/// Get a set of necessary columns to read from the table.
using RequiredSourceColumnsVisitor = InDepthNodeVisitor<RequiredSourceColumnsMatcher, true>;
/// Extracts all the information about columns and tables from ASTSelectQuery block into ColumnNamesContext object.
/// It doesn't use anithing but AST. It visits nodes from bottom to top except ASTFunction content to get aliases in right manner.
/// @note There's some ambiguousness with nested columns names that can't be solved without schema.
using RequiredSourceColumnsVisitor = InDepthNodeVisitor<RequiredSourceColumnsMatcher, false>;
}