2019-01-24 14:22:58 +00:00
|
|
|
#include <Poco/String.h>
|
2018-09-12 05:41:09 +00:00
|
|
|
#include <Core/Names.h>
|
|
|
|
#include <Interpreters/QueryNormalizer.h>
|
2019-01-25 15:42:24 +00:00
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
2019-01-24 14:22:58 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2021-08-05 16:52:00 +00:00
|
|
|
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
2018-09-12 05:41:09 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
2019-06-16 16:47:47 +00:00
|
|
|
#include <Parsers/ASTQueryParameter.h>
|
2018-09-12 05:41:09 +00:00
|
|
|
#include <Parsers/ASTTablesInSelectQuery.h>
|
2022-04-07 21:41:05 +00:00
|
|
|
#include <Parsers/ASTInterpolateElement.h>
|
2018-09-12 05:41:09 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
2019-10-08 18:42:22 +00:00
|
|
|
#include <Common/quoteString.h>
|
2019-12-12 08:57:25 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2018-09-12 05:41:09 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int TOO_DEEP_AST;
|
|
|
|
extern const int CYCLIC_ALIASES;
|
2019-06-16 16:47:47 +00:00
|
|
|
extern const int UNKNOWN_QUERY_PARAMETER;
|
2020-08-28 14:07:14 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 14:22:58 +00:00
|
|
|
|
2019-01-11 14:09:23 +00:00
|
|
|
class CheckASTDepth
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 14:09:23 +00:00
|
|
|
public:
|
2020-03-18 03:27:32 +00:00
|
|
|
explicit CheckASTDepth(QueryNormalizer::Data & data_)
|
2019-01-11 14:09:23 +00:00
|
|
|
: data(data_)
|
|
|
|
{
|
|
|
|
if (data.level > data.settings.max_ast_depth)
|
|
|
|
throw Exception("Normalized AST is too deep. Maximum: " + toString(data.settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST);
|
|
|
|
++data.level;
|
|
|
|
}
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2019-01-11 14:09:23 +00:00
|
|
|
~CheckASTDepth()
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 14:09:23 +00:00
|
|
|
--data.level;
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
2019-01-11 14:09:23 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
QueryNormalizer::Data & data;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class RestoreAliasOnExitScope
|
|
|
|
{
|
|
|
|
public:
|
2020-03-18 03:27:32 +00:00
|
|
|
explicit RestoreAliasOnExitScope(String & alias_)
|
2019-01-11 14:09:23 +00:00
|
|
|
: alias(alias_)
|
|
|
|
, copy(alias_)
|
|
|
|
{}
|
|
|
|
|
|
|
|
~RestoreAliasOnExitScope()
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 14:09:23 +00:00
|
|
|
alias = copy;
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
|
|
|
|
2019-01-11 14:09:23 +00:00
|
|
|
private:
|
|
|
|
String & alias;
|
|
|
|
const String copy;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
|
|
|
{
|
|
|
|
auto & current_asts = data.current_asts;
|
|
|
|
String & current_alias = data.current_alias;
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2019-01-25 15:42:24 +00:00
|
|
|
if (!IdentifierSemantic::getColumnName(node))
|
2019-01-11 17:14:17 +00:00
|
|
|
return;
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2021-03-30 13:51:45 +00:00
|
|
|
if (data.settings.prefer_column_name_to_alias)
|
|
|
|
{
|
|
|
|
if (data.source_columns_set.find(node.name()) != data.source_columns_set.end())
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
/// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
|
2020-10-24 18:46:10 +00:00
|
|
|
auto it_alias = data.aliases.find(node.name());
|
2021-06-07 20:59:38 +00:00
|
|
|
if (!data.allow_self_aliases && current_alias == node.name())
|
|
|
|
throw Exception(ErrorCodes::CYCLIC_ALIASES, "Self referencing of {} to {}. Cyclic alias", backQuote(current_alias), backQuote(node.name()));
|
|
|
|
|
2020-10-24 18:46:10 +00:00
|
|
|
if (it_alias != data.aliases.end() && current_alias != node.name())
|
2019-01-11 17:14:17 +00:00
|
|
|
{
|
2019-08-14 15:11:20 +00:00
|
|
|
if (!IdentifierSemantic::canBeAlias(node))
|
|
|
|
return;
|
|
|
|
|
2020-03-11 13:36:33 +00:00
|
|
|
/// We are alias for other column (node.name), but we are alias by
|
|
|
|
/// ourselves to some other column
|
2020-04-22 06:01:33 +00:00
|
|
|
const auto & alias_node = it_alias->second;
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2020-03-11 13:36:33 +00:00
|
|
|
String our_alias_or_name = alias_node->getAliasOrColumnName();
|
|
|
|
std::optional<String> our_name = IdentifierSemantic::getColumnName(alias_node);
|
|
|
|
|
|
|
|
String node_alias = ast->tryGetAlias();
|
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
if (current_asts.contains(alias_node.get()) /// We have loop of multiple aliases
|
2020-10-24 18:46:10 +00:00
|
|
|
|| (node.name() == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop
|
2019-01-11 17:14:17 +00:00
|
|
|
throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2020-03-11 13:36:33 +00:00
|
|
|
/// Let's replace it with the corresponding tree node.
|
|
|
|
if (!node_alias.empty() && node_alias != our_alias_or_name)
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 17:14:17 +00:00
|
|
|
/// Avoid infinite recursion here
|
2019-01-25 15:42:24 +00:00
|
|
|
auto opt_name = IdentifierSemantic::getColumnName(alias_node);
|
2020-10-24 18:46:10 +00:00
|
|
|
bool is_cycle = opt_name && *opt_name == node.name();
|
2019-01-11 17:14:17 +00:00
|
|
|
|
|
|
|
if (!is_cycle)
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 17:14:17 +00:00
|
|
|
/// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
|
|
|
|
ast = alias_node->clone();
|
2020-03-11 13:36:33 +00:00
|
|
|
ast->setAlias(node_alias);
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
|
|
|
}
|
2019-01-11 17:14:17 +00:00
|
|
|
else
|
|
|
|
ast = alias_node;
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
2019-01-11 17:14:17 +00:00
|
|
|
}
|
|
|
|
|
2019-02-14 12:31:13 +00:00
|
|
|
void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
|
2019-01-11 17:14:17 +00:00
|
|
|
{
|
2019-02-14 12:31:13 +00:00
|
|
|
/// normalize JOIN ON section
|
|
|
|
if (node.table_join)
|
|
|
|
{
|
2019-03-15 16:14:13 +00:00
|
|
|
auto & join = node.table_join->as<ASTTableJoin &>();
|
|
|
|
if (join.on_expression)
|
|
|
|
visit(join.on_expression, data);
|
2019-02-14 12:31:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool needVisitChild(const ASTPtr & child)
|
|
|
|
{
|
2022-04-11 13:47:27 +00:00
|
|
|
/// exclude interpolate elements - they are not subject for normalization and will be processed in filling transform
|
2022-04-07 21:41:05 +00:00
|
|
|
return !(child->as<ASTSelectQuery>() || child->as<ASTTableExpression>() || child->as<ASTInterpolateElement>());
|
2019-01-11 17:14:17 +00:00
|
|
|
}
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
/// special visitChildren() for ASTSelectQuery
|
2019-04-09 14:22:35 +00:00
|
|
|
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
|
2019-01-11 17:14:17 +00:00
|
|
|
{
|
2019-04-09 14:22:35 +00:00
|
|
|
for (auto & child : select.children)
|
2021-03-30 13:51:45 +00:00
|
|
|
{
|
2021-05-11 18:30:16 +00:00
|
|
|
if (needVisitChild(child))
|
2019-02-14 12:31:13 +00:00
|
|
|
visit(child, data);
|
2021-03-30 13:51:45 +00:00
|
|
|
}
|
2019-04-09 14:59:06 +00:00
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
/// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children,
|
|
|
|
/// but also in where_expression and having_expression.
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select.prewhere())
|
2019-04-09 14:59:06 +00:00
|
|
|
visit(select.refPrewhere(), data);
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select.where())
|
2019-04-09 14:59:06 +00:00
|
|
|
visit(select.refWhere(), data);
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select.having())
|
2019-04-09 14:59:06 +00:00
|
|
|
visit(select.refHaving(), data);
|
2019-01-11 17:14:17 +00:00
|
|
|
}
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
/// Don't go into subqueries.
|
|
|
|
/// Don't go into select query. It processes children itself.
|
|
|
|
/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters
|
|
|
|
/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
|
2020-12-28 09:56:38 +00:00
|
|
|
void QueryNormalizer::visitChildren(IAST * node, Data & data)
|
2019-01-11 17:14:17 +00:00
|
|
|
{
|
2020-12-25 04:59:17 +00:00
|
|
|
if (auto * func_node = node->as<ASTFunction>())
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2020-09-09 07:41:38 +00:00
|
|
|
if (func_node->tryGetQueryArgument())
|
2020-08-28 14:07:14 +00:00
|
|
|
{
|
|
|
|
if (func_node->name != "view")
|
|
|
|
throw Exception("Query argument can only be used in the `view` TableFunction", ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
/// Don't go into query argument.
|
|
|
|
return;
|
|
|
|
}
|
2021-08-05 16:52:00 +00:00
|
|
|
|
|
|
|
/// For lambda functions we need to avoid replacing lambda parameters with external aliases, for example,
|
|
|
|
/// Select 1 as x, arrayMap(x -> x + 2, [1, 2, 3])
|
|
|
|
/// shouldn't be replaced with Select 1 as x, arrayMap(x -> **(1 as x)** + 2, [1, 2, 3])
|
|
|
|
Aliases extracted_aliases;
|
|
|
|
if (func_node->name == "lambda")
|
|
|
|
{
|
|
|
|
Names lambda_aliases = RequiredSourceColumnsMatcher::extractNamesFromLambda(*func_node);
|
|
|
|
for (const auto & name : lambda_aliases)
|
|
|
|
{
|
|
|
|
auto it = data.aliases.find(name);
|
|
|
|
if (it != data.aliases.end())
|
|
|
|
{
|
|
|
|
extracted_aliases.insert(data.aliases.extract(it));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-28 09:56:38 +00:00
|
|
|
/// We skip the first argument. We also assume that the lambda function can not have parameters.
|
|
|
|
size_t first_pos = 0;
|
|
|
|
if (func_node->name == "lambda")
|
|
|
|
first_pos = 1;
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2020-12-28 09:56:38 +00:00
|
|
|
if (func_node->arguments)
|
2019-02-14 12:31:13 +00:00
|
|
|
{
|
2020-12-28 09:56:38 +00:00
|
|
|
auto & func_children = func_node->arguments->children;
|
2019-02-14 12:31:13 +00:00
|
|
|
|
2020-12-28 09:56:38 +00:00
|
|
|
for (size_t i = first_pos; i < func_children.size(); ++i)
|
2020-12-04 02:15:44 +00:00
|
|
|
{
|
2020-12-28 09:56:38 +00:00
|
|
|
auto & child = func_children[i];
|
|
|
|
|
|
|
|
if (needVisitChild(child))
|
|
|
|
visit(child, data);
|
2020-12-04 02:15:44 +00:00
|
|
|
}
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
2020-12-28 10:08:38 +00:00
|
|
|
|
2021-01-13 19:29:52 +00:00
|
|
|
if (func_node->window_definition)
|
2020-12-28 09:56:38 +00:00
|
|
|
{
|
2021-01-13 19:29:52 +00:00
|
|
|
visitChildren(func_node->window_definition.get(), data);
|
2020-12-28 09:56:38 +00:00
|
|
|
}
|
2021-08-05 16:52:00 +00:00
|
|
|
|
|
|
|
for (auto & it : extracted_aliases)
|
|
|
|
{
|
|
|
|
data.aliases.insert(it);
|
|
|
|
}
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
2019-03-11 13:22:51 +00:00
|
|
|
else if (!node->as<ASTSelectQuery>())
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 17:14:17 +00:00
|
|
|
for (auto & child : node->children)
|
2019-02-14 12:31:13 +00:00
|
|
|
if (needVisitChild(child))
|
|
|
|
visit(child, data);
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
2019-01-11 17:14:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void QueryNormalizer::visit(ASTPtr & ast, Data & data)
|
|
|
|
{
|
|
|
|
CheckASTDepth scope1(data);
|
|
|
|
RestoreAliasOnExitScope scope2(data.current_alias);
|
2018-09-12 05:41:09 +00:00
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
auto & finished_asts = data.finished_asts;
|
|
|
|
auto & current_asts = data.current_asts;
|
|
|
|
|
2022-04-18 10:18:43 +00:00
|
|
|
if (finished_asts.contains(ast))
|
2018-09-12 05:41:09 +00:00
|
|
|
{
|
2019-01-11 17:14:17 +00:00
|
|
|
ast = finished_asts[ast];
|
|
|
|
return;
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
ASTPtr initial_ast = ast;
|
|
|
|
current_asts.insert(initial_ast.get());
|
|
|
|
|
|
|
|
{
|
|
|
|
String my_alias = ast->tryGetAlias();
|
|
|
|
if (!my_alias.empty())
|
|
|
|
data.current_alias = my_alias;
|
|
|
|
}
|
|
|
|
|
2019-10-23 13:59:03 +00:00
|
|
|
if (auto * node_id = ast->as<ASTIdentifier>())
|
2019-06-16 16:47:47 +00:00
|
|
|
visit(*node_id, ast, data);
|
|
|
|
else if (auto * node_tables = ast->as<ASTTablesInSelectQueryElement>())
|
|
|
|
visit(*node_tables, ast, data);
|
|
|
|
else if (auto * node_select = ast->as<ASTSelectQuery>())
|
|
|
|
visit(*node_select, ast, data);
|
|
|
|
else if (auto * node_param = ast->as<ASTQueryParameter>())
|
|
|
|
throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
|
2021-08-10 15:22:33 +00:00
|
|
|
else if (auto * node_function = ast->as<ASTFunction>())
|
|
|
|
if (node_function->parameters)
|
|
|
|
visit(node_function->parameters, data);
|
2019-01-11 17:14:17 +00:00
|
|
|
|
|
|
|
/// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
|
|
|
|
if (ast.get() != initial_ast.get())
|
|
|
|
visit(ast, data);
|
|
|
|
else
|
2020-12-28 09:56:38 +00:00
|
|
|
visitChildren(ast.get(), data);
|
2019-01-11 17:14:17 +00:00
|
|
|
|
2018-09-12 05:41:09 +00:00
|
|
|
current_asts.erase(initial_ast.get());
|
|
|
|
current_asts.erase(ast.get());
|
2021-02-10 14:12:49 +00:00
|
|
|
if (data.ignore_alias && !ast->tryGetAlias().empty())
|
|
|
|
ast->setAlias("");
|
2018-09-12 05:41:09 +00:00
|
|
|
finished_asts[initial_ast] = ast;
|
2019-01-11 14:09:23 +00:00
|
|
|
|
2019-01-11 17:14:17 +00:00
|
|
|
/// @note can not place it in CheckASTDepth dtor cause of exception.
|
2019-01-11 14:09:23 +00:00
|
|
|
if (data.level == 1)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
ast->checkSize(data.settings.max_expanded_ast_elements);
|
|
|
|
}
|
|
|
|
catch (Exception & e)
|
|
|
|
{
|
|
|
|
e.addMessage("(after expansion of aliases)");
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
2018-09-12 05:41:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|