ClickHouse/dbms/src/Interpreters/ExpressionAnalyzer.cpp
2013-06-03 11:19:31 +00:00

1014 lines
32 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <DB/DataTypes/FieldToDataType.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTAsterisk.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTSubquery.h>
#include <DB/Parsers/ASTSet.h>
#include <DB/Parsers/ASTOrderByElement.h>
#include <DB/DataTypes/DataTypeSet.h>
#include <DB/DataTypes/DataTypeTuple.h>
#include <DB/DataTypes/DataTypeExpression.h>
#include <DB/Functions/FunctionsMiscellaneous.h>
#include <DB/Columns/ColumnSet.h>
#include <DB/Columns/ColumnExpression.h>
#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Storages/StorageMergeTree.h>
#include <DB/Storages/StorageDistributed.h>
namespace DB
{
static std::string * GetAlias(ASTPtr & ast)
{
if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
{
return &node->alias;
}
else if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
{
return &node->alias;
}
else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
{
return &node->alias;
}
else
{
return NULL;
}
}
void ExpressionAnalyzer::init()
{
select_query = dynamic_cast<ASTSelectQuery *>(&*ast);
has_aggregation = false;
createAliasesDict(ast); /// Если есть агрегатные функции, присвоит has_aggregation=true.
normalizeTree();
/// Найдем агрегатные функции.
if (select_query && (select_query->group_expression_list || select_query->having_expression))
has_aggregation = true;
ExpressionActions temp_actions(columns, settings);
getAggregatesImpl(ast, temp_actions);
if (has_aggregation)
{
assertSelect();
/// Найдем ключи агрегации.
if (select_query->group_expression_list)
{
const ASTs & group_asts = select_query->group_expression_list->children;
for (size_t i = 0; i < group_asts.size(); ++i)
{
getActionsImpl(group_asts[i], true, false, temp_actions);
NameAndTypePair key;
key.first = group_asts[i]->getColumnName();
key.second = temp_actions.getSampleBlock().getByName(key.first).type;
aggregation_keys.push_back(key);
}
}
aggregated_columns = aggregation_keys;
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
{
AggregateDescription & desc = aggregate_descriptions[i];
aggregated_columns.push_back(NameAndTypePair(desc.column_name, desc.function->getReturnType()));
}
}
else
{
aggregated_columns = columns;
}
}
NamesAndTypesList::iterator ExpressionAnalyzer::findColumn(const String & name, NamesAndTypesList & cols)
{
NamesAndTypesList::iterator it;
for (it = cols.begin(); it != cols.end(); ++it)
if (it->first == name)
break;
return it;
}
void ExpressionAnalyzer::createAliasesDict(ASTPtr & ast)
{
/// Обход снизу-вверх. Не опускаемся в подзапросы.
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
if (!dynamic_cast<ASTSelectQuery *>(&**it))
createAliasesDict(*it);
std::string * alias = GetAlias(ast);
if (alias && !alias->empty())
{
if (aliases.count(*alias) && ast->getTreeID() != aliases[*alias]->getTreeID())
{
throw Exception("Different expressions with the same alias " + *alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
}
else
{
aliases[*alias] = ast;
}
}
}
StoragePtr ExpressionAnalyzer::getTable()
{
if (const ASTSelectQuery * select = dynamic_cast<const ASTSelectQuery *>(&*ast))
{
if (select->table && !dynamic_cast<const ASTSelectQuery *>(&*select->table))
{
String database = select->database ?
dynamic_cast<const ASTIdentifier &>(*select->database).name :
"";
const String & table = dynamic_cast<const ASTIdentifier &>(*select->table).name;
return context.tryGetTable(database, table);
}
}
return StoragePtr();
}
bool ExpressionAnalyzer::needSignRewrite()
{
if (settings.sign_rewrite && storage)
{
if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
return merge_tree->getName() == "CollapsingMergeTree";
if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
return !distributed->getSignColumnName().empty();
}
return false;
}
String ExpressionAnalyzer::getSignColumnName()
{
if (const StorageMergeTree * merge_tree = dynamic_cast<const StorageMergeTree *>(&*storage))
return merge_tree->getSignColumnName();
if (const StorageDistributed * distributed = dynamic_cast<const StorageDistributed *>(&*storage))
return distributed->getSignColumnName();
return "";
}
ASTPtr ExpressionAnalyzer::createSignColumn()
{
ASTIdentifier * p_sign_column = new ASTIdentifier(ast->range, sign_column_name);
ASTIdentifier & sign_column = *p_sign_column;
ASTPtr sign_column_node = p_sign_column;
sign_column.name = sign_column_name;
return sign_column_node;
}
ASTPtr ExpressionAnalyzer::rewriteCount(const ASTFunction * node)
{
/// 'Sign'
ASTExpressionList * p_exp_list = new ASTExpressionList;
ASTExpressionList & exp_list = *p_exp_list;
ASTPtr exp_list_node = p_exp_list;
exp_list.children.push_back(createSignColumn());
/// sum(Sign)
ASTFunction * p_sum = new ASTFunction;
ASTFunction & sum = *p_sum;
ASTPtr sum_node = p_sum;
sum.name = "sum";
sum.alias = node->alias;
sum.arguments = exp_list_node;
sum.children.push_back(exp_list_node);
return sum_node;
}
ASTPtr ExpressionAnalyzer::rewriteSum(const ASTFunction * node)
{
/// 'x', 'Sign'
ASTExpressionList * p_mult_exp_list = new ASTExpressionList;
ASTExpressionList & mult_exp_list = *p_mult_exp_list;
ASTPtr mult_exp_list_node = p_mult_exp_list;
mult_exp_list.children.push_back(createSignColumn());
mult_exp_list.children.push_back(node->arguments->children[0]);
/// x * Sign
ASTFunction * p_mult = new ASTFunction;
ASTFunction & mult = *p_mult;
ASTPtr mult_node = p_mult;
mult.name = "multiply";
mult.arguments = mult_exp_list_node;
mult.children.push_back(mult_exp_list_node);
/// 'x * Sign'
ASTExpressionList * p_exp_list = new ASTExpressionList;
ASTExpressionList & exp_list = *p_exp_list;
ASTPtr exp_list_node = p_exp_list;
exp_list.children.push_back(mult_node);
/// sum(x * Sign)
ASTFunction * p_sum = new ASTFunction;
ASTFunction & sum = *p_sum;
ASTPtr sum_node = p_sum;
sum.name = "sum";
sum.alias = node->alias;
sum.arguments = exp_list_node;
sum.children.push_back(exp_list_node);
return sum_node;
}
ASTPtr ExpressionAnalyzer::rewriteAvg(const ASTFunction * node)
{
/// node без alias для переписывания числителя и знаменателя
ASTPtr node_clone = node->clone();
ASTFunction * node_clone_func = dynamic_cast<ASTFunction *>(&*node_clone);
node_clone_func->alias = "";
/// 'sum(Sign * x)', 'sum(Sign)'
ASTExpressionList * p_div_exp_list = new ASTExpressionList;
ASTExpressionList & div_exp_list = *p_div_exp_list;
ASTPtr div_exp_list_node = p_div_exp_list;
div_exp_list.children.push_back(rewriteSum(node_clone_func));
div_exp_list.children.push_back(rewriteCount(node_clone_func));
/// sum(Sign * x) / sum(Sign)
ASTFunction * p_div = new ASTFunction;
ASTFunction & div = *p_div;
ASTPtr div_node = p_div;
div.name = "divide";
div.alias = node->alias;
div.arguments = div_exp_list_node;
div.children.push_back(div_exp_list_node);
return div_node;
}
void ExpressionAnalyzer::considerSignRewrite(ASTPtr & ast)
{
ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
if (!node)
return;
const String & name = node->name;
if (name == "count")
ast = rewriteCount(node);
if (name == "sum")
ast = rewriteSum(node);
if (name == "avg")
ast = rewriteAvg(node);
}
void ExpressionAnalyzer::normalizeTree()
{
SetOfASTs tmp_set;
MapOfASTs tmp_map;
if (needSignRewrite())
sign_column_name = getSignColumnName();
normalizeTreeImpl(ast, tmp_map, tmp_set);
}
/// finished_asts - уже обработанные вершины (и на что они заменены)
/// current_asts - вершины в текущем стеке вызовов этого метода
void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts)
{
if (current_asts.count(ast))
{
throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES);
}
if (finished_asts.count(ast))
{
ast = finished_asts[ast];
return;
}
ASTPtr initial_ast = ast;
current_asts.insert(initial_ast);
/// Действия, выполняемые сверху вниз.
if (!sign_column_name.empty())
considerSignRewrite(ast);
if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
{
if (node->kind == ASTIdentifier::Column)
{
/// Если это алиас
Aliases::const_iterator jt = aliases.find(node->name);
if (jt != aliases.end())
{
/// Заменим его на соответствующий узел дерева
ast = jt->second;
normalizeTreeImpl(ast, finished_asts, current_asts);
}
else
{
/// Проверим имеет ли смысл sign-rewrite
if (node->name == sign_column_name)
throw Exception("Requested Sign column while sign-rewrite is on.", ErrorCodes::QUERY_SECTION_DOESNT_MAKE_SENSE);
if (findColumn(node->name) == columns.end())
throw Exception("Unknown identifier: " + node->name, ErrorCodes::UNKNOWN_IDENTIFIER);
}
}
}
else if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
{
/// Заменим * на список столбцов.
ASTs & asts = node->children;
for (int i = static_cast<int>(asts.size()) - 1; i >= 0; --i)
{
if (ASTAsterisk * asterisk = dynamic_cast<ASTAsterisk *>(&*asts[i]))
{
ASTs all_columns;
for (NamesAndTypesList::const_iterator it = columns.begin(); it != columns.end(); ++it)
all_columns.push_back(new ASTIdentifier(asterisk->range, it->first));
asts.erase(asts.begin() + i);
asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end());
}
}
}
/// Рекурсивные вызовы. Не опускаемся в подзапросы.
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
if (!dynamic_cast<ASTSelectQuery *>(&**it))
normalizeTreeImpl(*it, finished_asts, current_asts);
/// Действия, выполняемые снизу вверх.
/// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression.
if (ASTSelectQuery * select = dynamic_cast<ASTSelectQuery *>(&*ast))
{
if (select->where_expression)
normalizeTreeImpl(select->where_expression, finished_asts, current_asts);
if (select->having_expression)
normalizeTreeImpl(select->having_expression, finished_asts, current_asts);
}
if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
{
if (node->name == "lambda")
{
node->kind = ASTFunction::LAMBDA_EXPRESSION;
}
else if (context.getAggregateFunctionFactory().isAggregateFunctionName(node->name))
{
node->kind = ASTFunction::AGGREGATE_FUNCTION;
}
else if (node->name == "arrayJoin")
{
node->kind = ASTFunction::ARRAY_JOIN;
}
else
{
node->kind = ASTFunction::FUNCTION;
}
}
current_asts.erase(initial_ast);
current_asts.erase(ast);
finished_asts[initial_ast] = ast;
}
void ExpressionAnalyzer::makeSet(ASTFunction * node, ExpressionActions & actions)
{
/** Нужно преобразовать правый аргумент в множество.
* Это может быть перечисление значений или подзапрос.
* Перечисление значений парсится как функция tuple.
*/
IAST & args = *node->arguments;
ASTPtr & arg = args.children[1];
if (dynamic_cast<ASTSubquery *>(&*arg))
{
/// Исполняем подзапрос, превращаем результат в множество, и кладём это множество на место подзапроса.
InterpreterSelectQuery interpreter(arg->children[0], context, QueryProcessingStage::Complete, subquery_depth + 1);
ASTSet * ast_set = new ASTSet(arg->getColumnName());
ast_set->set = new Set;
ast_set->set->create(interpreter.execute());
arg = ast_set;
}
else if (ASTFunction * set_func = dynamic_cast<ASTFunction *>(&*arg))
{
/// Случай явного перечисления значений.
if (set_func->name != "tuple")
throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
DataTypes set_element_types;
ASTPtr & left_arg = args.children[0];
ASTFunction * left_arg_tuple = dynamic_cast<ASTFunction *>(&*left_arg);
if (left_arg_tuple && left_arg_tuple->name == "tuple")
{
for (ASTs::const_iterator it = left_arg_tuple->arguments->children.begin();
it != left_arg_tuple->arguments->children.end();
++it)
set_element_types.push_back(actions.getSampleBlock().getByName((*it)->getColumnName()).type);
}
else
{
DataTypePtr left_type = actions.getSampleBlock().getByName(left_arg->getColumnName()).type;
if (DataTypeArray * array_type = dynamic_cast<DataTypeArray *>(&*left_type))
set_element_types.push_back(array_type->getNestedType());
else
set_element_types.push_back(left_type);
}
ASTSet * ast_set = new ASTSet(arg->getColumnName());
ast_set->set = new Set;
ast_set->set->create(set_element_types, set_func->arguments);
arg = ast_set;
}
else if (!dynamic_cast<ASTSet *>(&*arg))
throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static std::string getUniqueName(const Block & block, const std::string & prefix)
{
int i = 1;
while (block.has(prefix + Poco::NumberFormatter::format(i)))
++i;
return prefix + Poco::NumberFormatter::format(i);
}
void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool only_consts, ExpressionActions & actions)
{
/// Если результат вычисления уже есть в блоке.
if ((dynamic_cast<ASTFunction *>(&*ast) || dynamic_cast<ASTLiteral *>(&*ast) || dynamic_cast<ASTSet *>(&*ast))
&& actions.getSampleBlock().has(ast->getColumnName()))
return;
if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
{
if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
throw Exception("Unexpected expression", ErrorCodes::UNEXPECTED_EXPRESSION);
if (node->kind == ASTFunction::ARRAY_JOIN)
{
if (node->arguments->children.size() != 1)
throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH);
ASTPtr arg = node->arguments->children[0];
getActionsImpl(arg, no_subqueries, only_consts, actions);
if (!only_consts)
actions.add(ExpressionActions::Action::arrayJoin(arg->getColumnName(), node->getColumnName()));
return;
}
if (node->kind == ASTFunction::FUNCTION)
{
if (node->name == "in" || node->name == "notIn")
{
if (!no_subqueries)
{
/// Найдем тип первого аргумента (потом getActionsImpl вызовется для него снова и ни на что не повлияет).
getActionsImpl(node->arguments->children[0], no_subqueries, only_consts, actions);
/// Превратим tuple или подзапрос в множество.
makeSet(node, actions);
}
else
{
/// Мы в той части дерева, которую не собираемся вычислять. Нужно только определить типы.
/// Не будем выполнять подзапросы и составлять множества. Вставим произвольный столбец правильного типа.
ColumnWithNameAndType fake_column;
fake_column.name = node->getColumnName();
fake_column.type = new DataTypeUInt8;
fake_column.column = new ColumnConstUInt8(1, 0);
actions.add(ExpressionActions::Action::addColumn(fake_column));
getActionsImpl(node->arguments, no_subqueries, only_consts, actions);
return;
}
}
FunctionPtr function = context.getFunctionFactory().get(node->name, context);
Names argument_names;
DataTypes argument_types;
/// Если у функции есть аргумент-лямбда-выражение, нужно определить его тип до рекурсивного вызова.
bool has_lambda_arguments = false;
for (size_t i = 0; i < node->arguments->children.size(); ++i)
{
ASTPtr child = node->arguments->children[i];
ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
if (lambda && lambda->name == "lambda")
{
/// Если аргумент лямбда-функция, только запомним ее примерный тип.
if (lambda->arguments->children.size() != 2)
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
has_lambda_arguments = true;
argument_types.push_back(new DataTypeExpression(DataTypes(lambda_args_tuple->arguments->children.size())));
/// Не можем дать название child->getColumnName(),
/// потому что оно не однозначно определяет выражение (типы аргументов могут быть разными).
argument_names.push_back(getUniqueName(actions.getSampleBlock(), "__lambda"));
}
else
{
/// Если аргумент не лямбда-функция, вызовемся рекурсивно и узнаем его тип.
getActionsImpl(child, no_subqueries, only_consts, actions);
std::string name = child->getColumnName();
argument_types.push_back(actions.getSampleBlock().getByName(name).type);
argument_names.push_back(name);
}
}
if (has_lambda_arguments && !only_consts)
{
function->getLambdaArgumentTypes(argument_types);
/// Вызовемся рекурсивно для лямбда-выражений.
for (size_t i = 0; i < node->arguments->children.size(); ++i)
{
ASTPtr child = node->arguments->children[i];
ASTFunction * lambda = dynamic_cast<ASTFunction *>(&*child);
if (lambda && lambda->name == "lambda")
{
DataTypeExpression * lambda_type = dynamic_cast<DataTypeExpression *>(&*argument_types[i]);
ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*lambda->arguments->children[0]);
ASTs lambda_arg_asts = lambda_args_tuple->arguments->children;
NamesAndTypes lambda_args;
NamesAndTypesList lambda_columns = actions.getRequiredColumnsWithTypes();
for (size_t j = 0; j < lambda_arg_asts.size(); ++j)
{
ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_arg_asts[j]);
if (!identifier)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
String arg_name = identifier->name;
NameAndTypePair arg(arg_name, lambda_type->getArgumentTypes()[j]);
NamesAndTypesList::iterator it = findColumn(arg_name, lambda_columns);
if (it != lambda_columns.end())
it->second = arg.second;
else
lambda_columns.push_back(arg);
lambda_args.push_back(arg);
}
ExpressionActionsPtr lambda_actions = new ExpressionActions(lambda_columns, settings);
getActionsImpl(lambda->arguments->children[1], no_subqueries, only_consts, *lambda_actions);
String result_name = lambda->arguments->children[1]->getColumnName();
lambda_actions->finalize(Names(1, result_name));
DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type;
ColumnWithNameAndType lambda_column;
lambda_column.column = new ColumnExpression(1, lambda_actions, lambda_args, result_type, result_name);
lambda_column.type = argument_types[i];
lambda_column.name = argument_names[i];
actions.add(ExpressionActions::Action::addColumn(lambda_column));
}
}
}
bool should_add = true;
if (only_consts)
{
for (size_t i = 0; i < argument_names.size(); ++i)
{
if (!actions.getSampleBlock().has(argument_names[i]))
{
should_add = false;
break;
}
}
}
if (should_add)
actions.add(ExpressionActions::Action::applyFunction(function, argument_names, node->getColumnName()));
}
}
else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
{
DataTypePtr type = apply_visitor(FieldToDataType(), node->value);
ColumnWithNameAndType column;
column.column = type->createConstColumn(1, node->value);
column.type = type;
column.name = node->getColumnName();
actions.add(ExpressionActions::Action::addColumn(column));
}
else if (ASTSet * node = dynamic_cast<ASTSet *>(&*ast))
{
/// Множество в секции IN.
ColumnWithNameAndType column;
column.column = new ColumnSet(1, node->set);
column.type = new DataTypeSet;
column.name = node->getColumnName();
actions.add(ExpressionActions::Action::addColumn(column));
}
else
{
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
getActionsImpl(*it, no_subqueries, only_consts, actions);
}
}
void ExpressionAnalyzer::getAggregatesImpl(ASTPtr ast, ExpressionActions & actions)
{
ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
{
has_aggregation = true;
AggregateDescription aggregate;
aggregate.column_name = node->getColumnName();
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
if (aggregate_descriptions[i].column_name == aggregate.column_name)
return;
ASTs & arguments = node->arguments->children;
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
getActionsImpl(arguments[i], true, false, actions);
const std::string & name = arguments[i]->getColumnName();
types[i] = actions.getSampleBlock().getByName(name).type;
aggregate.argument_names[i] = name;
}
aggregate.function = context.getAggregateFunctionFactory().get(node->name, types);
if (node->parameters)
{
ASTs & parameters = dynamic_cast<ASTExpressionList &>(*node->parameters).children;
Row params_row(parameters.size());
for (size_t i = 0; i < parameters.size(); ++i)
{
ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&*parameters[i]);
if (!lit)
throw Exception("Parameters to aggregate functions must be literals", ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS);
params_row[i] = lit->value;
}
aggregate.function->setParameters(params_row);
}
aggregate.function->setArguments(types);
if (!sign_column_name.empty())
considerSignRewrite(ast);
aggregate_descriptions.push_back(aggregate);
}
else
{
for (size_t i = 0; i < ast->children.size(); ++i)
{
getAggregatesImpl(ast->children[i], actions);
}
}
}
void ExpressionAnalyzer::assertSelect()
{
if (!select_query)
throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
}
void ExpressionAnalyzer::assertAggregation()
{
if (!has_aggregation)
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
}
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, NamesAndTypesList & columns)
{
if (chain.steps.empty())
{
chain.settings = settings;
chain.steps.push_back(ExpressionActionsChain::Step(new ExpressionActions(columns, settings)));
}
}
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain)
{
assertSelect();
if (!select_query->where_expression)
return false;
initChain(chain, columns);
ExpressionActionsChain::Step & step = chain.steps.back();
step.required_output.push_back(select_query->where_expression->getColumnName());
getActionsImpl(select_query->where_expression, false, false, *step.actions);
return true;
}
bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain)
{
assertAggregation();
if (!select_query->group_expression_list)
return false;
initChain(chain, columns);
ExpressionActionsChain::Step & step = chain.steps.back();
ASTs asts = select_query->group_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
step.required_output.push_back(asts[i]->getColumnName());
getActionsImpl(asts[i], false, false, *step.actions);
}
return true;
}
void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain)
{
assertAggregation();
initChain(chain, columns);
ExpressionActionsChain::Step & step = chain.steps.back();
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
{
for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
{
step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
}
}
getActionsBeforeAggregationImpl(select_query->select_expression_list, *step.actions);
if (select_query->having_expression)
getActionsBeforeAggregationImpl(select_query->having_expression, *step.actions);
if (select_query->order_expression_list)
getActionsBeforeAggregationImpl(select_query->order_expression_list, *step.actions);
}
bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain)
{
assertAggregation();
if (!select_query->having_expression)
return false;
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
step.required_output.push_back(select_query->having_expression->getColumnName());
getActionsImpl(select_query->having_expression, false, false, *step.actions);
return true;
}
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain)
{
assertSelect();
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->select_expression_list, false, false, *step.actions);
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
step.required_output.push_back(asts[i]->getColumnName());
}
}
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain)
{
assertSelect();
if (!select_query->order_expression_list)
return false;
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getActionsImpl(select_query->order_expression_list, false, false, *step.actions);
ASTs asts = select_query->order_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
ASTOrderByElement * ast = dynamic_cast<ASTOrderByElement *>(&*asts[i]);
if (!ast || ast->children.size() != 1)
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children[0];
step.required_output.push_back(order_expression->getColumnName());
}
return true;
}
void ExpressionAnalyzer::appendProjectResult(DB::ExpressionActionsChain & chain)
{
assertSelect();
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
NamesWithAliases result_columns;
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
step.required_output.push_back(result_columns.back().second);
}
step.actions->add(ExpressionActions::Action::project(result_columns));
}
Block ExpressionAnalyzer::getSelectSampleBlock()
{
assertSelect();
ExpressionActions temp_actions(aggregated_columns, settings);
NamesWithAliases result_columns;
ASTs asts = select_query->select_expression_list->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
getActionsImpl(asts[i], true, false, temp_actions);
}
temp_actions.add(ExpressionActions::Action::project(result_columns));
return temp_actions.getSampleBlock();
}
void ExpressionAnalyzer::getActionsBeforeAggregationImpl(ASTPtr ast, ExpressionActions & actions)
{
ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast);
if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION)
{
ASTs & arguments = node->arguments->children;
for (size_t i = 0; i < arguments.size(); ++i)
{
getActionsImpl(arguments[i], false, false, actions);
}
}
else
{
for (size_t i = 0; i < ast->children.size(); ++i)
{
getActionsBeforeAggregationImpl(ast->children[i], actions);
}
}
}
ExpressionActionsPtr ExpressionAnalyzer::getActions()
{
ExpressionActionsPtr actions = new ExpressionActions(columns, settings);
NamesWithAliases result_columns;
Names result_names;
if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
{
ASTs asts = node->children;
for (size_t i = 0; i < asts.size(); ++i)
{
result_columns.push_back(NameWithAlias(asts[i]->getColumnName(), asts[i]->getAlias()));
result_names.push_back(result_columns.back().first);
getActionsImpl(asts[i], false, false, *actions);
}
}
else
{
result_columns.push_back(NameWithAlias(ast->getColumnName(), ast->getAlias()));
result_names.push_back(result_columns.back().first);
getActionsImpl(ast, false, false, *actions);
}
actions->add(ExpressionActions::Action::project(result_columns));
actions->finalize(result_names);
return actions;
}
ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
if (has_aggregation)
throw Exception("Expression has aggregation", ErrorCodes::LOGICAL_ERROR);
ExpressionActionsPtr actions = new ExpressionActions(NamesAndTypesList(), settings);
getActionsImpl(ast, true, true, *actions);
return actions;
}
void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates)
{
for (NamesAndTypesList::iterator it = aggregation_keys.begin(); it != aggregation_keys.end(); ++it)
key_names.push_back(it->first);
aggregates = aggregate_descriptions;
}
Names ExpressionAnalyzer::getRequiredColumns()
{
NamesSet required;
NamesSet ignored;
getRequiredColumnsImpl(ast, required, ignored);
Names res(required.begin(), required.end());
return res;
}
void ExpressionAnalyzer::getRequiredColumnsImpl(ASTPtr ast, NamesSet & required_columns, NamesSet & ignored_names)
{
if (ASTIdentifier * node = dynamic_cast<ASTIdentifier *>(&*ast))
{
if (node->kind == ASTIdentifier::Column && !ignored_names.count(node->name))
required_columns.insert(node->name);
return;
}
if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
{
if (node->kind == ASTFunction::LAMBDA_EXPRESSION)
{
if (node->arguments->children.size() != 2)
throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTFunction * lambda_args_tuple = dynamic_cast<ASTFunction *>(&*node->arguments->children[0]);
if (!lambda_args_tuple || lambda_args_tuple->name != "tuple")
throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH);
/// Не нужно добавлять параметры лямбда-выражения в required_columns.
Names added_ignored;
for (size_t i = 0 ; i < lambda_args_tuple->children.size(); ++i)
{
ASTIdentifier * identifier = dynamic_cast<ASTIdentifier *>(&*lambda_args_tuple->children[i]);
if (!identifier)
throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH);
std::string name = identifier->name;
if (!ignored_names.count(name))
{
ignored_names.insert(name);
added_ignored.push_back(name);
}
}
getRequiredColumnsImpl(node->arguments->children[1], required_columns, ignored_names);
for (size_t i = 0; i < added_ignored.size(); ++i)
ignored_names.erase(added_ignored[i]);
return;
}
}
for (size_t i = 0; i < ast->children.size(); ++i)
getRequiredColumnsImpl(ast->children[i], required_columns, ignored_names);
}
}