ClickHouse/dbms/src/Interpreters/ExpressionAnalyzer.cpp

855 lines
31 KiB
C++
Raw Normal View History

#include <Poco/Util/Application.h>
2016-11-20 12:43:20 +00:00
#include <Poco/String.h>
#include <Core/Block.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
2018-02-26 09:05:06 +00:00
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/formatAST.h>
#include <Parsers/DumpASTNode.h>
2017-12-25 00:38:45 +00:00
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
2018-08-28 14:40:07 +00:00
#include <Columns/IColumn.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/InJoinSubqueriesPreprocessor.h>
#include <Interpreters/LogicalExpressionsOptimizer.h>
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/Set.h>
2019-09-03 16:56:32 +00:00
#include <Interpreters/AnalyzedJoin.h>
2019-09-09 19:43:37 +00:00
#include <Interpreters/Join.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
2015-04-16 06:12:35 +00:00
#include <Storages/StorageDistributed.h>
#include <Storages/StorageJoin.h>
#include <DataStreams/copyData.h>
#include <DataStreams/IBlockInputStream.h>
#include <Dictionaries/IDictionary.h>
#include <Common/typeid_cast.h>
#include <Common/StringUtils/StringUtils.h>
2017-06-06 17:18:32 +00:00
#include <ext/range.h>
#include <DataTypes/DataTypeFactory.h>
#include <Functions/FunctionsMiscellaneous.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
2018-07-27 16:21:43 +00:00
#include <Parsers/queryToString.h>
#include <Interpreters/interpretSubquery.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/ExternalTablesVisitor.h>
#include <Interpreters/GlobalSubqueriesVisitor.h>
2019-08-12 19:27:09 +00:00
#include <Interpreters/GetAggregatesVisitor.h>
namespace DB
{
2018-10-10 16:23:27 +00:00
using LogAST = DebugASTLog<false>; /// set to true to enable logs
namespace ErrorCodes
{
extern const int UNKNOWN_IDENTIFIER;
extern const int LOGICAL_ERROR;
}
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
ExpressionAnalyzer::ExpressionAnalyzer(
2018-08-27 17:58:43 +00:00
const ASTPtr & query_,
const SyntaxAnalyzerResultPtr & syntax_analyzer_result_,
const Context & context_,
size_t subquery_depth_,
2019-08-13 13:48:09 +00:00
bool do_global)
: query(query_), context(context_), settings(context.getSettings())
2019-08-13 13:48:09 +00:00
, subquery_depth(subquery_depth_)
2018-11-09 17:23:48 +00:00
, syntax(syntax_analyzer_result_)
{
2017-04-02 17:37:49 +00:00
/// external_tables, subqueries_for_sets for global subqueries.
/// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
2019-08-13 13:48:09 +00:00
initGlobalSubqueriesAndExternalTables(do_global);
/// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns.
2017-04-02 17:37:49 +00:00
/// This analysis should be performed after processing global subqueries, because otherwise,
/// if the aggregate function contains a global subquery, then `analyzeAggregation` method will save
/// in `aggregate_descriptions` the information about the parameters of this aggregate function, among which
/// global subquery. Then, when you call `initGlobalSubqueriesAndExternalTables` method, this
/// the global subquery will be replaced with a temporary table, resulting in aggregate_descriptions
/// will contain out-of-date information, which will lead to an error when the query is executed.
analyzeAggregation();
}
2018-10-17 11:06:46 +00:00
bool ExpressionAnalyzer::isRemoteStorage() const
{
return storage() && storage()->isRemote();
}
void ExpressionAnalyzer::analyzeAggregation()
{
2017-04-02 17:37:49 +00:00
/** Find aggregation keys (aggregation_keys), information about aggregate functions (aggregate_descriptions),
* as well as a set of columns obtained after the aggregation, if any,
* or after all the actions that are usually performed before aggregation (aggregated_columns).
*
2017-04-02 17:37:49 +00:00
* Everything below (compiling temporary ExpressionActions) - only for the purpose of query analysis (type output).
*/
2019-03-11 13:22:51 +00:00
auto * select_query = query->as<ASTSelectQuery>();
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
if (select_query)
{
bool is_array_join_left;
ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left);
if (array_join_expression_list)
{
getRootActions(array_join_expression_list, true, temp_actions);
addMultipleArrayJoinAction(temp_actions, is_array_join_left);
array_join_columns.clear();
for (auto & column : temp_actions->getSampleBlock().getNamesAndTypesList())
if (syntax->array_join_result_to_source.count(column.name))
array_join_columns.emplace_back(column);
}
const ASTTablesInSelectQueryElement * join = select_query->join();
if (join)
{
getRootActions(analyzedJoin().leftKeysList(), true, temp_actions);
addJoinAction(temp_actions);
}
}
has_aggregation = makeAggregateDescriptions(temp_actions);
if (select_query && (select_query->groupBy() || select_query->having()))
has_aggregation = true;
if (has_aggregation)
{
2019-08-13 13:48:09 +00:00
getSelectQuery(); /// assertSelect()
/// Find out aggregation keys.
if (select_query->groupBy())
{
NameSet unique_keys;
ASTs & group_asts = select_query->groupBy()->children;
for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
{
ssize_t size = group_asts.size();
getRootActions(group_asts[i], true, temp_actions);
const auto & column_name = group_asts[i]->getColumnName();
const auto & block = temp_actions->getSampleBlock();
if (!block.has(column_name))
throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
const auto & col = block.getByName(column_name);
/// Constant expressions have non-null column pointer at this stage.
if (col.column && isColumnConst(*col.column))
{
/// But don't remove last key column if no aggregate functions, otherwise aggregation will not work.
if (!aggregate_descriptions.empty() || size > 1)
{
if (i + 1 < static_cast<ssize_t>(size))
group_asts[i] = std::move(group_asts.back());
group_asts.pop_back();
--i;
continue;
}
}
NameAndTypePair key{column_name, col.type};
/// Aggregation keys are uniqued.
if (!unique_keys.count(key.name))
{
unique_keys.insert(key.name);
aggregation_keys.push_back(key);
/// Key is no longer needed, therefore we can save a little by moving it.
aggregated_columns.push_back(std::move(key));
}
}
if (group_asts.empty())
{
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {});
has_aggregation = select_query->having() || aggregate_descriptions.size();
}
}
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
{
AggregateDescription & desc = aggregate_descriptions[i];
aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType());
}
}
else
{
aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList();
}
}
2019-08-13 13:48:09 +00:00
void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
{
2017-04-02 17:37:49 +00:00
/// Adds existing external tables (not subqueries) to the external_tables dictionary.
ExternalTablesVisitor::Data tables_data{context, external_tables};
ExternalTablesVisitor(tables_data).visit(query);
2018-10-17 10:59:05 +00:00
if (do_global)
{
GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(),
2018-10-17 10:59:05 +00:00
external_tables, subqueries_for_sets, has_global_subqueries);
GlobalSubqueriesVisitor(subqueries_data).visit(query);
2018-10-17 10:59:05 +00:00
}
}
void SelectQueryExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
{
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
if (prepared_sets.count(set_key))
return; /// Already prepared.
auto interpreter_subquery = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1, {});
BlockIO res = interpreter_subquery->execute();
SetPtr set = std::make_shared<Set>(settings.size_limits_for_set, true);
2018-04-19 21:34:04 +00:00
set->setHeader(res.in->getHeader());
res.in->readPrefix();
while (Block block = res.in->read())
{
/// If the limits have been exceeded, give up and let the default subquery processing actions take place.
2018-07-02 18:57:14 +00:00
if (!set->insertFromBlock(block))
return;
}
res.in->readSuffix();
prepared_sets[set_key] = std::move(set);
}
2019-08-12 19:27:09 +00:00
/// Perfomance optimisation for IN() if storage supports it.
void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
{
2019-08-12 19:27:09 +00:00
if (!node || !storage() || !storage()->supportsIndexForIn())
return;
for (auto & child : node->children)
{
/// Don't descend into subqueries.
2019-03-11 13:22:51 +00:00
if (child->as<ASTSubquery>())
continue;
/// Don't descend into lambda functions
2019-03-11 13:22:51 +00:00
const auto * func = child->as<ASTFunction>();
if (func && func->name == "lambda")
continue;
2019-08-12 19:27:09 +00:00
makeSetsForIndex(child);
}
2019-03-11 13:22:51 +00:00
const auto * func = node->as<ASTFunction>();
if (func && functionIsInOperator(func->name))
{
const IAST & args = *func->arguments;
2019-08-12 19:27:09 +00:00
const ASTPtr & left_in_operand = args.children.at(0);
2019-08-12 19:27:09 +00:00
if (storage()->mayBenefitFromIndexForIn(left_in_operand, context))
{
2018-03-16 06:39:32 +00:00
const ASTPtr & arg = args.children.at(1);
2019-03-11 13:22:51 +00:00
if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>())
{
if (settings.use_index_for_in_with_subqueries)
tryMakeSetForIndexFromSubquery(arg);
}
else
{
NamesAndTypesList temp_columns = sourceColumns();
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
temp_columns.insert(temp_columns.end(),
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
2019-08-12 19:27:09 +00:00
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
2019-08-12 19:27:09 +00:00
getRootActions(left_in_operand, true, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
2019-08-12 19:27:09 +00:00
if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
settings.size_limits_for_set, prepared_sets);
}
}
}
}
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(context, settings.size_limits_for_set, subquery_depth,
sourceColumns(), actions, prepared_sets, subqueries_for_sets,
no_subqueries, only_consts, !isRemoteStorage());
ActionsVisitor(visitor_data, log.stream()).visit(ast);
visitor_data.updateActions(actions);
}
bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
{
for (const ASTFunction * node : aggregates())
{
AggregateDescription aggregate;
aggregate.column_name = node->getColumnName();
const ASTs & arguments = node->arguments->children;
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
getRootActions(arguments[i], true, actions);
const std::string & name = arguments[i]->getColumnName();
types[i] = actions->getSampleBlock().getByName(name).type;
aggregate.argument_names[i] = name;
}
aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array();
aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters);
aggregate_descriptions.push_back(aggregate);
}
2016-03-05 02:30:20 +00:00
return !aggregates().empty();
2016-03-05 02:30:20 +00:00
}
2019-08-13 13:48:09 +00:00
const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
{
2019-03-11 13:22:51 +00:00
const auto * select_query = query->as<ASTSelectQuery>();
if (!select_query)
throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
2019-08-13 13:48:09 +00:00
return select_query;
}
2013-06-14 16:38:54 +00:00
const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const
{
if (!has_aggregation)
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
2019-08-13 13:48:09 +00:00
return getSelectQuery();
}
2013-06-14 16:38:54 +00:00
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const
{
if (chain.steps.empty())
{
chain.steps.emplace_back(std::make_shared<ExpressionActions>(columns, context));
}
}
2016-07-23 02:25:09 +00:00
/// "Big" ARRAY JOIN.
void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool array_join_is_left) const
{
NameSet result_columns;
for (const auto & result_source : syntax->array_join_result_to_source)
{
/// Assign new names to columns, if needed.
if (result_source.first != result_source.second)
actions->add(ExpressionAction::copyColumn(result_source.second, result_source.first));
2017-04-02 17:37:49 +00:00
/// Make ARRAY JOIN (replace arrays with their insides) for the columns in these new names.
result_columns.insert(result_source.first);
}
actions->add(ExpressionAction::arrayJoin(result_columns, array_join_is_left, context));
}
bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
bool is_array_join_left;
ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left);
if (!array_join_expression_list)
return false;
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
getRootActions(array_join_expression_list, only_types, step.actions);
addMultipleArrayJoinAction(step.actions, is_array_join_left);
return true;
}
void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const
{
actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join));
}
bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
{
const ASTTablesInSelectQueryElement * ast_join = getSelectQuery()->join();
if (!ast_join)
return false;
JoinPtr table_join = makeTableJoin(*ast_join);
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions);
addJoinAction(step.actions, table_join);
return true;
}
static JoinPtr tryGetStorageJoin(const ASTTablesInSelectQueryElement & join_element, const Context & context)
{
const auto & table_to_join = join_element.table_expression->as<ASTTableExpression &>();
auto & join_params = join_element.table_join->as<ASTTableJoin &>();
2017-04-02 17:37:49 +00:00
/// TODO This syntax does not support specifying a database name.
if (table_to_join.database_and_table_name)
{
DatabaseAndTableWithAlias database_table(table_to_join.database_and_table_name);
StoragePtr table = context.tryGetTable(database_table.database, database_table.table);
if (table)
{
auto * storage_join = dynamic_cast<StorageJoin *>(table.get());
if (storage_join)
{
storage_join->assertCompatible(join_params.kind, join_params.strictness);
2017-04-02 17:37:49 +00:00
/// TODO Check the set of keys.
return storage_join->getJoin();
}
}
}
return {};
}
2019-09-04 16:37:05 +00:00
static ExpressionActionsPtr createJoinedBlockActions(const Context & context, const AnalyzedJoin & analyzed_join)
{
ASTPtr expression_list = analyzed_join.rightKeysList();
auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list,
analyzed_join.columnsFromJoinedTable(), analyzed_join.requiredJoinedNames());
return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false);
}
JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin(const ASTTablesInSelectQueryElement & join_element)
{
/// Two JOINs are not supported with the same subquery, but different USINGs.
auto join_hash = join_element.getTreeHash();
String join_subquery_id = toString(join_hash.first) + "_" + toString(join_hash.second);
2019-09-09 19:43:37 +00:00
SubqueryForSet & subquery_for_join = subqueries_for_sets[join_subquery_id];
/// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping).
2019-09-09 19:43:37 +00:00
if (!subquery_for_join.join)
subquery_for_join.join = tryGetStorageJoin(join_element, context);
2019-09-09 19:43:37 +00:00
if (!subquery_for_join.join)
{
/// Actions which need to be calculated on joined block.
2019-09-04 16:37:05 +00:00
ExpressionActionsPtr joined_block_actions = createJoinedBlockActions(context, analyzedJoin());
2019-09-09 19:43:37 +00:00
if (!subquery_for_join.source)
{
NamesWithAliases required_columns_with_aliases =
analyzedJoin().getRequiredColumns(joined_block_actions->getSampleBlock(), joined_block_actions->getRequiredColumns());
makeSubqueryForJoin(join_element, std::move(required_columns_with_aliases), subquery_for_join);
}
/// TODO You do not need to set this up when JOIN is only needed on remote servers.
2019-09-09 19:43:37 +00:00
subquery_for_join.setJoinActions(joined_block_actions); /// changes subquery_for_join.sample_block inside
subquery_for_join.join = makeJoin(syntax->analyzed_join, subquery_for_join.sample_block);
}
return subquery_for_join.join;
}
void SelectQueryExpressionAnalyzer::makeSubqueryForJoin(const ASTTablesInSelectQueryElement & join_element,
2019-09-09 19:43:37 +00:00
NamesWithAliases && required_columns_with_aliases,
SubqueryForSet & subquery_for_set) const
{
/** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs
* - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1,
* in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`.
* - this function shows the expression JOIN _data1.
*/
Names original_columns;
for (auto & pr : required_columns_with_aliases)
original_columns.push_back(pr.first);
auto interpreter = interpretSubquery(join_element.table_expression, context, subquery_depth, original_columns);
subquery_for_set.makeSource(interpreter, std::move(required_columns_with_aliases));
}
bool SelectQueryExpressionAnalyzer::appendPrewhere(
ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
if (!select_query->prewhere())
return false;
initChain(chain, sourceColumns());
2018-04-12 09:45:24 +00:00
auto & step = chain.getLastStep();
getRootActions(select_query->prewhere(), only_types, step.actions);
String prewhere_column_name = select_query->prewhere()->getColumnName();
2018-04-12 09:45:24 +00:00
step.required_output.push_back(prewhere_column_name);
2018-06-29 11:42:44 +00:00
step.can_remove_required_output.push_back(true);
2018-04-12 09:45:24 +00:00
{
/// Remove unused source_columns from prewhere actions.
auto tmp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
getRootActions(select_query->prewhere(), only_types, tmp_actions);
2018-04-12 09:45:24 +00:00
tmp_actions->finalize({prewhere_column_name});
auto required_columns = tmp_actions->getRequiredColumns();
NameSet required_source_columns(required_columns.begin(), required_columns.end());
2018-10-04 08:58:19 +00:00
/// Add required columns to required output in order not to remove them after prewhere execution.
/// TODO: add sampling and final execution to common chain.
for (const auto & column : additional_required_columns)
{
if (required_source_columns.count(column))
{
step.required_output.push_back(column);
step.can_remove_required_output.push_back(true);
}
}
2018-04-12 09:45:24 +00:00
auto names = step.actions->getSampleBlock().getNames();
NameSet name_set(names.begin(), names.end());
for (const auto & column : sourceColumns())
2018-04-12 09:45:24 +00:00
if (required_source_columns.count(column.name) == 0)
name_set.erase(column.name);
Names required_output(name_set.begin(), name_set.end());
step.actions->finalize(required_output);
}
{
/// Add empty action with input = {prewhere actions output} + {unused source columns}
/// Reasons:
/// 1. Remove remove source columns which are used only in prewhere actions during prewhere actions execution.
/// Example: select A prewhere B > 0. B can be removed at prewhere step.
/// 2. Store side columns which were calculated during prewhere actions execution if they are used.
/// Example: select F(A) prewhere F(A) > 0. F(A) can be saved from prewhere step.
2018-06-25 13:08:35 +00:00
/// 3. Check if we can remove filter column at prewhere step. If we can, action will store single REMOVE_COLUMN.
2018-04-12 09:45:24 +00:00
ColumnsWithTypeAndName columns = step.actions->getSampleBlock().getColumnsWithTypeAndName();
auto required_columns = step.actions->getRequiredColumns();
NameSet prewhere_input_names(required_columns.begin(), required_columns.end());
NameSet unused_source_columns;
for (const auto & column : sourceColumns())
2018-04-12 09:45:24 +00:00
{
if (prewhere_input_names.count(column.name) == 0)
{
columns.emplace_back(column.type, column.name);
unused_source_columns.emplace(column.name);
}
}
chain.steps.emplace_back(std::make_shared<ExpressionActions>(std::move(columns), context));
2018-04-12 09:45:24 +00:00
chain.steps.back().additional_input = std::move(unused_source_columns);
}
return true;
}
void SelectQueryExpressionAnalyzer::appendPreliminaryFilter(ExpressionActionsChain & chain, ExpressionActionsPtr actions, String column_name)
{
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
2019-10-05 19:34:57 +00:00
// FIXME: assert(filter_info);
step.actions = std::move(actions);
step.required_output.push_back(std::move(column_name));
step.can_remove_required_output = {true};
chain.addStep();
}
bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
if (!select_query->where())
return false;
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
step.required_output.push_back(select_query->where()->getColumnName());
2018-06-29 11:42:44 +00:00
step.can_remove_required_output = {true};
2018-04-23 19:05:46 +00:00
getRootActions(select_query->where(), only_types, step.actions);
return true;
}
bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getAggregatingQuery();
if (!select_query->groupBy())
return false;
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
ASTs asts = select_query->groupBy()->children;
for (size_t i = 0; i < asts.size(); ++i)
{
step.required_output.push_back(asts[i]->getColumnName());
getRootActions(asts[i], only_types, step.actions);
}
return true;
}
void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getAggregatingQuery();
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
{
for (size_t j = 0; j < aggregate_descriptions[i].argument_names.size(); ++j)
{
step.required_output.push_back(aggregate_descriptions[i].argument_names[j]);
}
}
/// Collect aggregates removing duplicates by node.getColumnName()
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
/// @note The original recollection logic didn't remove duplicates.
GetAggregatesVisitor::Data data;
GetAggregatesVisitor(data).visit(select_query->select());
if (select_query->having())
GetAggregatesVisitor(data).visit(select_query->having());
if (select_query->orderBy())
GetAggregatesVisitor(data).visit(select_query->orderBy());
/// TODO: data.aggregates -> aggregates()
for (const ASTFunction * node : data.aggregates)
for (auto & argument : node->arguments->children)
getRootActions(argument, only_types, step.actions);
}
bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getAggregatingQuery();
if (!select_query->having())
return false;
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
step.required_output.push_back(select_query->having()->getColumnName());
getRootActions(select_query->having(), only_types, step.actions);
return true;
}
void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getRootActions(select_query->select(), only_types, step.actions);
for (const auto & child : select_query->select()->children)
2018-03-01 01:25:06 +00:00
step.required_output.push_back(child->getColumnName());
}
bool SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
if (!select_query->orderBy())
return false;
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getRootActions(select_query->orderBy(), only_types, step.actions);
for (auto & child : select_query->orderBy()->children)
{
const auto * ast = child->as<ASTOrderByElement>();
if (!ast || ast->children.size() < 1)
throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
ASTPtr order_expression = ast->children.at(0);
step.required_output.push_back(order_expression->getColumnName());
}
return true;
}
bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
if (!select_query->limitBy())
return false;
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
getRootActions(select_query->limitBy(), only_types, step.actions);
NameSet aggregated_names;
for (const auto & column : aggregated_columns)
{
2019-05-25 10:51:48 +00:00
step.required_output.push_back(column.name);
aggregated_names.insert(column.name);
}
for (const auto & child : select_query->limitBy()->children)
2019-05-25 10:51:48 +00:00
{
auto child_name = child->getColumnName();
if (!aggregated_names.count(child_name))
step.required_output.push_back(std::move(child_name));
}
return true;
}
void SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
{
2019-08-13 13:48:09 +00:00
const auto * select_query = getSelectQuery();
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
NamesWithAliases result_columns;
ASTs asts = select_query->select()->children;
for (size_t i = 0; i < asts.size(); ++i)
{
2018-02-26 09:05:06 +00:00
String result_name = asts[i]->getAliasOrColumnName();
if (required_result_columns.empty() || required_result_columns.count(result_name))
2018-02-26 09:05:06 +00:00
{
result_columns.emplace_back(asts[i]->getColumnName(), result_name);
step.required_output.push_back(result_columns.back().second);
}
}
step.actions->add(ExpressionAction::project(result_columns));
}
void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types)
{
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
getRootActions(expr, only_types, step.actions);
step.required_output.push_back(expr->getColumnName());
}
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
{
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
NamesWithAliases result_columns;
Names result_names;
ASTs asts;
2019-03-11 13:22:51 +00:00
if (const auto * node = query->as<ASTExpressionList>())
asts = node->children;
else
2018-08-27 17:58:43 +00:00
asts = ASTs(1, query);
for (size_t i = 0; i < asts.size(); ++i)
{
std::string name = asts[i]->getColumnName();
std::string alias;
if (add_aliases)
alias = asts[i]->getAliasOrColumnName();
else
alias = name;
result_columns.emplace_back(name, alias);
result_names.push_back(alias);
getRootActions(asts[i], false, actions);
}
if (add_aliases)
{
if (project_result)
actions->add(ExpressionAction::project(result_columns));
else
actions->add(ExpressionAction::addAliases(result_columns));
}
2018-09-05 13:04:28 +00:00
if (!(add_aliases && project_result))
{
2017-04-02 17:37:49 +00:00
/// We will not delete the original columns.
for (const auto & column_name_type : sourceColumns())
result_names.push_back(column_name_type.name);
}
actions->finalize(result_names);
return actions;
}
ExpressionActionsPtr ExpressionAnalyzer::getConstActions()
{
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(NamesAndTypesList(), context);
getRootActions(query, true, actions, true);
return actions;
}
void SelectQueryExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptions & aggregates) const
{
for (const auto & name_and_type : aggregation_keys)
key_names.emplace_back(name_and_type.name);
2015-03-12 02:22:55 +00:00
aggregates = aggregate_descriptions;
}
}