Merge remote-tracking branch 'upstream/master' into fix25

This commit is contained in:
proller 2019-08-14 16:29:20 +03:00
commit aac41f353a
7 changed files with 169 additions and 153 deletions

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 7a2d304c21549427460428c9039009ef4bbfd899 Subproject commit 6216cc01a107ce149863411ca29013a224f80343

View File

@ -58,6 +58,7 @@
#include <Interpreters/ActionsVisitor.h> #include <Interpreters/ActionsVisitor.h>
#include <Interpreters/ExternalTablesVisitor.h> #include <Interpreters/ExternalTablesVisitor.h>
#include <Interpreters/GlobalSubqueriesVisitor.h> #include <Interpreters/GlobalSubqueriesVisitor.h>
#include <Interpreters/GetAggregatesVisitor.h>
namespace DB namespace DB
{ {
@ -68,7 +69,6 @@ using LogAST = DebugASTLog<false>; /// set to true to enable logs
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNKNOWN_IDENTIFIER; extern const int UNKNOWN_IDENTIFIER;
extern const int ILLEGAL_AGGREGATION;
extern const int EXPECTED_ALL_OR_ANY; extern const int EXPECTED_ALL_OR_ANY;
} }
@ -78,16 +78,15 @@ ExpressionAnalyzer::ExpressionAnalyzer(
const Context & context_, const Context & context_,
const NameSet & required_result_columns_, const NameSet & required_result_columns_,
size_t subquery_depth_, size_t subquery_depth_,
bool do_global_, bool do_global)
const SubqueriesForSets & subqueries_for_sets_) : ExpressionAnalyzerData(required_result_columns_)
: ExpressionAnalyzerData(required_result_columns_, subqueries_for_sets_)
, query(query_), context(context_), settings(context.getSettings()) , query(query_), context(context_), settings(context.getSettings())
, subquery_depth(subquery_depth_), do_global(do_global_) , subquery_depth(subquery_depth_)
, syntax(syntax_analyzer_result_) , syntax(syntax_analyzer_result_)
{ {
/// external_tables, subqueries_for_sets for global subqueries. /// external_tables, subqueries_for_sets for global subqueries.
/// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
initGlobalSubqueriesAndExternalTables(); initGlobalSubqueriesAndExternalTables(do_global);
/// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns. /// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns.
/// This analysis should be performed after processing global subqueries, because otherwise, /// This analysis should be performed after processing global subqueries, because otherwise,
@ -116,9 +115,6 @@ void ExpressionAnalyzer::analyzeAggregation()
auto * select_query = query->as<ASTSelectQuery>(); auto * select_query = query->as<ASTSelectQuery>();
if (select_query && (select_query->groupBy() || select_query->having()))
has_aggregation = true;
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context); ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
if (select_query) if (select_query)
@ -150,11 +146,13 @@ void ExpressionAnalyzer::analyzeAggregation()
} }
} }
getAggregates(query, temp_actions); has_aggregation = makeAggregateDescriptions(temp_actions);
if (select_query && (select_query->groupBy() || select_query->having()))
has_aggregation = true;
if (has_aggregation) if (has_aggregation)
{ {
assertSelect(); getSelectQuery(); /// assertSelect()
/// Find out aggregation keys. /// Find out aggregation keys.
if (select_query->groupBy()) if (select_query->groupBy())
@ -223,7 +221,7 @@ void ExpressionAnalyzer::analyzeAggregation()
} }
void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
{ {
/// Adds existing external tables (not subqueries) to the external_tables dictionary. /// Adds existing external tables (not subqueries) to the external_tables dictionary.
ExternalTablesVisitor::Data tables_data{context, external_tables}; ExternalTablesVisitor::Data tables_data{context, external_tables};
@ -238,20 +236,6 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables()
} }
void ExpressionAnalyzer::makeSetsForIndex()
{
const auto * select_query = query->as<ASTSelectQuery>();
if (storage() && select_query && storage()->supportsIndexForIn())
{
if (select_query->where())
makeSetsForIndexImpl(select_query->where());
if (select_query->prewhere())
makeSetsForIndexImpl(select_query->prewhere());
}
}
void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name) void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
{ {
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
@ -277,8 +261,12 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
} }
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node) /// Perfomance optimisation for IN() if storage supports it.
void ExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
{ {
if (!node || !storage() || !storage()->supportsIndexForIn())
return;
for (auto & child : node->children) for (auto & child : node->children)
{ {
/// Don't descend into subqueries. /// Don't descend into subqueries.
@ -290,15 +278,16 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
if (func && func->name == "lambda") if (func && func->name == "lambda")
continue; continue;
makeSetsForIndexImpl(child); makeSetsForIndex(child);
} }
const auto * func = node->as<ASTFunction>(); const auto * func = node->as<ASTFunction>();
if (func && functionIsInOperator(func->name)) if (func && functionIsInOperator(func->name))
{ {
const IAST & args = *func->arguments; const IAST & args = *func->arguments;
const ASTPtr & left_in_operand = args.children.at(0);
if (storage() && storage()->mayBenefitFromIndexForIn(args.children.at(0), context)) if (storage()->mayBenefitFromIndexForIn(left_in_operand, context))
{ {
const ASTPtr & arg = args.children.at(1); const ASTPtr & arg = args.children.at(1);
if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>()) if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>())
@ -310,13 +299,13 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
{ {
NamesAndTypesList temp_columns = sourceColumns(); NamesAndTypesList temp_columns = sourceColumns();
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end()); temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
for (const auto & joined_column : columnsAddedByJoin()) temp_columns.insert(temp_columns.end(), columnsAddedByJoin().begin(), columnsAddedByJoin().end());
temp_columns.push_back(joined_column);
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context); ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
getRootActions(func->arguments->children.at(0), true, temp_actions); getRootActions(left_in_operand, true, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock(); Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName())) if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true, context, makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
settings.size_limits_for_set, prepared_sets); settings.size_limits_for_set, prepared_sets);
} }
@ -356,46 +345,19 @@ void ExpressionAnalyzer::getActionsFromJoinKeys(const ASTTableJoin & table_join,
actions = actions_visitor.popActionsLevel(); actions = actions_visitor.popActionsLevel();
} }
bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); for (const ASTFunction * node : aggregates())
/// There can not be aggregate functions inside the WHERE and PREWHERE.
if (select_query && (ast.get() == select_query->where().get() || ast.get() == select_query->prewhere().get()))
{ {
assertNoAggregates(ast, "in WHERE or PREWHERE");
return;
}
/// If we are not analyzing a SELECT query, but a separate expression, then there can not be aggregate functions in it.
if (!select_query)
{
assertNoAggregates(ast, "in wrong place");
return;
}
const auto * node = ast->as<ASTFunction>();
if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
{
has_aggregation = true;
AggregateDescription aggregate; AggregateDescription aggregate;
aggregate.column_name = node->getColumnName(); aggregate.column_name = node->getColumnName();
/// Make unique aggregate functions.
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
if (aggregate_descriptions[i].column_name == aggregate.column_name)
return;
const ASTs & arguments = node->arguments->children; const ASTs & arguments = node->arguments->children;
aggregate.argument_names.resize(arguments.size()); aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size()); DataTypes types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i) for (size_t i = 0; i < arguments.size(); ++i)
{ {
/// There can not be other aggregate functions within the aggregate functions.
assertNoAggregates(arguments[i], "inside another aggregate function");
getRootActions(arguments[i], true, actions); getRootActions(arguments[i], true, actions);
const std::string & name = arguments[i]->getColumnName(); const std::string & name = arguments[i]->getColumnName();
types[i] = actions->getSampleBlock().getByName(name).type; types[i] = actions->getSampleBlock().getByName(name).type;
@ -407,41 +369,24 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr
aggregate_descriptions.push_back(aggregate); aggregate_descriptions.push_back(aggregate);
} }
else
{ return !aggregates().empty();
for (const auto & child : ast->children)
if (!child->as<ASTSubquery>() && !child->as<ASTSelectQuery>())
getAggregates(child, actions);
}
} }
void ExpressionAnalyzer::assertNoAggregates(const ASTPtr & ast, const char * description) const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
{
const auto * node = ast->as<ASTFunction>();
if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
throw Exception("Aggregate function " + node->getColumnName()
+ " is found " + String(description) + " in query", ErrorCodes::ILLEGAL_AGGREGATION);
for (const auto & child : ast->children)
if (!child->as<ASTSubquery>() && !child->as<ASTSelectQuery>())
assertNoAggregates(child, description);
}
void ExpressionAnalyzer::assertSelect() const
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = query->as<ASTSelectQuery>();
if (!select_query) if (!select_query)
throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR); throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
return select_query;
} }
void ExpressionAnalyzer::assertAggregation() const const ASTSelectQuery * ExpressionAnalyzer::getAggregatingQuery() const
{ {
if (!has_aggregation) if (!has_aggregation)
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR); throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
return getSelectQuery();
} }
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const
@ -471,9 +416,7 @@ void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actio
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
bool is_array_join_left; bool is_array_join_left;
ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left); ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left);
@ -515,9 +458,7 @@ static void appendRequiredColumns(
bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
if (!select_query->join()) if (!select_query->join())
return false; return false;
@ -626,9 +567,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
bool ExpressionAnalyzer::appendPrewhere( bool ExpressionAnalyzer::appendPrewhere(
ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns) ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
if (!select_query->prewhere()) if (!select_query->prewhere())
return false; return false;
@ -701,9 +640,7 @@ bool ExpressionAnalyzer::appendPrewhere(
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
if (!select_query->where()) if (!select_query->where())
return false; return false;
@ -721,9 +658,7 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_t
bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getAggregatingQuery();
assertAggregation();
if (!select_query->groupBy()) if (!select_query->groupBy())
return false; return false;
@ -743,9 +678,7 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only
void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types) void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getAggregatingQuery();
assertAggregation();
initChain(chain, sourceColumns()); initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back(); ExpressionActionsChain::Step & step = chain.steps.back();
@ -758,20 +691,27 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai
} }
} }
getActionsBeforeAggregation(select_query->select(), step.actions, only_types); /// Collect aggregates removing duplicates by node.getColumnName()
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
/// @note The original recollection logic didn't remove duplicates.
GetAggregatesVisitor::Data data;
GetAggregatesVisitor(data).visit(select_query->select());
if (select_query->having()) if (select_query->having())
getActionsBeforeAggregation(select_query->having(), step.actions, only_types); GetAggregatesVisitor(data).visit(select_query->having());
if (select_query->orderBy()) if (select_query->orderBy())
getActionsBeforeAggregation(select_query->orderBy(), step.actions, only_types); GetAggregatesVisitor(data).visit(select_query->orderBy());
/// TODO: data.aggregates -> aggregates()
for (const ASTFunction * node : data.aggregates)
for (auto & argument : node->arguments->children)
getRootActions(argument, only_types, step.actions);
} }
bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getAggregatingQuery();
assertAggregation();
if (!select_query->having()) if (!select_query->having())
return false; return false;
@ -787,9 +727,7 @@ bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types) void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
initChain(chain, aggregated_columns); initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back(); ExpressionActionsChain::Step & step = chain.steps.back();
@ -802,9 +740,7 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
if (!select_query->orderBy()) if (!select_query->orderBy())
return false; return false;
@ -828,9 +764,7 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only
bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
if (!select_query->limitBy()) if (!select_query->limitBy())
return false; return false;
@ -859,9 +793,7 @@ bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only
void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
{ {
const auto * select_query = query->as<ASTSelectQuery>(); const auto * select_query = getSelectQuery();
assertSelect();
initChain(chain, aggregated_columns); initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back(); ExpressionActionsChain::Step & step = chain.steps.back();
@ -892,19 +824,6 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
} }
void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries)
{
const auto * node = ast->as<ASTFunction>();
if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
for (auto & argument : node->arguments->children)
getRootActions(argument, no_subqueries, actions);
else
for (auto & child : ast->children)
getActionsBeforeAggregation(child, actions, no_subqueries);
}
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
{ {
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context); ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context);

View File

@ -51,10 +51,8 @@ struct ExpressionAnalyzerData
Tables external_tables; Tables external_tables;
protected: protected:
ExpressionAnalyzerData(const NameSet & required_result_columns_, ExpressionAnalyzerData(const NameSet & required_result_columns_)
const SubqueriesForSets & subqueries_for_sets_) : required_result_columns(required_result_columns_)
: required_result_columns(required_result_columns_),
subqueries_for_sets(subqueries_for_sets_)
{} {}
}; };
@ -91,8 +89,7 @@ public:
const Context & context_, const Context & context_,
const NameSet & required_result_columns_ = {}, const NameSet & required_result_columns_ = {},
size_t subquery_depth_ = 0, size_t subquery_depth_ = 0,
bool do_global_ = false, bool do_global_ = false);
const SubqueriesForSets & subqueries_for_set_ = {});
/// Does the expression have aggregate functions or a GROUP BY or HAVING section. /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
bool hasAggregation() const { return has_aggregation; } bool hasAggregation() const { return has_aggregation; }
@ -161,7 +158,7 @@ public:
const ExpressionAnalyzerData & getAnalyzedData() const { return *this; } const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
/// Create Set-s that we can from IN section to use the index on them. /// Create Set-s that we can from IN section to use the index on them.
void makeSetsForIndex(); void makeSetsForIndex(const ASTPtr & node);
bool hasGlobalSubqueries() { return has_global_subqueries; } bool hasGlobalSubqueries() { return has_global_subqueries; }
@ -170,7 +167,6 @@ private:
const Context & context; const Context & context;
const ExtractedSettings settings; const ExtractedSettings settings;
size_t subquery_depth; size_t subquery_depth;
bool do_global; /// Do I need to prepare for execution global subqueries when analyzing the query.
SyntaxAnalyzerResultPtr syntax; SyntaxAnalyzerResultPtr syntax;
@ -178,9 +174,10 @@ private:
const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; } const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; }
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
const NamesAndTypesList & columnsAddedByJoin() const { return syntax->columns_added_by_join; } const NamesAndTypesList & columnsAddedByJoin() const { return syntax->columns_added_by_join; }
const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables(); void initGlobalSubqueriesAndExternalTables(bool do_global);
void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const; void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
@ -191,22 +188,19 @@ private:
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false); void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
void getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries);
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
* Create a set of columns aggregated_columns resulting after the aggregation, if any, * Create a set of columns aggregated_columns resulting after the aggregation, if any,
* or after all the actions that are normally performed before aggregation. * or after all the actions that are normally performed before aggregation.
* Set has_aggregation = true if there is GROUP BY or at least one aggregate function. * Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
*/ */
void analyzeAggregation(); void analyzeAggregation();
void getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions); bool makeAggregateDescriptions(ExpressionActionsPtr & actions);
void assertNoAggregates(const ASTPtr & ast, const char * description);
/// columns - the columns that are present before the transformations begin. /// columns - the columns that are present before the transformations begin.
void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const; void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const;
void assertSelect() const; const ASTSelectQuery * getSelectQuery() const;
void assertAggregation() const; const ASTSelectQuery * getAggregatingQuery() const;
/** /**
* Create Set from a subquery or a table expression in the query. The created set is suitable for using the index. * Create Set from a subquery or a table expression in the query. The created set is suitable for using the index.
@ -214,8 +208,6 @@ private:
*/ */
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name); void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
void makeSetsForIndexImpl(const ASTPtr & node);
bool isRemoteStorage() const; bool isRemoteStorage() const;
}; };

View File

@ -0,0 +1,75 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_AGGREGATION;
}
class GetAggregatesMatcher
{
public:
using Visitor = ConstInDepthNodeVisitor<GetAggregatesMatcher, true>;
struct Data
{
const char * assert_no_aggregates = nullptr;
std::unordered_set<String> uniq_names;
std::vector<const ASTFunction *> aggregates;
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
if (child->as<ASTSubquery>() || child->as<ASTSelectQuery>())
return false;
if (auto * func = node->as<ASTFunction>())
if (isAggregateFunction(func->name))
return false;
return true;
}
static void visit(const ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
visit(*func, ast, data);
}
private:
static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
{
if (!isAggregateFunction(node.name))
return;
if (data.assert_no_aggregates)
throw Exception("Aggregate function " + node.getColumnName() + " is found " + String(data.assert_no_aggregates) + " in query",
ErrorCodes::ILLEGAL_AGGREGATION);
String column_name = node.getColumnName();
if (data.uniq_names.count(column_name))
return;
data.uniq_names.insert(column_name);
data.aggregates.push_back(&node);
}
static bool isAggregateFunction(const String & name)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(name);
}
};
using GetAggregatesVisitor = GetAggregatesMatcher::Visitor;
inline void assertNoAggregates(const ASTPtr & ast, const char * description)
{
GetAggregatesVisitor::Data data{description, {}, {}};
GetAggregatesVisitor(data).visit(ast);
}
}

View File

@ -790,7 +790,8 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
if (!dry_run) if (!dry_run)
from_stage = storage->getQueryProcessingStage(context); from_stage = storage->getQueryProcessingStage(context);
query_analyzer->makeSetsForIndex(); query_analyzer->makeSetsForIndex(query.where());
query_analyzer->makeSetsForIndex(query.prewhere());
auto optimize_prewhere = [&](auto & merge_tree) auto optimize_prewhere = [&](auto & merge_tree)
{ {

View File

@ -14,6 +14,7 @@
#include <Interpreters/ExternalDictionaries.h> #include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h> #include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h> #include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/GetAggregatesVisitor.h>
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
@ -558,6 +559,30 @@ void checkJoin(const ASTTablesInSelectQueryElement * join)
ErrorCodes::NOT_IMPLEMENTED); ErrorCodes::NOT_IMPLEMENTED);
} }
std::vector<const ASTFunction *> getAggregates(const ASTPtr & query)
{
if (const auto * select_query = query->as<ASTSelectQuery>())
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
if (select_query->where())
assertNoAggregates(select_query->where(), "in WHERE");
if (select_query->prewhere())
assertNoAggregates(select_query->prewhere(), "in PREWHERE");
GetAggregatesVisitor::Data data;
GetAggregatesVisitor(data).visit(query);
/// There can not be other aggregate functions within the aggregate functions.
for (const ASTFunction * node : data.aggregates)
for (auto & arg : node->arguments->children)
assertNoAggregates(arg, "inside another aggregate function");
return data.aggregates;
}
else
assertNoAggregates(query, "in wrong place");
return {};
}
} }
/// Calculate which columns are required to execute the expression. /// Calculate which columns are required to execute the expression.
@ -840,6 +865,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
collectJoinedColumns(result.analyzed_join, *select_query, source_columns_set, result.aliases, settings.join_use_nulls); collectJoinedColumns(result.analyzed_join, *select_query, source_columns_set, result.aliases, settings.join_use_nulls);
} }
result.aggregates = getAggregates(query);
result.collectUsedColumns(query, additional_source_columns); result.collectUsedColumns(query, additional_source_columns);
return std::make_shared<const SyntaxAnalyzerResult>(result); return std::make_shared<const SyntaxAnalyzerResult>(result);
} }

View File

@ -10,6 +10,8 @@ namespace DB
NameSet removeDuplicateColumns(NamesAndTypesList & columns); NameSet removeDuplicateColumns(NamesAndTypesList & columns);
class ASTFunction;
struct SyntaxAnalyzerResult struct SyntaxAnalyzerResult
{ {
StoragePtr storage; StoragePtr storage;
@ -22,6 +24,7 @@ struct SyntaxAnalyzerResult
NamesAndTypesList columns_added_by_join; NamesAndTypesList columns_added_by_join;
Aliases aliases; Aliases aliases;
std::vector<const ASTFunction *> aggregates;
/// Which column is needed to be ARRAY-JOIN'ed to get the specified. /// Which column is needed to be ARRAY-JOIN'ed to get the specified.
/// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v". /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".