Merge remote-tracking branch 'upstream/master' into fix25

This commit is contained in:
proller 2019-08-14 16:29:20 +03:00
commit aac41f353a
7 changed files with 169 additions and 153 deletions

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 7a2d304c21549427460428c9039009ef4bbfd899
Subproject commit 6216cc01a107ce149863411ca29013a224f80343

View File

@ -58,6 +58,7 @@
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/ExternalTablesVisitor.h>
#include <Interpreters/GlobalSubqueriesVisitor.h>
#include <Interpreters/GetAggregatesVisitor.h>
namespace DB
{
@ -68,7 +69,6 @@ using LogAST = DebugASTLog<false>; /// set to true to enable logs
namespace ErrorCodes
{
extern const int UNKNOWN_IDENTIFIER;
extern const int ILLEGAL_AGGREGATION;
extern const int EXPECTED_ALL_OR_ANY;
}
@ -78,16 +78,15 @@ ExpressionAnalyzer::ExpressionAnalyzer(
const Context & context_,
const NameSet & required_result_columns_,
size_t subquery_depth_,
bool do_global_,
const SubqueriesForSets & subqueries_for_sets_)
: ExpressionAnalyzerData(required_result_columns_, subqueries_for_sets_)
bool do_global)
: ExpressionAnalyzerData(required_result_columns_)
, query(query_), context(context_), settings(context.getSettings())
, subquery_depth(subquery_depth_), do_global(do_global_)
, subquery_depth(subquery_depth_)
, syntax(syntax_analyzer_result_)
{
/// external_tables, subqueries_for_sets for global subqueries.
/// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers.
initGlobalSubqueriesAndExternalTables();
initGlobalSubqueriesAndExternalTables(do_global);
/// has_aggregation, aggregation_keys, aggregate_descriptions, aggregated_columns.
/// This analysis should be performed after processing global subqueries, because otherwise,
@ -116,9 +115,6 @@ void ExpressionAnalyzer::analyzeAggregation()
auto * select_query = query->as<ASTSelectQuery>();
if (select_query && (select_query->groupBy() || select_query->having()))
has_aggregation = true;
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(sourceColumns(), context);
if (select_query)
@ -150,11 +146,13 @@ void ExpressionAnalyzer::analyzeAggregation()
}
}
getAggregates(query, temp_actions);
has_aggregation = makeAggregateDescriptions(temp_actions);
if (select_query && (select_query->groupBy() || select_query->having()))
has_aggregation = true;
if (has_aggregation)
{
assertSelect();
getSelectQuery(); /// assertSelect()
/// Find out aggregation keys.
if (select_query->groupBy())
@ -223,7 +221,7 @@ void ExpressionAnalyzer::analyzeAggregation()
}
void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables()
void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
{
/// Adds existing external tables (not subqueries) to the external_tables dictionary.
ExternalTablesVisitor::Data tables_data{context, external_tables};
@ -238,20 +236,6 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables()
}
void ExpressionAnalyzer::makeSetsForIndex()
{
const auto * select_query = query->as<ASTSelectQuery>();
if (storage() && select_query && storage()->supportsIndexForIn())
{
if (select_query->where())
makeSetsForIndexImpl(select_query->where());
if (select_query->prewhere())
makeSetsForIndexImpl(select_query->prewhere());
}
}
void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name)
{
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
@ -277,8 +261,12 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
}
void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
/// Perfomance optimisation for IN() if storage supports it.
void ExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
{
if (!node || !storage() || !storage()->supportsIndexForIn())
return;
for (auto & child : node->children)
{
/// Don't descend into subqueries.
@ -290,15 +278,16 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
if (func && func->name == "lambda")
continue;
makeSetsForIndexImpl(child);
makeSetsForIndex(child);
}
const auto * func = node->as<ASTFunction>();
if (func && functionIsInOperator(func->name))
{
const IAST & args = *func->arguments;
const ASTPtr & left_in_operand = args.children.at(0);
if (storage() && storage()->mayBenefitFromIndexForIn(args.children.at(0), context))
if (storage()->mayBenefitFromIndexForIn(left_in_operand, context))
{
const ASTPtr & arg = args.children.at(1);
if (arg->as<ASTSubquery>() || arg->as<ASTIdentifier>())
@ -310,13 +299,13 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
{
NamesAndTypesList temp_columns = sourceColumns();
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
for (const auto & joined_column : columnsAddedByJoin())
temp_columns.push_back(joined_column);
temp_columns.insert(temp_columns.end(), columnsAddedByJoin().begin(), columnsAddedByJoin().end());
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
getRootActions(func->arguments->children.at(0), true, temp_actions);
getRootActions(left_in_operand, true, temp_actions);
Block sample_block_with_calculated_columns = temp_actions->getSampleBlock();
if (sample_block_with_calculated_columns.has(args.children.at(0)->getColumnName()))
if (sample_block_with_calculated_columns.has(left_in_operand->getColumnName()))
makeExplicitSet(func, sample_block_with_calculated_columns, true, context,
settings.size_limits_for_set, prepared_sets);
}
@ -356,46 +345,19 @@ void ExpressionAnalyzer::getActionsFromJoinKeys(const ASTTableJoin & table_join,
actions = actions_visitor.popActionsLevel();
}
void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions)
bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & actions)
{
const auto * select_query = query->as<ASTSelectQuery>();
/// There can not be aggregate functions inside the WHERE and PREWHERE.
if (select_query && (ast.get() == select_query->where().get() || ast.get() == select_query->prewhere().get()))
for (const ASTFunction * node : aggregates())
{
assertNoAggregates(ast, "in WHERE or PREWHERE");
return;
}
/// If we are not analyzing a SELECT query, but a separate expression, then there can not be aggregate functions in it.
if (!select_query)
{
assertNoAggregates(ast, "in wrong place");
return;
}
const auto * node = ast->as<ASTFunction>();
if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
{
has_aggregation = true;
AggregateDescription aggregate;
aggregate.column_name = node->getColumnName();
/// Make unique aggregate functions.
for (size_t i = 0; i < aggregate_descriptions.size(); ++i)
if (aggregate_descriptions[i].column_name == aggregate.column_name)
return;
const ASTs & arguments = node->arguments->children;
aggregate.argument_names.resize(arguments.size());
DataTypes types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
/// There can not be other aggregate functions within the aggregate functions.
assertNoAggregates(arguments[i], "inside another aggregate function");
getRootActions(arguments[i], true, actions);
const std::string & name = arguments[i]->getColumnName();
types[i] = actions->getSampleBlock().getByName(name).type;
@ -407,41 +369,24 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr
aggregate_descriptions.push_back(aggregate);
}
else
{
for (const auto & child : ast->children)
if (!child->as<ASTSubquery>() && !child->as<ASTSelectQuery>())
getAggregates(child, actions);
}
return !aggregates().empty();
}
void ExpressionAnalyzer::assertNoAggregates(const ASTPtr & ast, const char * description)
{
const auto * node = ast->as<ASTFunction>();
if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
throw Exception("Aggregate function " + node->getColumnName()
+ " is found " + String(description) + " in query", ErrorCodes::ILLEGAL_AGGREGATION);
for (const auto & child : ast->children)
if (!child->as<ASTSubquery>() && !child->as<ASTSelectQuery>())
assertNoAggregates(child, description);
}
void ExpressionAnalyzer::assertSelect() const
const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
{
const auto * select_query = query->as<ASTSelectQuery>();
if (!select_query)
throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR);
return select_query;
}
void ExpressionAnalyzer::assertAggregation() const
const ASTSelectQuery * ExpressionAnalyzer::getAggregatingQuery() const
{
if (!has_aggregation)
throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR);
return getSelectQuery();
}
void ExpressionAnalyzer::initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const
@ -471,9 +416,7 @@ void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actio
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
bool is_array_join_left;
ASTPtr array_join_expression_list = select_query->array_join_expression_list(is_array_join_left);
@ -515,9 +458,7 @@ static void appendRequiredColumns(
bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
if (!select_query->join())
return false;
@ -626,9 +567,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
bool ExpressionAnalyzer::appendPrewhere(
ExpressionActionsChain & chain, bool only_types, const Names & additional_required_columns)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
if (!select_query->prewhere())
return false;
@ -701,9 +640,7 @@ bool ExpressionAnalyzer::appendPrewhere(
bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
if (!select_query->where())
return false;
@ -721,9 +658,7 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_t
bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertAggregation();
const auto * select_query = getAggregatingQuery();
if (!select_query->groupBy())
return false;
@ -743,9 +678,7 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only
void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertAggregation();
const auto * select_query = getAggregatingQuery();
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
@ -758,20 +691,27 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai
}
}
getActionsBeforeAggregation(select_query->select(), step.actions, only_types);
/// Collect aggregates removing duplicates by node.getColumnName()
/// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
/// @note The original recollection logic didn't remove duplicates.
GetAggregatesVisitor::Data data;
GetAggregatesVisitor(data).visit(select_query->select());
if (select_query->having())
getActionsBeforeAggregation(select_query->having(), step.actions, only_types);
GetAggregatesVisitor(data).visit(select_query->having());
if (select_query->orderBy())
getActionsBeforeAggregation(select_query->orderBy(), step.actions, only_types);
GetAggregatesVisitor(data).visit(select_query->orderBy());
/// TODO: data.aggregates -> aggregates()
for (const ASTFunction * node : data.aggregates)
for (auto & argument : node->arguments->children)
getRootActions(argument, only_types, step.actions);
}
bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertAggregation();
const auto * select_query = getAggregatingQuery();
if (!select_query->having())
return false;
@ -787,9 +727,7 @@ bool ExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_
void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
@ -802,9 +740,7 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_
bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
if (!select_query->orderBy())
return false;
@ -828,9 +764,7 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only
bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types)
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
if (!select_query->limitBy())
return false;
@ -859,9 +793,7 @@ bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only
void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const
{
const auto * select_query = query->as<ASTSelectQuery>();
assertSelect();
const auto * select_query = getSelectQuery();
initChain(chain, aggregated_columns);
ExpressionActionsChain::Step & step = chain.steps.back();
@ -892,19 +824,6 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
}
void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries)
{
const auto * node = ast->as<ASTFunction>();
if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name))
for (auto & argument : node->arguments->children)
getRootActions(argument, no_subqueries, actions);
else
for (auto & child : ast->children)
getActionsBeforeAggregation(child, actions, no_subqueries);
}
ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result)
{
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(sourceColumns(), context);

View File

@ -51,10 +51,8 @@ struct ExpressionAnalyzerData
Tables external_tables;
protected:
ExpressionAnalyzerData(const NameSet & required_result_columns_,
const SubqueriesForSets & subqueries_for_sets_)
: required_result_columns(required_result_columns_),
subqueries_for_sets(subqueries_for_sets_)
ExpressionAnalyzerData(const NameSet & required_result_columns_)
: required_result_columns(required_result_columns_)
{}
};
@ -91,8 +89,7 @@ public:
const Context & context_,
const NameSet & required_result_columns_ = {},
size_t subquery_depth_ = 0,
bool do_global_ = false,
const SubqueriesForSets & subqueries_for_set_ = {});
bool do_global_ = false);
/// Does the expression have aggregate functions or a GROUP BY or HAVING section.
bool hasAggregation() const { return has_aggregation; }
@ -161,7 +158,7 @@ public:
const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
/// Create Set-s that we can from IN section to use the index on them.
void makeSetsForIndex();
void makeSetsForIndex(const ASTPtr & node);
bool hasGlobalSubqueries() { return has_global_subqueries; }
@ -170,7 +167,6 @@ private:
const Context & context;
const ExtractedSettings settings;
size_t subquery_depth;
bool do_global; /// Do I need to prepare for execution global subqueries when analyzing the query.
SyntaxAnalyzerResultPtr syntax;
@ -178,9 +174,10 @@ private:
const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; }
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
const NamesAndTypesList & columnsAddedByJoin() const { return syntax->columns_added_by_join; }
const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
void initGlobalSubqueriesAndExternalTables();
void initGlobalSubqueriesAndExternalTables(bool do_global);
void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
@ -191,22 +188,19 @@ private:
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
void getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries);
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
* Create a set of columns aggregated_columns resulting after the aggregation, if any,
* or after all the actions that are normally performed before aggregation.
* Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
*/
void analyzeAggregation();
void getAggregates(const ASTPtr & ast, ExpressionActionsPtr & actions);
void assertNoAggregates(const ASTPtr & ast, const char * description);
bool makeAggregateDescriptions(ExpressionActionsPtr & actions);
/// columns - the columns that are present before the transformations begin.
void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const;
void assertSelect() const;
void assertAggregation() const;
const ASTSelectQuery * getSelectQuery() const;
const ASTSelectQuery * getAggregatingQuery() const;
/**
* Create Set from a subquery or a table expression in the query. The created set is suitable for using the index.
@ -214,8 +208,6 @@ private:
*/
void tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_or_table_name);
void makeSetsForIndexImpl(const ASTPtr & node);
bool isRemoteStorage() const;
};

View File

@ -0,0 +1,75 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_AGGREGATION;
}
class GetAggregatesMatcher
{
public:
using Visitor = ConstInDepthNodeVisitor<GetAggregatesMatcher, true>;
struct Data
{
const char * assert_no_aggregates = nullptr;
std::unordered_set<String> uniq_names;
std::vector<const ASTFunction *> aggregates;
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
if (child->as<ASTSubquery>() || child->as<ASTSelectQuery>())
return false;
if (auto * func = node->as<ASTFunction>())
if (isAggregateFunction(func->name))
return false;
return true;
}
static void visit(const ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
visit(*func, ast, data);
}
private:
static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
{
if (!isAggregateFunction(node.name))
return;
if (data.assert_no_aggregates)
throw Exception("Aggregate function " + node.getColumnName() + " is found " + String(data.assert_no_aggregates) + " in query",
ErrorCodes::ILLEGAL_AGGREGATION);
String column_name = node.getColumnName();
if (data.uniq_names.count(column_name))
return;
data.uniq_names.insert(column_name);
data.aggregates.push_back(&node);
}
static bool isAggregateFunction(const String & name)
{
return AggregateFunctionFactory::instance().isAggregateFunctionName(name);
}
};
using GetAggregatesVisitor = GetAggregatesMatcher::Visitor;
inline void assertNoAggregates(const ASTPtr & ast, const char * description)
{
GetAggregatesVisitor::Data data{description, {}, {}};
GetAggregatesVisitor(data).visit(ast);
}
}

View File

@ -790,7 +790,8 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
if (!dry_run)
from_stage = storage->getQueryProcessingStage(context);
query_analyzer->makeSetsForIndex();
query_analyzer->makeSetsForIndex(query.where());
query_analyzer->makeSetsForIndex(query.prewhere());
auto optimize_prewhere = [&](auto & merge_tree)
{

View File

@ -14,6 +14,7 @@
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/GetAggregatesVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -558,6 +559,30 @@ void checkJoin(const ASTTablesInSelectQueryElement * join)
ErrorCodes::NOT_IMPLEMENTED);
}
std::vector<const ASTFunction *> getAggregates(const ASTPtr & query)
{
if (const auto * select_query = query->as<ASTSelectQuery>())
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
if (select_query->where())
assertNoAggregates(select_query->where(), "in WHERE");
if (select_query->prewhere())
assertNoAggregates(select_query->prewhere(), "in PREWHERE");
GetAggregatesVisitor::Data data;
GetAggregatesVisitor(data).visit(query);
/// There can not be other aggregate functions within the aggregate functions.
for (const ASTFunction * node : data.aggregates)
for (auto & arg : node->arguments->children)
assertNoAggregates(arg, "inside another aggregate function");
return data.aggregates;
}
else
assertNoAggregates(query, "in wrong place");
return {};
}
}
/// Calculate which columns are required to execute the expression.
@ -840,6 +865,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
collectJoinedColumns(result.analyzed_join, *select_query, source_columns_set, result.aliases, settings.join_use_nulls);
}
result.aggregates = getAggregates(query);
result.collectUsedColumns(query, additional_source_columns);
return std::make_shared<const SyntaxAnalyzerResult>(result);
}

View File

@ -10,6 +10,8 @@ namespace DB
NameSet removeDuplicateColumns(NamesAndTypesList & columns);
class ASTFunction;
struct SyntaxAnalyzerResult
{
StoragePtr storage;
@ -22,6 +24,7 @@ struct SyntaxAnalyzerResult
NamesAndTypesList columns_added_by_join;
Aliases aliases;
std::vector<const ASTFunction *> aggregates;
/// Which column is needed to be ARRAY-JOIN'ed to get the specified.
/// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".