fix optimization with 'optimize_aggregators_of_group_by_keys' and joins

This commit is contained in:
Anton Popov 2020-11-12 22:50:01 +03:00
parent 6e62108606
commit 7990c8cbad
4 changed files with 36 additions and 77 deletions

View File

@ -20,8 +20,8 @@ struct KeepAggregateFunctionMatcher
{ {
struct Data struct Data
{ {
std::unordered_set<String> & group_by_keys; const NameSet & group_by_keys;
bool & keep_aggregator; bool keep_aggregator;
}; };
using Visitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>; using Visitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>;
@ -33,7 +33,7 @@ struct KeepAggregateFunctionMatcher
static void visit(ASTFunction & function_node, Data & data) static void visit(ASTFunction & function_node, Data & data)
{ {
if ((function_node.arguments->children).empty()) if (function_node.arguments->children.empty())
{ {
data.keep_aggregator = true; data.keep_aggregator = true;
return; return;
@ -46,13 +46,10 @@ struct KeepAggregateFunctionMatcher
} }
static void visit(ASTIdentifier & ident, Data & data) static void visit(ASTIdentifier & ident, Data & data)
{
if (!data.group_by_keys.count(ident.shortName()))
{ {
/// if variable of a function is not in GROUP BY keys, this function should not be deleted /// if variable of a function is not in GROUP BY keys, this function should not be deleted
if (!data.group_by_keys.count(ident.getColumnName()))
data.keep_aggregator = true; data.keep_aggregator = true;
return;
}
} }
static void visit(const ASTPtr & ast, Data & data) static void visit(const ASTPtr & ast, Data & data)
@ -75,21 +72,21 @@ struct KeepAggregateFunctionMatcher
} }
}; };
using KeepAggregateFunctionVisitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>; using KeepAggregateFunctionVisitor = KeepAggregateFunctionMatcher::Visitor;
class SelectAggregateFunctionOfGroupByKeysMatcher class SelectAggregateFunctionOfGroupByKeysMatcher
{ {
public: public:
struct Data struct Data
{ {
std::unordered_set<String> & group_by_keys; const NameSet & group_by_keys;
}; };
static bool needChildVisit(const ASTPtr & node, const ASTPtr &) static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{ {
/// Don't descent into table functions and subqueries and special case for ArrayJoin. /// Don't descent into table functions and subqueries and special case for ArrayJoin.
return !node->as<ASTSubquery>() && return !node->as<ASTSubquery>() && !node->as<ASTTableExpression>()
!(node->as<ASTTableExpression>() || node->as<ASTSelectWithUnionQuery>() || node->as<ASTArrayJoin>()); && !node->as<ASTSelectWithUnionQuery>() && !node->as<ASTArrayJoin>();
} }
static void visit(ASTPtr & ast, Data & data) static void visit(ASTPtr & ast, Data & data)
@ -99,12 +96,11 @@ public:
if (function_node && (function_node->name == "min" || function_node->name == "max" || if (function_node && (function_node->name == "min" || function_node->name == "max" ||
function_node->name == "any" || function_node->name == "anyLast")) function_node->name == "any" || function_node->name == "anyLast"))
{ {
bool keep_aggregator = false; KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, false};
KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, keep_aggregator};
KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments); KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments);
/// Place argument of an aggregate function instead of function /// Place argument of an aggregate function instead of function
if (!keep_aggregator) if (!keep_data.keep_aggregator)
{ {
String alias = function_node->alias; String alias = function_node->alias;
ast = (function_node->arguments->children[0])->clone(); ast = (function_node->arguments->children[0])->clone();

View File

@ -177,44 +177,22 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
struct GroupByKeysInfo struct GroupByKeysInfo
{ {
std::unordered_set<String> key_names; ///set of keys' short names NameSet key_names; ///set of keys' short names
bool has_identifier = false;
bool has_function = false; bool has_function = false;
bool has_possible_collision = false;
}; };
GroupByKeysInfo getGroupByKeysInfo(ASTs & group_keys) GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
{ {
GroupByKeysInfo data; GroupByKeysInfo data;
/// filling set with short names of keys /// filling set with short names of keys
for (auto & group_key : group_keys) for (auto & group_key : group_by_keys)
{ {
if (group_key->as<ASTFunction>()) if (group_key->as<ASTFunction>())
data.has_function = true; data.has_function = true;
if (auto * group_key_ident = group_key->as<ASTIdentifier>())
{
data.has_identifier = true;
if (data.key_names.count(group_key_ident->shortName()))
{
///There may be a collision between different tables having similar variables.
///Due to the fact that we can't track these conflicts yet,
///it's better to disable some optimizations to avoid elimination necessary keys.
data.has_possible_collision = true;
}
data.key_names.insert(group_key_ident->shortName());
}
else if (auto * group_key_func = group_key->as<ASTFunction>())
{
data.key_names.insert(group_key_func->getColumnName());
}
else
{
data.key_names.insert(group_key->getColumnName()); data.key_names.insert(group_key->getColumnName());
} }
}
return data; return data;
} }
@ -225,47 +203,28 @@ void optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
if (!select_query->groupBy()) if (!select_query->groupBy())
return; return;
auto grp_by = select_query->groupBy(); auto group_by = select_query->groupBy();
auto & group_keys = grp_by->children; const auto & group_by_keys = group_by->children;
ASTs modified; ///result ASTs modified; ///result
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys); GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
if (!group_by_keys_data.has_function || group_by_keys_data.has_possible_collision) if (!group_by_keys_data.has_function)
return; return;
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names}; GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
GroupByFunctionKeysVisitor(visitor_data).visit(grp_by); GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
modified.reserve(group_keys.size()); modified.reserve(group_by_keys.size());
/// filling the result /// filling the result
for (auto & group_key : group_keys) for (auto & group_key : group_by_keys)
{
if (auto * group_key_func = group_key->as<ASTFunction>())
{
if (group_by_keys_data.key_names.count(group_key_func->getColumnName()))
modified.push_back(group_key);
continue;
}
if (auto * group_key_ident = group_key->as<ASTIdentifier>())
{
if (group_by_keys_data.key_names.count(group_key_ident->shortName()))
modified.push_back(group_key);
continue;
}
else
{
if (group_by_keys_data.key_names.count(group_key->getColumnName())) if (group_by_keys_data.key_names.count(group_key->getColumnName()))
modified.push_back(group_key); modified.push_back(group_key);
}
}
/// modifying the input /// modifying the input
grp_by->children = modified; group_by->children = modified;
} }
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys /// Eliminates min/max/any-aggregators of functions of GROUP BY keys
@ -274,10 +233,8 @@ void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTP
if (!select_query->groupBy()) if (!select_query->groupBy())
return; return;
auto grp_by = select_query->groupBy(); auto & group_by_keys = select_query->groupBy()->children;
auto & group_keys = grp_by->children; GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys);
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names}; SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node); SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);

View File

@ -0,0 +1,5 @@
SET optimize_aggregators_of_group_by_keys = 1;
SELECT source.key, max(target.key) FROM (SELECT 1 key, 'x' name) source
INNER JOIN (SELECT 2 key, 'x' name) target
ON source.name = target.name
GROUP BY source.key;