mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
fix optimization with 'optimize_aggregators_of_group_by_keys' and joins
This commit is contained in:
parent
6e62108606
commit
7990c8cbad
@ -20,8 +20,8 @@ struct KeepAggregateFunctionMatcher
|
||||
{
|
||||
struct Data
|
||||
{
|
||||
std::unordered_set<String> & group_by_keys;
|
||||
bool & keep_aggregator;
|
||||
const NameSet & group_by_keys;
|
||||
bool keep_aggregator;
|
||||
};
|
||||
|
||||
using Visitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>;
|
||||
@ -33,7 +33,7 @@ struct KeepAggregateFunctionMatcher
|
||||
|
||||
static void visit(ASTFunction & function_node, Data & data)
|
||||
{
|
||||
if ((function_node.arguments->children).empty())
|
||||
if (function_node.arguments->children.empty())
|
||||
{
|
||||
data.keep_aggregator = true;
|
||||
return;
|
||||
@ -46,13 +46,10 @@ struct KeepAggregateFunctionMatcher
|
||||
}
|
||||
|
||||
static void visit(ASTIdentifier & ident, Data & data)
|
||||
{
|
||||
if (!data.group_by_keys.count(ident.shortName()))
|
||||
{
|
||||
/// if variable of a function is not in GROUP BY keys, this function should not be deleted
|
||||
if (!data.group_by_keys.count(ident.getColumnName()))
|
||||
data.keep_aggregator = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void visit(const ASTPtr & ast, Data & data)
|
||||
@ -75,21 +72,21 @@ struct KeepAggregateFunctionMatcher
|
||||
}
|
||||
};
|
||||
|
||||
using KeepAggregateFunctionVisitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>;
|
||||
using KeepAggregateFunctionVisitor = KeepAggregateFunctionMatcher::Visitor;
|
||||
|
||||
class SelectAggregateFunctionOfGroupByKeysMatcher
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
std::unordered_set<String> & group_by_keys;
|
||||
const NameSet & group_by_keys;
|
||||
};
|
||||
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||
{
|
||||
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
|
||||
return !node->as<ASTSubquery>() &&
|
||||
!(node->as<ASTTableExpression>() || node->as<ASTSelectWithUnionQuery>() || node->as<ASTArrayJoin>());
|
||||
return !node->as<ASTSubquery>() && !node->as<ASTTableExpression>()
|
||||
&& !node->as<ASTSelectWithUnionQuery>() && !node->as<ASTArrayJoin>();
|
||||
}
|
||||
|
||||
static void visit(ASTPtr & ast, Data & data)
|
||||
@ -99,12 +96,11 @@ public:
|
||||
if (function_node && (function_node->name == "min" || function_node->name == "max" ||
|
||||
function_node->name == "any" || function_node->name == "anyLast"))
|
||||
{
|
||||
bool keep_aggregator = false;
|
||||
KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, keep_aggregator};
|
||||
KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, false};
|
||||
KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments);
|
||||
|
||||
/// Place argument of an aggregate function instead of function
|
||||
if (!keep_aggregator)
|
||||
if (!keep_data.keep_aggregator)
|
||||
{
|
||||
String alias = function_node->alias;
|
||||
ast = (function_node->arguments->children[0])->clone();
|
||||
|
@ -177,44 +177,22 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
|
||||
|
||||
struct GroupByKeysInfo
|
||||
{
|
||||
std::unordered_set<String> key_names; ///set of keys' short names
|
||||
bool has_identifier = false;
|
||||
NameSet key_names; ///set of keys' short names
|
||||
bool has_function = false;
|
||||
bool has_possible_collision = false;
|
||||
};
|
||||
|
||||
GroupByKeysInfo getGroupByKeysInfo(ASTs & group_keys)
|
||||
GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
|
||||
{
|
||||
GroupByKeysInfo data;
|
||||
|
||||
/// filling set with short names of keys
|
||||
for (auto & group_key : group_keys)
|
||||
for (auto & group_key : group_by_keys)
|
||||
{
|
||||
if (group_key->as<ASTFunction>())
|
||||
data.has_function = true;
|
||||
|
||||
if (auto * group_key_ident = group_key->as<ASTIdentifier>())
|
||||
{
|
||||
data.has_identifier = true;
|
||||
if (data.key_names.count(group_key_ident->shortName()))
|
||||
{
|
||||
///There may be a collision between different tables having similar variables.
|
||||
///Due to the fact that we can't track these conflicts yet,
|
||||
///it's better to disable some optimizations to avoid elimination necessary keys.
|
||||
data.has_possible_collision = true;
|
||||
}
|
||||
|
||||
data.key_names.insert(group_key_ident->shortName());
|
||||
}
|
||||
else if (auto * group_key_func = group_key->as<ASTFunction>())
|
||||
{
|
||||
data.key_names.insert(group_key_func->getColumnName());
|
||||
}
|
||||
else
|
||||
{
|
||||
data.key_names.insert(group_key->getColumnName());
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
@ -225,47 +203,28 @@ void optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
|
||||
if (!select_query->groupBy())
|
||||
return;
|
||||
|
||||
auto grp_by = select_query->groupBy();
|
||||
auto & group_keys = grp_by->children;
|
||||
auto group_by = select_query->groupBy();
|
||||
const auto & group_by_keys = group_by->children;
|
||||
|
||||
ASTs modified; ///result
|
||||
|
||||
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys);
|
||||
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
|
||||
|
||||
if (!group_by_keys_data.has_function || group_by_keys_data.has_possible_collision)
|
||||
if (!group_by_keys_data.has_function)
|
||||
return;
|
||||
|
||||
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
||||
GroupByFunctionKeysVisitor(visitor_data).visit(grp_by);
|
||||
GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
|
||||
|
||||
modified.reserve(group_keys.size());
|
||||
modified.reserve(group_by_keys.size());
|
||||
|
||||
/// filling the result
|
||||
for (auto & group_key : group_keys)
|
||||
{
|
||||
if (auto * group_key_func = group_key->as<ASTFunction>())
|
||||
{
|
||||
if (group_by_keys_data.key_names.count(group_key_func->getColumnName()))
|
||||
modified.push_back(group_key);
|
||||
|
||||
continue;
|
||||
}
|
||||
if (auto * group_key_ident = group_key->as<ASTIdentifier>())
|
||||
{
|
||||
if (group_by_keys_data.key_names.count(group_key_ident->shortName()))
|
||||
modified.push_back(group_key);
|
||||
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & group_key : group_by_keys)
|
||||
if (group_by_keys_data.key_names.count(group_key->getColumnName()))
|
||||
modified.push_back(group_key);
|
||||
}
|
||||
}
|
||||
|
||||
/// modifying the input
|
||||
grp_by->children = modified;
|
||||
group_by->children = modified;
|
||||
}
|
||||
|
||||
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
|
||||
@ -274,10 +233,8 @@ void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTP
|
||||
if (!select_query->groupBy())
|
||||
return;
|
||||
|
||||
auto grp_by = select_query->groupBy();
|
||||
auto & group_keys = grp_by->children;
|
||||
|
||||
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys);
|
||||
auto & group_by_keys = select_query->groupBy()->children;
|
||||
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
|
||||
|
||||
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
||||
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
|
||||
|
@ -0,0 +1 @@
|
||||
1 2
|
@ -0,0 +1,5 @@
|
||||
SET optimize_aggregators_of_group_by_keys = 1;
|
||||
SELECT source.key, max(target.key) FROM (SELECT 1 key, 'x' name) source
|
||||
INNER JOIN (SELECT 2 key, 'x' name) target
|
||||
ON source.name = target.name
|
||||
GROUP BY source.key;
|
Loading…
Reference in New Issue
Block a user