mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge pull request #16951 from CurtizJ/fix-groupby-optimization
Fix optimization with 'optimize_aggregators_of_group_by_keys' and joins
This commit is contained in:
commit
038a56d243
@ -20,8 +20,8 @@ struct KeepAggregateFunctionMatcher
|
|||||||
{
|
{
|
||||||
struct Data
|
struct Data
|
||||||
{
|
{
|
||||||
std::unordered_set<String> & group_by_keys;
|
const NameSet & group_by_keys;
|
||||||
bool & keep_aggregator;
|
bool keep_aggregator;
|
||||||
};
|
};
|
||||||
|
|
||||||
using Visitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>;
|
using Visitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>;
|
||||||
@ -33,7 +33,7 @@ struct KeepAggregateFunctionMatcher
|
|||||||
|
|
||||||
static void visit(ASTFunction & function_node, Data & data)
|
static void visit(ASTFunction & function_node, Data & data)
|
||||||
{
|
{
|
||||||
if ((function_node.arguments->children).empty())
|
if (function_node.arguments->children.empty())
|
||||||
{
|
{
|
||||||
data.keep_aggregator = true;
|
data.keep_aggregator = true;
|
||||||
return;
|
return;
|
||||||
@ -46,13 +46,10 @@ struct KeepAggregateFunctionMatcher
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void visit(ASTIdentifier & ident, Data & data)
|
static void visit(ASTIdentifier & ident, Data & data)
|
||||||
{
|
|
||||||
if (!data.group_by_keys.count(ident.shortName()))
|
|
||||||
{
|
{
|
||||||
/// if variable of a function is not in GROUP BY keys, this function should not be deleted
|
/// if variable of a function is not in GROUP BY keys, this function should not be deleted
|
||||||
|
if (!data.group_by_keys.count(ident.getColumnName()))
|
||||||
data.keep_aggregator = true;
|
data.keep_aggregator = true;
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void visit(const ASTPtr & ast, Data & data)
|
static void visit(const ASTPtr & ast, Data & data)
|
||||||
@ -75,21 +72,21 @@ struct KeepAggregateFunctionMatcher
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using KeepAggregateFunctionVisitor = InDepthNodeVisitor<KeepAggregateFunctionMatcher, true>;
|
using KeepAggregateFunctionVisitor = KeepAggregateFunctionMatcher::Visitor;
|
||||||
|
|
||||||
class SelectAggregateFunctionOfGroupByKeysMatcher
|
class SelectAggregateFunctionOfGroupByKeysMatcher
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
struct Data
|
struct Data
|
||||||
{
|
{
|
||||||
std::unordered_set<String> & group_by_keys;
|
const NameSet & group_by_keys;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
|
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||||
{
|
{
|
||||||
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
|
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
|
||||||
return !node->as<ASTSubquery>() &&
|
return !node->as<ASTSubquery>() && !node->as<ASTTableExpression>()
|
||||||
!(node->as<ASTTableExpression>() || node->as<ASTSelectWithUnionQuery>() || node->as<ASTArrayJoin>());
|
&& !node->as<ASTSelectWithUnionQuery>() && !node->as<ASTArrayJoin>();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void visit(ASTPtr & ast, Data & data)
|
static void visit(ASTPtr & ast, Data & data)
|
||||||
@ -99,12 +96,11 @@ public:
|
|||||||
if (function_node && (function_node->name == "min" || function_node->name == "max" ||
|
if (function_node && (function_node->name == "min" || function_node->name == "max" ||
|
||||||
function_node->name == "any" || function_node->name == "anyLast"))
|
function_node->name == "any" || function_node->name == "anyLast"))
|
||||||
{
|
{
|
||||||
bool keep_aggregator = false;
|
KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, false};
|
||||||
KeepAggregateFunctionVisitor::Data keep_data{data.group_by_keys, keep_aggregator};
|
|
||||||
KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments);
|
KeepAggregateFunctionVisitor(keep_data).visit(function_node->arguments);
|
||||||
|
|
||||||
/// Place argument of an aggregate function instead of function
|
/// Place argument of an aggregate function instead of function
|
||||||
if (!keep_aggregator && !function_node->arguments->children.empty())
|
if (!keep_data.keep_aggregator && !function_node->arguments->children.empty())
|
||||||
{
|
{
|
||||||
String alias = function_node->alias;
|
String alias = function_node->alias;
|
||||||
ast = (function_node->arguments->children[0])->clone();
|
ast = (function_node->arguments->children[0])->clone();
|
||||||
|
@ -178,44 +178,22 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum
|
|||||||
|
|
||||||
struct GroupByKeysInfo
|
struct GroupByKeysInfo
|
||||||
{
|
{
|
||||||
std::unordered_set<String> key_names; ///set of keys' short names
|
NameSet key_names; ///set of keys' short names
|
||||||
bool has_identifier = false;
|
|
||||||
bool has_function = false;
|
bool has_function = false;
|
||||||
bool has_possible_collision = false;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
GroupByKeysInfo getGroupByKeysInfo(ASTs & group_keys)
|
GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
|
||||||
{
|
{
|
||||||
GroupByKeysInfo data;
|
GroupByKeysInfo data;
|
||||||
|
|
||||||
/// filling set with short names of keys
|
/// filling set with short names of keys
|
||||||
for (auto & group_key : group_keys)
|
for (const auto & group_key : group_by_keys)
|
||||||
{
|
{
|
||||||
if (group_key->as<ASTFunction>())
|
if (group_key->as<ASTFunction>())
|
||||||
data.has_function = true;
|
data.has_function = true;
|
||||||
|
|
||||||
if (auto * group_key_ident = group_key->as<ASTIdentifier>())
|
|
||||||
{
|
|
||||||
data.has_identifier = true;
|
|
||||||
if (data.key_names.count(group_key_ident->shortName()))
|
|
||||||
{
|
|
||||||
///There may be a collision between different tables having similar variables.
|
|
||||||
///Due to the fact that we can't track these conflicts yet,
|
|
||||||
///it's better to disable some optimizations to avoid elimination necessary keys.
|
|
||||||
data.has_possible_collision = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
data.key_names.insert(group_key_ident->shortName());
|
|
||||||
}
|
|
||||||
else if (auto * group_key_func = group_key->as<ASTFunction>())
|
|
||||||
{
|
|
||||||
data.key_names.insert(group_key_func->getColumnName());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
data.key_names.insert(group_key->getColumnName());
|
data.key_names.insert(group_key->getColumnName());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
@ -226,47 +204,28 @@ void optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
|
|||||||
if (!select_query->groupBy())
|
if (!select_query->groupBy())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto grp_by = select_query->groupBy();
|
auto group_by = select_query->groupBy();
|
||||||
auto & group_keys = grp_by->children;
|
const auto & group_by_keys = group_by->children;
|
||||||
|
|
||||||
ASTs modified; ///result
|
ASTs modified; ///result
|
||||||
|
|
||||||
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys);
|
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
|
||||||
|
|
||||||
if (!group_by_keys_data.has_function || group_by_keys_data.has_possible_collision)
|
if (!group_by_keys_data.has_function)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
||||||
GroupByFunctionKeysVisitor(visitor_data).visit(grp_by);
|
GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
|
||||||
|
|
||||||
modified.reserve(group_keys.size());
|
modified.reserve(group_by_keys.size());
|
||||||
|
|
||||||
/// filling the result
|
/// filling the result
|
||||||
for (auto & group_key : group_keys)
|
for (const auto & group_key : group_by_keys)
|
||||||
{
|
|
||||||
if (auto * group_key_func = group_key->as<ASTFunction>())
|
|
||||||
{
|
|
||||||
if (group_by_keys_data.key_names.count(group_key_func->getColumnName()))
|
|
||||||
modified.push_back(group_key);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (auto * group_key_ident = group_key->as<ASTIdentifier>())
|
|
||||||
{
|
|
||||||
if (group_by_keys_data.key_names.count(group_key_ident->shortName()))
|
|
||||||
modified.push_back(group_key);
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (group_by_keys_data.key_names.count(group_key->getColumnName()))
|
if (group_by_keys_data.key_names.count(group_key->getColumnName()))
|
||||||
modified.push_back(group_key);
|
modified.push_back(group_key);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// modifying the input
|
/// modifying the input
|
||||||
grp_by->children = modified;
|
group_by->children = modified;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
|
/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
|
||||||
@ -275,10 +234,8 @@ void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTP
|
|||||||
if (!select_query->groupBy())
|
if (!select_query->groupBy())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto grp_by = select_query->groupBy();
|
const auto & group_by_keys = select_query->groupBy()->children;
|
||||||
auto & group_keys = grp_by->children;
|
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
|
||||||
|
|
||||||
GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_keys);
|
|
||||||
|
|
||||||
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
|
||||||
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
|
SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
1 2
|
@ -0,0 +1,5 @@
|
|||||||
|
SET optimize_aggregators_of_group_by_keys = 1;
|
||||||
|
SELECT source.key, max(target.key) FROM (SELECT 1 key, 'x' name) source
|
||||||
|
INNER JOIN (SELECT 2 key, 'x' name) target
|
||||||
|
ON source.name = target.name
|
||||||
|
GROUP BY source.key;
|
Loading…
Reference in New Issue
Block a user