Allow auto distributed_group_by_no_merge for DISTINCT of sharding key

This commit is contained in:
Azat Khuzhin 2020-04-19 18:47:33 +03:00
parent de4a723264
commit 93d049fe64
2 changed files with 35 additions and 13 deletions

View File

@ -383,6 +383,7 @@ StoragePtr StorageDistributed::createWithOwnCluster(
bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const ASTPtr & query_ptr) const bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const ASTPtr & query_ptr) const
{ {
const auto & settings = context.getSettingsRef(); const auto & settings = context.getSettingsRef();
std::string reason;
if (settings.distributed_group_by_no_merge) if (settings.distributed_group_by_no_merge)
return true; return true;
@ -395,8 +396,20 @@ bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const AS
if (select.orderBy()) if (select.orderBy())
return false; return false;
if (select.distinct) if (select.distinct)
return false; {
for (auto & expr : select.select()->children)
{
auto id = expr->as<ASTIdentifier>();
if (!id)
return false;
if (!sharding_key_expr->getSampleBlock().has(id->name))
return false;
}
reason = "DISTINCT " + backQuote(serializeAST(*select.select(), true));
}
// This can use distributed_group_by_no_merge but in this case limit stage // This can use distributed_group_by_no_merge but in this case limit stage
// should be done later (which is not the case right now). // should be done later (which is not the case right now).
@ -405,21 +418,28 @@ bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const AS
const ASTPtr group_by = select.groupBy(); const ASTPtr group_by = select.groupBy();
if (!group_by) if (!group_by)
return false; {
if (!select.distinct)
return false;
}
else
{
// injective functions are optimized out in optimizeGroupBy()
// hence all we need to check is that column in GROUP BY matches sharding expression
auto & group_exprs = group_by->children;
if (!group_exprs.size())
throw Exception("No ASTExpressionList in GROUP BY", ErrorCodes::LOGICAL_ERROR);
// injective functions are optimized out in optimizeGroupBy() auto id = group_exprs[0]->as<ASTIdentifier>();
// hence all we need to check is that column in GROUP BY matches sharding expression if (!id)
auto & group_exprs = group_by->children; return false;
if (!group_exprs.size()) if (!sharding_key_expr->getSampleBlock().has(id->name))
throw Exception("No ASTExpressionList in GROUP BY", ErrorCodes::LOGICAL_ERROR); return false;
auto id = group_exprs[0]->as<ASTIdentifier>(); reason = "GROUP BY " + backQuote(serializeAST(*group_by, true));
if (!id) }
return false;
if (!sharding_key_expr->getSampleBlock().has(id->name))
return false;
LOG_DEBUG(log, "Force distributed_group_by_no_merge for GROUP BY " << backQuote(serializeAST(*group_by, true)) << " (injective)"); LOG_DEBUG(log, "Force distributed_group_by_no_merge for " << reason << " (injective)");
return true; return true;
} }

View File

@ -41,6 +41,8 @@ countDistinct GROUP BY number
DISTINCT DISTINCT
0 0
1 1
0
1
HAVING HAVING
LIMIT LIMIT
2 0 2 0