From 93d049fe64298973e75cb53b5c988440125d61b2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 19 Apr 2020 18:47:33 +0300 Subject: [PATCH] Allow auto distributed_group_by_no_merge for DISTINCT of sharding key --- src/Storages/StorageDistributed.cpp | 46 +++++++++++++------ ..._GROUP_BY_injective_sharding_key.reference | 2 + 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a245cf66449..237e283d918 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -383,6 +383,7 @@ StoragePtr StorageDistributed::createWithOwnCluster( bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const ASTPtr & query_ptr) const { const auto & settings = context.getSettingsRef(); + std::string reason; if (settings.distributed_group_by_no_merge) return true; @@ -395,8 +396,20 @@ bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const AS if (select.orderBy()) return false; + if (select.distinct) - return false; + { + for (auto & expr : select.select()->children) + { + auto id = expr->as(); + if (!id) + return false; + if (!sharding_key_expr->getSampleBlock().has(id->name)) + return false; + } + + reason = "DISTINCT " + backQuote(serializeAST(*select.select(), true)); + } // This can use distributed_group_by_no_merge but in this case limit stage // should be done later (which is not the case right now). @@ -405,21 +418,28 @@ bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const AS const ASTPtr group_by = select.groupBy(); if (!group_by) - return false; + { + if (!select.distinct) + return false; + } + else + { + // injective functions are optimized out in optimizeGroupBy() + // hence all we need to check is that column in GROUP BY matches sharding expression + auto & group_exprs = group_by->children; + if (!group_exprs.size()) + throw Exception("No ASTExpressionList in GROUP BY", ErrorCodes::LOGICAL_ERROR); - // injective functions are optimized out in optimizeGroupBy() - // hence all we need to check is that column in GROUP BY matches sharding expression - auto & group_exprs = group_by->children; - if (!group_exprs.size()) - throw Exception("No ASTExpressionList in GROUP BY", ErrorCodes::LOGICAL_ERROR); + auto id = group_exprs[0]->as(); + if (!id) + return false; + if (!sharding_key_expr->getSampleBlock().has(id->name)) + return false; - auto id = group_exprs[0]->as(); - if (!id) - return false; - if (!sharding_key_expr->getSampleBlock().has(id->name)) - return false; + reason = "GROUP BY " + backQuote(serializeAST(*group_by, true)); + } - LOG_DEBUG(log, "Force distributed_group_by_no_merge for GROUP BY " << backQuote(serializeAST(*group_by, true)) << " (injective)"); + LOG_DEBUG(log, "Force distributed_group_by_no_merge for " << reason << " (injective)"); return true; } diff --git a/tests/queries/0_stateless/01247_distributed_group_by_no_merge_GROUP_BY_injective_sharding_key.reference b/tests/queries/0_stateless/01247_distributed_group_by_no_merge_GROUP_BY_injective_sharding_key.reference index 5af7c8d8473..1807c1b51cc 100644 --- a/tests/queries/0_stateless/01247_distributed_group_by_no_merge_GROUP_BY_injective_sharding_key.reference +++ b/tests/queries/0_stateless/01247_distributed_group_by_no_merge_GROUP_BY_injective_sharding_key.reference @@ -41,6 +41,8 @@ countDistinct GROUP BY number DISTINCT 0 1 +0 +1 HAVING LIMIT 2 0