Fix distributed_group_by_no_merge optimization for Distributed-over-Distributed

This commit is contained in:
Azat Khuzhin 2020-04-19 21:06:21 +03:00
parent 681034f4a3
commit be1dec9239
4 changed files with 31 additions and 4 deletions

View File

@ -380,13 +380,16 @@ StoragePtr StorageDistributed::createWithOwnCluster(
}
bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const ASTPtr & query_ptr) const
bool StorageDistributed::canForceGroupByNoMerge(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
{
const auto & settings = context.getSettingsRef();
std::string reason;
if (settings.distributed_group_by_no_merge)
return true;
/// Distributed-over-Distributed (see getQueryProcessingStageImpl())
if (to_stage == QueryProcessingStage::WithMergeableState)
return false;
if (!settings.optimize_skip_unused_shards)
return false;
if (!has_sharding_key)
@ -445,7 +448,7 @@ bool StorageDistributed::canForceGroupByNoMerge(const Context &context, const AS
QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
{
if (canForceGroupByNoMerge(context, query_ptr))
if (canForceGroupByNoMerge(context, to_stage, query_ptr))
return QueryProcessingStage::Complete;
auto cluster = getOptimizedCluster(context, query_ptr);

View File

@ -68,8 +68,8 @@ public:
bool isRemote() const override { return true; }
/// Return true if distributed_group_by_no_merge may be applied.
bool canForceGroupByNoMerge(const Context &, const ASTPtr &) const;
QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override;
bool canForceGroupByNoMerge(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const;
QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override;
Pipes read(
const Names & column_names,

View File

@ -13,6 +13,11 @@ GROUP BY number
1 1
1 0
1 1
GROUP BY number distributed_group_by_no_merge
1 0
1 1
1 0
1 1
GROUP BY number, 1
1 0
1 1
@ -50,3 +55,15 @@ LIMIT
LIMIT BY
2 0
2 1
GROUP BY (Distributed-over-Distributed)
4 0
4 1
GROUP BY (Distributed-over-Distributed) distributed_group_by_no_merge
1 0
1 1
1 0
1 1
1 0
1 1
1 0
1 1

View File

@ -18,6 +18,8 @@ select * from dist_01247;
select 'GROUP BY number';
select count(), * from dist_01247 group by number;
select 'GROUP BY number distributed_group_by_no_merge';
select count(), * from dist_01247 group by number settings distributed_group_by_no_merge=1;
-- dumb, but should work, since "GROUP BY 1" optimized out
select 'GROUP BY number, 1';
@ -53,3 +55,8 @@ select count(), * from dist_01247 group by number limit 1 offset 1;
select 'LIMIT BY';
select count(), * from dist_01247 group by number limit 0 by number;
select count(), * from dist_01247 group by number limit 1 by number;
select 'GROUP BY (Distributed-over-Distributed)';
select count(), * from cluster(test_cluster_two_shards, currentDatabase(), dist_01247) group by number;
select 'GROUP BY (Distributed-over-Distributed) distributed_group_by_no_merge';
select count(), * from cluster(test_cluster_two_shards, currentDatabase(), dist_01247) group by number settings distributed_group_by_no_merge=1;