Merge pull request #28105 from azat/enable-optimize_distributed_group_by_sharding_key

Enable optimize_distributed_group_by_sharding_key by default
2024-11-21 23:21:59 +00:00 · 2021-08-25 14:07:33 +03:00 · 2021-08-25 14:07:33 +03:00 · df1fe27791
commit df1fe27791
parent efeefcb437 c3096805ac
3 changed files with 16 additions and 4 deletions
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -125,7 +125,7 @@ class IColumn;
    M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
    M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \
    M(UInt64, distributed_push_down_limit, 1, "If 1, LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.", 0) \
-    M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
+    M(Bool, optimize_distributed_group_by_sharding_key, true, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
    M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
    M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
    M(Bool, optimize_skip_unused_shards_rewrite_in, true, "Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards)", 0) \
--- a/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.reference
+++ b/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.reference
@ -3,6 +3,11 @@ distributed_group_by_no_merge
 1
 optimize_skip_unused_shards
 1
-optimize_skip_unused_shards lack of WHERE
+optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=0)
+0
+1
+optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=1)
+0
+1
 0
 1
--- a/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.sql
+++ b/tests/queries/0_stateless/01213_optimize_skip_unused_shards_DISTINCT.sql
@ -12,8 +12,15 @@ SELECT DISTINCT id FROM dist_01213 WHERE id = 1 SETTINGS distributed_group_by_no
 SELECT 'optimize_skip_unused_shards';
 SELECT DISTINCT id FROM dist_01213 WHERE id = 1 SETTINGS optimize_skip_unused_shards=1;
 -- check that querying all shards is ok
-SELECT 'optimize_skip_unused_shards lack of WHERE';
-SELECT DISTINCT id FROM dist_01213 SETTINGS optimize_skip_unused_shards=1;
+SELECT 'optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=0)';
+SELECT DISTINCT id FROM dist_01213 SETTINGS optimize_skip_unused_shards=1, optimize_distributed_group_by_sharding_key=0;
+-- with optimize_distributed_group_by_sharding_key=1 there will be 4 rows,
+-- since DISTINCT will be done on each shard separatelly, and initiator will
+-- not do anything (since we use optimize_skip_unused_shards=1 that must
+-- guarantee that the data had been INSERTed according to sharding key,
+-- which is not our case, since we use one local table).
+SELECT 'optimize_skip_unused_shards lack of WHERE (optimize_distributed_group_by_sharding_key=1)';
+SELECT DISTINCT id FROM dist_01213 SETTINGS optimize_skip_unused_shards=1, optimize_distributed_group_by_sharding_key=1;

 DROP TABLE local_01213;
 DROP TABLE dist_01213;