Merge pull request #38371 from CurtizJ/fix-distint-with-limit

Fix `DISTINCT` with `LIMIT` in distributed queries
This commit is contained in:
Anton Popov 2022-06-29 14:02:41 +02:00 committed by GitHub
commit e78814f3bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 2 deletions

View File

@ -1242,8 +1242,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
if (expressions.has_order_by)
executeOrder(query_plan, input_order_info_for_order);
if (expressions.has_order_by && query.limitLength())
executeDistinct(query_plan, false, expressions.selected_columns, true);
/// pre_distinct = false, because if we have limit and distinct,
/// we need to merge streams to one and calculate overall distinct.
/// Otherwise we can take several equal values from different streams
/// according to limit and skip some distinct values.
if (query.limitLength())
executeDistinct(query_plan, false, expressions.selected_columns, false);
if (expressions.hasLimitBy())
{

View File

@ -0,0 +1,6 @@
-1
1
11
12
13
14

View File

@ -0,0 +1,26 @@
drop table if exists t_distinct_limit;
create table t_distinct_limit (d Date, id Int64)
engine = MergeTree partition by toYYYYMM(d) order by d;
set max_threads = 10;
insert into t_distinct_limit select '2021-12-15', -1 from numbers(1e6);
insert into t_distinct_limit select '2021-12-15', -1 from numbers(1e6);
insert into t_distinct_limit select '2021-12-15', -1 from numbers(1e6);
insert into t_distinct_limit select '2022-12-15', 1 from numbers(1e6);
insert into t_distinct_limit select '2022-12-15', 1 from numbers(1e6);
insert into t_distinct_limit select '2022-12-16', 11 from numbers(1);
insert into t_distinct_limit select '2023-12-16', 12 from numbers(1);
insert into t_distinct_limit select '2023-12-16', 13 from numbers(1);
insert into t_distinct_limit select '2023-12-16', 14 from numbers(1);
set max_block_size = 1024;
select id from
(
select distinct id from remote('127.0.0.1,127.0.0.2', currentDatabase(),t_distinct_limit) limit 10
)
order by id;
drop table if exists t_distinct_limit;