Merge pull request #53711 from ClickHouse/sorted-distinct-sparse-columns

Fix: sorted distinct with sparse columns
This commit is contained in:
Alexey Milovidov 2023-08-23 11:34:01 +03:00 committed by GitHub
commit 67c3b7fa65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 39 additions and 0 deletions

View File

@ -205,6 +205,8 @@ void DistinctSortedChunkTransform::transform(Chunk & chunk)
if (unlikely(0 == chunk_rows))
return;
convertToFullIfSparse(chunk);
Columns input_columns = chunk.detachColumns();
/// split input columns into sorted and other("non-sorted") columns
initChunkProcessing(input_columns);

View File

@ -0,0 +1,12 @@
-- { echoOn }
SELECT name, column, serialization_kind
FROM system.parts_columns
WHERE table = 't_sparse_distinct' AND database = currentDatabase() AND column = 'v'
ORDER BY name;
all_1_1_0 v Default
all_2_2_0 v Sparse
set optimize_distinct_in_order=1;
set max_threads=1;
select trimLeft(explain) from (explain pipeline SELECT DISTINCT id, v FROM t_sparse_distinct) where explain ilike '%DistinctSortedChunkTransform%';
DistinctSortedChunkTransform
SELECT DISTINCT id, v FROM t_sparse_distinct format Null;

View File

@ -0,0 +1,25 @@
DROP TABLE IF EXISTS t_sparse_distinct;
CREATE TABLE t_sparse_distinct (id UInt32, v String)
ENGINE = MergeTree
ORDER BY id
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
SYSTEM STOP MERGES t_sparse_distinct;
INSERT INTO t_sparse_distinct SELECT number % 10, toString(number % 100 = 0) FROM numbers(100);
INSERT INTO t_sparse_distinct(id) SELECT number % 10 FROM numbers(100);
-- { echoOn }
SELECT name, column, serialization_kind
FROM system.parts_columns
WHERE table = 't_sparse_distinct' AND database = currentDatabase() AND column = 'v'
ORDER BY name;
set optimize_distinct_in_order=1;
set max_threads=1;
select trimLeft(explain) from (explain pipeline SELECT DISTINCT id, v FROM t_sparse_distinct) where explain ilike '%DistinctSortedChunkTransform%';
SELECT DISTINCT id, v FROM t_sparse_distinct format Null;
DROP TABLE t_sparse_distinct;