Merge pull request #41576 from ClickHouse/fix-partial-sort-optimisation-bug-from-41182

Fix a bug with missing rows after partial sort optimization
This commit is contained in:
Nikolai Kochetov 2022-09-21 16:47:18 +02:00 committed by GitHub
commit 1204f643f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 2013 additions and 5 deletions

View File

@ -111,7 +111,6 @@ void PartialSortingTransform::transform(Chunk & chunk)
read_rows->add(chunk.getNumRows());
auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
size_t block_rows_before_filter = block.rows();
/** If we've saved columns from previously blocks we could filter all rows from current block
* which are unnecessary for sortBlock(...) because they obviously won't be in the top LIMIT rows.
@ -138,10 +137,8 @@ void PartialSortingTransform::transform(Chunk & chunk)
sortBlock(block, description, limit);
size_t block_rows_after_filter = block.rows();
/// Check if we can use this block for optimization.
if (min_limit_for_partial_sort_optimization <= limit && block_rows_after_filter > 0 && limit <= block_rows_before_filter)
if (min_limit_for_partial_sort_optimization <= limit && limit <= block.rows())
{
/** If we filtered more than limit rows from block take block last row.
* Otherwise take last limit row.
@ -149,7 +146,7 @@ void PartialSortingTransform::transform(Chunk & chunk)
* If current threshold value is empty, update current threshold value.
* If min block value is less than current threshold value, update current threshold value.
*/
size_t min_row_to_compare = limit <= block_rows_after_filter ? (limit - 1) : (block_rows_after_filter - 1);
size_t min_row_to_compare = limit - 1;
auto raw_block_columns = extractRawColumns(block, description_with_positions);
if (sort_description_threshold_columns.empty() ||

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,11 @@
create table partial_sort_opt_bug (x UInt64) engine = MergeTree order by tuple() settings index_granularity = 1000;
insert into partial_sort_opt_bug select number + 100000 from numbers(4000);
insert into partial_sort_opt_bug select number from numbers(1000);
insert into partial_sort_opt_bug select number + 200000 from numbers(3000);
insert into partial_sort_opt_bug select number + 1000 from numbers(4000);
optimize table partial_sort_opt_bug final;
select x from partial_sort_opt_bug order by x limit 2000 settings max_block_size = 4000;