Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final

This commit is contained in:
Vasily Nemkov 2023-08-17 12:30:06 +02:00
parent 9aef39a788
commit 322db80cd9
3 changed files with 63 additions and 2 deletions

View File

@ -996,7 +996,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
/// MergeTreeReadPool and MergeTreeThreadSelectProcessor for parallel select.
if (num_streams > 1 && settings.do_not_merge_across_partitions_select_final &&
std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
parts_to_merge_ranges[range_index]->data_part->info.level > 0)
parts_to_merge_ranges[range_index]->data_part->info.level > 0
&& data.merging_params.is_deleted_column.empty())
{
sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount();
lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index]));
@ -1052,7 +1053,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
/// with level > 0 then we won't postprocess this part
if (settings.do_not_merge_across_partitions_select_final &&
std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 &&
parts_to_merge_ranges[range_index]->data_part->info.level > 0)
parts_to_merge_ranges[range_index]->data_part->info.level > 0 &&
data.merging_params.is_deleted_column.empty())
{
partition_pipes.emplace_back(Pipe::unitePipes(std::move(pipes)));
continue;

View File

@ -0,0 +1,29 @@
--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685
--- Verify that ReplacingMergeTree properly handles _is_deleted:
--- SELECT FINAL should take _is_deleted in consideration when there is only one partition.
-- { echoOn }
CREATE TABLE t
(
`account_id` UInt64,
`_is_deleted` UInt8,
`_version` UInt64
)
ENGINE = ReplacingMergeTree(_version, _is_deleted)
ORDER BY (account_id);
INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3);
-- Mark first 100 rows as deleted.
INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2);
-- Put everything is in one partition
OPTIMIZE TABLE t FINAL;
SELECT count() FROM t;
1000
SELECT count() FROM t FINAL;
900
-- Both should produce same number of rows.
-- Previously `do_not_merge_across_partitions_select_final = 1` showed more rows,
-- as if no rows were deleted.
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1;
900
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0;
900

View File

@ -0,0 +1,30 @@
--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685
--- Verify that ReplacingMergeTree properly handles _is_deleted:
--- SELECT FINAL should take _is_deleted in consideration when there is only one partition.
-- { echoOn }
CREATE TABLE t
(
`account_id` UInt64,
`_is_deleted` UInt8,
`_version` UInt64
)
ENGINE = ReplacingMergeTree(_version, _is_deleted)
ORDER BY (account_id);
INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3);
-- Mark first 100 rows as deleted.
INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2);
-- Put everything is in one partition
OPTIMIZE TABLE t FINAL;
SELECT count() FROM t;
SELECT count() FROM t FINAL;
-- Both should produce same number of rows.
-- Previously `do_not_merge_across_partitions_select_final = 1` showed more rows,
-- as if no rows were deleted.
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1;
SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0;