From 9e85e930d900914a2f33f74bfac40040f86041a2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 22 Nov 2024 10:08:48 +0000 Subject: [PATCH] Backport #72209 to 24.8: Fix min_age_to_force_merge_on_partition_only --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 13 ++++++++++++- src/Storages/MergeTree/MergeTreeDataMergerMutator.h | 1 + .../0_stateless/02473_optimize_old_parts.reference | 1 + .../queries/0_stateless/02473_optimize_old_parts.sh | 6 +++--- .../02676_optimize_old_parts_replicated.reference | 1 + .../02676_optimize_old_parts_replicated.sh | 6 ++++-- 6 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 140a226f2d1..6a094fd2e5e 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -416,6 +416,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo auto & partition_info = partitions_info[partition_id]; partition_info.min_age = std::min(partition_info.min_age, part_info.age); + ++partition_info.num_parts; ++res.parts_selected_precondition; @@ -557,11 +558,21 @@ String MergeTreeDataMergerMutator::getBestPartitionToOptimizeEntire( auto best_partition_it = std::max_element( partitions_info.begin(), partitions_info.end(), - [](const auto & e1, const auto & e2) { return e1.second.min_age < e2.second.min_age; }); + [](const auto & e1, const auto & e2) + { + // If one partition has only a single part, always select the other partition. + if (e1.second.num_parts == 1) + return true; + if (e2.second.num_parts == 1) + return false; + // If both partitions have more than one part, select the older partition. + return e1.second.min_age < e2.second.min_age; + }); assert(best_partition_it != partitions_info.end()); if (static_cast(best_partition_it->second.min_age) < data_settings->min_age_to_force_merge_seconds) + || static_cast(best_partition_it->second.num_parts) == 1) return {}; return best_partition_it->first; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index d2852a3a504..89bb7793709 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -65,6 +65,7 @@ public: struct PartitionInfo { time_t min_age{std::numeric_limits::max()}; + size_t num_parts = 0; }; using PartitionsInfo = std::unordered_map; diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.reference b/tests/queries/0_stateless/02473_optimize_old_parts.reference index 7d08cd5bbef..19526022e88 100644 --- a/tests/queries/0_stateless/02473_optimize_old_parts.reference +++ b/tests/queries/0_stateless/02473_optimize_old_parts.reference @@ -3,5 +3,6 @@ Without merge With merge any part range 1 With merge partition only +2 1 1 diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sh b/tests/queries/0_stateless/02473_optimize_old_parts.sh index b563bc31b39..805b36be8e8 100755 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sh +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sh @@ -52,13 +52,13 @@ DROP TABLE test_with_merge; SELECT 'With merge partition only'; -CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i PARTITION BY i SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=true; INSERT INTO test_with_merge SELECT 1; INSERT INTO test_with_merge SELECT 2; -INSERT INTO test_with_merge SELECT 3;" +INSERT INTO test_with_merge SELECT 2 SETTINGS insert_deduplicate = 0;" -wait_for_number_of_parts 'test_with_merge' 1 100 +wait_for_number_of_parts 'test_with_merge' 2 100 $CLICKHOUSE_CLIENT -nmq " SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference index 0f3c482f188..46f409a3a73 100644 --- a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.reference @@ -3,5 +3,6 @@ Without merge With merge replicated any part range 1 With merge replicated partition only +2 1 1 diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh index c1f28f9f079..90d4997f76f 100755 --- a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh @@ -53,12 +53,14 @@ DROP TABLE test_replicated; SELECT 'With merge replicated partition only'; CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02676_partition_only', 'node') ORDER BY i +PARTITION BY i SETTINGS min_age_to_force_merge_seconds=1, merge_selecting_sleep_ms=1000, min_age_to_force_merge_on_partition_only=true; INSERT INTO test_replicated SELECT 1; INSERT INTO test_replicated SELECT 2; -INSERT INTO test_replicated SELECT 3;" +SELECT sleep(3) FORMAT Null; -- Sleep so the first partition is older +INSERT INTO test_replicated SELECT 2 SETTINGS insert_deduplicate = 0;" -wait_for_number_of_parts 'test_replicated' 1 100 +wait_for_number_of_parts 'test_replicated' 2 100 $CLICKHOUSE_CLIENT -nmq " SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one