From 6e29404a7548c5a4dbdf3f2f91d37e27441afce2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 May 2021 16:21:17 +0300 Subject: [PATCH 1/4] Add max_parts_to_merge_at_once setting --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 3 ++ src/Storages/MergeTree/MergeTreeSettings.h | 1 + src/Storages/MergeTree/SimpleMergeSelector.h | 2 +- ...check_max_parts_to_merge_at_once.reference | 1 + ...01882_check_max_parts_to_merge_at_once.sql | 29 +++++++++++++++++++ 5 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference create mode 100644 tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 4e151bfdb91..f5edde01478 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -343,6 +343,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (parts_to_merge.empty()) { SimpleMergeSelector::Settings merge_settings; + /// Override value from table settings + merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once; + if (aggressive) merge_settings.base = 1; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 6e43d0fad77..9bda3578906 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -56,6 +56,7 @@ struct Settings; M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \ M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ + M(UInt64, max_parts_to_merge_at_once, 100, "Max amout of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 4f277ad74cd..2e6ea3513fd 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -88,7 +88,7 @@ class SimpleMergeSelector final : public IMergeSelector public: struct Settings { - /// Zero means unlimited. + /// Zero means unlimited. Can be overriden by the same merge tree setting. size_t max_parts_to_merge_at_once = 100; /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases). diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference new file mode 100644 index 00000000000..00750edc07d --- /dev/null +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference @@ -0,0 +1 @@ +3 diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql new file mode 100644 index 00000000000..cf6d76467e3 --- /dev/null +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS limited_merge_table; + +SET max_threads = 1; +SET max_block_size = 1; +SET min_insert_block_size_rows = 1; + +CREATE TABLE limited_merge_table +( + key UInt64 +) +ENGINE = MergeTree() +ORDER BY key +SETTINGS max_parts_to_merge_at_once = 3; + +SYSTEM STOP MERGES limited_merge_table; + +INSERT INTO limited_merge_table SELECT number FROM numbers(250); + + +SYSTEM START MERGES limited_merge_table; + +OPTIMIZE TABLE limited_merge_table FINAL; + +SYSTEM FLUSH LOGS; + +-- final optimize FINAL will merge all parts, but all previous merges must merge <= 3 parts +SELECT length(merged_from) FROM system.part_log WHERE event_type = 'MergeParts' and table = 'limited_merge_table' and database = currentDatabase() ORDER BY length(merged_from) DESC LIMIT 1 OFFSET 2; + +DROP TABLE IF EXISTS limited_merge_table; From 7c69695d674df646bde59c1ddb913ae1b47e7c98 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 May 2021 17:52:10 +0300 Subject: [PATCH 2/4] Fix test --- .../01882_check_max_parts_to_merge_at_once.reference | 2 +- .../0_stateless/01882_check_max_parts_to_merge_at_once.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference index 00750edc07d..d00491fd7e5 100644 --- a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference @@ -1 +1 @@ -3 +1 diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql index cf6d76467e3..d19732a6e09 100644 --- a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql @@ -24,6 +24,6 @@ OPTIMIZE TABLE limited_merge_table FINAL; SYSTEM FLUSH LOGS; -- final optimize FINAL will merge all parts, but all previous merges must merge <= 3 parts -SELECT length(merged_from) FROM system.part_log WHERE event_type = 'MergeParts' and table = 'limited_merge_table' and database = currentDatabase() ORDER BY length(merged_from) DESC LIMIT 1 OFFSET 2; +SELECT length(merged_from) <= 3 FROM system.part_log WHERE event_type = 'MergeParts' and table = 'limited_merge_table' and database = currentDatabase() ORDER BY length(merged_from) DESC LIMIT 1 OFFSET 2; DROP TABLE IF EXISTS limited_merge_table; From e19cff70b89e417cd9c3690e9c4c34367acd035d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 26 May 2021 12:47:22 +0300 Subject: [PATCH 3/4] More robust test --- .../01882_check_max_parts_to_merge_at_once.reference | 1 + .../01882_check_max_parts_to_merge_at_once.sql | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference index d00491fd7e5..c155c53432a 100644 --- a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference @@ -1 +1,2 @@ +250 1 diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql index d19732a6e09..1d12f0ef2ec 100644 --- a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql @@ -16,14 +16,17 @@ SYSTEM STOP MERGES limited_merge_table; INSERT INTO limited_merge_table SELECT number FROM numbers(250); - SYSTEM START MERGES limited_merge_table; OPTIMIZE TABLE limited_merge_table FINAL; SYSTEM FLUSH LOGS; --- final optimize FINAL will merge all parts, but all previous merges must merge <= 3 parts -SELECT length(merged_from) <= 3 FROM system.part_log WHERE event_type = 'MergeParts' and table = 'limited_merge_table' and database = currentDatabase() ORDER BY length(merged_from) DESC LIMIT 1 OFFSET 2; +SELECT COUNT() FROM limited_merge_table; + +-- final optimize FINAL will merge all parts, but all previous merges must merge <= 3 parts. +-- During concurrent run only one final merge can happen, thats why we have this `if`. +SELECT if(length(topK(2)(length(merged_from))) == 2, arrayMin(topK(2)(length(merged_from))) <= 3, 1) +FROM system.part_log WHERE table = 'limited_merge_table' and database = currentDatabase() and event_type = 'MergeParts'; DROP TABLE IF EXISTS limited_merge_table; From 8ba6ed3fc268ba7b24275744a6aeda5089affbc6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 26 May 2021 16:53:05 +0300 Subject: [PATCH 4/4] Simplier test --- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/MergeTree/SimpleMergeSelector.h | 2 +- .../01882_check_max_parts_to_merge_at_once.reference | 2 +- .../0_stateless/01882_check_max_parts_to_merge_at_once.sql | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 9bda3578906..673105b3ed4 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -56,7 +56,7 @@ struct Settings; M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \ M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ - M(UInt64, max_parts_to_merge_at_once, 100, "Max amout of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ + M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 2e6ea3513fd..af339dbfa24 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -88,7 +88,7 @@ class SimpleMergeSelector final : public IMergeSelector public: struct Settings { - /// Zero means unlimited. Can be overriden by the same merge tree setting. + /// Zero means unlimited. Can be overridden by the same merge tree setting. size_t max_parts_to_merge_at_once = 100; /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases). diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference index c155c53432a..30a4d52afe1 100644 --- a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.reference @@ -1,2 +1,2 @@ -250 +100 1 diff --git a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql index 1d12f0ef2ec..d6cde1ef7a8 100644 --- a/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql +++ b/tests/queries/0_stateless/01882_check_max_parts_to_merge_at_once.sql @@ -14,7 +14,7 @@ SETTINGS max_parts_to_merge_at_once = 3; SYSTEM STOP MERGES limited_merge_table; -INSERT INTO limited_merge_table SELECT number FROM numbers(250); +INSERT INTO limited_merge_table SELECT number FROM numbers(100); SYSTEM START MERGES limited_merge_table;