Merge pull request #24496 from ClickHouse/configurable_max_parts_to_merge

Add `max_parts_to_merge_at_once` setting
This commit is contained in:
alesapin 2021-05-27 10:56:58 +03:00 committed by GitHub
commit ef17daaaa0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 39 additions and 1 deletions

View File

@ -343,6 +343,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge(
if (parts_to_merge.empty())
{
SimpleMergeSelector::Settings merge_settings;
/// Override value from table settings
merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once;
if (aggressive)
merge_settings.base = 1;

View File

@ -56,6 +56,7 @@ struct Settings;
M(UInt64, write_ahead_log_interval_ms_to_fsync, 100, "Interval in milliseconds after which fsync for WAL is being done.", 0) \
M(Bool, in_memory_parts_insert_sync, false, "If true insert of part with in-memory format will wait for fsync of WAL", 0) \
M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \
M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \
\
/** Inserts settings. */ \
M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \

View File

@ -88,7 +88,7 @@ class SimpleMergeSelector final : public IMergeSelector
public:
struct Settings
{
/// Zero means unlimited.
/// Zero means unlimited. Can be overridden by the same merge tree setting.
size_t max_parts_to_merge_at_once = 100;
/** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases).

View File

@ -0,0 +1,32 @@
DROP TABLE IF EXISTS limited_merge_table;
SET max_threads = 1;
SET max_block_size = 1;
SET min_insert_block_size_rows = 1;
CREATE TABLE limited_merge_table
(
key UInt64
)
ENGINE = MergeTree()
ORDER BY key
SETTINGS max_parts_to_merge_at_once = 3;
SYSTEM STOP MERGES limited_merge_table;
INSERT INTO limited_merge_table SELECT number FROM numbers(100);
SYSTEM START MERGES limited_merge_table;
OPTIMIZE TABLE limited_merge_table FINAL;
SYSTEM FLUSH LOGS;
SELECT COUNT() FROM limited_merge_table;
-- final optimize FINAL will merge all parts, but all previous merges must merge <= 3 parts.
-- During concurrent run only one final merge can happen, thats why we have this `if`.
SELECT if(length(topK(2)(length(merged_from))) == 2, arrayMin(topK(2)(length(merged_from))) <= 3, 1)
FROM system.part_log WHERE table = 'limited_merge_table' and database = currentDatabase() and event_type = 'MergeParts';
DROP TABLE IF EXISTS limited_merge_table;