From c1e2b2af74910d0fa28c89612c20fd5658bb84ec Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Mon, 31 Oct 2022 08:57:31 +0800 Subject: [PATCH 1/4] Optimize TTL merge, completely expired parts can be removed in time --- src/Interpreters/PartLog.cpp | 2 ++ src/Interpreters/PartLog.h | 2 ++ src/Storages/MergeTree/MergeFromLogEntryTask.cpp | 8 +++++--- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 7 +++++-- src/Storages/MergeTree/MergeType.cpp | 2 +- src/Storages/MergeTree/MergeType.h | 2 ++ src/Storages/StorageMergeTree.cpp | 7 ++++++- 7 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index b35ee50b98e..6797d6b3a41 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -31,6 +31,8 @@ PartLogElement::MergeReasonType PartLogElement::getMergeReasonType(MergeType mer return TTL_DELETE_MERGE; case MergeType::TTLRecompress: return TTL_RECOMPRESS_MERGE; + case MergeType::TTLDrop: + return TTL_DROP_MERGE; } throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast(merge_type)); diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 2ce0dfd76de..6180963908d 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -41,6 +41,8 @@ struct PartLogElement TTL_DELETE_MERGE = 3, /// Merge with recompression TTL_RECOMPRESS_MERGE = 4, + /// Merge assigned to drop parts (with TTLMergeSelector) + TTL_DROP_MERGE = 5, }; String query_id; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9a9b8a4a6bb..0711ed157a5 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -152,7 +152,9 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() } /// Start to make the main work - size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts); + size_t need_total_size = 0; + if (entry.merge_type != MergeType::TTLDrop) + need_total_size = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts); /// Can throw an exception while reserving space. IMergeTreeDataPart::TTLInfos ttl_infos; @@ -180,7 +182,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() std::optional tagger; ReservationSharedPtr reserved_space = storage.balancedReservation( metadata_snapshot, - estimated_space_for_merge, + need_total_size, max_volume_index, future_merged_part->name, future_merged_part->part_info, @@ -190,7 +192,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (!reserved_space) reserved_space = storage.reserveSpacePreferringTTLRules( - metadata_snapshot, estimated_space_for_merge, ttl_infos, time(nullptr), max_volume_index); + metadata_snapshot, need_total_size, ttl_infos, time(nullptr), max_volume_index); future_merged_part->uuid = entry.new_part_uuid; future_merged_part->updatePath(storage, reserved_space.get()); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 0b5c5285d15..879cd2fa3da 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -309,10 +309,13 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( data_settings->merge_with_ttl_timeout, data_settings->ttl_only_drop_parts); - parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge); + parts_to_merge = delete_ttl_selector.select( + parts_ranges, + data_settings->ttl_only_drop_parts ? data_settings->max_bytes_to_merge_at_max_space_in_pool : max_total_size_to_merge); + if (!parts_to_merge.empty()) { - future_part->merge_type = MergeType::TTLDelete; + future_part->merge_type = data_settings->ttl_only_drop_parts ? MergeType::TTLDrop : MergeType::TTLDelete; } else if (metadata_snapshot->hasAnyRecompressionTTL()) { diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp index 045114578d0..96862b6cca1 100644 --- a/src/Storages/MergeTree/MergeType.cpp +++ b/src/Storages/MergeTree/MergeType.cpp @@ -20,7 +20,7 @@ MergeType checkAndGetMergeType(UInt32 merge_type) bool isTTLMergeType(MergeType merge_type) { - return merge_type == MergeType::TTLDelete || merge_type == MergeType::TTLRecompress; + return merge_type == MergeType::TTLDelete || merge_type == MergeType::TTLRecompress || merge_type == MergeType::TTLDrop; } } diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h index ce9a40c5931..fa5df87b25a 100644 --- a/src/Storages/MergeTree/MergeType.h +++ b/src/Storages/MergeTree/MergeType.h @@ -19,6 +19,8 @@ enum class MergeType TTLDelete = 2, /// Merge with recompression TTLRecompress = 3, + /// Merge assigned to drop parts (with TTLMergeSelector) + TTLDrop = 4, }; /// Check parsed merge_type from raw int and get enum value. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index a450a9ef3a9..805242fbec9 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -929,7 +929,12 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (isTTLMergeType(future_part->merge_type)) getContext()->getMergeList().bookMergeWithTTL(); - merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false); + /// If merge_type is TTLDrop, no need to reserve disk space + size_t need_total_size = 0; + if (future_part->merge_type != MergeType::TTLDrop) + need_total_size = MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts); + + merging_tagger = std::make_unique(future_part, need_total_size, *this, metadata_snapshot, false); return std::make_shared(future_part, std::move(merging_tagger), std::make_shared()); } From e773eb2fd17c84620738ef9cd16561e2a1764d80 Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Fri, 4 Nov 2022 17:19:34 +0800 Subject: [PATCH 2/4] batter --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 879cd2fa3da..c7d1786135a 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -303,19 +303,28 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled()) { /// TTL delete is preferred to recompression - TTLDeleteMergeSelector delete_ttl_selector( + TTLDeleteMergeSelector drop_ttl_selector( next_delete_ttl_merge_times_by_partition, current_time, data_settings->merge_with_ttl_timeout, - data_settings->ttl_only_drop_parts); - - parts_to_merge = delete_ttl_selector.select( - parts_ranges, - data_settings->ttl_only_drop_parts ? data_settings->max_bytes_to_merge_at_max_space_in_pool : max_total_size_to_merge); + true); + parts_to_merge = drop_ttl_selector.select(parts_ranges,data_settings->max_bytes_to_merge_at_max_space_in_pool); if (!parts_to_merge.empty()) { - future_part->merge_type = data_settings->ttl_only_drop_parts ? MergeType::TTLDrop : MergeType::TTLDelete; + future_part->merge_type = MergeType::TTLDrop; + } + else if (!data_settings->ttl_only_drop_parts) + { + TTLDeleteMergeSelector delete_ttl_selector( + next_delete_ttl_merge_times_by_partition, + current_time, + data_settings->merge_with_ttl_timeout, + false); + + parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge); + if (!parts_to_merge.empty()) + future_part->merge_type = MergeType::TTLDelete; } else if (metadata_snapshot->hasAnyRecompressionTTL()) { From ecf45bb2165af6e9691a7c57f1993b2b60b393a5 Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Fri, 4 Nov 2022 20:52:21 +0800 Subject: [PATCH 3/4] fix test --- src/Interpreters/PartLog.cpp | 1 + .../0_stateless/02293_part_log_has_merge_reason.reference | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 6797d6b3a41..f1b29a8a9b2 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -74,6 +74,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"RegularMerge", static_cast(REGULAR_MERGE)}, {"TTLDeleteMerge", static_cast(TTL_DELETE_MERGE)}, {"TTLRecompressMerge", static_cast(TTL_RECOMPRESS_MERGE)}, + {"TTLDropMerge", static_cast(TTL_DROP_MERGE)}, } ); diff --git a/tests/queries/0_stateless/02293_part_log_has_merge_reason.reference b/tests/queries/0_stateless/02293_part_log_has_merge_reason.reference index 220107cf15b..37156ee37e7 100644 --- a/tests/queries/0_stateless/02293_part_log_has_merge_reason.reference +++ b/tests/queries/0_stateless/02293_part_log_has_merge_reason.reference @@ -1 +1 @@ -MergeParts TTLDeleteMerge +MergeParts TTLDropMerge From ef6065b331c19a59ea1e7409e169afa206d697ba Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Sun, 6 Nov 2022 13:40:26 +0800 Subject: [PATCH 4/4] fix test --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c7d1786135a..c926b30781a 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -326,7 +326,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (!parts_to_merge.empty()) future_part->merge_type = MergeType::TTLDelete; } - else if (metadata_snapshot->hasAnyRecompressionTTL()) + + if (parts_to_merge.empty() && metadata_snapshot->hasAnyRecompressionTTL()) { TTLRecompressMergeSelector recompress_ttl_selector( next_recompress_ttl_merge_times_by_partition,