From 4b65d5469f441bac67e7026ffb9a8f16337fcd65 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Oct 2020 15:41:01 +0300 Subject: [PATCH] Fix some unrelated performance issues in select parts for merge --- src/Disks/StoragePolicy.cpp | 7 +++++ src/Disks/StoragePolicy.h | 3 ++ .../MergeTree/MergeTreeDataMergerMutator.cpp | 5 +++- .../MergeTree/SimpleMergeSelector.cpp | 28 ++++++++++++------- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 8a71f4f7a2f..2215615feda 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -307,6 +307,13 @@ void StoragePolicy::buildVolumeIndices() } } +bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const +{ + for (const auto & volume : volumes) + if (volume->areMergesAvoided()) + return true; + return false; +} StoragePolicySelector::StoragePolicySelector( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h index f4a4a0070b8..fc45ed3ed06 100644 --- a/src/Disks/StoragePolicy.h +++ b/src/Disks/StoragePolicy.h @@ -88,6 +88,9 @@ public: /// Checks if storage policy can be replaced by another one. void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const; + /// Check if we have any volume with stopped merges + bool hasAnyVolumeWithDisabledMerges() const; + private: Volumes volumes; const String name; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index df42f164e34..b29966751f9 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -227,6 +227,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( IMergeSelector::PartsRanges parts_ranges; StoragePolicyPtr storage_policy = data.getStoragePolicy(); + /// Volumes with stopped merges are extremely rare situation. + /// Check it once and don't check each part (this is bad for performance). + bool has_volumes_with_disabled_merges = storage_policy->hasAnyVolumeWithDisabledMerges(); const String * prev_partition_id = nullptr; /// Previous part only in boundaries of partition frame @@ -277,7 +280,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( part_info.data = ∂ part_info.ttl_infos = &part->ttl_infos; part_info.compression_codec_desc = part->default_codec->getFullCodecDesc(); - part_info.shall_participate_in_merges = part->shallParticipateInMerges(storage_policy); + part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true; parts_ranges.back().emplace_back(part_info); diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 335833998c8..1156c17835b 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -92,19 +92,21 @@ double mapPiecewiseLinearToUnit(double value, double min, double max) /** Is allowed to merge parts in range with specific properties. */ bool allow( - double sum_size, - double max_size, - double min_age, - double range_size, - double partition_size, + size_t sum_size, + size_t max_size, + size_t min_age, + size_t range_size, + size_t partition_size, + double min_size_to_lower_base_log, + double max_size_to_lower_base_log, const SimpleMergeSelector::Settings & settings) { // std::cerr << "sum_size: " << sum_size << "\n"; /// Map size to 0..1 using logarithmic scale - /// Use log(1 + x) instead of log1p(x) because our x variables (sum_size and settings) are always integer. + /// Use log(1 + x) instead of log1p(x) because our sum_size is always integer. /// Also log1p seems to be slow and significantly affect performance of merges assignment. - double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), log(1 + settings.min_size_to_lower_base), log(1 + settings.max_size_to_lower_base)); + double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log); // std::cerr << "size_normalized: " << size_normalized << "\n"; @@ -143,7 +145,9 @@ void selectWithinPartition( const SimpleMergeSelector::PartsRange & parts, const size_t max_total_size_to_merge, Estimator & estimator, - const SimpleMergeSelector::Settings & settings) + const SimpleMergeSelector::Settings & settings, + double min_size_to_lower_base_log, + double max_size_to_lower_base_log) { size_t parts_count = parts.size(); if (parts_count <= 1) @@ -180,7 +184,7 @@ void selectWithinPartition( if (max_total_size_to_merge && sum_size > max_total_size_to_merge) break; - if (allow(sum_size, max_size, min_age, end - begin, parts_count, settings)) + if (allow(sum_size, max_size, min_age, end - begin, parts_count, min_size_to_lower_base_log, max_size_to_lower_base_log, settings)) estimator.consider( parts.begin() + begin, parts.begin() + end, @@ -200,8 +204,12 @@ SimpleMergeSelector::PartsRange SimpleMergeSelector::select( { Estimator estimator; + /// Precompute logarithm of settings boundaries, because log function is quite expensive in terms of performance + const double min_size_to_lower_base_log = log(1 + settings.min_size_to_lower_base); + const double max_size_to_lower_base_log = log(1 + settings.max_size_to_lower_base); + for (const auto & part_range : parts_ranges) - selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings); + selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings, min_size_to_lower_base_log, max_size_to_lower_base_log); return estimator.getBest(); }