Fix some unrelated performance issues in select parts for merge

This commit is contained in:
alesapin 2020-10-22 15:41:01 +03:00
parent 945dd3aa59
commit 4b65d5469f
4 changed files with 32 additions and 11 deletions

View File

@ -307,6 +307,13 @@ void StoragePolicy::buildVolumeIndices()
}
}
bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
{
for (const auto & volume : volumes)
if (volume->areMergesAvoided())
return true;
return false;
}
StoragePolicySelector::StoragePolicySelector(
const Poco::Util::AbstractConfiguration & config,

View File

@ -88,6 +88,9 @@ public:
/// Checks if storage policy can be replaced by another one.
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
/// Check if we have any volume with stopped merges
bool hasAnyVolumeWithDisabledMerges() const;
private:
Volumes volumes;
const String name;

View File

@ -227,6 +227,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
IMergeSelector::PartsRanges parts_ranges;
StoragePolicyPtr storage_policy = data.getStoragePolicy();
/// Volumes with stopped merges are extremely rare situation.
/// Check it once and don't check each part (this is bad for performance).
bool has_volumes_with_disabled_merges = storage_policy->hasAnyVolumeWithDisabledMerges();
const String * prev_partition_id = nullptr;
/// Previous part only in boundaries of partition frame
@ -277,7 +280,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
part_info.data = ∂
part_info.ttl_infos = &part->ttl_infos;
part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
part_info.shall_participate_in_merges = part->shallParticipateInMerges(storage_policy);
part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true;
parts_ranges.back().emplace_back(part_info);

View File

@ -92,19 +92,21 @@ double mapPiecewiseLinearToUnit(double value, double min, double max)
/** Is allowed to merge parts in range with specific properties.
*/
bool allow(
double sum_size,
double max_size,
double min_age,
double range_size,
double partition_size,
size_t sum_size,
size_t max_size,
size_t min_age,
size_t range_size,
size_t partition_size,
double min_size_to_lower_base_log,
double max_size_to_lower_base_log,
const SimpleMergeSelector::Settings & settings)
{
// std::cerr << "sum_size: " << sum_size << "\n";
/// Map size to 0..1 using logarithmic scale
/// Use log(1 + x) instead of log1p(x) because our x variables (sum_size and settings) are always integer.
/// Use log(1 + x) instead of log1p(x) because our sum_size is always integer.
/// Also log1p seems to be slow and significantly affect performance of merges assignment.
double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), log(1 + settings.min_size_to_lower_base), log(1 + settings.max_size_to_lower_base));
double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log);
// std::cerr << "size_normalized: " << size_normalized << "\n";
@ -143,7 +145,9 @@ void selectWithinPartition(
const SimpleMergeSelector::PartsRange & parts,
const size_t max_total_size_to_merge,
Estimator & estimator,
const SimpleMergeSelector::Settings & settings)
const SimpleMergeSelector::Settings & settings,
double min_size_to_lower_base_log,
double max_size_to_lower_base_log)
{
size_t parts_count = parts.size();
if (parts_count <= 1)
@ -180,7 +184,7 @@ void selectWithinPartition(
if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
break;
if (allow(sum_size, max_size, min_age, end - begin, parts_count, settings))
if (allow(sum_size, max_size, min_age, end - begin, parts_count, min_size_to_lower_base_log, max_size_to_lower_base_log, settings))
estimator.consider(
parts.begin() + begin,
parts.begin() + end,
@ -200,8 +204,12 @@ SimpleMergeSelector::PartsRange SimpleMergeSelector::select(
{
Estimator estimator;
/// Precompute logarithm of settings boundaries, because log function is quite expensive in terms of performance
const double min_size_to_lower_base_log = log(1 + settings.min_size_to_lower_base);
const double max_size_to_lower_base_log = log(1 + settings.max_size_to_lower_base);
for (const auto & part_range : parts_ranges)
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings);
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings, min_size_to_lower_base_log, max_size_to_lower_base_log);
return estimator.getBest();
}