mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #16191 from ClickHouse/blind_fix_of_read_in_order
Blind performance fix of read_in_order_many_parts
This commit is contained in:
commit
10d2070fbe
@ -307,6 +307,13 @@ void StoragePolicy::buildVolumeIndices()
|
||||
}
|
||||
}
|
||||
|
||||
bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
|
||||
{
|
||||
for (const auto & volume : volumes)
|
||||
if (volume->areMergesAvoided())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
StoragePolicySelector::StoragePolicySelector(
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
|
@ -88,6 +88,9 @@ public:
|
||||
/// Checks if storage policy can be replaced by another one.
|
||||
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
|
||||
|
||||
/// Check if we have any volume with stopped merges
|
||||
bool hasAnyVolumeWithDisabledMerges() const;
|
||||
|
||||
private:
|
||||
Volumes volumes;
|
||||
const String name;
|
||||
|
@ -227,6 +227,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
IMergeSelector::PartsRanges parts_ranges;
|
||||
|
||||
StoragePolicyPtr storage_policy = data.getStoragePolicy();
|
||||
/// Volumes with stopped merges are extremely rare situation.
|
||||
/// Check it once and don't check each part (this is bad for performance).
|
||||
bool has_volumes_with_disabled_merges = storage_policy->hasAnyVolumeWithDisabledMerges();
|
||||
|
||||
const String * prev_partition_id = nullptr;
|
||||
/// Previous part only in boundaries of partition frame
|
||||
@ -277,7 +280,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
part_info.data = ∂
|
||||
part_info.ttl_infos = &part->ttl_infos;
|
||||
part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
|
||||
part_info.shall_participate_in_merges = part->shallParticipateInMerges(storage_policy);
|
||||
part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true;
|
||||
|
||||
parts_ranges.back().emplace_back(part_info);
|
||||
|
||||
|
@ -97,12 +97,16 @@ bool allow(
|
||||
double min_age,
|
||||
double range_size,
|
||||
double partition_size,
|
||||
double min_size_to_lower_base_log,
|
||||
double max_size_to_lower_base_log,
|
||||
const SimpleMergeSelector::Settings & settings)
|
||||
{
|
||||
// std::cerr << "sum_size: " << sum_size << "\n";
|
||||
|
||||
/// Map size to 0..1 using logarithmic scale
|
||||
double size_normalized = mapPiecewiseLinearToUnit(log1p(sum_size), log1p(settings.min_size_to_lower_base), log1p(settings.max_size_to_lower_base));
|
||||
/// Use log(1 + x) instead of log1p(x) because our sum_size is always integer.
|
||||
/// Also log1p seems to be slow and significantly affect performance of merges assignment.
|
||||
double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log);
|
||||
|
||||
// std::cerr << "size_normalized: " << size_normalized << "\n";
|
||||
|
||||
@ -141,7 +145,9 @@ void selectWithinPartition(
|
||||
const SimpleMergeSelector::PartsRange & parts,
|
||||
const size_t max_total_size_to_merge,
|
||||
Estimator & estimator,
|
||||
const SimpleMergeSelector::Settings & settings)
|
||||
const SimpleMergeSelector::Settings & settings,
|
||||
double min_size_to_lower_base_log,
|
||||
double max_size_to_lower_base_log)
|
||||
{
|
||||
size_t parts_count = parts.size();
|
||||
if (parts_count <= 1)
|
||||
@ -178,7 +184,7 @@ void selectWithinPartition(
|
||||
if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
|
||||
break;
|
||||
|
||||
if (allow(sum_size, max_size, min_age, end - begin, parts_count, settings))
|
||||
if (allow(sum_size, max_size, min_age, end - begin, parts_count, min_size_to_lower_base_log, max_size_to_lower_base_log, settings))
|
||||
estimator.consider(
|
||||
parts.begin() + begin,
|
||||
parts.begin() + end,
|
||||
@ -198,8 +204,12 @@ SimpleMergeSelector::PartsRange SimpleMergeSelector::select(
|
||||
{
|
||||
Estimator estimator;
|
||||
|
||||
/// Precompute logarithm of settings boundaries, because log function is quite expensive in terms of performance
|
||||
const double min_size_to_lower_base_log = log(1 + settings.min_size_to_lower_base);
|
||||
const double max_size_to_lower_base_log = log(1 + settings.max_size_to_lower_base);
|
||||
|
||||
for (const auto & part_range : parts_ranges)
|
||||
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings);
|
||||
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings, min_size_to_lower_base_log, max_size_to_lower_base_log);
|
||||
|
||||
return estimator.getBest();
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class SimpleMergeSelector : public IMergeSelector
|
||||
class SimpleMergeSelector final : public IMergeSelector
|
||||
{
|
||||
public:
|
||||
struct Settings
|
||||
|
Loading…
Reference in New Issue
Block a user