Merge pull request #16191 from ClickHouse/blind_fix_of_read_in_order

Blind performance fix of read_in_order_many_parts
This commit is contained in:
alexey-milovidov 2020-10-23 01:51:54 +03:00 committed by GitHub
commit 10d2070fbe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 29 additions and 6 deletions

View File

@ -307,6 +307,13 @@ void StoragePolicy::buildVolumeIndices()
} }
} }
bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
{
for (const auto & volume : volumes)
if (volume->areMergesAvoided())
return true;
return false;
}
StoragePolicySelector::StoragePolicySelector( StoragePolicySelector::StoragePolicySelector(
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,

View File

@ -88,6 +88,9 @@ public:
/// Checks if storage policy can be replaced by another one. /// Checks if storage policy can be replaced by another one.
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const; void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
/// Check if we have any volume with stopped merges
bool hasAnyVolumeWithDisabledMerges() const;
private: private:
Volumes volumes; Volumes volumes;
const String name; const String name;

View File

@ -227,6 +227,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
IMergeSelector::PartsRanges parts_ranges; IMergeSelector::PartsRanges parts_ranges;
StoragePolicyPtr storage_policy = data.getStoragePolicy(); StoragePolicyPtr storage_policy = data.getStoragePolicy();
/// Volumes with stopped merges are extremely rare situation.
/// Check it once and don't check each part (this is bad for performance).
bool has_volumes_with_disabled_merges = storage_policy->hasAnyVolumeWithDisabledMerges();
const String * prev_partition_id = nullptr; const String * prev_partition_id = nullptr;
/// Previous part only in boundaries of partition frame /// Previous part only in boundaries of partition frame
@ -277,7 +280,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
part_info.data = ∂ part_info.data = ∂
part_info.ttl_infos = &part->ttl_infos; part_info.ttl_infos = &part->ttl_infos;
part_info.compression_codec_desc = part->default_codec->getFullCodecDesc(); part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
part_info.shall_participate_in_merges = part->shallParticipateInMerges(storage_policy); part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true;
parts_ranges.back().emplace_back(part_info); parts_ranges.back().emplace_back(part_info);

View File

@ -97,12 +97,16 @@ bool allow(
double min_age, double min_age,
double range_size, double range_size,
double partition_size, double partition_size,
double min_size_to_lower_base_log,
double max_size_to_lower_base_log,
const SimpleMergeSelector::Settings & settings) const SimpleMergeSelector::Settings & settings)
{ {
// std::cerr << "sum_size: " << sum_size << "\n"; // std::cerr << "sum_size: " << sum_size << "\n";
/// Map size to 0..1 using logarithmic scale /// Map size to 0..1 using logarithmic scale
double size_normalized = mapPiecewiseLinearToUnit(log1p(sum_size), log1p(settings.min_size_to_lower_base), log1p(settings.max_size_to_lower_base)); /// Use log(1 + x) instead of log1p(x) because our sum_size is always integer.
/// Also log1p seems to be slow and significantly affect performance of merges assignment.
double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log);
// std::cerr << "size_normalized: " << size_normalized << "\n"; // std::cerr << "size_normalized: " << size_normalized << "\n";
@ -141,7 +145,9 @@ void selectWithinPartition(
const SimpleMergeSelector::PartsRange & parts, const SimpleMergeSelector::PartsRange & parts,
const size_t max_total_size_to_merge, const size_t max_total_size_to_merge,
Estimator & estimator, Estimator & estimator,
const SimpleMergeSelector::Settings & settings) const SimpleMergeSelector::Settings & settings,
double min_size_to_lower_base_log,
double max_size_to_lower_base_log)
{ {
size_t parts_count = parts.size(); size_t parts_count = parts.size();
if (parts_count <= 1) if (parts_count <= 1)
@ -178,7 +184,7 @@ void selectWithinPartition(
if (max_total_size_to_merge && sum_size > max_total_size_to_merge) if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
break; break;
if (allow(sum_size, max_size, min_age, end - begin, parts_count, settings)) if (allow(sum_size, max_size, min_age, end - begin, parts_count, min_size_to_lower_base_log, max_size_to_lower_base_log, settings))
estimator.consider( estimator.consider(
parts.begin() + begin, parts.begin() + begin,
parts.begin() + end, parts.begin() + end,
@ -198,8 +204,12 @@ SimpleMergeSelector::PartsRange SimpleMergeSelector::select(
{ {
Estimator estimator; Estimator estimator;
/// Precompute logarithm of settings boundaries, because log function is quite expensive in terms of performance
const double min_size_to_lower_base_log = log(1 + settings.min_size_to_lower_base);
const double max_size_to_lower_base_log = log(1 + settings.max_size_to_lower_base);
for (const auto & part_range : parts_ranges) for (const auto & part_range : parts_ranges)
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings); selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings, min_size_to_lower_base_log, max_size_to_lower_base_log);
return estimator.getBest(); return estimator.getBest();
} }

View File

@ -6,7 +6,7 @@
namespace DB namespace DB
{ {
class SimpleMergeSelector : public IMergeSelector class SimpleMergeSelector final : public IMergeSelector
{ {
public: public:
struct Settings struct Settings