mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge pull request #16191 from ClickHouse/blind_fix_of_read_in_order
Blind performance fix of read_in_order_many_parts
This commit is contained in:
commit
10d2070fbe
@ -307,6 +307,13 @@ void StoragePolicy::buildVolumeIndices()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool StoragePolicy::hasAnyVolumeWithDisabledMerges() const
|
||||||
|
{
|
||||||
|
for (const auto & volume : volumes)
|
||||||
|
if (volume->areMergesAvoided())
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
StoragePolicySelector::StoragePolicySelector(
|
StoragePolicySelector::StoragePolicySelector(
|
||||||
const Poco::Util::AbstractConfiguration & config,
|
const Poco::Util::AbstractConfiguration & config,
|
||||||
|
@ -88,6 +88,9 @@ public:
|
|||||||
/// Checks if storage policy can be replaced by another one.
|
/// Checks if storage policy can be replaced by another one.
|
||||||
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
|
void checkCompatibleWith(const StoragePolicyPtr & new_storage_policy) const;
|
||||||
|
|
||||||
|
/// Check if we have any volume with stopped merges
|
||||||
|
bool hasAnyVolumeWithDisabledMerges() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Volumes volumes;
|
Volumes volumes;
|
||||||
const String name;
|
const String name;
|
||||||
|
@ -227,6 +227,9 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
|||||||
IMergeSelector::PartsRanges parts_ranges;
|
IMergeSelector::PartsRanges parts_ranges;
|
||||||
|
|
||||||
StoragePolicyPtr storage_policy = data.getStoragePolicy();
|
StoragePolicyPtr storage_policy = data.getStoragePolicy();
|
||||||
|
/// Volumes with stopped merges are extremely rare situation.
|
||||||
|
/// Check it once and don't check each part (this is bad for performance).
|
||||||
|
bool has_volumes_with_disabled_merges = storage_policy->hasAnyVolumeWithDisabledMerges();
|
||||||
|
|
||||||
const String * prev_partition_id = nullptr;
|
const String * prev_partition_id = nullptr;
|
||||||
/// Previous part only in boundaries of partition frame
|
/// Previous part only in boundaries of partition frame
|
||||||
@ -277,7 +280,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
|||||||
part_info.data = ∂
|
part_info.data = ∂
|
||||||
part_info.ttl_infos = &part->ttl_infos;
|
part_info.ttl_infos = &part->ttl_infos;
|
||||||
part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
|
part_info.compression_codec_desc = part->default_codec->getFullCodecDesc();
|
||||||
part_info.shall_participate_in_merges = part->shallParticipateInMerges(storage_policy);
|
part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true;
|
||||||
|
|
||||||
parts_ranges.back().emplace_back(part_info);
|
parts_ranges.back().emplace_back(part_info);
|
||||||
|
|
||||||
|
@ -97,12 +97,16 @@ bool allow(
|
|||||||
double min_age,
|
double min_age,
|
||||||
double range_size,
|
double range_size,
|
||||||
double partition_size,
|
double partition_size,
|
||||||
|
double min_size_to_lower_base_log,
|
||||||
|
double max_size_to_lower_base_log,
|
||||||
const SimpleMergeSelector::Settings & settings)
|
const SimpleMergeSelector::Settings & settings)
|
||||||
{
|
{
|
||||||
// std::cerr << "sum_size: " << sum_size << "\n";
|
// std::cerr << "sum_size: " << sum_size << "\n";
|
||||||
|
|
||||||
/// Map size to 0..1 using logarithmic scale
|
/// Map size to 0..1 using logarithmic scale
|
||||||
double size_normalized = mapPiecewiseLinearToUnit(log1p(sum_size), log1p(settings.min_size_to_lower_base), log1p(settings.max_size_to_lower_base));
|
/// Use log(1 + x) instead of log1p(x) because our sum_size is always integer.
|
||||||
|
/// Also log1p seems to be slow and significantly affect performance of merges assignment.
|
||||||
|
double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log);
|
||||||
|
|
||||||
// std::cerr << "size_normalized: " << size_normalized << "\n";
|
// std::cerr << "size_normalized: " << size_normalized << "\n";
|
||||||
|
|
||||||
@ -141,7 +145,9 @@ void selectWithinPartition(
|
|||||||
const SimpleMergeSelector::PartsRange & parts,
|
const SimpleMergeSelector::PartsRange & parts,
|
||||||
const size_t max_total_size_to_merge,
|
const size_t max_total_size_to_merge,
|
||||||
Estimator & estimator,
|
Estimator & estimator,
|
||||||
const SimpleMergeSelector::Settings & settings)
|
const SimpleMergeSelector::Settings & settings,
|
||||||
|
double min_size_to_lower_base_log,
|
||||||
|
double max_size_to_lower_base_log)
|
||||||
{
|
{
|
||||||
size_t parts_count = parts.size();
|
size_t parts_count = parts.size();
|
||||||
if (parts_count <= 1)
|
if (parts_count <= 1)
|
||||||
@ -178,7 +184,7 @@ void selectWithinPartition(
|
|||||||
if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
|
if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (allow(sum_size, max_size, min_age, end - begin, parts_count, settings))
|
if (allow(sum_size, max_size, min_age, end - begin, parts_count, min_size_to_lower_base_log, max_size_to_lower_base_log, settings))
|
||||||
estimator.consider(
|
estimator.consider(
|
||||||
parts.begin() + begin,
|
parts.begin() + begin,
|
||||||
parts.begin() + end,
|
parts.begin() + end,
|
||||||
@ -198,8 +204,12 @@ SimpleMergeSelector::PartsRange SimpleMergeSelector::select(
|
|||||||
{
|
{
|
||||||
Estimator estimator;
|
Estimator estimator;
|
||||||
|
|
||||||
|
/// Precompute logarithm of settings boundaries, because log function is quite expensive in terms of performance
|
||||||
|
const double min_size_to_lower_base_log = log(1 + settings.min_size_to_lower_base);
|
||||||
|
const double max_size_to_lower_base_log = log(1 + settings.max_size_to_lower_base);
|
||||||
|
|
||||||
for (const auto & part_range : parts_ranges)
|
for (const auto & part_range : parts_ranges)
|
||||||
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings);
|
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings, min_size_to_lower_base_log, max_size_to_lower_base_log);
|
||||||
|
|
||||||
return estimator.getBest();
|
return estimator.getBest();
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class SimpleMergeSelector : public IMergeSelector
|
class SimpleMergeSelector final : public IMergeSelector
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
struct Settings
|
struct Settings
|
||||||
|
Loading…
Reference in New Issue
Block a user