2016-10-27 23:28:35 +00:00
|
|
|
#include <DB/Storages/MergeTree/SimpleMergeSelector.h>
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
#include <cmath>
|
2016-10-27 23:28:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
/** Estimates best set of parts to merge within passed alternatives.
|
|
|
|
*/
|
2016-10-27 23:28:35 +00:00
|
|
|
struct Estimator
|
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
using Iterator = SimpleMergeSelector::PartsInPartition::const_iterator;
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
void consider(Iterator begin, Iterator end, size_t sum_size, size_t size_next_at_left, size_t size_next_at_right)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
double current_score = score(end - begin, sum_size);
|
2016-10-30 08:15:55 +00:00
|
|
|
|
|
|
|
if (size_next_at_left > sum_size * 0.9)
|
|
|
|
{
|
|
|
|
double difference = abs(log2(static_cast<double>(sum_size) / size_next_at_left));
|
|
|
|
if (difference < 0.5)
|
|
|
|
current_score *= 0.75 + difference * 0.5;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (size_next_at_right == 0)
|
|
|
|
current_score *= 0.9;
|
|
|
|
|
|
|
|
if (size_next_at_right > sum_size * 0.9)
|
|
|
|
{
|
|
|
|
double difference = abs(log2(static_cast<double>(sum_size) / size_next_at_right));
|
|
|
|
if (difference < 0.5)
|
|
|
|
current_score *= 0.75 + difference * 0.5;
|
|
|
|
}
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
if (!min_score || current_score < min_score)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:12:25 +00:00
|
|
|
min_score = current_score;
|
2016-10-30 03:39:28 +00:00
|
|
|
best_begin = begin;
|
|
|
|
best_end = end;
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SimpleMergeSelector::PartsInPartition getBest()
|
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
return SimpleMergeSelector::PartsInPartition(best_begin, best_end);
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
static double score(double count, double sum_size)
|
|
|
|
{
|
|
|
|
/** Consider we have two alternative ranges of data parts to merge.
|
|
|
|
* Assume time to merge a range is proportional to sum size of its parts.
|
|
|
|
*
|
|
|
|
* Cost of query execution is proportional to total number of data parts in a moment of time.
|
|
|
|
* Let define our target: to minimize average (in time) total number of data parts.
|
|
|
|
*
|
|
|
|
* Let calculate integral of total number of parts, if we are going to do merge of one or another range.
|
|
|
|
* It must be lower, and thus we decide, what range is better to merge.
|
|
|
|
*
|
|
|
|
* The integral is lower iff the following formula is lower:
|
|
|
|
*/
|
|
|
|
return sum_size / (count - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
double min_score = 0;
|
2016-10-30 03:39:28 +00:00
|
|
|
Iterator best_begin;
|
|
|
|
Iterator best_end;
|
2016-10-27 23:28:35 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
void selectWithinPartition(
|
|
|
|
const SimpleMergeSelector::PartsInPartition & parts,
|
2016-10-27 23:28:35 +00:00
|
|
|
const size_t max_total_size_to_merge,
|
2016-10-30 08:15:55 +00:00
|
|
|
Estimator & estimator,
|
|
|
|
const SimpleMergeSelector::Settings & settings)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
|
|
|
if (parts.size() <= 1)
|
|
|
|
return;
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
double actual_base = settings.base;
|
|
|
|
|
|
|
|
if (parts.back().age > settings.lower_base_after)
|
2016-10-30 08:15:55 +00:00
|
|
|
{
|
|
|
|
actual_base -= log2(parts.back().age - settings.lower_base_after);
|
|
|
|
if (actual_base < 1)
|
|
|
|
actual_base = 1;
|
|
|
|
}
|
2016-10-30 03:12:25 +00:00
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
if (parts.size() <= actual_base)
|
2016-10-30 03:39:28 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
size_t parts_count = parts.size();
|
|
|
|
size_t prefix_sum = 0;
|
|
|
|
std::vector<size_t> prefix_sums(parts.size() + 1);
|
2016-10-27 23:28:35 +00:00
|
|
|
|
2016-10-30 03:39:28 +00:00
|
|
|
for (size_t i = 0; i < parts_count; ++i)
|
|
|
|
{
|
|
|
|
prefix_sum += parts[i].size;
|
|
|
|
prefix_sums[i + 1] = prefix_sum;
|
|
|
|
}
|
2016-10-27 23:28:35 +00:00
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
Estimator local_estimator;
|
|
|
|
|
2016-10-30 03:39:28 +00:00
|
|
|
for (size_t begin = 0; begin < parts_count; ++begin)
|
|
|
|
{
|
|
|
|
for (size_t end = begin + 1 + actual_base; end <= parts_count; ++end)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
if (settings.max_parts_to_merge_at_once && end - begin > settings.max_parts_to_merge_at_once)
|
2016-10-27 23:28:35 +00:00
|
|
|
break;
|
|
|
|
|
2016-10-30 03:39:28 +00:00
|
|
|
size_t sum_size = prefix_sums[end] - prefix_sums[begin];
|
2016-10-27 23:28:35 +00:00
|
|
|
|
2016-10-30 03:39:28 +00:00
|
|
|
if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
|
2016-10-27 23:28:35 +00:00
|
|
|
break;
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
local_estimator.consider(
|
|
|
|
parts.begin() + begin,
|
|
|
|
parts.begin() + end,
|
|
|
|
sum_size,
|
|
|
|
begin == 0 ? 0 : parts[begin - 1].size,
|
|
|
|
end == parts_count ? 0 : parts[end].size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
size_t sum_size = 0;
|
|
|
|
size_t max_size = 0;
|
|
|
|
|
|
|
|
for (auto it = local_estimator.best_begin; it != local_estimator.best_end; ++it)
|
|
|
|
{
|
|
|
|
sum_size += it->size;
|
|
|
|
if (it->size > max_size)
|
|
|
|
max_size = it->size;
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
2016-10-30 08:15:55 +00:00
|
|
|
|
|
|
|
if (static_cast<double>(sum_size) / max_size >= actual_base)
|
|
|
|
estimator.consider(local_estimator.best_begin, local_estimator.best_end, sum_size, 0, 0);
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2016-10-30 08:15:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
SimpleMergeSelector::PartsInPartition SimpleMergeSelector::select(
|
|
|
|
const Partitions & partitions,
|
|
|
|
const size_t max_total_size_to_merge)
|
|
|
|
{
|
|
|
|
Estimator estimator;
|
|
|
|
|
|
|
|
for (const auto & partition : partitions)
|
|
|
|
selectWithinPartition(partition, max_total_size_to_merge, estimator, settings);
|
|
|
|
|
|
|
|
return estimator.getBest();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|