2016-10-27 23:28:35 +00:00
|
|
|
#include <DB/Storages/MergeTree/SimpleMergeSelector.h>
|
2016-11-05 01:11:51 +00:00
|
|
|
#include <DB/Common/interpolate.h>
|
2016-10-27 23:28:35 +00:00
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
#include <cmath>
|
2016-10-27 23:28:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
/** Estimates best set of parts to merge within passed alternatives.
|
|
|
|
*/
|
2016-10-27 23:28:35 +00:00
|
|
|
struct Estimator
|
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
using Iterator = SimpleMergeSelector::PartsInPartition::const_iterator;
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
void consider(Iterator begin, Iterator end, size_t sum_size, size_t size_next_at_left, size_t size_next_at_right)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
double current_score = score(end - begin, sum_size);
|
2016-10-30 08:15:55 +00:00
|
|
|
|
|
|
|
if (size_next_at_left > sum_size * 0.9)
|
|
|
|
{
|
2016-10-30 08:25:07 +00:00
|
|
|
double difference = std::abs(log2(static_cast<double>(sum_size) / size_next_at_left));
|
2016-10-30 08:15:55 +00:00
|
|
|
if (difference < 0.5)
|
|
|
|
current_score *= 0.75 + difference * 0.5;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (size_next_at_right == 0)
|
|
|
|
current_score *= 0.9;
|
|
|
|
|
|
|
|
if (size_next_at_right > sum_size * 0.9)
|
|
|
|
{
|
2016-10-30 08:25:07 +00:00
|
|
|
double difference = std::abs(log2(static_cast<double>(sum_size) / size_next_at_right));
|
2016-10-30 08:15:55 +00:00
|
|
|
if (difference < 0.5)
|
|
|
|
current_score *= 0.75 + difference * 0.5;
|
|
|
|
}
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
if (!min_score || current_score < min_score)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:12:25 +00:00
|
|
|
min_score = current_score;
|
2016-10-30 03:39:28 +00:00
|
|
|
best_begin = begin;
|
|
|
|
best_end = end;
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SimpleMergeSelector::PartsInPartition getBest()
|
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
return SimpleMergeSelector::PartsInPartition(best_begin, best_end);
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
static double score(double count, double sum_size)
|
|
|
|
{
|
|
|
|
/** Consider we have two alternative ranges of data parts to merge.
|
|
|
|
* Assume time to merge a range is proportional to sum size of its parts.
|
|
|
|
*
|
|
|
|
* Cost of query execution is proportional to total number of data parts in a moment of time.
|
|
|
|
* Let define our target: to minimize average (in time) total number of data parts.
|
|
|
|
*
|
|
|
|
* Let calculate integral of total number of parts, if we are going to do merge of one or another range.
|
|
|
|
* It must be lower, and thus we decide, what range is better to merge.
|
|
|
|
*
|
|
|
|
* The integral is lower iff the following formula is lower:
|
|
|
|
*/
|
|
|
|
return sum_size / (count - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
double min_score = 0;
|
2016-10-30 03:39:28 +00:00
|
|
|
Iterator best_begin;
|
|
|
|
Iterator best_end;
|
2016-10-27 23:28:35 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
void selectWithinPartition(
|
|
|
|
const SimpleMergeSelector::PartsInPartition & parts,
|
2016-10-27 23:28:35 +00:00
|
|
|
const size_t max_total_size_to_merge,
|
2016-11-01 19:20:24 +00:00
|
|
|
const time_t current_min_part_age,
|
2016-10-30 08:15:55 +00:00
|
|
|
Estimator & estimator,
|
|
|
|
const SimpleMergeSelector::Settings & settings)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
size_t parts_count = parts.size();
|
2016-11-01 20:30:40 +00:00
|
|
|
if (parts_count <= 1)
|
|
|
|
return;
|
2016-10-27 23:28:35 +00:00
|
|
|
|
2016-11-05 01:11:51 +00:00
|
|
|
double actual_base = std::max(1.0, std::min(
|
|
|
|
settings.base,
|
|
|
|
std::min(
|
|
|
|
interpolateLinear(settings.base, 1.0, (static_cast<double>(parts_count) - settings.lower_base_after_num_parts_start)
|
|
|
|
/ (settings.lower_base_after_num_parts_end - settings.lower_base_after_num_parts_start)),
|
|
|
|
interpolateLinear(settings.base, 1.0, (static_cast<double>(current_min_part_age) - settings.lower_base_after_seconds_start)
|
|
|
|
/ (settings.lower_base_after_seconds_end - settings.lower_base_after_seconds_start)))));
|
2016-10-30 08:15:55 +00:00
|
|
|
|
2016-10-30 03:39:28 +00:00
|
|
|
for (size_t begin = 0; begin < parts_count; ++begin)
|
|
|
|
{
|
2016-11-18 02:50:28 +00:00
|
|
|
/// If too much parts, select only from first, to avoid complexity.
|
|
|
|
if (begin > 1000)
|
|
|
|
break;
|
|
|
|
|
2016-11-01 20:30:40 +00:00
|
|
|
size_t sum_size = parts[begin].size;
|
|
|
|
size_t max_size = parts[begin].size;
|
|
|
|
|
2016-11-01 08:38:55 +00:00
|
|
|
for (size_t end = begin + 2; end <= parts_count; ++end)
|
2016-10-27 23:28:35 +00:00
|
|
|
{
|
2016-10-30 03:39:28 +00:00
|
|
|
if (settings.max_parts_to_merge_at_once && end - begin > settings.max_parts_to_merge_at_once)
|
2016-10-27 23:28:35 +00:00
|
|
|
break;
|
|
|
|
|
2016-11-01 20:30:40 +00:00
|
|
|
sum_size += parts[end - 1].size;
|
|
|
|
if (parts[end - 1].size > max_size)
|
|
|
|
max_size = parts[end - 1].size;
|
2016-10-27 23:28:35 +00:00
|
|
|
|
2016-10-30 03:39:28 +00:00
|
|
|
if (max_total_size_to_merge && sum_size > max_total_size_to_merge)
|
2016-10-27 23:28:35 +00:00
|
|
|
break;
|
|
|
|
|
2016-11-01 20:30:40 +00:00
|
|
|
if (static_cast<double>(sum_size) / max_size >= actual_base)
|
|
|
|
estimator.consider(
|
|
|
|
parts.begin() + begin,
|
|
|
|
parts.begin() + end,
|
|
|
|
sum_size,
|
|
|
|
begin == 0 ? 0 : parts[begin - 1].size,
|
|
|
|
end == parts_count ? 0 : parts[end].size);
|
2016-10-30 08:15:55 +00:00
|
|
|
}
|
|
|
|
}
|
2016-10-27 23:28:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2016-10-30 08:15:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
SimpleMergeSelector::PartsInPartition SimpleMergeSelector::select(
|
|
|
|
const Partitions & partitions,
|
|
|
|
const size_t max_total_size_to_merge)
|
|
|
|
{
|
2016-11-01 19:20:24 +00:00
|
|
|
time_t min_age = -1;
|
|
|
|
for (const auto & partition : partitions)
|
|
|
|
for (const auto & part : partition)
|
|
|
|
if (min_age == -1 || part.age < min_age)
|
|
|
|
min_age = part.age;
|
|
|
|
|
2016-10-30 08:15:55 +00:00
|
|
|
Estimator estimator;
|
|
|
|
|
|
|
|
for (const auto & partition : partitions)
|
2016-11-01 19:20:24 +00:00
|
|
|
selectWithinPartition(partition, max_total_size_to_merge, min_age, estimator, settings);
|
2016-10-30 08:15:55 +00:00
|
|
|
|
|
|
|
return estimator.getBest();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|