mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 18:20:50 +00:00
MergeSelector: development [#METR-21841].
This commit is contained in:
parent
4fd7183049
commit
49602c44a9
@ -6,6 +6,15 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Select parts to merge based on its level.
|
||||
*
|
||||
* Select first range of parts at least min_parts_to_merge length with minimum level.
|
||||
*
|
||||
* If enough time has passed, lower min_parts_to_merge.
|
||||
* And if no ranges of consecutive parts with same level, and much time has passed,
|
||||
* allow to select parts of different level.
|
||||
* This is done to allow further merging when table is not updated.
|
||||
*/
|
||||
class LevelMergeSelector : public IMergeSelector
|
||||
{
|
||||
public:
|
||||
@ -14,6 +23,11 @@ public:
|
||||
size_t min_parts_to_merge = 8;
|
||||
size_t max_parts_to_merge = 100;
|
||||
|
||||
/** min_parts_to_merge will be lowered by 1 after that time.
|
||||
* It will be lowered by 2 after that time * 2^1,
|
||||
* It will be lowered by 3 after that time * 2^2,
|
||||
* and so on, exponentially.
|
||||
*/
|
||||
time_t lower_base_after = 300;
|
||||
};
|
||||
|
||||
|
@ -19,6 +19,11 @@ public:
|
||||
*/
|
||||
double base = 8;
|
||||
|
||||
/** Lower base by 1 after that time.
|
||||
* It will be lowered by 2 after that time * 2^1,
|
||||
* It will be lowered by 3 after that time * 2^2,
|
||||
* and so on, exponentially.
|
||||
*/
|
||||
time_t lower_base_after = 300;
|
||||
|
||||
/// Zero means unlimited.
|
||||
|
@ -10,6 +10,7 @@ namespace
|
||||
{
|
||||
|
||||
/** Estimates best set of parts to merge within passed alternatives.
|
||||
* It is selected simply: by minimal size.
|
||||
*/
|
||||
struct Estimator
|
||||
{
|
||||
@ -49,6 +50,8 @@ void selectWithinPartition(
|
||||
if (parts_size <= 1)
|
||||
return;
|
||||
|
||||
/// Will lower 'min_parts_to_merge' if all parts are old enough.
|
||||
/// NOTE It is called base, because it is a base of logarithm, that determines merge tree depth.
|
||||
double actual_base = settings.min_parts_to_merge;
|
||||
|
||||
if (current_min_part_age > settings.lower_base_after)
|
||||
@ -58,9 +61,11 @@ void selectWithinPartition(
|
||||
actual_base = 2;
|
||||
}
|
||||
|
||||
/// Not enough parts to merge.
|
||||
if (parts.size() < actual_base)
|
||||
return;
|
||||
|
||||
/// To easily calculate sum size in any range.
|
||||
size_t parts_count = parts.size();
|
||||
size_t prefix_sum = 0;
|
||||
std::vector<size_t> prefix_sums(parts.size() + 1);
|
||||
@ -86,9 +91,12 @@ void selectWithinPartition(
|
||||
prev_level = part.level;
|
||||
}
|
||||
|
||||
/// If no ranges of same level - then nothing to merge
|
||||
/// except case when parts are old (much time has passed) and 'base' was lowered to minimum.
|
||||
if (!has_range_of_same_level && actual_base > 2)
|
||||
return;
|
||||
|
||||
/// For each level, try to select range of parts with that level.
|
||||
for (size_t level = 0; level <= max_level; ++level)
|
||||
{
|
||||
bool in_range = false;
|
||||
@ -97,6 +105,7 @@ void selectWithinPartition(
|
||||
|
||||
for (size_t i = 0; i <= parts_size; ++i)
|
||||
{
|
||||
/// But if !has_range_of_same_level - it is allowed to select parts with any different levels.
|
||||
if (i < parts_size && (parts[i].level == level || !has_range_of_same_level))
|
||||
{
|
||||
if (!in_range)
|
||||
@ -114,8 +123,11 @@ void selectWithinPartition(
|
||||
|
||||
size_t range_size = range_end - range_begin;
|
||||
|
||||
/// Size of range is enough.
|
||||
if (range_size >= actual_base)
|
||||
{
|
||||
/// If size of range is larger than 'max_parts_to_merge' - split it to subranges of almost equal sizes.
|
||||
/// For example, if 'max_parts_to_merge' == 100 and 'range_size' = 101, split it to subranges of sizes 50 and 51.
|
||||
size_t num_subranges = (range_size + settings.max_parts_to_merge - 1) / settings.max_parts_to_merge;
|
||||
|
||||
for (size_t subrange_index = 0; subrange_index < num_subranges; ++subrange_index)
|
||||
@ -125,6 +137,7 @@ void selectWithinPartition(
|
||||
|
||||
size_t size_of_subrange = prefix_sums[subrange_end] - prefix_sums[subrange_begin];
|
||||
|
||||
/// Don't consider this range if its size is too large.
|
||||
if (!max_total_size_to_merge || size_of_subrange <= max_total_size_to_merge)
|
||||
estimator.consider(parts.begin() + subrange_begin, parts.begin() + subrange_end, size_of_subrange);
|
||||
}
|
||||
@ -133,6 +146,7 @@ void selectWithinPartition(
|
||||
}
|
||||
}
|
||||
|
||||
/// If we don't care of levels, first iteration was enough.
|
||||
if (!has_range_of_same_level)
|
||||
break;
|
||||
}
|
||||
|
@ -30,3 +30,6 @@ target_link_libraries (remove_symlink_directory dbms)
|
||||
|
||||
add_executable (merge_selector merge_selector.cpp)
|
||||
target_link_libraries (merge_selector dbms)
|
||||
|
||||
add_executable (merge_selector2 merge_selector2.cpp)
|
||||
target_link_libraries (merge_selector2 dbms)
|
||||
|
45
dbms/src/Storages/tests/merge_selector2.cpp
Normal file
45
dbms/src/Storages/tests/merge_selector2.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <DB/IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <DB/IO/ReadHelpers.h>
|
||||
#include <DB/IO/Operators.h>
|
||||
#include <DB/Storages/MergeTree/SimpleMergeSelector.h>
|
||||
#include <DB/Storages/MergeTree/LevelMergeSelector.h>
|
||||
|
||||
|
||||
/** This program tests merge-selecting algorithm.
|
||||
* Pass parts properties to stdin: (size, age, level, name) in tab separated format,
|
||||
* and it will select and print parts to merge.
|
||||
*/
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
using namespace DB;
|
||||
|
||||
IMergeSelector::Partitions partitions(1);
|
||||
IMergeSelector::PartsInPartition & parts = partitions.back();
|
||||
|
||||
LevelMergeSelector::Settings settings;
|
||||
LevelMergeSelector selector(settings);
|
||||
|
||||
ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
||||
|
||||
std::list<std::string> part_names;
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
part_names.emplace_back();
|
||||
IMergeSelector::Part part;
|
||||
in >> part.size >> "\t" >> part.age >> "\t" >> part.level >> "\t" >> part_names.back() >> "\n";
|
||||
part.data = part_names.back().data();
|
||||
parts.emplace_back(part);
|
||||
}
|
||||
|
||||
IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 0);
|
||||
|
||||
for (const auto & part : selected_parts)
|
||||
std::cout << static_cast<const char *>(part.data) << ' ';
|
||||
std::cout << '\n';
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user