2016-10-27 22:50:02 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <cstddef>
|
2017-01-17 18:03:32 +00:00
|
|
|
#include <ctime>
|
2016-10-27 22:50:02 +00:00
|
|
|
#include <vector>
|
|
|
|
#include <functional>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-10-30 03:12:25 +00:00
|
|
|
/** Interface of algorithm to select data parts to merge
|
|
|
|
* (merge is also known as "compaction").
|
|
|
|
* Following properties depend on it:
|
|
|
|
*
|
|
|
|
* 1. Number of data parts at some moment in time.
|
|
|
|
* If parts are merged frequently, then data will be represented by lower number of parts, in average,
|
|
|
|
* but with cost of higher write amplification.
|
|
|
|
*
|
|
|
|
* 2. Write amplification ratio: how much times, on average, source data was written
|
|
|
|
* (during initial writes and followed merges).
|
|
|
|
*
|
|
|
|
* Number of parallel merges are controlled outside of scope of this interface.
|
|
|
|
*/
|
2016-10-27 22:50:02 +00:00
|
|
|
class IMergeSelector
|
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Information about data part relevant to merge selecting strategy.
|
|
|
|
struct Part
|
|
|
|
{
|
|
|
|
/// Size of data part in bytes.
|
|
|
|
size_t size;
|
2016-10-30 03:12:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// How old this data part in seconds.
|
|
|
|
time_t age;
|
2016-10-30 03:12:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Depth of tree of merges by which this part was created. New parts has zero level.
|
|
|
|
unsigned level;
|
2016-10-27 22:50:02 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Opaque pointer to avoid dependencies (it is not possible to do forward declaration of typedef).
|
|
|
|
const void * data;
|
2019-04-15 09:30:45 +00:00
|
|
|
|
2019-07-28 10:30:46 +00:00
|
|
|
/// Minimal time, when we need to delete some data from this part.
|
2020-09-02 10:30:04 +00:00
|
|
|
time_t min_delete_ttl;
|
2019-07-28 10:30:46 +00:00
|
|
|
|
|
|
|
/// Maximum time, when we will need to drop this part altogether because all rows in it are expired.
|
2020-09-02 10:30:04 +00:00
|
|
|
time_t max_delete_ttl;
|
|
|
|
|
|
|
|
/// Minimal time, when we need to recompress this part.
|
|
|
|
time_t min_recompress_ttl;
|
|
|
|
|
|
|
|
/// Maximum time, when we need to recompress this part.
|
|
|
|
time_t max_recompress_ttl;
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
2016-10-27 22:50:02 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Parts are belong to partitions. Only parts within same partition could be merged.
|
|
|
|
using PartsInPartition = std::vector<Part>;
|
2016-10-30 03:12:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Parts are in some specific order. Parts could be merged only in contiguous ranges.
|
|
|
|
using Partitions = std::vector<PartsInPartition>;
|
2016-10-27 22:50:02 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/** Function could be called at any frequency and it must decide, should you do any merge at all.
|
|
|
|
* If better not to do any merge, it returns empty result.
|
|
|
|
*/
|
|
|
|
virtual PartsInPartition select(
|
|
|
|
const Partitions & partitions,
|
|
|
|
const size_t max_total_size_to_merge) = 0;
|
2016-10-27 22:50:02 +00:00
|
|
|
|
2020-05-14 20:08:15 +00:00
|
|
|
virtual ~IMergeSelector() = default;
|
2016-10-27 22:50:02 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|