ClickHouse/dbms/DataStreams/GraphiteRollupSortedBlockInputStream.h

242 lines
7.9 KiB
C++
Raw Normal View History

2016-04-24 09:44:47 +00:00
#pragma once
#include <common/logger_useful.h>
#include <Core/ColumnNumbers.h>
#include <DataStreams/MergingSortedBlockInputStream.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Common/OptimizedRegularExpression.h>
#include <Common/AlignedBuffer.h>
2016-04-24 09:44:47 +00:00
namespace DB
{
2017-02-07 19:21:59 +00:00
/** Intended for implementation of "rollup" - aggregation (rounding) of older data
* for a table with Graphite data (Graphite is the system for time series monitoring).
2016-04-24 09:44:47 +00:00
*
2019-01-22 19:56:53 +00:00
* Table with graphite data has at least the following columns (accurate to the name):
2016-04-24 09:44:47 +00:00
* Path, Time, Value, Version
*
2017-02-07 19:21:59 +00:00
* Path - name of metric (sensor);
* Time - time of measurement;
* Value - value of measurement;
* Version - a number, that for equal pairs of Path and Time, need to leave only record with maximum version.
2016-04-24 09:44:47 +00:00
*
2017-02-07 19:21:59 +00:00
* Each row in a table correspond to one value of one sensor.
2016-04-24 09:44:47 +00:00
*
* Pattern should contain function, retention scheme, or both of them. The order of patterns does mean as well:
* * Aggregation OR retention patterns should be first
* * Then aggregation AND retention full patterns have to be placed
* * default pattern without regexp must be the last
*
2017-02-07 19:21:59 +00:00
* Rollup rules are specified in the following way:
2016-04-24 09:44:47 +00:00
*
* pattern
* regexp
* function
* pattern
* regexp
* age -> precision
* age -> precision
* ...
* pattern
* regexp
* function
* age -> precision
* age -> precision
* ...
2016-04-24 09:44:47 +00:00
* pattern
* ...
2016-04-24 09:44:47 +00:00
* default
* function
* age -> precision
* ...
2016-04-24 09:44:47 +00:00
*
2017-02-07 19:21:59 +00:00
* regexp - pattern for sensor name
* default - if no pattern has matched
2016-04-24 09:44:47 +00:00
*
2017-02-07 19:21:59 +00:00
* age - minimal data age (in seconds), to start rounding with specified precision.
* precision - rounding precision (in seconds)
2016-04-24 09:44:47 +00:00
*
2017-02-07 19:21:59 +00:00
* function - name of aggregate function to be applied for values, that time was rounded to same.
2016-04-24 09:44:47 +00:00
*
2017-02-07 19:21:59 +00:00
* Example:
2016-04-24 09:44:47 +00:00
*
* <graphite_rollup>
* <pattern>
* <regexp>\.max$</regexp>
* <function>max</function>
* </pattern>
* <pattern>
* <regexp>click_cost</regexp>
* <function>any</function>
* <retention>
* <age>0</age>
* <precision>5</precision>
* </retention>
* <retention>
* <age>86400</age>
* <precision>60</precision>
* </retention>
* </pattern>
* <default>
* <function>max</function>
* <retention>
* <age>0</age>
* <precision>60</precision>
* </retention>
* <retention>
* <age>3600</age>
* <precision>300</precision>
* </retention>
* <retention>
* <age>86400</age>
* <precision>3600</precision>
* </retention>
* </default>
2016-04-24 09:44:47 +00:00
* </graphite_rollup>
*/
namespace Graphite
{
struct Retention
{
UInt32 age;
UInt32 precision;
};
using Retentions = std::vector<Retention>;
struct Pattern
{
std::shared_ptr<OptimizedRegularExpression> regexp;
2019-03-07 20:17:06 +00:00
std::string regexp_str;
AggregateFunctionPtr function;
Retentions retentions; /// Must be ordered by 'age' descending.
enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically
};
using Patterns = std::vector<Pattern>;
using RetentionPattern = Pattern;
using AggregationPattern = Pattern;
struct Params
{
String config_name;
String path_column_name;
String time_column_name;
String value_column_name;
String version_column_name;
Graphite::Patterns patterns;
};
using RollupRule = std::pair<const RetentionPattern *, const AggregationPattern *>;
2016-04-24 09:44:47 +00:00
}
2017-05-13 22:19:04 +00:00
/** Merges several sorted streams into one.
2016-04-24 09:44:47 +00:00
*
2017-05-13 22:19:04 +00:00
* For each group of consecutive identical values of the `path` column,
* and the same `time` values, rounded to some precision
* (where rounding accuracy depends on the template set for `path`
* and the amount of time elapsed from `time` to the specified time),
* keeps one line,
* performing the rounding of time,
* merge `value` values using the specified aggregate functions,
* as well as keeping the maximum value of the `version` column.
2016-04-24 09:44:47 +00:00
*/
class GraphiteRollupSortedBlockInputStream : public MergingSortedBlockInputStream
{
public:
GraphiteRollupSortedBlockInputStream(
const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_,
2019-08-03 11:02:40 +00:00
const Graphite::Params & params_, time_t time_of_merge_);
2016-04-24 09:44:47 +00:00
String getName() const override { return "GraphiteRollupSorted"; }
2016-04-24 09:44:47 +00:00
~GraphiteRollupSortedBlockInputStream() override
{
if (aggregate_state_created)
std::get<1>(current_rule)->function->destroy(place_for_aggregate_state.data());
}
2016-04-24 09:44:47 +00:00
protected:
Block readImpl() override;
2016-04-24 09:44:47 +00:00
private:
Logger * log = &Logger::get("GraphiteRollupSortedBlockInputStream");
2016-04-24 09:44:47 +00:00
const Graphite::Params params;
2016-04-24 09:44:47 +00:00
size_t path_column_num;
size_t time_column_num;
size_t value_column_num;
size_t version_column_num;
2016-04-24 09:44:47 +00:00
/// All columns other than 'time', 'value', 'version'. They are unmodified during rollup.
ColumnNumbers unmodified_column_numbers;
2016-04-24 09:44:47 +00:00
time_t time_of_merge;
2016-04-24 09:44:47 +00:00
/// No data has been read.
bool is_first = true;
/// All data has been read.
bool finished = false;
2016-04-24 09:44:47 +00:00
/* | path | time | rounded_time | version | value | unmodified |
* -----------------------------------------------------------------------------------
* | A | 11 | 10 | 1 | 1 | a | |
* | A | 11 | 10 | 3 | 2 | b |> subgroup(A, 11) |
* | A | 11 | 10 | 2 | 3 | c | |> group(A, 10)
* ----------------------------------------------------------------------------------|>
* | A | 12 | 10 | 0 | 4 | d | |> Outputs (A, 10, avg(2, 5), a)
* | A | 12 | 10 | 1 | 5 | e |> subgroup(A, 12) |
* -----------------------------------------------------------------------------------
* | A | 21 | 20 | 1 | 6 | f |
* | B | 11 | 10 | 1 | 7 | g |
* ...
*/
/// Path name of current bucket
StringRef current_group_path;
/// Last row with maximum version for current primary key (time bucket).
2019-04-19 13:38:25 +00:00
SharedBlockRowRef current_subgroup_newest_row;
/// Time of last read row
time_t current_time = 0;
time_t current_time_rounded = 0;
2016-04-24 09:44:47 +00:00
Graphite::RollupRule current_rule = {nullptr, nullptr};
AlignedBuffer place_for_aggregate_state;
bool aggregate_state_created = false; /// Invariant: if true then current_rule is not NULL.
const Graphite::Pattern undef_pattern =
{ /// temporary empty pattern for selectPatternForPath
nullptr,
2019-03-07 20:17:06 +00:00
"",
nullptr,
DB::Graphite::Retentions(),
undef_pattern.TypeUndef,
};
Graphite::RollupRule selectPatternForPath(StringRef path) const;
UInt32 selectPrecision(const Graphite::Retentions & retentions, time_t time) const;
2016-04-24 09:44:47 +00:00
2019-12-22 00:19:07 +00:00
void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);
2016-04-24 09:44:47 +00:00
2017-05-13 22:19:04 +00:00
/// Insert the values into the resulting columns, which will not be changed in the future.
template <typename TSortCursor>
2020-03-08 22:38:12 +00:00
void startNextGroup(MutableColumns & merged_columns, TSortCursor & cursor, Graphite::RollupRule next_rule);
2016-04-24 09:44:47 +00:00
2017-05-13 22:19:04 +00:00
/// Insert the calculated `time`, `value`, `version` values into the resulting columns by the last group of rows.
void finishCurrentGroup(MutableColumns & merged_columns);
2016-04-24 09:44:47 +00:00
2017-05-13 22:19:04 +00:00
/// Update the state of the aggregate function with the new `value`.
2019-04-19 13:38:25 +00:00
void accumulateRow(SharedBlockRowRef & row);
2016-04-24 09:44:47 +00:00
};
}