ClickHouse/src/Common/ExponentiallySmoothedCounter.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

113 lines
4.8 KiB
C++
Raw Normal View History

2021-09-12 05:28:57 +00:00
#pragma once
#include <cmath>
#include <limits>
namespace DB
{
/** https://en.wikipedia.org/wiki/Exponential_smoothing
*
* Exponentially smoothed average over time is weighted average with weight proportional to negative exponent of the time passed.
* For example, the last value is taken with weight 1/2, the value one second ago with weight 1/4, two seconds ago - 1/8, etc.
2021-09-12 06:09:57 +00:00
* It can be understood as an average over sliding window, but with different kernel.
2021-09-12 05:28:57 +00:00
*
2021-09-12 06:10:31 +00:00
* As an advantage, it is easy to update. Instead of collecting values and calculating a series of x1 / 2 + x2 / 4 + x3 / 8...
2021-09-19 17:23:06 +00:00
* just calculate x_old / 2 + x_new / 2.
2021-09-12 05:28:57 +00:00
*
* It is often used for resource usage metrics. For example, "load average" in Linux is exponentially smoothed moving average.
* We can use exponentially smoothed counters in query scheduler.
*/
2021-09-19 17:23:06 +00:00
struct ExponentiallySmoothedAverage
2021-09-12 05:28:57 +00:00
{
2021-09-19 17:51:35 +00:00
/// The sum. It contains the last value and all previous values scaled accordingly to the difference of their time to the reference time.
/// Older values are summed with exponentially smaller coefficients.
/// To obtain the average, you have to divide this value to the sum of all coefficients (see 'sumWeights').
2021-09-12 05:28:57 +00:00
double value = 0;
2021-09-19 17:51:35 +00:00
/// The point of reference. You can translate the value to a different point of reference (see 'remap').
/// You can imagine that the value exponentially decays over time.
/// But it is also meaningful to treat the whole counters as constants over time but in another non-linear coordinate system,
/// that inflates over time, while the counter itself does not change
/// (it continues to be the same physical quantity, but only changes its representation in the "usual" coordinate system).
2021-09-19 17:58:10 +00:00
/// Recap: the whole counter is one dimensional and it can be represented as a curve formed by two dependent coordinates in 2d plane,
2021-09-19 17:51:35 +00:00
/// the space can be represented by (value, time) coordinates, and the curves will be exponentially decaying over time,
/// alternatively the space can be represented by (exponentially_adjusted_value, time) and then the curves will be constant over time.
/// Also useful analogy is the exponential representation of a number: x = a * exp(b) = a * e (where e = exp(b))
2021-09-19 17:58:10 +00:00
/// a number x is represented by a curve in 2d plane that can be parametrized by coordinates (a, b) or (a, e).
2021-09-19 17:51:35 +00:00
double time = 0;
2021-09-12 05:28:57 +00:00
ExponentiallySmoothedAverage() = default;
2021-09-12 05:28:57 +00:00
2021-09-19 17:23:06 +00:00
ExponentiallySmoothedAverage(double current_value, double current_time)
2021-09-19 17:51:35 +00:00
: value(current_value), time(current_time)
2021-09-12 05:28:57 +00:00
{
}
2021-09-19 17:51:35 +00:00
/// How much value decays after time_passed.
2021-09-19 17:23:06 +00:00
static double scale(double time_passed, double half_decay_time)
2021-09-12 05:28:57 +00:00
{
2021-09-19 17:23:06 +00:00
return exp2(-time_passed / half_decay_time);
2021-09-12 05:28:57 +00:00
}
2021-09-19 17:51:35 +00:00
/// Sum of weights of all values. Divide by it to get the average.
2021-09-19 17:23:06 +00:00
static double sumWeights(double half_decay_time)
2021-09-12 05:28:57 +00:00
{
2021-09-19 17:23:06 +00:00
double k = scale(1.0, half_decay_time);
return 1 / (1 - k);
2021-09-12 05:28:57 +00:00
}
2021-09-19 17:51:35 +00:00
/// Obtain the same counter in another point of reference.
2021-09-19 17:23:06 +00:00
ExponentiallySmoothedAverage remap(double current_time, double half_decay_time) const
{
2021-09-19 17:51:35 +00:00
return ExponentiallySmoothedAverage(value * scale(current_time - time, half_decay_time), current_time);
}
2021-09-19 17:51:35 +00:00
/// Merge two counters. It is done by moving to the same point of reference and summing the values.
2021-09-19 17:23:06 +00:00
static ExponentiallySmoothedAverage merge(const ExponentiallySmoothedAverage & a, const ExponentiallySmoothedAverage & b, double half_decay_time)
{
2021-09-19 17:51:35 +00:00
if (a.time > b.time)
return ExponentiallySmoothedAverage(a.value + b.remap(a.time, half_decay_time).value, a.time);
if (a.time < b.time)
return ExponentiallySmoothedAverage(b.value + a.remap(b.time, half_decay_time).value, b.time);
2021-09-19 17:23:06 +00:00
2021-09-19 17:51:35 +00:00
return ExponentiallySmoothedAverage(a.value + b.value, a.time);
}
2021-09-19 17:23:06 +00:00
void merge(const ExponentiallySmoothedAverage & other, double half_decay_time)
2021-09-12 05:28:57 +00:00
{
2021-09-19 17:23:06 +00:00
*this = merge(*this, other, half_decay_time);
2021-09-12 05:28:57 +00:00
}
void add(double new_value, double current_time, double half_decay_time)
2021-09-12 05:28:57 +00:00
{
2021-09-19 17:23:06 +00:00
merge(ExponentiallySmoothedAverage(new_value, current_time), half_decay_time);
}
2021-09-19 17:51:35 +00:00
/// Calculate the average from the sum.
2021-09-19 17:23:06 +00:00
double get(double half_decay_time) const
{
return value / sumWeights(half_decay_time);
}
double get(double current_time, double half_decay_time) const
{
return remap(current_time, half_decay_time).get(half_decay_time);
}
2021-09-19 17:51:35 +00:00
/// Compare two counters (by moving to the same point of reference and comparing sums).
/// You can store the counters in container and sort it without changing the stored values over time.
2021-09-19 17:23:06 +00:00
bool less(const ExponentiallySmoothedAverage & other, double half_decay_time) const
{
2021-09-19 17:51:35 +00:00
return remap(other.time, half_decay_time).value < other.value;
2021-09-12 05:28:57 +00:00
}
};
}