2017-12-20 07:36:30 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <AggregateFunctions/ReservoirSampler.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2021-05-26 11:32:14 +00:00
|
|
|
struct Settings;
|
2017-12-20 07:36:30 +00:00
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
}
|
|
|
|
|
2017-12-20 20:25:22 +00:00
|
|
|
/** Quantile calculation with "reservoir sample" algorithm.
|
|
|
|
* It collects pseudorandom subset of limited size from a stream of values,
|
|
|
|
* and approximate quantile from it.
|
|
|
|
* The result is non-deterministic. Also look at QuantileReservoirSamplerDeterministic.
|
|
|
|
*
|
|
|
|
* This algorithm is quite inefficient in terms of precision for memory usage,
|
|
|
|
* but very efficient in CPU (though less efficient than QuantileTiming and than QuantileExact for small sets).
|
|
|
|
*/
|
2017-12-20 07:36:30 +00:00
|
|
|
template <typename Value>
|
|
|
|
struct QuantileReservoirSampler
|
|
|
|
{
|
|
|
|
using Data = ReservoirSampler<Value, ReservoirSamplerOnEmpty::RETURN_NAN_OR_ZERO>;
|
|
|
|
Data data;
|
|
|
|
|
|
|
|
void add(const Value & x)
|
|
|
|
{
|
|
|
|
data.insert(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Weight>
|
2017-12-20 08:39:21 +00:00
|
|
|
void add(const Value &, const Weight &)
|
2017-12-20 07:36:30 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for ReservoirSampler");
|
2017-12-20 07:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void merge(const QuantileReservoirSampler & rhs)
|
|
|
|
{
|
2017-12-20 08:49:56 +00:00
|
|
|
data.merge(rhs.data);
|
2017-12-20 07:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
{
|
|
|
|
data.write(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
void deserialize(ReadBuffer & buf)
|
|
|
|
{
|
|
|
|
data.read(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the value of the `level` quantile. The level must be between 0 and 1.
|
2017-12-20 08:49:56 +00:00
|
|
|
Value get(Float64 level)
|
2017-12-20 07:36:30 +00:00
|
|
|
{
|
2022-12-11 03:29:06 +00:00
|
|
|
if (data.empty())
|
|
|
|
return {};
|
|
|
|
|
2022-09-11 04:05:33 +00:00
|
|
|
if constexpr (is_decimal<Value>)
|
|
|
|
return Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(level)));
|
|
|
|
else
|
|
|
|
return static_cast<Value>(data.quantileInterpolated(level));
|
2017-12-20 07:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
|
|
|
|
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
|
2017-12-20 08:49:56 +00:00
|
|
|
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
|
2017-12-20 07:36:30 +00:00
|
|
|
{
|
2022-12-09 03:17:54 +00:00
|
|
|
bool is_empty = data.empty();
|
|
|
|
|
2017-12-20 07:36:30 +00:00
|
|
|
for (size_t i = 0; i < size; ++i)
|
2022-12-09 03:17:54 +00:00
|
|
|
{
|
|
|
|
if (is_empty)
|
|
|
|
{
|
|
|
|
result[i] = Value{};
|
|
|
|
}
|
2022-09-11 04:05:33 +00:00
|
|
|
else
|
2022-12-09 03:17:54 +00:00
|
|
|
{
|
|
|
|
if constexpr (is_decimal<Value>)
|
|
|
|
result[indices[i]] = Value(static_cast<typename Value::NativeType>(data.quantileInterpolated(levels[indices[i]])));
|
|
|
|
else
|
|
|
|
result[indices[i]] = Value(data.quantileInterpolated(levels[indices[i]]));
|
|
|
|
}
|
|
|
|
}
|
2017-12-20 07:36:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// The same, but in the case of an empty state, NaN is returned.
|
2017-12-21 01:19:25 +00:00
|
|
|
Float64 getFloat(Float64 level)
|
2017-12-20 07:36:30 +00:00
|
|
|
{
|
|
|
|
return data.quantileInterpolated(level);
|
|
|
|
}
|
|
|
|
|
2017-12-21 01:19:25 +00:00
|
|
|
void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result)
|
2017-12-20 07:36:30 +00:00
|
|
|
{
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
result[indices[i]] = data.quantileInterpolated(levels[indices[i]]);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|