2013-07-27 19:57:45 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
2013-09-24 19:56:33 +00:00
|
|
|
|
#include <limits>
|
|
|
|
|
|
2014-05-03 22:57:43 +00:00
|
|
|
|
#include <DB/Common/MemoryTracker.h>
|
2015-11-15 09:14:22 +00:00
|
|
|
|
#include <DB/Common/HashTable/Hash.h>
|
2014-05-03 22:57:43 +00:00
|
|
|
|
|
2013-07-27 19:57:45 +00:00
|
|
|
|
#include <DB/IO/WriteHelpers.h>
|
|
|
|
|
#include <DB/IO/ReadHelpers.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
|
|
|
|
#include <DB/DataTypes/DataTypeArray.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/AggregateFunctions/IUnaryAggregateFunction.h>
|
2015-11-15 05:52:41 +00:00
|
|
|
|
#include <DB/AggregateFunctions/IBinaryAggregateFunction.h>
|
2015-11-23 21:33:43 +00:00
|
|
|
|
#include <DB/AggregateFunctions/QuantilesCommon.h>
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
|
|
|
|
#include <DB/Columns/ColumnArray.h>
|
|
|
|
|
|
2015-10-05 00:33:43 +00:00
|
|
|
|
#include <ext/range.hpp>
|
2013-10-20 03:57:28 +00:00
|
|
|
|
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/** Вычисляет квантиль для времени в миллисекундах, меньшего 30 сек.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
* Если значение больше 30 сек, то значение приравнивается к 30 сек.
|
|
|
|
|
*
|
2016-03-12 07:22:58 +00:00
|
|
|
|
* Если всего значений не больше примерно 5670, то вычисление точное.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
*
|
|
|
|
|
* Иначе:
|
|
|
|
|
* Если время меньше 1024 мс., то вычисление точное.
|
|
|
|
|
* Иначе вычисление идёт с округлением до числа, кратного 16 мс.
|
2016-03-12 07:22:58 +00:00
|
|
|
|
*
|
|
|
|
|
* Используется три разные структуры данных:
|
|
|
|
|
* - плоский массив (всех встреченных значений) фиксированной длины, выделяемый inplace, размер 64 байта; хранит 0..31 значений;
|
|
|
|
|
* - плоский массив (всех встреченных значений), выделяемый отдельно, увеличивающейся длины;
|
|
|
|
|
* - гистограмма (то есть, отображение значение -> количество), состоящая из двух частей:
|
|
|
|
|
* -- для значений от 0 до 1023 - с шагом 1;
|
|
|
|
|
* -- для значений от 1024 до 30000 - с шагом 16;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
*/
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
#define TINY_MAX_ELEMS 31
|
2013-07-27 19:57:45 +00:00
|
|
|
|
#define BIG_THRESHOLD 30000
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
namespace detail
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
/** Вспомогательная структура для оптимизации в случае маленького количества значений
|
|
|
|
|
* - плоский массив фиксированного размера "на стеке", в который кладутся все встреченные значения подряд.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
* Размер - 64 байта. Должна быть POD-типом (используется в union).
|
|
|
|
|
*/
|
|
|
|
|
struct QuantileTimingTiny
|
|
|
|
|
{
|
|
|
|
|
mutable UInt16 elems[TINY_MAX_ELEMS]; /// mutable потому что сортировка массива не считается изменением состояния.
|
2016-03-12 07:22:58 +00:00
|
|
|
|
/// Важно, чтобы count был в конце структуры, так как начало структуры будет впоследствии перезатёрто другими объектами.
|
|
|
|
|
/// Вы должны сами инициализировать его нулём.
|
|
|
|
|
/// Почему? Поле count переиспользуется и в тех случаях, когда в union-е лежат другие структуры
|
|
|
|
|
/// (размер которых не дотягивает до этого поля.)
|
|
|
|
|
UInt16 count;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// Можно использовать только пока count < TINY_MAX_ELEMS.
|
|
|
|
|
void insert(UInt64 x)
|
|
|
|
|
{
|
|
|
|
|
if (unlikely(x > BIG_THRESHOLD))
|
|
|
|
|
x = BIG_THRESHOLD;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
elems[count] = x;
|
|
|
|
|
++count;
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// Можно использовать только пока count + rhs.count <= TINY_MAX_ELEMS.
|
|
|
|
|
void merge(const QuantileTimingTiny & rhs)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < rhs.count; ++i)
|
|
|
|
|
{
|
|
|
|
|
elems[count] = rhs.elems[i];
|
|
|
|
|
++count;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
|
{
|
|
|
|
|
writeBinary(count, buf);
|
|
|
|
|
buf.write(reinterpret_cast<const char *>(elems), count * sizeof(elems[0]));
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void deserialize(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
readBinary(count, buf);
|
|
|
|
|
buf.readStrict(reinterpret_cast<char *>(elems), count * sizeof(elems[0]));
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/** Эту функцию обязательно нужно позвать перед get-функциями. */
|
|
|
|
|
void prepare() const
|
|
|
|
|
{
|
|
|
|
|
std::sort(elems, elems + count);
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
UInt16 get(double level) const
|
|
|
|
|
{
|
|
|
|
|
return level != 1
|
|
|
|
|
? elems[static_cast<size_t>(count * level)]
|
|
|
|
|
: elems[count - 1];
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
template <typename ResultType>
|
|
|
|
|
void getMany(const double * levels, size_t size, ResultType * result) const
|
|
|
|
|
{
|
|
|
|
|
const double * levels_end = levels + size;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
while (levels != levels_end)
|
|
|
|
|
{
|
|
|
|
|
*result = get(*levels);
|
|
|
|
|
++levels;
|
|
|
|
|
++result;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// То же самое, но в случае пустого состояния возвращается NaN.
|
|
|
|
|
float getFloat(double level) const
|
|
|
|
|
{
|
|
|
|
|
return count
|
|
|
|
|
? get(level)
|
|
|
|
|
: std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void getManyFloat(const double * levels, size_t size, float * result) const
|
|
|
|
|
{
|
|
|
|
|
if (count)
|
|
|
|
|
getMany(levels, size, result);
|
|
|
|
|
else
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
result[i] = std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
|
|
|
|
};
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
/** Вспомогательная структура для оптимизации в случае среднего количества значений
|
|
|
|
|
* - плоский массив, выделенный отдельно, в который кладутся все встреченные значения подряд.
|
|
|
|
|
*/
|
|
|
|
|
struct QuantileTimingMedium
|
|
|
|
|
{
|
|
|
|
|
/// sizeof - 24 байта.
|
2016-04-15 16:07:58 +00:00
|
|
|
|
using Array = PODArray<UInt16, 128>;
|
2016-03-11 21:43:59 +00:00
|
|
|
|
mutable Array elems; /// mutable потому что сортировка массива не считается изменением состояния.
|
|
|
|
|
|
|
|
|
|
QuantileTimingMedium() {}
|
|
|
|
|
QuantileTimingMedium(const UInt16 * begin, const UInt16 * end) : elems(begin, end) {}
|
|
|
|
|
|
|
|
|
|
void insert(UInt64 x)
|
|
|
|
|
{
|
|
|
|
|
if (unlikely(x > BIG_THRESHOLD))
|
|
|
|
|
x = BIG_THRESHOLD;
|
|
|
|
|
|
|
|
|
|
elems.emplace_back(x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void merge(const QuantileTimingMedium & rhs)
|
|
|
|
|
{
|
|
|
|
|
elems.insert(rhs.elems.begin(), rhs.elems.end());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
|
{
|
|
|
|
|
writeBinary(elems.size(), buf);
|
|
|
|
|
buf.write(reinterpret_cast<const char *>(&elems[0]), elems.size() * sizeof(elems[0]));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void deserialize(ReadBuffer & buf)
|
|
|
|
|
{
|
|
|
|
|
size_t size = 0;
|
|
|
|
|
readBinary(size, buf);
|
|
|
|
|
elems.resize(size);
|
|
|
|
|
buf.readStrict(reinterpret_cast<char *>(&elems[0]), size * sizeof(elems[0]));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UInt16 get(double level) const
|
|
|
|
|
{
|
|
|
|
|
UInt16 quantile = 0;
|
|
|
|
|
|
|
|
|
|
if (!elems.empty())
|
|
|
|
|
{
|
|
|
|
|
size_t n = level < 1
|
|
|
|
|
? level * elems.size()
|
|
|
|
|
: (elems.size() - 1);
|
|
|
|
|
|
|
|
|
|
/// Сортировка массива не будет считаться нарушением константности.
|
|
|
|
|
auto & array = const_cast<Array &>(elems);
|
|
|
|
|
std::nth_element(array.begin(), array.begin() + n, array.end());
|
|
|
|
|
quantile = array[n];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return quantile;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename ResultType>
|
|
|
|
|
void getMany(const double * levels, const size_t * levels_permutation, size_t size, ResultType * result) const
|
|
|
|
|
{
|
|
|
|
|
size_t prev_n = 0;
|
|
|
|
|
auto & array = const_cast<Array &>(elems);
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
auto level_index = levels_permutation[i];
|
|
|
|
|
auto level = levels[level_index];
|
|
|
|
|
|
|
|
|
|
size_t n = level < 1
|
|
|
|
|
? level * elems.size()
|
|
|
|
|
: (elems.size() - 1);
|
|
|
|
|
|
|
|
|
|
std::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
|
|
|
|
|
|
|
|
|
result[level_index] = array[n];
|
|
|
|
|
prev_n = n;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// То же самое, но в случае пустого состояния возвращается NaN.
|
|
|
|
|
float getFloat(double level) const
|
|
|
|
|
{
|
|
|
|
|
return !elems.empty()
|
|
|
|
|
? get(level)
|
|
|
|
|
: std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void getManyFloat(const double * levels, const size_t * levels_permutation, size_t size, float * result) const
|
|
|
|
|
{
|
|
|
|
|
if (!elems.empty())
|
|
|
|
|
getMany(levels, levels_permutation, size, result);
|
|
|
|
|
else
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
result[i] = std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
#define SMALL_THRESHOLD 1024
|
|
|
|
|
#define BIG_SIZE ((BIG_THRESHOLD - SMALL_THRESHOLD) / BIG_PRECISION)
|
|
|
|
|
#define BIG_PRECISION 16
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2016-03-11 03:43:16 +00:00
|
|
|
|
#define SIZE_OF_LARGE_WITHOUT_COUNT ((SMALL_THRESHOLD + BIG_SIZE) * sizeof(UInt64))
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-12 07:22:58 +00:00
|
|
|
|
/** Для большого количества значений. Размер около 22 680 байт.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
*/
|
|
|
|
|
class QuantileTimingLarge
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2013-09-30 04:51:57 +00:00
|
|
|
|
private:
|
|
|
|
|
/// Общее число значений.
|
|
|
|
|
UInt64 count;
|
2016-03-12 07:22:58 +00:00
|
|
|
|
/// Использование UInt64 весьма расточительно.
|
|
|
|
|
/// Но UInt32 точно не хватает, а изобретать 6-байтные значения слишком сложно.
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// Число значений для каждого значения меньше small_threshold.
|
|
|
|
|
UInt64 count_small[SMALL_THRESHOLD];
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// Число значений для каждого значения от small_threshold до big_threshold, округлённого до big_precision.
|
|
|
|
|
UInt64 count_big[BIG_SIZE];
|
|
|
|
|
|
2013-10-20 03:57:28 +00:00
|
|
|
|
/// Получить значение квантиля по индексу в массиве count_big.
|
|
|
|
|
static inline UInt16 indexInBigToValue(size_t i)
|
|
|
|
|
{
|
|
|
|
|
return (i * BIG_PRECISION) + SMALL_THRESHOLD
|
|
|
|
|
+ (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2)); /// Небольшая рандомизация, чтобы не было заметно, что все значения чётные.
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
/// Позволяет перебрать значения гистограммы, пропуская нули.
|
|
|
|
|
class Iterator
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
const UInt64 * begin;
|
|
|
|
|
const UInt64 * pos;
|
|
|
|
|
const UInt64 * end;
|
|
|
|
|
|
|
|
|
|
void adjust()
|
|
|
|
|
{
|
|
|
|
|
while (isValid() && 0 == *pos)
|
|
|
|
|
++pos;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
Iterator(const QuantileTimingLarge & parent)
|
|
|
|
|
: begin(parent.count_small), pos(begin), end(&parent.count_big[BIG_SIZE])
|
|
|
|
|
{
|
|
|
|
|
adjust();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool isValid() const { return pos < end; }
|
|
|
|
|
|
|
|
|
|
void next()
|
|
|
|
|
{
|
|
|
|
|
++pos;
|
|
|
|
|
adjust();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
UInt64 count() const { return *pos; }
|
|
|
|
|
|
|
|
|
|
UInt16 key() const
|
|
|
|
|
{
|
|
|
|
|
return pos - begin < SMALL_THRESHOLD
|
|
|
|
|
? pos - begin
|
|
|
|
|
: indexInBigToValue(pos - begin - SMALL_THRESHOLD);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
public:
|
|
|
|
|
QuantileTimingLarge()
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2013-09-30 04:51:57 +00:00
|
|
|
|
memset(this, 0, sizeof(*this));
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void insert(UInt64 x)
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2015-02-27 17:38:21 +00:00
|
|
|
|
insertWeighted(x, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void insertWeighted(UInt64 x, size_t weight)
|
|
|
|
|
{
|
|
|
|
|
count += weight;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
|
|
|
|
if (x < SMALL_THRESHOLD)
|
2015-02-27 17:38:21 +00:00
|
|
|
|
count_small[x] += weight;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
else if (x < BIG_THRESHOLD)
|
2015-02-27 17:38:21 +00:00
|
|
|
|
count_big[(x - SMALL_THRESHOLD) / BIG_PRECISION] += weight;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void merge(const QuantileTimingLarge & rhs)
|
|
|
|
|
{
|
|
|
|
|
count += rhs.count;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
for (size_t i = 0; i < SMALL_THRESHOLD; ++i)
|
|
|
|
|
count_small[i] += rhs.count_small[i];
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
for (size_t i = 0; i < BIG_SIZE; ++i)
|
|
|
|
|
count_big[i] += rhs.count_big[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
|
{
|
2016-03-11 03:43:16 +00:00
|
|
|
|
writeBinary(count, buf);
|
|
|
|
|
|
|
|
|
|
if (count * 2 > SMALL_THRESHOLD + BIG_SIZE)
|
|
|
|
|
{
|
|
|
|
|
/// Простая сериализация для сильно заполненного случая.
|
|
|
|
|
buf.write(reinterpret_cast<const char *>(this) + sizeof(count), SIZE_OF_LARGE_WITHOUT_COUNT);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Более компактная сериализация для разреженного случая.
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < SMALL_THRESHOLD; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (count_small[i])
|
|
|
|
|
{
|
|
|
|
|
writeBinary(UInt16(i), buf);
|
|
|
|
|
writeBinary(count_small[i], buf);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < BIG_SIZE; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (count_big[i])
|
|
|
|
|
{
|
|
|
|
|
writeBinary(UInt16(i + SMALL_THRESHOLD), buf);
|
|
|
|
|
writeBinary(count_big[i], buf);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Символизирует конец данных.
|
|
|
|
|
writeBinary(UInt16(BIG_THRESHOLD), buf);
|
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-12 07:22:58 +00:00
|
|
|
|
void deserialize(ReadBuffer & buf)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-11 03:43:16 +00:00
|
|
|
|
readBinary(count, buf);
|
|
|
|
|
|
|
|
|
|
if (count * 2 > SMALL_THRESHOLD + BIG_SIZE)
|
|
|
|
|
{
|
|
|
|
|
buf.readStrict(reinterpret_cast<char *>(this) + sizeof(count), SIZE_OF_LARGE_WITHOUT_COUNT);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
while (true)
|
|
|
|
|
{
|
|
|
|
|
UInt16 index = 0;
|
|
|
|
|
readBinary(index, buf);
|
|
|
|
|
if (index == BIG_THRESHOLD)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
UInt64 count = 0;
|
|
|
|
|
readBinary(count, buf);
|
|
|
|
|
|
|
|
|
|
if (index < SMALL_THRESHOLD)
|
|
|
|
|
count_small[index] = count;
|
|
|
|
|
else
|
|
|
|
|
count_big[index - SMALL_THRESHOLD] = count;
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// Получить значение квантиля уровня level. Уровень должен быть от 0 до 1.
|
|
|
|
|
UInt16 get(double level) const
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-03-13 19:00:59 +00:00
|
|
|
|
UInt64 pos = std::ceil(count * level);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
|
|
|
|
UInt64 accumulated = 0;
|
2016-03-13 14:40:27 +00:00
|
|
|
|
Iterator it(*this);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
while (it.isValid())
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-03-13 14:40:27 +00:00
|
|
|
|
accumulated += it.count();
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
if (accumulated >= pos)
|
|
|
|
|
break;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
it.next();
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
return it.isValid() ? it.key() : BIG_THRESHOLD;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
/// Получить значения size квантилей уровней levels. Записать size результатов начиная с адреса result.
|
2015-11-23 21:33:43 +00:00
|
|
|
|
/// indices - массив индексов levels такой, что соответствующие элементы будут идти в порядке по возрастанию.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
template <typename ResultType>
|
2015-11-23 21:33:43 +00:00
|
|
|
|
void getMany(const double * levels, const size_t * indices, size_t size, ResultType * result) const
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2015-08-20 16:47:02 +00:00
|
|
|
|
const auto indices_end = indices + size;
|
|
|
|
|
auto index = indices;
|
|
|
|
|
|
2016-03-13 19:00:59 +00:00
|
|
|
|
UInt64 pos = std::ceil(count * levels[*index]);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
|
|
|
|
UInt64 accumulated = 0;
|
2016-03-13 14:40:27 +00:00
|
|
|
|
Iterator it(*this);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
while (it.isValid())
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-03-13 14:40:27 +00:00
|
|
|
|
accumulated += it.count();
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
while (accumulated >= pos)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-13 14:40:27 +00:00
|
|
|
|
result[*index] = it.key();
|
2015-08-20 16:47:02 +00:00
|
|
|
|
++index;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2015-08-20 16:47:02 +00:00
|
|
|
|
if (index == indices_end)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
return;
|
|
|
|
|
|
2016-03-13 19:00:59 +00:00
|
|
|
|
pos = std::ceil(count * levels[*index]);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-13 14:40:27 +00:00
|
|
|
|
it.next();
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-11 17:52:20 +00:00
|
|
|
|
while (index != indices_end)
|
|
|
|
|
{
|
|
|
|
|
result[*index] = BIG_THRESHOLD;
|
|
|
|
|
++index;
|
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// То же самое, но в случае пустого состояния возвращается NaN.
|
|
|
|
|
float getFloat(double level) const
|
|
|
|
|
{
|
|
|
|
|
return count
|
|
|
|
|
? get(level)
|
|
|
|
|
: std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 21:33:43 +00:00
|
|
|
|
void getManyFloat(const double * levels, const size_t * levels_permutation, size_t size, float * result) const
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
|
|
|
|
if (count)
|
2015-11-23 21:33:43 +00:00
|
|
|
|
getMany(levels, levels_permutation, size, result);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
else
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
result[i] = std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** sizeof - 64 байта.
|
2016-06-11 14:27:54 +00:00
|
|
|
|
* Если их не хватает - выделяет дополнительно до 20 КБ памяти.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
*/
|
|
|
|
|
class QuantileTiming : private boost::noncopyable
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
union
|
|
|
|
|
{
|
|
|
|
|
detail::QuantileTimingTiny tiny;
|
2016-03-11 21:43:59 +00:00
|
|
|
|
detail::QuantileTimingMedium medium;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
detail::QuantileTimingLarge * large;
|
|
|
|
|
};
|
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
enum class Kind : UInt8
|
|
|
|
|
{
|
|
|
|
|
Tiny = 1,
|
|
|
|
|
Medium = 2,
|
|
|
|
|
Large = 3
|
|
|
|
|
};
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
Kind which() const
|
|
|
|
|
{
|
|
|
|
|
if (tiny.count <= TINY_MAX_ELEMS)
|
|
|
|
|
return Kind::Tiny;
|
|
|
|
|
if (tiny.count == TINY_MAX_ELEMS + 1)
|
|
|
|
|
return Kind::Medium;
|
|
|
|
|
return Kind::Large;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void tinyToMedium()
|
|
|
|
|
{
|
|
|
|
|
detail::QuantileTimingTiny tiny_copy = tiny;
|
|
|
|
|
new (&medium) detail::QuantileTimingMedium(tiny_copy.elems, tiny_copy.elems + tiny_copy.count);
|
|
|
|
|
tiny.count = TINY_MAX_ELEMS + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void mediumToLarge()
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2014-05-03 22:57:43 +00:00
|
|
|
|
if (current_memory_tracker)
|
|
|
|
|
current_memory_tracker->alloc(sizeof(detail::QuantileTimingLarge));
|
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
/// На время копирования данных из medium, устанавливать значение large ещё нельзя (иначе оно перезатрёт часть данных).
|
|
|
|
|
detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge;
|
|
|
|
|
|
|
|
|
|
for (const auto & elem : medium.elems)
|
|
|
|
|
tmp_large->insert(elem);
|
|
|
|
|
|
2016-06-11 14:27:54 +00:00
|
|
|
|
medium.~QuantileTimingMedium();
|
2016-03-11 21:43:59 +00:00
|
|
|
|
large = tmp_large;
|
|
|
|
|
tiny.count = TINY_MAX_ELEMS + 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void tinyToLarge()
|
|
|
|
|
{
|
|
|
|
|
if (current_memory_tracker)
|
|
|
|
|
current_memory_tracker->alloc(sizeof(detail::QuantileTimingLarge));
|
|
|
|
|
|
|
|
|
|
/// На время копирования данных из medium, устанавливать значение large ещё нельзя (иначе оно перезатрёт часть данных).
|
2014-03-26 00:34:00 +00:00
|
|
|
|
detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < tiny.count; ++i)
|
2014-03-26 00:34:00 +00:00
|
|
|
|
tmp_large->insert(tiny.elems[i]);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2014-03-26 00:34:00 +00:00
|
|
|
|
large = tmp_large;
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tiny.count = TINY_MAX_ELEMS + 2;
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-03-12 07:22:58 +00:00
|
|
|
|
bool mediumIsWorthToConvertToLarge() const
|
|
|
|
|
{
|
|
|
|
|
return medium.elems.size() >= sizeof(detail::QuantileTimingLarge) / sizeof(medium.elems[0]) / 2;
|
|
|
|
|
}
|
|
|
|
|
|
2013-09-30 04:51:57 +00:00
|
|
|
|
public:
|
|
|
|
|
QuantileTiming()
|
|
|
|
|
{
|
|
|
|
|
tiny.count = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
~QuantileTiming()
|
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
Kind kind = which();
|
|
|
|
|
|
|
|
|
|
if (kind == Kind::Medium)
|
|
|
|
|
{
|
|
|
|
|
medium.~QuantileTimingMedium();
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Large)
|
2014-05-03 22:57:43 +00:00
|
|
|
|
{
|
2013-09-30 04:51:57 +00:00
|
|
|
|
delete large;
|
2014-05-03 22:57:43 +00:00
|
|
|
|
|
|
|
|
|
if (current_memory_tracker)
|
|
|
|
|
current_memory_tracker->free(sizeof(detail::QuantileTimingLarge));
|
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void insert(UInt64 x)
|
|
|
|
|
{
|
|
|
|
|
if (tiny.count < TINY_MAX_ELEMS)
|
|
|
|
|
{
|
|
|
|
|
tiny.insert(x);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (unlikely(tiny.count == TINY_MAX_ELEMS))
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tinyToMedium();
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
if (which() == Kind::Medium)
|
|
|
|
|
{
|
2016-03-12 07:22:58 +00:00
|
|
|
|
if (unlikely(mediumIsWorthToConvertToLarge()))
|
2016-03-11 21:43:59 +00:00
|
|
|
|
{
|
|
|
|
|
mediumToLarge();
|
|
|
|
|
large->insert(x);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
medium.insert(x);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
large->insert(x);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-27 17:38:21 +00:00
|
|
|
|
void insertWeighted(UInt64 x, size_t weight)
|
|
|
|
|
{
|
|
|
|
|
/// NOTE: Первое условие - для того, чтобы избежать переполнения.
|
|
|
|
|
if (weight < TINY_MAX_ELEMS && tiny.count + weight <= TINY_MAX_ELEMS)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < weight; ++i)
|
|
|
|
|
tiny.insert(x);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (unlikely(tiny.count <= TINY_MAX_ELEMS))
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tinyToLarge(); /// Для weighted варианта medium не используем - предположительно, нецелесообразно.
|
2015-02-27 17:38:21 +00:00
|
|
|
|
|
|
|
|
|
large->insertWeighted(x, weight);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-12 07:22:58 +00:00
|
|
|
|
/// NOTE Слишком сложный код.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void merge(const QuantileTiming & rhs)
|
|
|
|
|
{
|
|
|
|
|
if (tiny.count + rhs.tiny.count <= TINY_MAX_ELEMS)
|
|
|
|
|
{
|
|
|
|
|
tiny.merge(rhs.tiny);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
auto kind = which();
|
|
|
|
|
auto rhs_kind = rhs.which();
|
|
|
|
|
|
2016-03-12 07:22:58 +00:00
|
|
|
|
/// Если то, с чем сливаем, имеет бОльшую структуру данных, то приводим текущую структуру к такой же.
|
2016-03-11 21:43:59 +00:00
|
|
|
|
if (kind == Kind::Tiny && rhs_kind == Kind::Medium)
|
2016-03-12 07:22:58 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tinyToMedium();
|
2016-03-12 07:22:58 +00:00
|
|
|
|
kind = Kind::Medium;
|
|
|
|
|
}
|
2016-03-11 21:43:59 +00:00
|
|
|
|
else if (kind == Kind::Tiny && rhs_kind == Kind::Large)
|
2016-03-12 07:22:58 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tinyToLarge();
|
2016-03-12 07:22:58 +00:00
|
|
|
|
kind = Kind::Large;
|
|
|
|
|
}
|
2016-03-11 21:43:59 +00:00
|
|
|
|
else if (kind == Kind::Medium && rhs_kind == Kind::Large)
|
2016-03-12 07:22:58 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
mediumToLarge();
|
2016-03-12 07:22:58 +00:00
|
|
|
|
kind = Kind::Large;
|
|
|
|
|
}
|
|
|
|
|
/// Случай, когда два состояния маленькие, но при их слиянии, они превратятся в средние.
|
|
|
|
|
else if (kind == Kind::Tiny && rhs_kind == Kind::Tiny)
|
|
|
|
|
{
|
|
|
|
|
tinyToMedium();
|
|
|
|
|
kind = Kind::Medium;
|
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
if (kind == Kind::Medium && rhs_kind == Kind::Medium)
|
|
|
|
|
{
|
|
|
|
|
medium.merge(rhs.medium);
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Large && rhs_kind == Kind::Large)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
|
|
|
|
large->merge(*rhs.large);
|
|
|
|
|
}
|
2016-03-11 21:43:59 +00:00
|
|
|
|
else if (kind == Kind::Medium && rhs_kind == Kind::Tiny)
|
|
|
|
|
{
|
|
|
|
|
medium.elems.insert(rhs.tiny.elems, rhs.tiny.elems + rhs.tiny.count);
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Large && rhs_kind == Kind::Tiny)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < rhs.tiny.count; ++i)
|
|
|
|
|
large->insert(rhs.tiny.elems[i]);
|
|
|
|
|
}
|
2016-03-11 21:43:59 +00:00
|
|
|
|
else if (kind == Kind::Large && rhs_kind == Kind::Medium)
|
|
|
|
|
{
|
|
|
|
|
for (const auto & elem : rhs.medium.elems)
|
|
|
|
|
large->insert(elem);
|
|
|
|
|
}
|
2016-03-12 07:22:58 +00:00
|
|
|
|
else
|
|
|
|
|
throw Exception("Logical error in QuantileTiming::merge function: not all cases are covered", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
|
|
/// Для детерминированности, мы должны всегда переводить в large при достижении условия на размер
|
|
|
|
|
/// - независимо от порядка мерджей.
|
|
|
|
|
if (kind == Kind::Medium && unlikely(mediumIsWorthToConvertToLarge()))
|
|
|
|
|
{
|
|
|
|
|
mediumToLarge();
|
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
auto kind = which();
|
|
|
|
|
DB::writePODBinary(kind, buf);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
if (kind == Kind::Tiny)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
tiny.serialize(buf);
|
2016-03-11 21:43:59 +00:00
|
|
|
|
else if (kind == Kind::Medium)
|
|
|
|
|
medium.serialize(buf);
|
|
|
|
|
else
|
|
|
|
|
large->serialize(buf);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
/// Вызывается для пустого объекта.
|
2013-09-30 04:51:57 +00:00
|
|
|
|
void deserialize(ReadBuffer & buf)
|
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
Kind kind;
|
|
|
|
|
DB::readPODBinary(kind, buf);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2016-03-11 21:43:59 +00:00
|
|
|
|
if (kind == Kind::Tiny)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tiny.deserialize(buf);
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Medium)
|
|
|
|
|
{
|
|
|
|
|
tinyToMedium();
|
|
|
|
|
medium.deserialize(buf);
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Large)
|
|
|
|
|
{
|
|
|
|
|
tinyToLarge();
|
2013-09-30 04:51:57 +00:00
|
|
|
|
large->deserialize(buf);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Получить значение квантиля уровня level. Уровень должен быть от 0 до 1.
|
|
|
|
|
UInt16 get(double level) const
|
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
Kind kind = which();
|
|
|
|
|
|
|
|
|
|
if (kind == Kind::Tiny)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
tiny.prepare();
|
|
|
|
|
return tiny.get(level);
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Medium)
|
|
|
|
|
{
|
|
|
|
|
return medium.get(level);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
return large->get(level);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Получить значения size квантилей уровней levels. Записать size результатов начиная с адреса result.
|
|
|
|
|
template <typename ResultType>
|
2015-11-23 21:33:43 +00:00
|
|
|
|
void getMany(const double * levels, const size_t * levels_permutation, size_t size, ResultType * result) const
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
Kind kind = which();
|
|
|
|
|
|
|
|
|
|
if (kind == Kind::Tiny)
|
|
|
|
|
{
|
|
|
|
|
tiny.prepare();
|
|
|
|
|
tiny.getMany(levels, size, result);
|
|
|
|
|
}
|
|
|
|
|
else if (kind == Kind::Medium)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
medium.getMany(levels, levels_permutation, size, result);
|
2013-09-30 04:51:57 +00:00
|
|
|
|
}
|
2016-03-11 21:43:59 +00:00
|
|
|
|
else if (kind == Kind::Large)
|
2013-09-30 04:51:57 +00:00
|
|
|
|
{
|
2016-03-11 21:43:59 +00:00
|
|
|
|
large->getMany(levels, levels_permutation, size, result);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-09-24 19:56:33 +00:00
|
|
|
|
|
|
|
|
|
/// То же самое, но в случае пустого состояния возвращается NaN.
|
|
|
|
|
float getFloat(double level) const
|
|
|
|
|
{
|
2013-09-30 04:51:57 +00:00
|
|
|
|
return tiny.count
|
2013-09-24 19:56:33 +00:00
|
|
|
|
? get(level)
|
|
|
|
|
: std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-23 21:33:43 +00:00
|
|
|
|
void getManyFloat(const double * levels, const size_t * levels_permutation, size_t size, float * result) const
|
2013-09-24 19:56:33 +00:00
|
|
|
|
{
|
2013-09-30 04:51:57 +00:00
|
|
|
|
if (tiny.count)
|
2015-11-23 21:33:43 +00:00
|
|
|
|
getMany(levels, levels_permutation, size, result);
|
2013-09-24 19:56:33 +00:00
|
|
|
|
else
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
result[i] = std::numeric_limits<float>::quiet_NaN();
|
|
|
|
|
}
|
2013-07-27 19:57:45 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#undef SMALL_THRESHOLD
|
|
|
|
|
#undef BIG_THRESHOLD
|
|
|
|
|
#undef BIG_SIZE
|
|
|
|
|
#undef BIG_PRECISION
|
2013-09-30 04:51:57 +00:00
|
|
|
|
#undef TINY_MAX_ELEMS
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <typename ArgumentFieldType>
|
2014-06-04 01:00:09 +00:00
|
|
|
|
class AggregateFunctionQuantileTiming final : public IUnaryAggregateFunction<QuantileTiming, AggregateFunctionQuantileTiming<ArgumentFieldType> >
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
double level;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
AggregateFunctionQuantileTiming(double level_ = 0.5) : level(level_) {}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
String getName() const override { return "quantileTiming"; }
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
DataTypePtr getReturnType() const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-05-28 07:48:40 +00:00
|
|
|
|
return std::make_shared<DataTypeFloat32>();
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-15 08:31:08 +00:00
|
|
|
|
void setArgument(const DataTypePtr & argument)
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void setParameters(const Array & params) override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
if (params.size() != 1)
|
|
|
|
|
throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
|
|
level = apply_visitor(FieldVisitorConvertToNumber<Float64>(), params[0]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-09-19 22:30:40 +00:00
|
|
|
|
void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).insert(static_cast<const ColumnVector<ArgumentFieldType> &>(column).getData()[row_num]);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-23 23:33:17 +00:00
|
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).merge(this->data(rhs));
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).serialize(buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-22 23:26:08 +00:00
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-03-12 07:22:58 +00:00
|
|
|
|
this->data(place).deserialize(buf);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2013-09-24 19:56:33 +00:00
|
|
|
|
static_cast<ColumnFloat32 &>(to).getData().push_back(this->data(place).getFloat(level));
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2015-02-27 17:38:21 +00:00
|
|
|
|
/** То же самое, но с двумя аргументами. Второй аргумент - "вес" (целое число) - сколько раз учитывать значение.
|
|
|
|
|
*/
|
|
|
|
|
template <typename ArgumentFieldType, typename WeightFieldType>
|
2015-11-21 18:56:54 +00:00
|
|
|
|
class AggregateFunctionQuantileTimingWeighted final
|
|
|
|
|
: public IBinaryAggregateFunction<QuantileTiming, AggregateFunctionQuantileTimingWeighted<ArgumentFieldType, WeightFieldType>>
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
double level;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
AggregateFunctionQuantileTimingWeighted(double level_ = 0.5) : level(level_) {}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
String getName() const override { return "quantileTimingWeighted"; }
|
2015-02-27 17:38:21 +00:00
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
DataTypePtr getReturnType() const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
2016-05-28 07:48:40 +00:00
|
|
|
|
return std::make_shared<DataTypeFloat32>();
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-21 18:56:54 +00:00
|
|
|
|
void setArgumentsImpl(const DataTypes & arguments)
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void setParameters(const Array & params) override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
if (params.size() != 1)
|
|
|
|
|
throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
|
|
level = apply_visitor(FieldVisitorConvertToNumber<Float64>(), params[0]);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-19 22:30:40 +00:00
|
|
|
|
void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).insertWeighted(
|
2015-11-21 18:56:54 +00:00
|
|
|
|
static_cast<const ColumnVector<ArgumentFieldType> &>(column_value).getData()[row_num],
|
|
|
|
|
static_cast<const ColumnVector<WeightFieldType> &>(column_weight).getData()[row_num]);
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-23 23:33:17 +00:00
|
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).merge(this->data(rhs));
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).serialize(buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-22 23:26:08 +00:00
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
2016-03-12 07:22:58 +00:00
|
|
|
|
this->data(place).deserialize(buf);
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
static_cast<ColumnFloat32 &>(to).getData().push_back(this->data(place).getFloat(level));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2013-07-27 19:57:45 +00:00
|
|
|
|
/** То же самое, но позволяет вычислить сразу несколько квантилей.
|
|
|
|
|
* Для этого, принимает в качестве параметров несколько уровней. Пример: quantilesTiming(0.5, 0.8, 0.9, 0.95)(ConnectTiming).
|
|
|
|
|
* Возвращает массив результатов.
|
|
|
|
|
*/
|
|
|
|
|
template <typename ArgumentFieldType>
|
2014-06-04 01:00:09 +00:00
|
|
|
|
class AggregateFunctionQuantilesTiming final : public IUnaryAggregateFunction<QuantileTiming, AggregateFunctionQuantilesTiming<ArgumentFieldType> >
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
2015-11-23 21:33:43 +00:00
|
|
|
|
QuantileLevels<double> levels;
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
|
|
|
|
public:
|
2015-11-11 02:04:23 +00:00
|
|
|
|
String getName() const override { return "quantilesTiming"; }
|
2013-07-27 19:57:45 +00:00
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
DataTypePtr getReturnType() const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-05-28 07:48:40 +00:00
|
|
|
|
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat32>());
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-15 08:31:08 +00:00
|
|
|
|
void setArgument(const DataTypePtr & argument)
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void setParameters(const Array & params) override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2015-11-23 21:33:43 +00:00
|
|
|
|
levels.set(params);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-09-19 22:30:40 +00:00
|
|
|
|
void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).insert(static_cast<const ColumnVector<ArgumentFieldType> &>(column).getData()[row_num]);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-23 23:33:17 +00:00
|
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).merge(this->data(rhs));
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).serialize(buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-22 23:26:08 +00:00
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
2016-03-12 07:22:58 +00:00
|
|
|
|
this->data(place).deserialize(buf);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
2013-07-27 19:57:45 +00:00
|
|
|
|
{
|
|
|
|
|
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
|
|
|
|
|
ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
|
|
|
|
|
|
|
|
|
|
size_t size = levels.size();
|
|
|
|
|
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size);
|
2015-08-20 16:47:02 +00:00
|
|
|
|
|
2016-10-27 14:51:43 +00:00
|
|
|
|
if (!size)
|
|
|
|
|
return;
|
|
|
|
|
|
2013-09-24 19:56:33 +00:00
|
|
|
|
typename ColumnFloat32::Container_t & data_to = static_cast<ColumnFloat32 &>(arr_to.getData()).getData();
|
2013-07-27 19:57:45 +00:00
|
|
|
|
size_t old_size = data_to.size();
|
|
|
|
|
data_to.resize(data_to.size() + size);
|
2015-08-20 16:47:02 +00:00
|
|
|
|
|
2015-11-23 21:33:43 +00:00
|
|
|
|
this->data(place).getManyFloat(&levels.levels[0], &levels.permutation[0], size, &data_to[old_size]);
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2015-02-27 17:38:21 +00:00
|
|
|
|
|
|
|
|
|
template <typename ArgumentFieldType, typename WeightFieldType>
|
2015-11-15 05:52:41 +00:00
|
|
|
|
class AggregateFunctionQuantilesTimingWeighted final
|
|
|
|
|
: public IBinaryAggregateFunction<QuantileTiming, AggregateFunctionQuantilesTimingWeighted<ArgumentFieldType, WeightFieldType>>
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
2015-11-23 21:33:43 +00:00
|
|
|
|
QuantileLevels<double> levels;
|
2015-02-27 17:38:21 +00:00
|
|
|
|
|
|
|
|
|
public:
|
2015-11-11 02:04:23 +00:00
|
|
|
|
String getName() const override { return "quantilesTimingWeighted"; }
|
2015-02-27 17:38:21 +00:00
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
DataTypePtr getReturnType() const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
2016-05-28 07:48:40 +00:00
|
|
|
|
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeFloat32>());
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-15 05:52:41 +00:00
|
|
|
|
void setArgumentsImpl(const DataTypes & arguments)
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void setParameters(const Array & params) override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
2015-11-23 21:33:43 +00:00
|
|
|
|
levels.set(params);
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-19 22:30:40 +00:00
|
|
|
|
void addImpl(AggregateDataPtr place, const IColumn & column_value, const IColumn & column_weight, size_t row_num, Arena *) const
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).insertWeighted(
|
2015-11-15 05:52:41 +00:00
|
|
|
|
static_cast<const ColumnVector<ArgumentFieldType> &>(column_value).getData()[row_num],
|
|
|
|
|
static_cast<const ColumnVector<WeightFieldType> &>(column_weight).getData()[row_num]);
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-09-23 23:33:17 +00:00
|
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).merge(this->data(rhs));
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
this->data(place).serialize(buf);
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-22 23:26:08 +00:00
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
2016-03-12 07:22:58 +00:00
|
|
|
|
this->data(place).deserialize(buf);
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 02:04:23 +00:00
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
2015-02-27 17:38:21 +00:00
|
|
|
|
{
|
|
|
|
|
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
|
|
|
|
|
ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
|
|
|
|
|
|
|
|
|
|
size_t size = levels.size();
|
|
|
|
|
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size);
|
|
|
|
|
|
2016-10-27 14:51:43 +00:00
|
|
|
|
if (!size)
|
|
|
|
|
return;
|
|
|
|
|
|
2015-02-27 17:38:21 +00:00
|
|
|
|
typename ColumnFloat32::Container_t & data_to = static_cast<ColumnFloat32 &>(arr_to.getData()).getData();
|
|
|
|
|
size_t old_size = data_to.size();
|
|
|
|
|
data_to.resize(data_to.size() + size);
|
|
|
|
|
|
2015-11-23 21:33:43 +00:00
|
|
|
|
this->data(place).getManyFloat(&levels.levels[0], &levels.permutation[0], size, &data_to[old_size]);
|
2015-02-27 17:38:21 +00:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2013-07-27 19:57:45 +00:00
|
|
|
|
}
|