2015-11-15 03:11:24 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <DB/Common/PODArray.h>
|
|
|
|
|
|
|
|
#include <DB/IO/WriteHelpers.h>
|
|
|
|
#include <DB/IO/ReadHelpers.h>
|
|
|
|
|
2017-03-12 10:13:45 +00:00
|
|
|
#include <DB/DataTypes/DataTypesNumber.h>
|
2015-11-15 03:11:24 +00:00
|
|
|
#include <DB/DataTypes/DataTypeArray.h>
|
|
|
|
|
|
|
|
#include <DB/AggregateFunctions/IUnaryAggregateFunction.h>
|
2015-11-23 21:33:43 +00:00
|
|
|
#include <DB/AggregateFunctions/QuantilesCommon.h>
|
2015-11-15 03:11:24 +00:00
|
|
|
|
|
|
|
#include <DB/Columns/ColumnArray.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
|
2017-03-09 04:18:41 +00:00
|
|
|
/** The state is an array, into which all values are added.
|
2017-03-09 00:56:38 +00:00
|
|
|
* NOTE If there are few different values then this is not optimal.
|
|
|
|
* For 8 and 16-bit values it might be better to use a lookup table.
|
2015-11-15 03:11:24 +00:00
|
|
|
*/
|
|
|
|
template <typename T>
|
|
|
|
struct AggregateFunctionQuantileExactData
|
|
|
|
{
|
2017-03-09 04:26:17 +00:00
|
|
|
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
|
2015-11-15 03:20:29 +00:00
|
|
|
static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<T>);
|
|
|
|
|
2016-04-15 16:07:58 +00:00
|
|
|
using Array = PODArray<T, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>>;
|
2015-11-15 03:11:24 +00:00
|
|
|
Array array;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-03-09 00:56:38 +00:00
|
|
|
/** Exactly calculates the quantile.
|
|
|
|
* The argument type can only be a numeric type (including date and date-time).
|
|
|
|
* The result type is the same as the argument type.
|
2015-11-15 03:11:24 +00:00
|
|
|
*/
|
|
|
|
template <typename T>
|
|
|
|
class AggregateFunctionQuantileExact final
|
|
|
|
: public IUnaryAggregateFunction<AggregateFunctionQuantileExactData<T>, AggregateFunctionQuantileExact<T>>
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
double level;
|
|
|
|
DataTypePtr type;
|
|
|
|
|
|
|
|
public:
|
|
|
|
AggregateFunctionQuantileExact(double level_ = 0.5) : level(level_) {}
|
|
|
|
|
|
|
|
String getName() const override { return "quantileExact"; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnType() const override
|
|
|
|
{
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
2015-11-15 08:31:08 +00:00
|
|
|
void setArgument(const DataTypePtr & argument)
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
type = argument;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setParameters(const Array & params) override
|
|
|
|
{
|
|
|
|
if (params.size() != 1)
|
|
|
|
throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
2017-01-06 17:41:19 +00:00
|
|
|
level = applyVisitor(FieldVisitorConvertToNumber<Float64>(), params[0]);
|
2015-11-15 03:11:24 +00:00
|
|
|
}
|
|
|
|
|
2016-09-19 22:30:40 +00:00
|
|
|
void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
this->data(place).array.push_back(static_cast<const ColumnVector<T> &>(column).getData()[row_num]);
|
|
|
|
}
|
|
|
|
|
2016-09-23 23:33:17 +00:00
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
this->data(place).array.insert(this->data(rhs).array.begin(), this->data(rhs).array.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
|
|
|
{
|
|
|
|
const auto & array = this->data(place).array;
|
|
|
|
|
|
|
|
size_t size = array.size();
|
|
|
|
writeVarUInt(size, buf);
|
|
|
|
buf.write(reinterpret_cast<const char *>(&array[0]), size * sizeof(array[0]));
|
|
|
|
}
|
|
|
|
|
2016-09-22 23:26:08 +00:00
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
auto & array = this->data(place).array;
|
|
|
|
|
|
|
|
size_t size = 0;
|
|
|
|
readVarUInt(size, buf);
|
2016-03-12 04:01:03 +00:00
|
|
|
array.resize(size);
|
|
|
|
buf.read(reinterpret_cast<char *>(&array[0]), size * sizeof(array[0]));
|
2015-11-15 03:11:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
|
|
|
{
|
2017-03-09 04:26:17 +00:00
|
|
|
/// Sorting an array will not be considered a violation of constancy.
|
2015-11-15 03:11:24 +00:00
|
|
|
auto & array = const_cast<typename AggregateFunctionQuantileExactData<T>::Array &>(this->data(place).array);
|
|
|
|
|
|
|
|
T quantile = T();
|
|
|
|
|
|
|
|
if (!array.empty())
|
|
|
|
{
|
|
|
|
size_t n = level < 1
|
|
|
|
? level * array.size()
|
|
|
|
: (array.size() - 1);
|
|
|
|
|
2017-03-09 04:26:17 +00:00
|
|
|
std::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm.
|
2015-11-15 03:11:24 +00:00
|
|
|
|
|
|
|
quantile = array[n];
|
|
|
|
}
|
|
|
|
|
|
|
|
static_cast<ColumnVector<T> &>(to).getData().push_back(quantile);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-03-09 00:56:38 +00:00
|
|
|
/** The same, but allows you to calculate several quantiles at once.
|
|
|
|
* To do this, takes several levels as parameters. Example: quantilesExact(0.5, 0.8, 0.9, 0.95)(ConnectTiming).
|
|
|
|
* Returns an array of results.
|
2015-11-15 03:11:24 +00:00
|
|
|
*/
|
|
|
|
template <typename T>
|
|
|
|
class AggregateFunctionQuantilesExact final
|
|
|
|
: public IUnaryAggregateFunction<AggregateFunctionQuantileExactData<T>, AggregateFunctionQuantilesExact<T>>
|
|
|
|
{
|
|
|
|
private:
|
2015-11-23 21:33:43 +00:00
|
|
|
QuantileLevels<double> levels;
|
2015-11-15 03:11:24 +00:00
|
|
|
DataTypePtr type;
|
|
|
|
|
|
|
|
public:
|
|
|
|
String getName() const override { return "quantilesExact"; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnType() const override
|
|
|
|
{
|
2016-05-28 07:48:40 +00:00
|
|
|
return std::make_shared<DataTypeArray>(type);
|
2015-11-15 03:11:24 +00:00
|
|
|
}
|
|
|
|
|
2015-11-15 08:31:08 +00:00
|
|
|
void setArgument(const DataTypePtr & argument)
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
type = argument;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setParameters(const Array & params) override
|
|
|
|
{
|
2015-11-23 21:33:43 +00:00
|
|
|
levels.set(params);
|
2015-11-15 03:11:24 +00:00
|
|
|
}
|
|
|
|
|
2016-09-19 22:30:40 +00:00
|
|
|
void addImpl(AggregateDataPtr place, const IColumn & column, size_t row_num, Arena *) const
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
this->data(place).array.push_back(static_cast<const ColumnVector<T> &>(column).getData()[row_num]);
|
|
|
|
}
|
|
|
|
|
2016-09-23 23:33:17 +00:00
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
this->data(place).array.insert(this->data(rhs).array.begin(), this->data(rhs).array.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
|
|
|
{
|
|
|
|
const auto & array = this->data(place).array;
|
|
|
|
|
|
|
|
size_t size = array.size();
|
|
|
|
writeVarUInt(size, buf);
|
|
|
|
buf.write(reinterpret_cast<const char *>(&array[0]), size * sizeof(array[0]));
|
|
|
|
}
|
|
|
|
|
2016-09-22 23:26:08 +00:00
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
|
|
|
auto & array = this->data(place).array;
|
|
|
|
|
|
|
|
size_t size = 0;
|
|
|
|
readVarUInt(size, buf);
|
2016-03-12 04:01:03 +00:00
|
|
|
array.resize(size);
|
|
|
|
buf.read(reinterpret_cast<char *>(&array[0]), size * sizeof(array[0]));
|
2015-11-15 03:11:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
|
|
|
{
|
2017-03-09 04:26:17 +00:00
|
|
|
/// Sorting an array will not be considered a violation of constancy.
|
2015-11-15 03:11:24 +00:00
|
|
|
auto & array = const_cast<typename AggregateFunctionQuantileExactData<T>::Array &>(this->data(place).array);
|
|
|
|
|
|
|
|
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
|
|
|
|
ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
|
|
|
|
|
|
|
|
size_t num_levels = levels.size();
|
|
|
|
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + num_levels);
|
|
|
|
|
|
|
|
typename ColumnVector<T>::Container_t & data_to = static_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
2015-11-23 21:33:43 +00:00
|
|
|
size_t old_size = data_to.size();
|
|
|
|
data_to.resize(old_size + num_levels);
|
2015-11-15 03:11:24 +00:00
|
|
|
|
|
|
|
if (!array.empty())
|
|
|
|
{
|
|
|
|
size_t prev_n = 0;
|
2015-11-23 21:33:43 +00:00
|
|
|
for (auto level_index : levels.permutation)
|
2015-11-15 03:11:24 +00:00
|
|
|
{
|
2015-11-23 21:33:43 +00:00
|
|
|
auto level = levels.levels[level_index];
|
|
|
|
|
2015-11-15 03:11:24 +00:00
|
|
|
size_t n = level < 1
|
|
|
|
? level * array.size()
|
|
|
|
: (array.size() - 1);
|
|
|
|
|
|
|
|
std::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
|
|
|
|
|
2015-11-23 21:33:43 +00:00
|
|
|
data_to[old_size + level_index] = array[n];
|
2015-11-15 03:11:24 +00:00
|
|
|
prev_n = n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < num_levels; ++i)
|
2015-11-23 21:33:43 +00:00
|
|
|
data_to[old_size + i] = T();
|
2015-11-15 03:11:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|