ClickHouse/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantileDeterministic.h

212 lines
6.7 KiB
C++
Raw Normal View History

#pragma once
#include <DB/AggregateFunctions/ReservoirSamplerDeterministic.h>
#include <DB/Core/FieldVisitors.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/ReadHelpers.h>
#include <DB/DataTypes/DataTypesNumber.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/AggregateFunctions/IBinaryAggregateFunction.h>
#include <DB/Columns/ColumnArray.h>
#include <DB/Columns/ColumnsNumber.h>
namespace DB
{
template <typename ArgumentFieldType>
struct AggregateFunctionQuantileDeterministicData
{
using Sample = ReservoirSamplerDeterministic<ArgumentFieldType, ReservoirSamplerDeterministicOnEmpty::RETURN_NAN_OR_ZERO>;
2017-03-09 04:26:17 +00:00
Sample sample; /// TODO Add MemoryTracker
};
2017-03-09 00:56:38 +00:00
/** Approximately calculates the quantile.
* The argument type can only be a numeric type (including date and date-time).
* If returns_float = true, the result type is Float64, otherwise - the result type is the same as the argument type.
* For dates and date-time, returns_float should be set to false.
*/
template <typename ArgumentFieldType, bool returns_float = true>
class AggregateFunctionQuantileDeterministic final
: public IBinaryAggregateFunction<
AggregateFunctionQuantileDeterministicData<ArgumentFieldType>,
AggregateFunctionQuantileDeterministic<ArgumentFieldType, returns_float>>
{
private:
using Sample = typename AggregateFunctionQuantileDeterministicData<ArgumentFieldType>::Sample;
double level;
DataTypePtr type;
public:
AggregateFunctionQuantileDeterministic(double level_ = 0.5) : level(level_) {}
2015-11-11 02:04:23 +00:00
String getName() const override { return "quantileDeterministic"; }
2015-11-11 02:04:23 +00:00
DataTypePtr getReturnType() const override
{
return type;
}
void setArgumentsImpl(const DataTypes & arguments)
{
type = returns_float ? std::make_shared<DataTypeFloat64>() : arguments[0];
if (!arguments[1]->isNumeric())
throw Exception{
"Invalid type of second argument to function " + getName() +
", got " + arguments[1]->getName() + ", expected numeric",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
};
}
2015-11-11 02:04:23 +00:00
void setParameters(const Array & params) override
{
if (params.size() != 1)
throw Exception("Aggregate function " + getName() + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
Squashed commit of the following: commit c567d4e1fe8d54e6363e47548f1e3927cc5ee78f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:35:01 2017 +0300 Style [#METR-2944]. commit 26bf3e1228e03f46c29b13edb0e3770bd453e3f1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:33:11 2017 +0300 Miscellaneous [#METR-2944]. commit eb946f4c6fd4bb0e9e5c7fb1468d36be3dfca5a5 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:30:19 2017 +0300 Miscellaneous [#METR-2944]. commit 78c867a14744b5af2db8d37caf7804fc2057ea51 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:11:41 2017 +0300 Miscellaneous [#METR-2944]. commit 6604c5c83cfcedc81c8da4da026711920d5963b4 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:56:15 2017 +0300 Miscellaneous [#METR-2944]. commit 23fbf05c1d4bead636458ec21b05a101b1152e33 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:47:52 2017 +0300 Miscellaneous [#METR-2944]. commit 98772faf11a7d450d473f7fa84f8a9ae24f7b59b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:46:05 2017 +0300 Miscellaneous [#METR-2944]. commit 3dc636ab9f9359dbeac2e8d997ae563d4ca147e2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:39:46 2017 +0300 Miscellaneous [#METR-2944]. commit 3e16aee95482f374ee3eda1a4dbe9ba5cdce02e8 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:38:03 2017 +0300 Miscellaneous [#METR-2944]. commit ae7e7e90eb1f82bd0fe0f887708d08b9e7755612 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:34:15 2017 +0300 Miscellaneous [#METR-2944].
2017-01-06 17:41:19 +00:00
level = applyVisitor(FieldVisitorConvertToNumber<Float64>(), params[0]);
}
void addImpl(AggregateDataPtr place, const IColumn & column, const IColumn & determinator, size_t row_num, Arena *) const
{
this->data(place).sample.insert(static_cast<const ColumnVector<ArgumentFieldType> &>(column).getData()[row_num],
determinator.get64(row_num));
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).sample.merge(this->data(rhs).sample);
}
2015-11-11 02:04:23 +00:00
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).sample.write(buf);
}
2016-09-22 23:26:08 +00:00
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
2016-03-12 04:01:03 +00:00
this->data(place).sample.read(buf);
}
2015-11-11 02:04:23 +00:00
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
2017-03-09 04:26:17 +00:00
/// `Sample` can be sorted when a quantile is received, but in this context, you can not think of this as a violation of constancy.
Sample & sample = const_cast<Sample &>(this->data(place).sample);
if (returns_float)
static_cast<ColumnFloat64 &>(to).getData().push_back(sample.quantileInterpolated(level));
else
static_cast<ColumnVector<ArgumentFieldType> &>(to).getData().push_back(sample.quantileInterpolated(level));
}
};
2017-03-09 00:56:38 +00:00
/** The same, but allows you to calculate several quantiles at once.
* To do this, takes several levels as parameters. Example: quantiles(0.5, 0.8, 0.9, 0.95)(ConnectTiming).
* Returns an array of results.
*/
template <typename ArgumentFieldType, bool returns_float = true>
class AggregateFunctionQuantilesDeterministic final
: public IBinaryAggregateFunction<
AggregateFunctionQuantileDeterministicData<ArgumentFieldType>,
AggregateFunctionQuantilesDeterministic<ArgumentFieldType, returns_float>>
{
private:
using Sample = typename AggregateFunctionQuantileDeterministicData<ArgumentFieldType>::Sample;
using Levels = std::vector<double>;
Levels levels;
DataTypePtr type;
public:
2015-11-11 02:04:23 +00:00
String getName() const override { return "quantilesDeterministic"; }
2015-11-11 02:04:23 +00:00
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeArray>(type);
}
void setArgumentsImpl(const DataTypes & arguments)
{
type = returns_float ? std::make_shared<DataTypeFloat64>() : arguments[0];
if (!arguments[1]->isNumeric())
throw Exception{
"Invalid type of second argument to function " + getName() +
", got " + arguments[1]->getName() + ", expected numeric",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
};
}
2015-11-11 02:04:23 +00:00
void setParameters(const Array & params) override
{
if (params.empty())
throw Exception("Aggregate function " + getName() + " requires at least one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
size_t size = params.size();
levels.resize(size);
for (size_t i = 0; i < size; ++i)
Squashed commit of the following: commit c567d4e1fe8d54e6363e47548f1e3927cc5ee78f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:35:01 2017 +0300 Style [#METR-2944]. commit 26bf3e1228e03f46c29b13edb0e3770bd453e3f1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:33:11 2017 +0300 Miscellaneous [#METR-2944]. commit eb946f4c6fd4bb0e9e5c7fb1468d36be3dfca5a5 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:30:19 2017 +0300 Miscellaneous [#METR-2944]. commit 78c867a14744b5af2db8d37caf7804fc2057ea51 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 20:11:41 2017 +0300 Miscellaneous [#METR-2944]. commit 6604c5c83cfcedc81c8da4da026711920d5963b4 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:56:15 2017 +0300 Miscellaneous [#METR-2944]. commit 23fbf05c1d4bead636458ec21b05a101b1152e33 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:47:52 2017 +0300 Miscellaneous [#METR-2944]. commit 98772faf11a7d450d473f7fa84f8a9ae24f7b59b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:46:05 2017 +0300 Miscellaneous [#METR-2944]. commit 3dc636ab9f9359dbeac2e8d997ae563d4ca147e2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:39:46 2017 +0300 Miscellaneous [#METR-2944]. commit 3e16aee95482f374ee3eda1a4dbe9ba5cdce02e8 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:38:03 2017 +0300 Miscellaneous [#METR-2944]. commit ae7e7e90eb1f82bd0fe0f887708d08b9e7755612 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Jan 6 19:34:15 2017 +0300 Miscellaneous [#METR-2944].
2017-01-06 17:41:19 +00:00
levels[i] = applyVisitor(FieldVisitorConvertToNumber<Float64>(), params[i]);
}
void addImpl(AggregateDataPtr place, const IColumn & column, const IColumn & determinator, size_t row_num, Arena *) const
{
this->data(place).sample.insert(static_cast<const ColumnVector<ArgumentFieldType> &>(column).getData()[row_num],
determinator.get64(row_num));
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
this->data(place).sample.merge(this->data(rhs).sample);
}
2015-11-11 02:04:23 +00:00
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).sample.write(buf);
}
2016-09-22 23:26:08 +00:00
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
2016-03-12 04:01:03 +00:00
this->data(place).sample.read(buf);
}
2015-11-11 02:04:23 +00:00
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
2017-03-09 04:26:17 +00:00
/// `Sample` can be sorted when a quantile is received, but in this context, you can not think of this as a violation of constancy.
Sample & sample = const_cast<Sample &>(this->data(place).sample);
ColumnArray & arr_to = static_cast<ColumnArray &>(to);
ColumnArray::Offsets_t & offsets_to = arr_to.getOffsets();
size_t size = levels.size();
offsets_to.push_back((offsets_to.size() == 0 ? 0 : offsets_to.back()) + size);
if (returns_float)
{
ColumnFloat64::Container_t & data_to = static_cast<ColumnFloat64 &>(arr_to.getData()).getData();
for (size_t i = 0; i < size; ++i)
data_to.push_back(sample.quantileInterpolated(levels[i]));
}
else
{
typename ColumnVector<ArgumentFieldType>::Container_t & data_to = static_cast<ColumnVector<ArgumentFieldType> &>(arr_to.getData()).getData();
for (size_t i = 0; i < size; ++i)
data_to.push_back(sample.quantileInterpolated(levels[i]));
}
}
};
}