decimal quantileExact [CLICKHOUSE-3765]

This commit is contained in:
chertus 2018-09-12 19:36:13 +03:00
parent 1c4825138a
commit d301fcd004
4 changed files with 114 additions and 76 deletions

View File

@ -19,78 +19,96 @@ namespace ErrorCodes
namespace
{
template <template <typename> class Data, typename Name, bool have_second_arg, typename FloatReturnType, bool returns_many>
template <typename T> using FuncQuantile = AggregateFunctionQuantile<T, QuantileReservoirSampler<T>, NameQuantile, false, Float64, false>;
template <typename T> using FuncQuantiles = AggregateFunctionQuantile<T, QuantileReservoirSampler<T>, NameQuantile, false, Float64, true>;
template <typename T> using FuncQuantileDeterministic = AggregateFunctionQuantile<T, QuantileReservoirSamplerDeterministic<T>, NameQuantileDeterministic, true, Float64, false>;
template <typename T> using FuncQuantilesDeterministic = AggregateFunctionQuantile<T, QuantileReservoirSamplerDeterministic<T>, NameQuantilesDeterministic, true, Float64, true>;
template <typename T> using FuncQuantileExact = AggregateFunctionQuantile<T, QuantileExact<T>, NameQuantileExact, false, void, false>;
template <typename T> using FuncQuantilesExact = AggregateFunctionQuantile<T, QuantileExact<T>, NameQuantilesExact, false, void, true>;
template <typename T> using FuncQuantileExactWeighted = AggregateFunctionQuantile<T, QuantileExactWeighted<T>, NameQuantileExactWeighted, true, void, false>;
template <typename T> using FuncQuantilesExactWeighted = AggregateFunctionQuantile<T, QuantileExactWeighted<T>, NameQuantilesExactWeighted, true, void, true>;
template <typename T> using FuncQuantileTiming = AggregateFunctionQuantile<T, QuantileTiming<T>, NameQuantileTiming, false, Float32, false>;
template <typename T> using FuncQuantilesTiming = AggregateFunctionQuantile<T, QuantileTiming<T>, NameQuantilesTiming, false, Float32, true>;
template <typename T> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<T, QuantileTiming<T>, NameQuantileTimingWeighted, true, Float32, false>;
template <typename T> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<T, QuantileTiming<T>, NameQuantilesTimingWeighted, true, Float32, true>;
template <typename T> using FuncQuantileTDigest = AggregateFunctionQuantile<T, QuantileTDigest<T>, NameQuantileTDigest, false, Float32, false>;
template <typename T> using FuncQuantilesTDigest = AggregateFunctionQuantile<T, QuantileTDigest<T>, NameQuantilesTDigest, false, Float32, true>;
template <typename T> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<T, QuantileTDigest<T>, NameQuantileTDigestWeighted, true, Float32, false>;
template <typename T> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<T, QuantileTDigest<T>, NameQuantilesTDigestWeighted, true, Float32, true>;
template <template <typename> class Function>
static constexpr bool SupportDecimal()
{
return std::is_same_v<Function<Float32>, FuncQuantileExact<Float32>> ||
std::is_same_v<Function<Float32>, FuncQuantilesExact<Float32>>;
}
template <template <typename> class Function, bool have_second_arg>
AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, const DataTypes & argument_types, const Array & params)
{
if (have_second_arg)
if constexpr (have_second_arg)
assertBinary(name, argument_types);
else
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
#define CREATE(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<AggregateFunctionQuantile<TYPE, Data<TYPE>, Name, have_second_arg, FloatReturnType, returns_many>>(argument_type, params);
FOR_NUMERIC_TYPES(CREATE)
#undef CREATE
if (which.idx == TypeIndex::Date)
return std::make_shared<AggregateFunctionQuantile<
DataTypeDate::FieldType, Data<DataTypeDate::FieldType>, Name, have_second_arg, void, returns_many>>(argument_type, params);
if (which.idx == TypeIndex::DateTime)
return std::make_shared<AggregateFunctionQuantile<
DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>, Name, have_second_arg, void, returns_many>>(argument_type, params);
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE>>(argument_type, params);
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
#undef FOR_NUMERIC_TYPES
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType>>(argument_type, params);
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType>>(argument_type, params);
if constexpr (SupportDecimal<Function>())
{
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32>>(argument_type, params);
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64>>(argument_type, params);
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128>>(argument_type, params);
}
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
{
factory.registerFunction(NameQuantile::name,
createAggregateFunctionQuantile<QuantileReservoirSampler, NameQuantile, false, Float64, false>);
factory.registerFunction(NameQuantiles::name,
createAggregateFunctionQuantile<QuantileReservoirSampler, NameQuantiles, false, Float64, true>);
factory.registerFunction(NameQuantile::name, createAggregateFunctionQuantile<FuncQuantile, false>);
factory.registerFunction(NameQuantiles::name, createAggregateFunctionQuantile<FuncQuantiles, false>);
factory.registerFunction(NameQuantileDeterministic::name,
createAggregateFunctionQuantile<QuantileReservoirSamplerDeterministic, NameQuantileDeterministic, true, Float64, false>);
factory.registerFunction(NameQuantilesDeterministic::name,
createAggregateFunctionQuantile<QuantileReservoirSamplerDeterministic, NameQuantilesDeterministic, true, Float64, true>);
factory.registerFunction(NameQuantileDeterministic::name, createAggregateFunctionQuantile<FuncQuantileDeterministic, true>);
factory.registerFunction(NameQuantilesDeterministic::name, createAggregateFunctionQuantile<FuncQuantilesDeterministic, true>);
factory.registerFunction(NameQuantileExact::name,
createAggregateFunctionQuantile<QuantileExact, NameQuantileExact, false, void, false>);
factory.registerFunction(NameQuantilesExact::name,
createAggregateFunctionQuantile<QuantileExact, NameQuantilesExact, false, void, true>);
factory.registerFunction(NameQuantileExact::name, createAggregateFunctionQuantile<FuncQuantileExact, false>);
factory.registerFunction(NameQuantilesExact::name, createAggregateFunctionQuantile<FuncQuantilesExact, false>);
factory.registerFunction(NameQuantileExactWeighted::name,
createAggregateFunctionQuantile<QuantileExactWeighted, NameQuantileExactWeighted, true, void, false>);
factory.registerFunction(NameQuantilesExactWeighted::name,
createAggregateFunctionQuantile<QuantileExactWeighted, NameQuantilesExactWeighted, true, void, true>);
factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile<FuncQuantileExactWeighted, true>);
factory.registerFunction(NameQuantilesExactWeighted::name, createAggregateFunctionQuantile<FuncQuantilesExactWeighted, true>);
factory.registerFunction(NameQuantileTiming::name,
createAggregateFunctionQuantile<QuantileTiming, NameQuantileTiming, false, Float32, false>);
factory.registerFunction(NameQuantilesTiming::name,
createAggregateFunctionQuantile<QuantileTiming, NameQuantilesTiming, false, Float32, true>);
factory.registerFunction(NameQuantileTiming::name, createAggregateFunctionQuantile<FuncQuantileTiming, false>);
factory.registerFunction(NameQuantilesTiming::name, createAggregateFunctionQuantile<FuncQuantilesTiming, false>);
factory.registerFunction(NameQuantileTimingWeighted::name,
createAggregateFunctionQuantile<QuantileTiming, NameQuantileTimingWeighted, true, Float32, false>);
factory.registerFunction(NameQuantilesTimingWeighted::name,
createAggregateFunctionQuantile<QuantileTiming, NameQuantilesTimingWeighted, true, Float32, true>);
factory.registerFunction(NameQuantileTimingWeighted::name, createAggregateFunctionQuantile<FuncQuantileTimingWeighted, true>);
factory.registerFunction(NameQuantilesTimingWeighted::name, createAggregateFunctionQuantile<FuncQuantilesTimingWeighted, true>);
factory.registerFunction(NameQuantileTDigest::name,
createAggregateFunctionQuantile<QuantileTDigest, NameQuantileTDigest, false, Float32, false>);
factory.registerFunction(NameQuantilesTDigest::name,
createAggregateFunctionQuantile<QuantileTDigest, NameQuantilesTDigest, false, Float32, true>);
factory.registerFunction(NameQuantileTDigest::name, createAggregateFunctionQuantile<FuncQuantileTDigest, false>);
factory.registerFunction(NameQuantilesTDigest::name, createAggregateFunctionQuantile<FuncQuantilesTDigest, false>);
factory.registerFunction(NameQuantileTDigestWeighted::name,
createAggregateFunctionQuantile<QuantileTDigest, NameQuantileTDigestWeighted, true, Float32, false>);
factory.registerFunction(NameQuantilesTDigestWeighted::name,
createAggregateFunctionQuantile<QuantileTDigest, NameQuantilesTDigestWeighted, true, Float32, true>);
factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantileTDigestWeighted, true>);
factory.registerFunction(NameQuantilesTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantilesTDigestWeighted, true>);
/// 'median' is an alias for 'quantile'
factory.registerAlias("median", NameQuantile::name);

View File

@ -13,6 +13,7 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnDecimal.h>
namespace DB
@ -51,7 +52,10 @@ class AggregateFunctionQuantile final : public IAggregateFunctionDataHelper<Data
AggregateFunctionQuantile<Value, Data, Name, have_second_arg, FloatReturnType, returns_many>>
{
private:
using ColVecType = std::conditional_t<IsDecimalNumber<Value>, ColumnDecimal<Value>, ColumnVector<Value>>;
static constexpr bool returns_float = !std::is_same_v<FloatReturnType, void>;
static_assert(!IsDecimalNumber<Value> || !returns_float);
QuantileLevels<Float64> levels;
@ -87,13 +91,13 @@ public:
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
if constexpr (have_second_arg)
this->data(place).add(
static_cast<const ColumnVector<Value> &>(*columns[0]).getData()[row_num],
column.getData()[row_num],
columns[1]->getUInt(row_num));
else
this->data(place).add(
static_cast<const ColumnVector<Value> &>(*columns[0]).getData()[row_num]);
this->data(place).add(column.getData()[row_num]);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
@ -138,7 +142,7 @@ public:
}
else
{
auto & data_to = static_cast<ColumnVector<Value> &>(arr_to.getData()).getData();
auto & data_to = static_cast<ColVecType &>(arr_to.getData()).getData();
size_t old_size = data_to.size();
data_to.resize(data_to.size() + size);
@ -150,7 +154,7 @@ public:
if constexpr (returns_float)
static_cast<ColumnVector<FloatReturnType> &>(to).getData().push_back(data.getFloat(level));
else
static_cast<ColumnVector<Value> &>(to).getData().push_back(data.get(level));
static_cast<ColVecType &>(to).getData().push_back(data.get(level));
}
}

View File

@ -1,6 +1,5 @@
101 101 101
-50.0000 -16.66666666 -10.00000000
50.0000 16.66666666 10.00000000
[-50.0000,50.0000] [-16.66666666,16.66666666] [-10.00000000,10.00000000]
0.0000 0.00000000 0.00000000 0.0000 0.00000000 0.00000000
1275.0000 424.99999983 255.00000000 1275.0000 424.99999983 255.00000000
-1275.0000 -424.99999983 -255.00000000 -1275.0000 -424.99999983 -255.00000000
@ -9,11 +8,21 @@
0.0000 0.00000000 0.00000000
25.5000 8.49999999 5.10000000
-25.5000 -8.49999999 -5.10000000
101 101 101
101 101 101
101 101 101
102 100 101
(101,101,101) (101,101,101) (101,101,101) (102,100,101)
5 5 5
10 10 10
-50.0000 -50.0000 -16.66666666 -16.66666666 -10.00000000 -10.00000000
1.0000 1.0000 0.33333333 0.33333333 0.20000000 0.20000000
50.0000 50.0000 16.66666666 16.66666666 10.00000000 10.00000000
-1.0000 -1.0000 -0.33333333 -0.33333333 -0.20000000 -0.20000000
0.0000 0.00000000 0.00000000
-25.0000 -8.33333333 -5.00000000
0.0000 0.00000000 0.00000000
10.0000 3.33333333 2.00000000
20.0000 6.66666666 4.00000000
30.0000 10.00000000 6.00000000
40.0000 13.33333333 8.00000000
50.0000 16.66666666 10.00000000
[-50.0000,-40.0000,-30.0000,-20.0000,-10.0000,0.0000,10.0000,20.0000,30.0000,40.0000,50.0000]
[-16.66666666,-13.33333333,-10.00000000,-6.66666666,-3.33333333,0.00000000,3.33333333,6.66666666,10.00000000,13.33333333,16.66666666]
[-10.00000000,-8.00000000,-6.00000000,-4.00000000,-2.00000000,0.00000000,2.00000000,4.00000000,6.00000000,8.00000000,10.00000000]

View File

@ -16,8 +16,7 @@ SELECT toDecimal32(number - 50, 4), toDecimal64(number - 50, 8) / 3, toDecimal12
FROM system.numbers LIMIT 101;
SELECT count(a), count(b), count(c) FROM test.decimal;
SELECT min(a), min(b), min(c) FROM test.decimal;
SELECT max(a), max(b), max(c) FROM test.decimal;
SELECT [min(a), max(a)], [min(b), max(b)], [min(c), max(c)] FROM test.decimal;
SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOverflow(c) FROM test.decimal;
SELECT sum(a), sum(b), sum(c), sumWithOverflow(a), sumWithOverflow(b), sumWithOverflow(c) FROM test.decimal WHERE a > 0;
@ -29,25 +28,33 @@ SELECT avg(a), avg(b), avg(c) FROM test.decimal;
SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a > 0;
SELECT avg(a), avg(b), avg(c) FROM test.decimal WHERE a < 0;
SELECT uniq(a), uniq(b), uniq(c) FROM (SELECT * FROM test.decimal ORDER BY a);
SELECT uniqCombined(a), uniqCombined(b), uniqCombined(c) FROM (SELECT * FROM test.decimal ORDER BY a);
SELECT uniqExact(a), uniqExact(b), uniqExact(c) FROM (SELECT * FROM test.decimal ORDER BY a);
SELECT uniqHLL12(a), uniqHLL12(b), uniqHLL12(c) FROM (SELECT * FROM test.decimal ORDER BY a);
SELECT (uniq(a), uniq(b), uniq(c)),
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
(uniqExact(a), uniqExact(b), uniqExact(c)),
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c))
FROM (SELECT * FROM test.decimal ORDER BY a);
SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM test.decimal WHERE a >= 0 AND a < 5;
SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM test.decimal WHERE a >= 0 AND a < 10;
SELECT argMin(a, b), argMin(a, c), argMin(b, a), argMin(b, c), argMin(c, a), argMin(c, b) FROM test.decimal;
SELECT argMin(a, b), argMin(a, c), argMin(b, a), argMin(b, c), argMin(c, a), argMin(c, b) FROM test.decimal WHERE a > 0;
SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM test.decimal;
SELECT argMax(a, b), argMax(a, c), argMax(b, a), argMax(b, c), argMax(c, a), argMax(c, b) FROM test.decimal WHERE a < 0;
--SELECT median(a), median(b), median(c) FROM test.decimal;
--SELECT quantile(a), quantile(b), quantile(c) FROM test.decimal;
--SELECT quantile(0.9)(a), quantile(0.9)(b), quantile(0.9)(c) FROM test.decimal;
-- TODO: quantileDeterministic, quantileTiming
--SELECT quantileExact(a), quantileExact(b), quantileExact(c) FROM test.decimal;
--SELECT quantileExact(0.9)(a), quantileExact(0.9)(b), quantileExact(0.9)(c) FROM test.decimal;
--SELECT quantileTDigest(a), quantileTDigest(b), quantileTDigest(c) FROM test.decimal;
--SELECT quantileTDigest(0.9)(a), quantileTDigest(0.9)(b), quantileTDigest(0.9)(c) FROM test.decimal;
--SELECT quantiles(0.5, 0.9)(a), quantiles(0.5, 0.9)(b), quantiles(0.5, 0.9)(c) FROM test.decimal;
SELECT medianExact(a), medianExact(b), medianExact(c) FROM test.decimal;
SELECT quantileExact(a), quantileExact(b), quantileExact(c) FROM test.decimal WHERE a < 0;
SELECT quantileExact(0.0)(a), quantileExact(0.0)(b), quantileExact(0.0)(c) FROM test.decimal WHERE a >= 0;
SELECT quantileExact(0.2)(a), quantileExact(0.2)(b), quantileExact(0.2)(c) FROM test.decimal WHERE a >= 0;
SELECT quantileExact(0.4)(a), quantileExact(0.4)(b), quantileExact(0.4)(c) FROM test.decimal WHERE a >= 0;
SELECT quantileExact(0.6)(a), quantileExact(0.6)(b), quantileExact(0.6)(c) FROM test.decimal WHERE a >= 0;
SELECT quantileExact(0.8)(a), quantileExact(0.8)(b), quantileExact(0.8)(c) FROM test.decimal WHERE a >= 0;
SELECT quantileExact(1.0)(a), quantileExact(1.0)(b), quantileExact(1.0)(c) FROM test.decimal WHERE a >= 0;
SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(a) FROM test.decimal;
SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(b) FROM test.decimal;
SELECT quantilesExact(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)(c) FROM test.decimal;
-- TODO: sumMap
-- TODO: other quantile(s)
-- TODO: groupArray, groupArrayInsertAt, groupUniqArray
-- TODO: topK