2017-05-02 21:08:37 +00:00
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
|
|
#include <AggregateFunctions/AggregateFunctionTopK.h>
|
|
|
|
#include <AggregateFunctions/Helpers.h>
|
2017-12-20 20:58:43 +00:00
|
|
|
#include <AggregateFunctions/FactoryHelpers.h>
|
2017-12-20 21:22:04 +00:00
|
|
|
#include <DataTypes/DataTypeDate.h>
|
|
|
|
#include <DataTypes/DataTypeDateTime.h>
|
2017-12-20 20:58:43 +00:00
|
|
|
|
|
|
|
#define TOP_K_MAX_SIZE 0xFFFFFF
|
|
|
|
|
2017-05-02 21:08:37 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-12-20 20:25:22 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2017-12-20 20:58:43 +00:00
|
|
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
2019-02-10 21:01:26 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2017-12-20 20:25:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-02 21:08:37 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
/// Substitute return type for Date and DateTime
|
2019-02-10 21:01:26 +00:00
|
|
|
template <bool is_weighted>
|
|
|
|
class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
|
2017-05-02 21:08:37 +00:00
|
|
|
{
|
2019-02-10 21:01:26 +00:00
|
|
|
using AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>::AggregateFunctionTopK;
|
2017-05-02 21:08:37 +00:00
|
|
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
|
|
|
|
};
|
|
|
|
|
2019-02-10 21:01:26 +00:00
|
|
|
template <bool is_weighted>
|
|
|
|
class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>
|
2017-05-02 21:08:37 +00:00
|
|
|
{
|
2019-02-10 21:01:26 +00:00
|
|
|
using AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>::AggregateFunctionTopK;
|
2017-05-02 21:08:37 +00:00
|
|
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-02-10 21:01:26 +00:00
|
|
|
template <bool is_weighted>
|
2019-02-11 19:26:32 +00:00
|
|
|
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold, const Array & params)
|
2017-05-02 21:08:37 +00:00
|
|
|
{
|
2018-09-10 17:09:07 +00:00
|
|
|
WhichDataType which(argument_type);
|
2019-02-10 21:01:26 +00:00
|
|
|
if (which.idx == TypeIndex::Date)
|
2019-02-11 19:26:32 +00:00
|
|
|
return new AggregateFunctionTopKDate<is_weighted>(threshold, {argument_type}, params);
|
2019-02-10 21:01:26 +00:00
|
|
|
if (which.idx == TypeIndex::DateTime)
|
2019-02-11 19:26:32 +00:00
|
|
|
return new AggregateFunctionTopKDateTime<is_weighted>(threshold, {argument_type}, params);
|
2017-12-20 20:58:43 +00:00
|
|
|
|
|
|
|
/// Check that we can use plain version of AggregateFunctionTopKGeneric
|
|
|
|
if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
2019-02-11 19:26:32 +00:00
|
|
|
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, argument_type, params);
|
2017-05-02 21:08:37 +00:00
|
|
|
else
|
2019-02-11 19:26:32 +00:00
|
|
|
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, argument_type, params);
|
2017-05-02 21:08:37 +00:00
|
|
|
}
|
|
|
|
|
2019-02-10 21:01:26 +00:00
|
|
|
|
|
|
|
template <bool is_weighted>
|
2017-12-20 20:58:43 +00:00
|
|
|
AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types, const Array & params)
|
2017-05-02 21:08:37 +00:00
|
|
|
{
|
2019-02-10 21:01:26 +00:00
|
|
|
if (!is_weighted)
|
|
|
|
{
|
2019-02-03 10:59:27 +00:00
|
|
|
assertUnary(name, argument_types);
|
2019-02-10 21:01:26 +00:00
|
|
|
}
|
2019-02-03 10:59:27 +00:00
|
|
|
else
|
2019-02-10 21:01:26 +00:00
|
|
|
{
|
2019-02-03 10:59:27 +00:00
|
|
|
assertBinary(name, argument_types);
|
2019-02-10 21:01:26 +00:00
|
|
|
if (!isNumber(argument_types[1]))
|
|
|
|
throw Exception("The second argument for aggregate function 'topKWeighted' must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
}
|
2017-12-20 20:58:43 +00:00
|
|
|
|
|
|
|
UInt64 threshold = 10; /// default value
|
|
|
|
|
|
|
|
if (!params.empty())
|
|
|
|
{
|
|
|
|
if (params.size() != 1)
|
2019-02-03 10:59:27 +00:00
|
|
|
throw Exception("Aggregate function " + name + " requires one parameter or less.",
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2017-12-20 20:58:43 +00:00
|
|
|
|
|
|
|
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
|
|
|
|
|
|
|
if (k > TOP_K_MAX_SIZE)
|
|
|
|
throw Exception("Too large parameter for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
|
|
|
|
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
if (k == 0)
|
|
|
|
throw Exception("Parameter 0 is illegal for aggregate function " + name,
|
|
|
|
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
threshold = k;
|
|
|
|
}
|
2017-05-02 21:08:37 +00:00
|
|
|
|
2019-02-11 19:26:32 +00:00
|
|
|
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(*argument_types[0], threshold, argument_types, params));
|
2017-05-02 21:08:37 +00:00
|
|
|
|
|
|
|
if (!res)
|
2019-02-11 19:26:32 +00:00
|
|
|
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types[0], threshold, params));
|
2017-05-02 21:08:37 +00:00
|
|
|
|
|
|
|
if (!res)
|
|
|
|
throw Exception("Illegal type " + argument_types[0]->getName() +
|
|
|
|
" of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
|
|
|
|
{
|
2019-02-03 10:59:27 +00:00
|
|
|
factory.registerFunction("topK", createAggregateFunctionTopK<false>);
|
2019-02-10 21:01:26 +00:00
|
|
|
factory.registerFunction("topKWeighted", createAggregateFunctionTopK<true>);
|
2017-05-02 21:08:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|