2017-04-01 09:19:00 +00:00
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
|
|
|
#include <AggregateFunctions/AggregateFunctionUniq.h>
|
|
|
|
#include <AggregateFunctions/Helpers.h>
|
2017-12-20 20:25:22 +00:00
|
|
|
#include <AggregateFunctions/FactoryHelpers.h>
|
2015-09-24 12:40:36 +00:00
|
|
|
|
2017-12-07 23:07:41 +00:00
|
|
|
#include <DataTypes/DataTypeDate.h>
|
2021-07-15 11:41:52 +00:00
|
|
|
#include <DataTypes/DataTypeDate32.h>
|
2017-12-07 23:07:41 +00:00
|
|
|
#include <DataTypes/DataTypeDateTime.h>
|
|
|
|
#include <DataTypes/DataTypeTuple.h>
|
|
|
|
#include <DataTypes/DataTypeUUID.h>
|
|
|
|
|
|
|
|
|
2015-09-24 12:40:36 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-05-26 11:32:14 +00:00
|
|
|
struct Settings;
|
2015-09-24 12:40:36 +00:00
|
|
|
|
2017-12-20 20:25:22 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-09-24 12:40:36 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2018-07-16 03:12:01 +00:00
|
|
|
|
2017-03-25 20:12:56 +00:00
|
|
|
/** `DataForVariadic` is a data structure that will be used for `uniq` aggregate function of multiple arguments.
|
|
|
|
* It differs, for example, in that it uses a trivial hash function, since `uniq` of many arguments first hashes them out itself.
|
2015-10-29 04:02:22 +00:00
|
|
|
*/
|
|
|
|
template <typename Data, typename DataForVariadic>
|
2021-05-26 11:32:14 +00:00
|
|
|
AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
2015-09-24 12:40:36 +00:00
|
|
|
{
|
2017-12-20 20:25:22 +00:00
|
|
|
assertNoParameters(name, params);
|
|
|
|
|
2018-03-17 18:14:05 +00:00
|
|
|
if (argument_types.empty())
|
|
|
|
throw Exception("Incorrect number of arguments for aggregate function " + name,
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
2018-07-16 03:12:01 +00:00
|
|
|
bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);
|
|
|
|
|
2015-10-29 02:13:37 +00:00
|
|
|
if (argument_types.size() == 1)
|
|
|
|
{
|
|
|
|
const IDataType & argument_type = *argument_types[0];
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-11 19:26:32 +00:00
|
|
|
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data>(*argument_types[0], argument_types));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-10 17:09:07 +00:00
|
|
|
WhichDataType which(argument_type);
|
2015-10-29 02:13:37 +00:00
|
|
|
if (res)
|
|
|
|
return res;
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isDate())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data>>(argument_types);
|
2021-07-15 11:41:52 +00:00
|
|
|
else if (which.isDate32())
|
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isDateTime())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isStringOrFixedString())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<String, Data>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isUUID())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isTuple())
|
2018-07-16 03:12:01 +00:00
|
|
|
{
|
|
|
|
if (use_exact_hash_function)
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, true>>(argument_types);
|
|
|
|
else
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, true>>(argument_types);
|
|
|
|
}
|
2015-10-29 02:13:37 +00:00
|
|
|
}
|
2018-03-17 18:14:05 +00:00
|
|
|
|
|
|
|
/// "Variadic" method also works as a fallback generic case for single argument.
|
2018-07-16 03:12:01 +00:00
|
|
|
if (use_exact_hash_function)
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, false>>(argument_types);
|
|
|
|
else
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, false>>(argument_types);
|
2015-09-24 12:40:36 +00:00
|
|
|
}
|
|
|
|
|
2018-07-16 03:12:01 +00:00
|
|
|
template <bool is_exact, template <typename> class Data, typename DataForVariadic>
|
2021-05-26 11:32:14 +00:00
|
|
|
AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
2015-09-24 12:40:36 +00:00
|
|
|
{
|
2017-12-20 20:25:22 +00:00
|
|
|
assertNoParameters(name, params);
|
|
|
|
|
2018-03-17 18:14:05 +00:00
|
|
|
if (argument_types.empty())
|
|
|
|
throw Exception("Incorrect number of arguments for aggregate function " + name,
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
2018-07-16 03:12:01 +00:00
|
|
|
/// We use exact hash function if the user wants it;
|
|
|
|
/// or if the arguments are not contiguous in memory, because only exact hash function have support for this case.
|
|
|
|
bool use_exact_hash_function = is_exact || !isAllArgumentsContiguousInMemory(argument_types);
|
|
|
|
|
2015-10-29 02:13:37 +00:00
|
|
|
if (argument_types.size() == 1)
|
|
|
|
{
|
|
|
|
const IDataType & argument_type = *argument_types[0];
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-11 19:26:32 +00:00
|
|
|
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data>(*argument_types[0], argument_types));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-10 17:09:07 +00:00
|
|
|
WhichDataType which(argument_type);
|
2015-10-29 02:13:37 +00:00
|
|
|
if (res)
|
|
|
|
return res;
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isDate())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(argument_types);
|
2021-07-15 11:41:52 +00:00
|
|
|
else if (which.isDate32())
|
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data<DataTypeDate32::FieldType>>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isDateTime())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isStringOrFixedString())
|
2019-02-12 09:31:20 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<String, Data<String>>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isUUID())
|
2019-02-11 19:26:32 +00:00
|
|
|
return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data<DataTypeUUID::FieldType>>>(argument_types);
|
2018-09-10 17:09:07 +00:00
|
|
|
else if (which.isTuple())
|
2018-07-16 03:12:01 +00:00
|
|
|
{
|
|
|
|
if (use_exact_hash_function)
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, true>>(argument_types);
|
|
|
|
else
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, true>>(argument_types);
|
|
|
|
}
|
2015-10-29 02:13:37 +00:00
|
|
|
}
|
2018-03-17 18:14:05 +00:00
|
|
|
|
|
|
|
/// "Variadic" method also works as a fallback generic case for single argument.
|
2018-07-16 03:12:01 +00:00
|
|
|
if (use_exact_hash_function)
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, false>>(argument_types);
|
|
|
|
else
|
|
|
|
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, false>>(argument_types);
|
2015-09-24 12:40:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
|
|
|
|
{
|
2020-07-05 23:50:20 +00:00
|
|
|
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
|
|
|
|
|
2016-07-14 05:22:09 +00:00
|
|
|
factory.registerFunction("uniq",
|
2020-07-05 23:50:20 +00:00
|
|
|
{createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, properties});
|
2015-10-29 04:02:22 +00:00
|
|
|
|
2016-07-14 05:22:09 +00:00
|
|
|
factory.registerFunction("uniqHLL12",
|
2020-07-05 23:50:20 +00:00
|
|
|
{createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, properties});
|
2015-10-29 04:02:22 +00:00
|
|
|
|
2016-07-14 05:22:09 +00:00
|
|
|
factory.registerFunction("uniqExact",
|
2020-07-05 23:50:20 +00:00
|
|
|
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
|
2021-05-05 08:42:57 +00:00
|
|
|
|
|
|
|
#if USE_DATASKETCHES
|
2021-05-11 14:36:26 +00:00
|
|
|
factory.registerFunction("uniqTheta",
|
|
|
|
{createAggregateFunctionUniq<AggregateFunctionUniqThetaData, AggregateFunctionUniqThetaData>, properties});
|
2021-05-05 08:42:57 +00:00
|
|
|
#endif
|
|
|
|
|
2015-09-24 12:40:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|