ClickHouse/src/AggregateFunctions/AggregateFunctionUniq.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

148 lines
6.3 KiB
C++
Raw Normal View History

#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionUniq.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
2015-09-24 12:40:36 +00:00
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeUUID.h>
2015-09-24 12:40:36 +00:00
namespace DB
{
struct Settings;
2015-09-24 12:40:36 +00:00
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
2015-09-24 12:40:36 +00:00
namespace
{
2017-03-25 20:12:56 +00:00
/** `DataForVariadic` is a data structure that will be used for `uniq` aggregate function of multiple arguments.
* It differs, for example, in that it uses a trivial hash function, since `uniq` of many arguments first hashes them out itself.
*/
template <typename Data, typename DataForVariadic>
AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
2015-09-24 12:40:36 +00:00
{
assertNoParameters(name, params);
if (argument_types.empty())
throw Exception("Incorrect number of arguments for aggregate function " + name,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
bool use_exact_hash_function = !isAllArgumentsContiguousInMemory(argument_types);
if (argument_types.size() == 1)
{
const IDataType & argument_type = *argument_types[0];
2019-02-11 19:26:32 +00:00
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data>(*argument_types[0], argument_types));
2018-09-10 17:09:07 +00:00
WhichDataType which(argument_type);
if (res)
return res;
2018-09-10 17:09:07 +00:00
else if (which.isDate())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data>>(argument_types);
else if (which.isDate32())
return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isDateTime())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isStringOrFixedString())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<String, Data>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isUUID())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isTuple())
{
if (use_exact_hash_function)
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, true>>(argument_types);
else
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, true>>(argument_types);
}
}
/// "Variadic" method also works as a fallback generic case for single argument.
if (use_exact_hash_function)
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, false>>(argument_types);
else
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, false>>(argument_types);
2015-09-24 12:40:36 +00:00
}
template <bool is_exact, template <typename> class Data, typename DataForVariadic>
AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
2015-09-24 12:40:36 +00:00
{
assertNoParameters(name, params);
if (argument_types.empty())
throw Exception("Incorrect number of arguments for aggregate function " + name,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
/// We use exact hash function if the user wants it;
/// or if the arguments are not contiguous in memory, because only exact hash function have support for this case.
bool use_exact_hash_function = is_exact || !isAllArgumentsContiguousInMemory(argument_types);
if (argument_types.size() == 1)
{
const IDataType & argument_type = *argument_types[0];
2019-02-11 19:26:32 +00:00
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data>(*argument_types[0], argument_types));
2018-09-10 17:09:07 +00:00
WhichDataType which(argument_type);
if (res)
return res;
2018-09-10 17:09:07 +00:00
else if (which.isDate())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(argument_types);
else if (which.isDate32())
return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data<DataTypeDate32::FieldType>>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isDateTime())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isStringOrFixedString())
2019-02-12 09:31:20 +00:00
return std::make_shared<AggregateFunctionUniq<String, Data<String>>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isUUID())
2019-02-11 19:26:32 +00:00
return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data<DataTypeUUID::FieldType>>>(argument_types);
2018-09-10 17:09:07 +00:00
else if (which.isTuple())
{
if (use_exact_hash_function)
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, true>>(argument_types);
else
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, true>>(argument_types);
}
}
/// "Variadic" method also works as a fallback generic case for single argument.
if (use_exact_hash_function)
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, true, false>>(argument_types);
else
return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic, false, false>>(argument_types);
2015-09-24 12:40:36 +00:00
}
}
void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
factory.registerFunction("uniq",
{createAggregateFunctionUniq<AggregateFunctionUniqUniquesHashSetData, AggregateFunctionUniqUniquesHashSetDataForVariadic>, properties});
factory.registerFunction("uniqHLL12",
{createAggregateFunctionUniq<false, AggregateFunctionUniqHLL12Data, AggregateFunctionUniqHLL12DataForVariadic>, properties});
factory.registerFunction("uniqExact",
{createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
#if USE_DATASKETCHES
2021-05-11 14:36:26 +00:00
factory.registerFunction("uniqTheta",
{createAggregateFunctionUniq<AggregateFunctionUniqThetaData, AggregateFunctionUniqThetaData>, properties});
#endif
2015-09-24 12:40:36 +00:00
}
}