2018-10-17 11:45:14 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Common/CombinedCardinalityEstimator.h>
|
2019-02-10 17:40:52 +00:00
|
|
|
#include <Common/FieldVisitors.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/typeid_cast.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2018-10-17 11:45:14 +00:00
|
|
|
|
|
|
|
#include <DataTypes/DataTypeTuple.h>
|
|
|
|
#include <DataTypes/DataTypeUUID.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
|
|
|
|
#include <AggregateFunctions/IAggregateFunction.h>
|
|
|
|
#include <AggregateFunctions/UniqCombinedBiasData.h>
|
|
|
|
#include <AggregateFunctions/UniqVariadicHash.h>
|
|
|
|
|
|
|
|
#include <ext/bit_cast.h>
|
|
|
|
|
|
|
|
#include <Columns/ColumnVector.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace detail
|
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
/** Hash function for uniqCombined/uniqCombined64 (based on Ret).
|
2018-10-18 18:38:45 +00:00
|
|
|
*/
|
2019-10-07 21:44:37 +00:00
|
|
|
template <typename T, typename Ret>
|
2018-10-17 11:45:14 +00:00
|
|
|
struct AggregateFunctionUniqCombinedTraits
|
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
static Ret hash(T x)
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
return static_cast<Ret>(intHash64(x));
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-10-07 21:44:37 +00:00
|
|
|
template <typename Ret>
|
|
|
|
struct AggregateFunctionUniqCombinedTraits<UInt128, Ret>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
static Ret hash(UInt128 x)
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
return sipHash64(x);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-10-07 21:44:37 +00:00
|
|
|
template <typename Ret>
|
|
|
|
struct AggregateFunctionUniqCombinedTraits<Float32, Ret>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
static Ret hash(Float32 x)
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
UInt64 res = ext::bit_cast<UInt64>(x);
|
2019-10-07 21:44:37 +00:00
|
|
|
return static_cast<Ret>(intHash64(res));
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-10-07 21:44:37 +00:00
|
|
|
template <typename Ret>
|
|
|
|
struct AggregateFunctionUniqCombinedTraits<Float64, Ret>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
static Ret hash(Float64 x)
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
UInt64 res = ext::bit_cast<UInt64>(x);
|
2019-10-07 21:44:37 +00:00
|
|
|
return static_cast<Ret>(intHash64(res));
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-06-13 10:37:13 +00:00
|
|
|
}
|
2018-10-17 11:45:14 +00:00
|
|
|
|
2019-10-08 20:26:31 +00:00
|
|
|
// Unlike HashTableGrower always grows to power of 2.
|
|
|
|
struct UniqCombinedHashTableGrower : public HashTableGrower<>
|
|
|
|
{
|
|
|
|
void increaseSize() { ++size_degree; }
|
|
|
|
};
|
2018-10-17 11:45:14 +00:00
|
|
|
|
2018-10-22 17:18:08 +00:00
|
|
|
template <typename Key, UInt8 K>
|
2018-10-18 15:23:42 +00:00
|
|
|
struct AggregateFunctionUniqCombinedDataWithKey
|
2018-10-23 14:59:24 +00:00
|
|
|
{
|
2018-10-24 14:28:23 +00:00
|
|
|
// TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
|
|
|
|
// at the moment gen-bias-data.py script doesn't work.
|
2018-10-25 13:17:29 +00:00
|
|
|
|
|
|
|
// We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
|
|
|
|
// The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
|
|
|
|
// For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
|
2019-10-08 20:26:31 +00:00
|
|
|
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
|
2018-10-23 14:59:24 +00:00
|
|
|
|
|
|
|
Set set;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename Key>
|
|
|
|
struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
using Set = CombinedCardinalityEstimator<Key,
|
2019-10-08 20:26:31 +00:00
|
|
|
HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
|
2018-10-17 11:45:14 +00:00
|
|
|
16,
|
2019-10-08 20:26:31 +00:00
|
|
|
12 + (sizeof(Key) == sizeof(UInt32)),
|
2018-10-23 14:59:24 +00:00
|
|
|
17,
|
2018-10-17 11:45:14 +00:00
|
|
|
TrivialHash,
|
|
|
|
Key,
|
|
|
|
HyperLogLogBiasEstimator<UniqCombinedBiasData>,
|
|
|
|
HyperLogLogMode::FullFeatured>;
|
|
|
|
|
2018-10-22 17:18:08 +00:00
|
|
|
Set set;
|
2018-10-17 11:45:14 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-10-07 21:44:37 +00:00
|
|
|
template <typename T, UInt8 K, typename HashValueType>
|
|
|
|
struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey<HashValueType, K>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-12-24 09:06:44 +00:00
|
|
|
/// For String keys, 64 bit hash is always used (both for uniqCombined and uniqCombined64),
|
2019-10-09 17:26:01 +00:00
|
|
|
/// because of backwards compatibility (64 bit hash was already used for uniqCombined).
|
2019-10-07 21:44:37 +00:00
|
|
|
template <UInt8 K, typename HashValueType>
|
|
|
|
struct AggregateFunctionUniqCombinedData<String, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-10-07 21:44:37 +00:00
|
|
|
template <typename T, UInt8 K, typename HashValueType>
|
2018-10-17 11:45:14 +00:00
|
|
|
class AggregateFunctionUniqCombined final
|
2019-10-07 21:44:37 +00:00
|
|
|
: public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
public:
|
2019-02-13 11:50:41 +00:00
|
|
|
AggregateFunctionUniqCombined(const DataTypes & argument_types_, const Array & params_)
|
2019-10-07 21:44:37 +00:00
|
|
|
: IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>(argument_types_, params_) {}
|
2019-02-11 19:26:32 +00:00
|
|
|
|
2018-10-17 11:45:14 +00:00
|
|
|
String getName() const override
|
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
if constexpr (std::is_same_v<HashValueType, UInt64>)
|
|
|
|
return "uniqCombined64";
|
|
|
|
else
|
|
|
|
return "uniqCombined";
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DataTypePtr getReturnType() const override
|
|
|
|
{
|
|
|
|
return std::make_shared<DataTypeUInt64>();
|
|
|
|
}
|
|
|
|
|
|
|
|
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
|
|
|
{
|
|
|
|
if constexpr (!std::is_same_v<T, String>)
|
|
|
|
{
|
2019-09-30 15:45:53 +00:00
|
|
|
const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
|
2019-10-07 21:44:37 +00:00
|
|
|
this->data(place).set.insert(detail::AggregateFunctionUniqCombinedTraits<T, HashValueType>::hash(value));
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
StringRef value = columns[0]->getDataAt(row_num);
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.insert(CityHash_v1_0_2::CityHash64(value.data, value.size));
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
|
|
|
{
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.merge(this->data(rhs).set);
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
|
|
|
{
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.write(buf);
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
|
|
|
{
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.read(buf);
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/** For multiple arguments. To compute, hashes them.
|
|
|
|
* You can pass multiple arguments as is; You can also pass one argument - a tuple.
|
|
|
|
* But (for the possibility of efficient implementation), you can not pass several arguments, among which there are tuples.
|
|
|
|
*/
|
2019-10-07 21:44:37 +00:00
|
|
|
template <bool is_exact, bool argument_is_tuple, UInt8 K, typename HashValueType>
|
|
|
|
class AggregateFunctionUniqCombinedVariadic final : public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
|
|
|
|
AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
private:
|
|
|
|
size_t num_args = 0;
|
|
|
|
|
|
|
|
public:
|
2019-02-11 19:26:32 +00:00
|
|
|
explicit AggregateFunctionUniqCombinedVariadic(const DataTypes & arguments, const Array & params)
|
2019-10-07 21:44:37 +00:00
|
|
|
: IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>,
|
|
|
|
AggregateFunctionUniqCombinedVariadic<is_exact, argument_is_tuple, K, HashValueType>>(arguments, params)
|
2018-10-17 11:45:14 +00:00
|
|
|
{
|
|
|
|
if (argument_is_tuple)
|
|
|
|
num_args = typeid_cast<const DataTypeTuple &>(*arguments[0]).getElements().size();
|
|
|
|
else
|
|
|
|
num_args = arguments.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
if constexpr (std::is_same_v<HashValueType, UInt64>)
|
|
|
|
return "uniqCombined64";
|
|
|
|
else
|
|
|
|
return "uniqCombined";
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DataTypePtr getReturnType() const override
|
|
|
|
{
|
|
|
|
return std::make_shared<DataTypeUInt64>();
|
|
|
|
}
|
|
|
|
|
|
|
|
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
|
|
|
{
|
2019-10-07 21:44:37 +00:00
|
|
|
this->data(place).set.insert(typename AggregateFunctionUniqCombinedData<UInt64, K, HashValueType>::Set::value_type(
|
2018-10-22 10:00:37 +00:00
|
|
|
UniqVariadicHash<is_exact, argument_is_tuple>::apply(num_args, columns, row_num)));
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
|
|
|
{
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.merge(this->data(rhs).set);
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
|
|
|
{
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.write(buf);
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
|
|
|
{
|
2018-10-22 17:18:08 +00:00
|
|
|
this->data(place).set.read(buf);
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
assert_cast<ColumnUInt64 &>(to).getData().push_back(this->data(place).set.size());
|
2018-10-17 11:45:14 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2019-06-13 10:37:13 +00:00
|
|
|
}
|