mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 13:42:02 +00:00
Simplify uniqCombined
This commit is contained in:
parent
b0df9e2c97
commit
0e9b418341
@ -42,104 +42,44 @@ namespace ErrorCodes
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
/** Hash function for uniqCombined/uniqCombined64 (based on Ret).
|
|
||||||
*/
|
|
||||||
template <typename T, typename Ret>
|
|
||||||
struct AggregateFunctionUniqCombinedTraits
|
|
||||||
{
|
|
||||||
static Ret hash(T x)
|
|
||||||
{
|
|
||||||
if constexpr (sizeof(T) > sizeof(UInt64))
|
|
||||||
return static_cast<Ret>(DefaultHash64<T>(x));
|
|
||||||
else
|
|
||||||
return static_cast<Ret>(intHash64(x));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Ret>
|
|
||||||
struct AggregateFunctionUniqCombinedTraits<UInt128, Ret>
|
|
||||||
{
|
|
||||||
static Ret hash(UInt128 x)
|
|
||||||
{
|
|
||||||
return static_cast<Ret>(sipHash64(x));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Ret>
|
|
||||||
struct AggregateFunctionUniqCombinedTraits<Float32, Ret>
|
|
||||||
{
|
|
||||||
static Ret hash(Float32 x)
|
|
||||||
{
|
|
||||||
UInt64 res = bit_cast<UInt64>(x);
|
|
||||||
return static_cast<Ret>(intHash64(res));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Ret>
|
|
||||||
struct AggregateFunctionUniqCombinedTraits<Float64, Ret>
|
|
||||||
{
|
|
||||||
static Ret hash(Float64 x)
|
|
||||||
{
|
|
||||||
UInt64 res = bit_cast<UInt64>(x);
|
|
||||||
return static_cast<Ret>(intHash64(res));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Unlike HashTableGrower always grows to power of 2.
|
// Unlike HashTableGrower always grows to power of 2.
|
||||||
struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
|
struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<>
|
||||||
{
|
{
|
||||||
void increaseSize() { increaseSizeDegree(1); }
|
void increaseSize() { increaseSizeDegree(1); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Key, UInt8 K>
|
|
||||||
struct AggregateFunctionUniqCombinedDataWithKey
|
template <typename T, UInt8 K, typename HashValueType>
|
||||||
|
struct AggregateFunctionUniqCombinedData
|
||||||
{
|
{
|
||||||
|
using Key = std::conditional_t<
|
||||||
|
std::is_same_v<T, String> || std::is_same_v<T, IPv6>,
|
||||||
|
UInt64,
|
||||||
|
HashValueType>;
|
||||||
|
|
||||||
// TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
|
// TODO(ilezhankin): pre-generate values for |UniqCombinedBiasData|,
|
||||||
// at the moment gen-bias-data.py script doesn't work.
|
// at the moment gen-bias-data.py script doesn't work.
|
||||||
|
|
||||||
// We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
|
// We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
|
||||||
// The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
|
// The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
|
||||||
// For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
|
// For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
|
||||||
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
|
|
||||||
|
|
||||||
Set set;
|
/// Note: I don't recall what is special with '17' - probably it is one of the original functions that has to be compatible.
|
||||||
};
|
using Set = CombinedCardinalityEstimator<
|
||||||
|
Key,
|
||||||
template <typename Key>
|
|
||||||
struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
|
|
||||||
{
|
|
||||||
using Set = CombinedCardinalityEstimator<Key,
|
|
||||||
HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
|
HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
|
||||||
16,
|
16,
|
||||||
12 + (sizeof(Key) == sizeof(UInt32)),
|
K - 5 + (sizeof(Key) == sizeof(UInt32)),
|
||||||
17,
|
K,
|
||||||
TrivialHash,
|
TrivialHash,
|
||||||
Key,
|
Key,
|
||||||
HyperLogLogBiasEstimator<UniqCombinedBiasData>,
|
std::conditional_t<K == 17, HyperLogLogBiasEstimator<UniqCombinedBiasData>, TrivialBiasEstimator>,
|
||||||
HyperLogLogMode::FullFeatured>;
|
HyperLogLogMode::FullFeatured>;
|
||||||
|
|
||||||
Set set;
|
Set set;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T, UInt8 K, typename HashValueType>
|
|
||||||
struct AggregateFunctionUniqCombinedData : public AggregateFunctionUniqCombinedDataWithKey<HashValueType, K>
|
|
||||||
{
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/// For String keys, 64 bit hash is always used (both for uniqCombined and uniqCombined64),
|
|
||||||
/// because of backwards compatibility (64 bit hash was already used for uniqCombined).
|
|
||||||
template <UInt8 K, typename HashValueType>
|
|
||||||
struct AggregateFunctionUniqCombinedData<String, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
|
|
||||||
{
|
|
||||||
};
|
|
||||||
|
|
||||||
template <UInt8 K, typename HashValueType>
|
|
||||||
struct AggregateFunctionUniqCombinedData<IPv6, K, HashValueType> : public AggregateFunctionUniqCombinedDataWithKey<UInt64 /*always*/, K>
|
|
||||||
{
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, UInt8 K, typename HashValueType>
|
template <typename T, UInt8 K, typename HashValueType>
|
||||||
class AggregateFunctionUniqCombined final
|
class AggregateFunctionUniqCombined final
|
||||||
: public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
|
: public IAggregateFunctionDataHelper<AggregateFunctionUniqCombinedData<T, K, HashValueType>, AggregateFunctionUniqCombined<T, K, HashValueType>>
|
||||||
@ -169,7 +109,30 @@ public:
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
|
const auto & value = assert_cast<const ColumnVector<T> &>(*columns[0]).getElement(row_num);
|
||||||
this->data(place).set.insert(AggregateFunctionUniqCombinedTraits<T, HashValueType>::hash(value));
|
|
||||||
|
HashValueType hash;
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<T, UInt128>)
|
||||||
|
{
|
||||||
|
/// This specialization exists due to historical circumstances.
|
||||||
|
/// Initially UInt128 was introduced only for UUID, and then the other big-integer types were added.
|
||||||
|
hash = static_cast<HashValueType>(sipHash64(value));
|
||||||
|
}
|
||||||
|
else if constexpr (std::is_floating_point_v<T>)
|
||||||
|
{
|
||||||
|
hash = static_cast<HashValueType>(intHash64(bit_cast<UInt64>(value)));
|
||||||
|
}
|
||||||
|
else if constexpr (sizeof(T) > sizeof(UInt64))
|
||||||
|
{
|
||||||
|
hash = static_cast<HashValueType>(DefaultHash64<T>(value));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// This specialization exists also for compatibility with the initial implementation.
|
||||||
|
hash = static_cast<HashValueType>(intHash64(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
this->data(place).set.insert(hash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user