mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
add ClearableSetVariant
This commit is contained in:
parent
ad79394799
commit
23e824d7a9
@ -4,13 +4,17 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template class SetVariantsTemplate<NonClearableSet>;
|
||||
template class SetVariantsTemplate<ClearableSet>;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_SET_DATA_VARIANT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void SetVariants::init(Type type_)
|
||||
template <typename Variant>
|
||||
void SetVariantsTemplate<Variant>::init(Type type_)
|
||||
{
|
||||
type = type_;
|
||||
|
||||
@ -19,7 +23,7 @@ void SetVariants::init(Type type_)
|
||||
case Type::EMPTY: break;
|
||||
|
||||
#define M(NAME) \
|
||||
case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break;
|
||||
case Type::NAME: NAME = std::make_unique<typename decltype(NAME)::element_type>(); break;
|
||||
APPLY_FOR_SET_VARIANTS(M)
|
||||
#undef M
|
||||
|
||||
@ -28,7 +32,8 @@ void SetVariants::init(Type type_)
|
||||
}
|
||||
}
|
||||
|
||||
size_t SetVariants::getTotalRowCount() const
|
||||
template <typename Variant>
|
||||
size_t SetVariantsTemplate<Variant>::getTotalRowCount() const
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
@ -44,7 +49,8 @@ size_t SetVariants::getTotalRowCount() const
|
||||
}
|
||||
}
|
||||
|
||||
size_t SetVariants::getTotalByteCount() const
|
||||
template <typename Variant>
|
||||
size_t SetVariantsTemplate<Variant>::getTotalByteCount() const
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
@ -60,7 +66,8 @@ size_t SetVariants::getTotalByteCount() const
|
||||
}
|
||||
}
|
||||
|
||||
SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
|
||||
template <typename Variant>
|
||||
typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::chooseMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
|
||||
{
|
||||
/// Check if at least one of the specified keys is nullable.
|
||||
/// Create a set of nested key columns from the corresponding key columns.
|
||||
@ -108,7 +115,7 @@ SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_col
|
||||
/// which specifies whether its value is null or not.
|
||||
size_t size_of_field = nested_key_columns[0]->sizeOfField();
|
||||
if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
|
||||
return SetVariants::Type::nullable_keys128;
|
||||
return Type::nullable_keys128;
|
||||
else
|
||||
throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
|
||||
ErrorCodes::LOGICAL_ERROR};
|
||||
@ -121,13 +128,13 @@ SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_col
|
||||
if (keys_bytes > (std::numeric_limits<size_t>::max() - std::tuple_size<KeysNullMap<UInt128>>::value))
|
||||
throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR};
|
||||
if ((std::tuple_size<KeysNullMap<UInt128>>::value + keys_bytes) <= 16)
|
||||
return SetVariants::Type::nullable_keys128;
|
||||
return Type::nullable_keys128;
|
||||
if ((std::tuple_size<KeysNullMap<UInt256>>::value + keys_bytes) <= 32)
|
||||
return SetVariants::Type::nullable_keys256;
|
||||
return Type::nullable_keys256;
|
||||
}
|
||||
|
||||
/// Fallback case.
|
||||
return SetVariants::Type::hashed;
|
||||
return Type::hashed;
|
||||
}
|
||||
|
||||
/// If there is one numeric key that fits into 64 bits
|
||||
@ -135,31 +142,31 @@ SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_col
|
||||
{
|
||||
size_t size_of_field = nested_key_columns[0]->sizeOfField();
|
||||
if (size_of_field == 1)
|
||||
return SetVariants::Type::key8;
|
||||
return Type::key8;
|
||||
if (size_of_field == 2)
|
||||
return SetVariants::Type::key16;
|
||||
return Type::key16;
|
||||
if (size_of_field == 4)
|
||||
return SetVariants::Type::key32;
|
||||
return Type::key32;
|
||||
if (size_of_field == 8)
|
||||
return SetVariants::Type::key64;
|
||||
return Type::key64;
|
||||
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
/// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
|
||||
if (all_fixed && keys_bytes <= 16)
|
||||
return SetVariants::Type::keys128;
|
||||
return Type::keys128;
|
||||
if (all_fixed && keys_bytes <= 32)
|
||||
return SetVariants::Type::keys256;
|
||||
return Type::keys256;
|
||||
|
||||
/// If there is single string key, use hash table of it's values.
|
||||
if (keys_size == 1 && (typeid_cast<const ColumnString *>(nested_key_columns[0]) || typeid_cast<const ColumnConstString *>(nested_key_columns[0])))
|
||||
return SetVariants::Type::key_string;
|
||||
return Type::key_string;
|
||||
|
||||
if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(nested_key_columns[0]))
|
||||
return SetVariants::Type::key_fixed_string;
|
||||
return Type::key_fixed_string;
|
||||
|
||||
/// Otherwise, will use set of cryptographic hashes of unambiguously serialized values.
|
||||
return SetVariants::Type::hashed;
|
||||
return Type::hashed;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/ClearableHashSet.h>
|
||||
#include <Common/UInt128.h>
|
||||
|
||||
|
||||
@ -301,48 +302,78 @@ struct SetMethodHashed
|
||||
|
||||
/** Разные варианты реализации множества.
|
||||
*/
|
||||
struct SetVariants
|
||||
struct NonClearableSet
|
||||
{
|
||||
/// TODO Использовать для этих двух вариантов bit- или byte- set.
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
|
||||
|
||||
/** Также для эксперимента проверялась возможность использовать SmallSet,
|
||||
* пока количество элементов в множестве небольшое (и, при необходимости, конвертировать в полноценный HashSet).
|
||||
* Но этот эксперимент показал, что преимущество есть только в редких случаях.
|
||||
*/
|
||||
std::unique_ptr<SetMethodOneNumber<UInt32, HashSet<UInt32, HashCRC32<UInt32>>>> key32;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt64, HashSet<UInt64, HashCRC32<UInt64>>>> key64;
|
||||
std::unique_ptr<SetMethodString<HashSetWithSavedHash<StringRef>>> key_string;
|
||||
std::unique_ptr<SetMethodFixedString<HashSetWithSavedHash<StringRef>>> key_fixed_string;
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>>> keys128;
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>>> keys256;
|
||||
std::unique_ptr<SetMethodHashed<HashSet<UInt128, UInt128TrivialHash>>> hashed;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt32, HashSet<UInt32, HashCRC32<UInt32>>>> key32;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt64, HashSet<UInt64, HashCRC32<UInt64>>>> key64;
|
||||
std::unique_ptr<SetMethodString<HashSetWithSavedHash<StringRef>>> key_string;
|
||||
std::unique_ptr<SetMethodFixedString<HashSetWithSavedHash<StringRef>>> key_fixed_string;
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>>> keys128;
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>>> keys256;
|
||||
std::unique_ptr<SetMethodHashed<HashSet<UInt128, UInt128TrivialHash>>> hashed;
|
||||
|
||||
/// Support for nullable keys (for DISTINCT implementation).
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
|
||||
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
|
||||
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
|
||||
/** В отличие от Aggregator, здесь не используется метод concat.
|
||||
* Это сделано потому что метод hashed, хоть и медленнее, но в данном случае, использует меньше оперативки.
|
||||
* так как при его использовании, сами значения ключей не сохраняются.
|
||||
*/
|
||||
};
|
||||
|
||||
struct ClearableSet
|
||||
{
|
||||
/// TODO Использовать для этих двух вариантов bit- или byte- set.
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, ClearableHashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, ClearableHashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
|
||||
|
||||
std::unique_ptr<SetMethodOneNumber<UInt32, ClearableHashSet<UInt32, HashCRC32<UInt32>>>> key32;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt64, ClearableHashSet<UInt64, HashCRC32<UInt64>>>> key64;
|
||||
std::unique_ptr<SetMethodString<ClearableHashSetWithSavedHash<StringRef>>> key_string;
|
||||
std::unique_ptr<SetMethodFixedString<ClearableHashSetWithSavedHash<StringRef>>> key_fixed_string;
|
||||
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt128, UInt128HashCRC32>>> keys128;
|
||||
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt256, UInt256HashCRC32>>> keys256;
|
||||
std::unique_ptr<SetMethodHashed<ClearableHashSet<UInt128, UInt128TrivialHash>>> hashed;
|
||||
|
||||
/// Support for nullable keys (for DISTINCT implementation).
|
||||
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
|
||||
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
|
||||
/** В отличие от Aggregator, здесь не используется метод concat.
|
||||
* Это сделано потому что метод hashed, хоть и медленнее, но в данном случае, использует меньше оперативки.
|
||||
* так как при его использовании, сами значения ключей не сохраняются.
|
||||
*/
|
||||
};
|
||||
|
||||
template <typename Variant>
|
||||
struct SetVariantsTemplate: public Variant
|
||||
{
|
||||
Arena string_pool;
|
||||
|
||||
#define APPLY_FOR_SET_VARIANTS(M) \
|
||||
M(key8) \
|
||||
M(key16) \
|
||||
M(key32) \
|
||||
M(key64) \
|
||||
M(key_string) \
|
||||
M(key_fixed_string) \
|
||||
M(keys128) \
|
||||
M(keys256) \
|
||||
M(nullable_keys128) \
|
||||
M(nullable_keys256) \
|
||||
M(key8) \
|
||||
M(key16) \
|
||||
M(key32) \
|
||||
M(key64) \
|
||||
M(key_string) \
|
||||
M(key_fixed_string) \
|
||||
M(keys128) \
|
||||
M(keys256) \
|
||||
M(nullable_keys128) \
|
||||
M(nullable_keys256) \
|
||||
M(hashed)
|
||||
|
||||
#define M(NAME) using Variant::NAME;
|
||||
APPLY_FOR_SET_VARIANTS(M)
|
||||
#undef M
|
||||
|
||||
enum class Type
|
||||
{
|
||||
EMPTY,
|
||||
@ -365,4 +396,7 @@ struct SetVariants
|
||||
size_t getTotalByteCount() const;
|
||||
};
|
||||
|
||||
using SetVariants = SetVariantsTemplate<NonClearableSet>;
|
||||
using ClearableSetVariants = SetVariantsTemplate<ClearableSet>;
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user