add ClearableSetVariant

This commit is contained in:
Yuri Dyachenko 2017-04-07 19:13:25 +03:00 committed by alexey-milovidov
parent ad79394799
commit 23e824d7a9
2 changed files with 82 additions and 41 deletions

View File

@ -4,13 +4,17 @@
namespace DB
{
template class SetVariantsTemplate<NonClearableSet>;
template class SetVariantsTemplate<ClearableSet>;
namespace ErrorCodes
{
extern const int UNKNOWN_SET_DATA_VARIANT;
extern const int LOGICAL_ERROR;
}
void SetVariants::init(Type type_)
template <typename Variant>
void SetVariantsTemplate<Variant>::init(Type type_)
{
type = type_;
@ -19,7 +23,7 @@ void SetVariants::init(Type type_)
case Type::EMPTY: break;
#define M(NAME) \
case Type::NAME: NAME = std::make_unique<decltype(NAME)::element_type>(); break;
case Type::NAME: NAME = std::make_unique<typename decltype(NAME)::element_type>(); break;
APPLY_FOR_SET_VARIANTS(M)
#undef M
@ -28,7 +32,8 @@ void SetVariants::init(Type type_)
}
}
size_t SetVariants::getTotalRowCount() const
template <typename Variant>
size_t SetVariantsTemplate<Variant>::getTotalRowCount() const
{
switch (type)
{
@ -44,7 +49,8 @@ size_t SetVariants::getTotalRowCount() const
}
}
size_t SetVariants::getTotalByteCount() const
template <typename Variant>
size_t SetVariantsTemplate<Variant>::getTotalByteCount() const
{
switch (type)
{
@ -60,7 +66,8 @@ size_t SetVariants::getTotalByteCount() const
}
}
SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
template <typename Variant>
typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::chooseMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
{
/// Check if at least one of the specified keys is nullable.
/// Create a set of nested key columns from the corresponding key columns.
@ -108,7 +115,7 @@ SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_col
/// which specifies whether its value is null or not.
size_t size_of_field = nested_key_columns[0]->sizeOfField();
if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
return SetVariants::Type::nullable_keys128;
return Type::nullable_keys128;
else
throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
ErrorCodes::LOGICAL_ERROR};
@ -121,13 +128,13 @@ SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_col
if (keys_bytes > (std::numeric_limits<size_t>::max() - std::tuple_size<KeysNullMap<UInt128>>::value))
throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR};
if ((std::tuple_size<KeysNullMap<UInt128>>::value + keys_bytes) <= 16)
return SetVariants::Type::nullable_keys128;
return Type::nullable_keys128;
if ((std::tuple_size<KeysNullMap<UInt256>>::value + keys_bytes) <= 32)
return SetVariants::Type::nullable_keys256;
return Type::nullable_keys256;
}
/// Fallback case.
return SetVariants::Type::hashed;
return Type::hashed;
}
/// If there is one numeric key that fits into 64 bits
@ -135,31 +142,31 @@ SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_col
{
size_t size_of_field = nested_key_columns[0]->sizeOfField();
if (size_of_field == 1)
return SetVariants::Type::key8;
return Type::key8;
if (size_of_field == 2)
return SetVariants::Type::key16;
return Type::key16;
if (size_of_field == 4)
return SetVariants::Type::key32;
return Type::key32;
if (size_of_field == 8)
return SetVariants::Type::key64;
return Type::key64;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.", ErrorCodes::LOGICAL_ERROR);
}
/// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
if (all_fixed && keys_bytes <= 16)
return SetVariants::Type::keys128;
return Type::keys128;
if (all_fixed && keys_bytes <= 32)
return SetVariants::Type::keys256;
return Type::keys256;
/// If there is single string key, use hash table of it's values.
if (keys_size == 1 && (typeid_cast<const ColumnString *>(nested_key_columns[0]) || typeid_cast<const ColumnConstString *>(nested_key_columns[0])))
return SetVariants::Type::key_string;
return Type::key_string;
if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(nested_key_columns[0]))
return SetVariants::Type::key_fixed_string;
return Type::key_fixed_string;
/// Otherwise, will use set of cryptographic hashes of unambiguously serialized values.
return SetVariants::Type::hashed;
return Type::hashed;
}
}

View File

@ -6,6 +6,7 @@
#include <Common/Arena.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/ClearableHashSet.h>
#include <Common/UInt128.h>
@ -301,48 +302,78 @@ struct SetMethodHashed
/** Разные варианты реализации множества.
*/
struct SetVariants
struct NonClearableSet
{
/// TODO Использовать для этих двух вариантов bit- или byte- set.
std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
/** Также для эксперимента проверялась возможность использовать SmallSet,
* пока количество элементов в множестве небольшое (и, при необходимости, конвертировать в полноценный HashSet).
* Но этот эксперимент показал, что преимущество есть только в редких случаях.
*/
std::unique_ptr<SetMethodOneNumber<UInt32, HashSet<UInt32, HashCRC32<UInt32>>>> key32;
std::unique_ptr<SetMethodOneNumber<UInt64, HashSet<UInt64, HashCRC32<UInt64>>>> key64;
std::unique_ptr<SetMethodString<HashSetWithSavedHash<StringRef>>> key_string;
std::unique_ptr<SetMethodFixedString<HashSetWithSavedHash<StringRef>>> key_fixed_string;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>>> keys128;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>>> keys256;
std::unique_ptr<SetMethodHashed<HashSet<UInt128, UInt128TrivialHash>>> hashed;
std::unique_ptr<SetMethodOneNumber<UInt32, HashSet<UInt32, HashCRC32<UInt32>>>> key32;
std::unique_ptr<SetMethodOneNumber<UInt64, HashSet<UInt64, HashCRC32<UInt64>>>> key64;
std::unique_ptr<SetMethodString<HashSetWithSavedHash<StringRef>>> key_string;
std::unique_ptr<SetMethodFixedString<HashSetWithSavedHash<StringRef>>> key_fixed_string;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>>> keys128;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>>> keys256;
std::unique_ptr<SetMethodHashed<HashSet<UInt128, UInt128TrivialHash>>> hashed;
/// Support for nullable keys (for DISTINCT implementation).
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
/** В отличие от Aggregator, здесь не используется метод concat.
* Это сделано потому что метод hashed, хоть и медленнее, но в данном случае, использует меньше оперативки.
* так как при его использовании, сами значения ключей не сохраняются.
*/
};
struct ClearableSet
{
/// TODO Использовать для этих двух вариантов bit- или byte- set.
std::unique_ptr<SetMethodOneNumber<UInt8, ClearableHashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, ClearableHashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
std::unique_ptr<SetMethodOneNumber<UInt32, ClearableHashSet<UInt32, HashCRC32<UInt32>>>> key32;
std::unique_ptr<SetMethodOneNumber<UInt64, ClearableHashSet<UInt64, HashCRC32<UInt64>>>> key64;
std::unique_ptr<SetMethodString<ClearableHashSetWithSavedHash<StringRef>>> key_string;
std::unique_ptr<SetMethodFixedString<ClearableHashSetWithSavedHash<StringRef>>> key_fixed_string;
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt128, UInt128HashCRC32>>> keys128;
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt256, UInt256HashCRC32>>> keys256;
std::unique_ptr<SetMethodHashed<ClearableHashSet<UInt128, UInt128TrivialHash>>> hashed;
/// Support for nullable keys (for DISTINCT implementation).
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
/** В отличие от Aggregator, здесь не используется метод concat.
* Это сделано потому что метод hashed, хоть и медленнее, но в данном случае, использует меньше оперативки.
* так как при его использовании, сами значения ключей не сохраняются.
*/
};
template <typename Variant>
struct SetVariantsTemplate: public Variant
{
Arena string_pool;
#define APPLY_FOR_SET_VARIANTS(M) \
M(key8) \
M(key16) \
M(key32) \
M(key64) \
M(key_string) \
M(key_fixed_string) \
M(keys128) \
M(keys256) \
M(nullable_keys128) \
M(nullable_keys256) \
M(key8) \
M(key16) \
M(key32) \
M(key64) \
M(key_string) \
M(key_fixed_string) \
M(keys128) \
M(keys256) \
M(nullable_keys128) \
M(nullable_keys256) \
M(hashed)
#define M(NAME) using Variant::NAME;
APPLY_FOR_SET_VARIANTS(M)
#undef M
enum class Type
{
EMPTY,
@ -365,4 +396,7 @@ struct SetVariants
size_t getTotalByteCount() const;
};
using SetVariants = SetVariantsTemplate<NonClearableSet>;
using ClearableSetVariants = SetVariantsTemplate<ClearableSet>;
}