mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Fix sparse_hashed dict performance with sequential keys (wrong hash function)
In #27152 the hash function for sparse_hash_map had been changed to std::hash<> switch it back to DefaultHash<> (ClickHouse builtin), since std::hash<> for numeric keys returns itself and this does not works great with sparse_hash_map. I've tried the example from #32480 and using some hash fixes the performance of sparse_hashed layout. Fixes: #32480 v2: Add comments for SparseHashMap
This commit is contained in:
parent
bd3a5bd4e2
commit
e16891713d
@ -4,6 +4,16 @@
|
||||
|
||||
#include <sparsehash/sparse_hash_map>
|
||||
|
||||
/// Hash function for sparse hash table is very important, for example:
|
||||
///
|
||||
/// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<>
|
||||
/// in case of sequential set of keys, but with random access to this set, i.e.
|
||||
///
|
||||
/// SELECT number FROM numbers(3000000) ORDER BY rand()
|
||||
///
|
||||
/// - but std::hash<> works good enough (and sometimes better) for generic cases
|
||||
///
|
||||
/// So std::hash<> by default is preferred.
|
||||
template <class Key, class T, class HashFcn = std::hash<Key>,
|
||||
class EqualKey = std::equal_to<Key>,
|
||||
class Alloc = google::libc_allocator_with_realloc<std::pair<const Key, T>>>
|
||||
|
@ -124,11 +124,22 @@ private:
|
||||
HashMap<UInt64, Value>,
|
||||
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
|
||||
|
||||
/// Here we use SparseHashMap with DefaultHash<> for the following reasons:
|
||||
///
|
||||
/// - DefaultHash<> is used for HashMap
|
||||
/// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<>
|
||||
/// in case of sequential set of keys, but with random access to this set, i.e.
|
||||
///
|
||||
/// SELECT number FROM numbers(3000000) ORDER BY rand()
|
||||
///
|
||||
/// And even though std::hash<> works better in some other cases,
|
||||
/// DefaultHash<> is preferred since the difference for this particular
|
||||
/// case is significant, i.e. it can be 10x+.
|
||||
template <typename Value>
|
||||
using CollectionTypeSparse = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::Simple,
|
||||
SparseHashMap<UInt64, Value>,
|
||||
SparseHashMap<StringRef, Value>>;
|
||||
SparseHashMap<UInt64, Value, DefaultHash<KeyType>>,
|
||||
SparseHashMap<StringRef, Value, DefaultHash<KeyType>>>;
|
||||
|
||||
template <typename Value>
|
||||
using CollectionType = std::conditional_t<sparse, CollectionTypeSparse<Value>, CollectionTypeNonSparse<Value>>;
|
||||
|
Loading…
Reference in New Issue
Block a user