Merge pull request #32536 from azat/sparse_hashed-dict-fix

Fix sparse_hashed dict performance with sequential keys (wrong hash function)
This commit is contained in:
Maksim Kita 2021-12-14 17:44:47 +03:00 committed by GitHub
commit 233505b665
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 16 deletions

View File

@ -1,10 +0,0 @@
#pragma once
/// SparseHashMap is a wrapper for google::sparse_hash_map.
#include <sparsehash/sparse_hash_map>
template <class Key, class T, class HashFcn = std::hash<Key>,
class EqualKey = std::equal_to<Key>,
class Alloc = google::libc_allocator_with_realloc<std::pair<const Key, T>>>
using SparseHashMap = google::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>;

View File

@ -5,8 +5,6 @@
#include <variant>
#include <optional>
#include <Common/SparseHashMap.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
#include <Core/Block.h>

View File

@ -4,8 +4,7 @@
#include <memory>
#include <variant>
#include <optional>
#include <Common/SparseHashMap.h>
#include <sparsehash/sparse_hash_map>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashSet.h>
@ -124,11 +123,22 @@ private:
HashMap<UInt64, Value>,
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
/// Here we use sparse_hash_map with DefaultHash<> for the following reasons:
///
/// - DefaultHash<> is used for HashMap
/// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<>
/// in case of sequential set of keys, but with random access to this set, i.e.
///
/// SELECT number FROM numbers(3000000) ORDER BY rand()
///
/// And even though std::hash<> works better in some other cases,
/// DefaultHash<> is preferred since the difference for this particular
/// case is significant, i.e. it can be 10x+.
template <typename Value>
using CollectionTypeSparse = std::conditional_t<
dictionary_key_type == DictionaryKeyType::Simple,
SparseHashMap<UInt64, Value>,
SparseHashMap<StringRef, Value>>;
google::sparse_hash_map<UInt64, Value, DefaultHash<KeyType>>,
google::sparse_hash_map<StringRef, Value, DefaultHash<KeyType>>>;
template <typename Value>
using CollectionType = std::conditional_t<sparse, CollectionTypeSparse<Value>, CollectionTypeNonSparse<Value>>;