#pragma once #include #include #include #include #include #include #include #include namespace DB::HashedDictionaryImpl { /// Return true if the type is POD [1] for the purpose of layout (this is not /// the same as STL traits has). /// /// [1]: https://stackoverflow.com/questions/4178175/what-are-aggregates-and-pods-and-how-why-are-they-special/4178176#4178176 /// /// The behaviour had been change in clang-16, see this for more details: /// - https://github.com/llvm/llvm-project/commit/a8b0c6fa28acced71db33e80bd0b51d00422035b /// - https://github.com/llvm/llvm-project/commit/277123376ce08c98b07c154bf83e4092a5d4d3c6 /// - https://github.com/llvm/llvm-project/issues/62422 /// - https://github.com/llvm/llvm-project/issues/62353 /// - https://github.com/llvm/llvm-project/issues/62358 template constexpr bool isPodLayout() { if constexpr (std::is_same_v) return false; if constexpr (std::is_same_v) return false; if constexpr (std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v) return false; if constexpr (std::is_same_v) return false; if constexpr (std::is_same_v || std::is_same_v) return false; return true; } /// HashMap with packed structure is better than google::sparse_hash_map if the /// pair is small, for the sizeof(std::pair) == 16, RSS for hash /// table with 1e9 elements will be: /// /// - google::sparse_hash_map : 26GiB /// - HashMap : 35GiB /// - PackedHashMap : 22GiB /// - google::sparse_hash_map: 17GiB /// /// Also note here sizeof(std::pair<>) was used since google::sparse_hash_map /// uses it to store , yes we can modify google::sparse_hash_map to work /// with packed analog of std::pair, but the allocator overhead is still /// significant, because of tons of reallocations (and those cannot be solved /// with reserve() due to some internals of google::sparse_hash_map) and poor /// jemalloc support of such pattern, which results in 33% fragmentation (in /// comparison with glibc). /// /// Plus since google::sparse_hash_map cannot use packed structure, it will /// have the same memory footprint for everything from UInt8 to UInt64 values /// and so on. /// /// Returns true hen google::sparse_hash_map should be used, otherwise /// PackedHashMap should be used instead. template constexpr bool useSparseHashForHashedDictionary() { if constexpr (!isPodLayout()) return true; if constexpr (!isPodLayout()) return true; /// NOTE: One should not use PackedPairNoInit here since this will /// create instantion of this type, and it could be illformed. return sizeof(V) > 8; } /// Grower with custom fill limit/load factor (instead of default 50%). /// /// Based on HashTableGrowerWithPrecalculation template class alignas(64) HashTableGrowerWithPrecalculationAndMaxLoadFactor { UInt8 size_degree = initial_size_degree; size_t precalculated_mask = (1ULL << initial_size_degree) - 1; size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1); float max_load_factor = 0.5; /// HashTableGrowerWithPrecalculation has 23, but to decrease memory usage /// at least slightly 19 is used here. Also note, that for dictionaries it /// is not that important since they are not that frequently loaded. static constexpr size_t max_size_degree_quadratic = 19; public: static constexpr auto initial_count = 1ULL << initial_size_degree; /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements. static constexpr auto performs_linear_probing_with_single_step = true; HashTableGrowerWithPrecalculationAndMaxLoadFactor() = default; explicit HashTableGrowerWithPrecalculationAndMaxLoadFactor(float max_load_factor_) : max_load_factor(max_load_factor_) { increaseSizeDegree(0); } UInt8 sizeDegree() const { return size_degree; } void increaseSizeDegree(UInt8 delta) { size_degree += delta; precalculated_mask = (1ULL << size_degree) - 1; precalculated_max_fill = static_cast((1ULL << size_degree) * max_load_factor); } /// The size of the hash table in the cells. size_t bufSize() const { return 1ULL << size_degree; } /// From the hash value, get the cell number in the hash table. size_t place(size_t x) const { return x & precalculated_mask; } /// The next cell in the collision resolution chain. size_t next(size_t pos) const { return (pos + 1) & precalculated_mask; } /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it. bool overflow(size_t elems) const { return elems > precalculated_max_fill; } /// Increase the size of the hash table. void increaseSize() { increaseSizeDegree(size_degree >= max_size_degree_quadratic ? 1 : 2); } /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. void set(size_t num_elems) { if (num_elems <= 1) size_degree = initial_size_degree; else if (initial_size_degree > static_cast(log2(num_elems - 1)) + 2) size_degree = initial_size_degree; else { /// Slightly more optimal than HashTableGrowerWithPrecalculation /// and takes into account max_load_factor. size_degree = static_cast(log2(num_elems - 1)) + 1; if ((1ULL << size_degree) * max_load_factor < num_elems) ++size_degree; } increaseSizeDegree(0); } void setBufSize(size_t buf_size_) { size_degree = static_cast(log2(buf_size_ - 1) + 1); increaseSizeDegree(0); } }; static_assert(sizeof(HashTableGrowerWithPrecalculationAndMaxLoadFactor<>) == 64); /// Above goes various specialisations for the hash table that will be used for /// HASHED/SPARSE_HASHED dictionary, it could use one of the following depends /// on the layout of the dictionary and types of key/value (for more info see /// comments in this file): /// - HashMap /// - HashSet /// - HashMapWithSavedHash /// - HashSetWithSavedHash /// - PackedHashMap /// - google::sparse_hash_map /// /// Map (dictionary with attributes) /// /// Type of the hash table for the dictionary. template struct HashedDictionaryMapType; /// Default implementation using builtin HashMap (for HASHED layout). template struct HashedDictionaryMapType { using Type = std::conditional_t< dictionary_key_type == DictionaryKeyType::Simple, HashMap, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>, HashMapWithSavedHash, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>; }; /// Implementations for SPARSE_HASHED layout. template struct HashedDictionarySparseMapType; /// Implementation based on google::sparse_hash_map for SPARSE_HASHED. template struct HashedDictionarySparseMapType { /// Here we use sparse_hash_map with DefaultHash<> for the following reasons: /// /// - DefaultHash<> is used for HashMap /// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<> /// in case of sequential set of keys, but with random access to this set, i.e. /// /// SELECT number FROM numbers(3000000) ORDER BY rand() /// /// And even though std::hash<> works better in some other cases, /// DefaultHash<> is preferred since the difference for this particular /// case is significant, i.e. it can be 10x+. using Type = std::conditional_t< dictionary_key_type == DictionaryKeyType::Simple, google::sparse_hash_map>, google::sparse_hash_map>>; }; /// Implementation based on PackedHashMap for SPARSE_HASHED. template struct HashedDictionarySparseMapType { using Type = std::conditional_t< dictionary_key_type == DictionaryKeyType::Simple, PackedHashMap, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>, PackedHashMap, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>; }; template struct HashedDictionaryMapType : public HashedDictionarySparseMapType< dictionary_key_type, Key, Value, /* use_sparse_hash= */ useSparseHashForHashedDictionary()> {}; /// /// Set (dictionary with attributes) /// /// Type of the hash table for the dictionary. template struct HashedDictionarySetType; /// Default implementation using builtin HashMap (for HASHED layout). template struct HashedDictionarySetType { using Type = std::conditional_t< dictionary_key_type == DictionaryKeyType::Simple, HashSet, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>, HashSetWithSavedHash, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>; }; /// Implementation for SPARSE_HASHED. /// /// NOTE: There is no implementation based on google::sparse_hash_set since /// PackedHashMap is more optimal anyway (see comments for /// useSparseHashForHashedDictionary()). template struct HashedDictionarySetType { using Type = std::conditional_t< dictionary_key_type == DictionaryKeyType::Simple, HashSet, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>, HashSet, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>; }; }