#pragma once #include #include #include /** NOTE HashMap could only be used for memmoveable (position independent) types. * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. */ struct NoInitTag {}; /// A pair that does not initialize the elements, if not needed. template struct PairNoInit { First first; Second second; PairNoInit() {} template PairNoInit(First_ && first_, NoInitTag) : first(std::forward(first_)) {} template PairNoInit(First_ && first_, Second_ && second_) : first(std::forward(first_)), second(std::forward(second_)) {} }; template struct HashMapCell { using Mapped = TMapped; using State = TState; using value_type = PairNoInit; value_type value; HashMapCell() {} HashMapCell(const Key & key_, const State & state) : value(key_, NoInitTag()) {} HashMapCell(const value_type & value_, const State & state) : value(value_) {} value_type & getValue() { return value; } const value_type & getValue() const { return value; } static Key & getKey(value_type & value) { return value.first; } static const Key & getKey(const value_type & value) { return value.first; } bool keyEquals(const Key & key_) const { return value.first == key_; } bool keyEquals(const Key & key_, size_t hash_) const { return value.first == key_; } void setHash(size_t hash_value) {} size_t getHash(const Hash & hash) const { return hash(value.first); } bool isZero(const State & state) const { return isZero(value.first, state); } static bool isZero(const Key & key, const State & state) { return ZeroTraits::check(key); } /// Set the key value to zero. void setZero() { ZeroTraits::set(value.first); } /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). static constexpr bool need_zero_value_storage = true; /// Whether the cell was deleted. bool isDeleted() const { return false; } void setMapped(const value_type & value_) { value.second = value_.second; } /// Serialization, in binary and text form. void write(DB::WriteBuffer & wb) const { DB::writeBinary(value.first, wb); DB::writeBinary(value.second, wb); } void writeText(DB::WriteBuffer & wb) const { DB::writeDoubleQuoted(value.first, wb); DB::writeChar(',', wb); DB::writeDoubleQuoted(value.second, wb); } /// Deserialization, in binary and text form. void read(DB::ReadBuffer & rb) { DB::readBinary(value.first, rb); DB::readBinary(value.second, rb); } void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(value.first, rb); DB::assertChar(',', rb); DB::readDoubleQuoted(value.second, rb); } }; template struct HashMapCellWithSavedHash : public HashMapCell { using Base = HashMapCell; size_t saved_hash; using Base::Base; bool keyEquals(const Key & key_) const { return this->value.first == key_; } bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; } void setHash(size_t hash_value) { saved_hash = hash_value; } size_t getHash(const Hash & hash) const { return saved_hash; } }; template < typename Key, typename Cell, typename Hash = DefaultHash, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator > class HashMapTable : public HashTable { public: using key_type = Key; using mapped_type = typename Cell::Mapped; using value_type = typename Cell::value_type; using HashTable::HashTable; mapped_type & ALWAYS_INLINE operator[](Key x) { typename HashMapTable::iterator it; bool inserted; this->emplace(x, it, inserted); /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), * since the hash table memory is initially initialized with zeros. * But, in fact, an empty cell may not be initialized with zeros in the following cases: * - ZeroValueStorage (it only zeros the key); * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. * * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. * * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. * When we do the initialization, for new cells, it's enough to make `store 1` right away. * And if we did not initialize, then even though there was zero in the cell, * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. */ if (inserted) new(&it->second) mapped_type(); return it->second; } }; template < typename Key, typename Mapped, typename Hash = DefaultHash, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator > using HashMap = HashMapTable, Hash, Grower, Allocator>; template < typename Key, typename Mapped, typename Hash = DefaultHash, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator > using HashMapWithSavedHash = HashMapTable, Hash, Grower, Allocator>;