#pragma once #include #include #include /** NOTE HashMap could only be used for memmoveable (position independent) types. * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. * Also, key in hash table must be of type, that zero bytes is compared equals to zero key. */ struct NoInitTag { }; /// A pair that does not initialize the elements, if not needed. template struct PairNoInit { First first; Second second; PairNoInit() {} template PairNoInit(First_ && first_, NoInitTag) : first(std::forward(first_)) { } template PairNoInit(First_ && first_, Second_ && second_) : first(std::forward(first_)), second(std::forward(second_)) { } }; template struct HashMapCell { using Mapped = TMapped; using State = TState; using value_type = PairNoInit; using mapped_type = Mapped; using key_type = Key; value_type value; HashMapCell() {} HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} HashMapCell(const value_type & value_, const State &) : value(value_) {} /// Get the key (externally). const Key & getKey() const { return value.first; } Mapped & getMapped() { return value.second; } const Mapped & getMapped() const { return value.second; } const value_type & getValue() const { return value; } /// Get the key (internally). static const Key & getKey(const value_type & value) { return value.first; } bool keyEquals(const Key & key_) const { return value.first == key_; } bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; } bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; } void setHash(size_t /*hash_value*/) {} size_t getHash(const Hash & hash) const { return hash(value.first); } bool isZero(const State & state) const { return isZero(value.first, state); } static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); } /// Set the key value to zero. void setZero() { ZeroTraits::set(value.first); } /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table). static constexpr bool need_zero_value_storage = true; /// Whether the cell was deleted. bool isDeleted() const { return false; } void setMapped(const value_type & value_) { value.second = value_.second; } /// Serialization, in binary and text form. void write(DB::WriteBuffer & wb) const { DB::writeBinary(value.first, wb); DB::writeBinary(value.second, wb); } void writeText(DB::WriteBuffer & wb) const { DB::writeDoubleQuoted(value.first, wb); DB::writeChar(',', wb); DB::writeDoubleQuoted(value.second, wb); } /// Deserialization, in binary and text form. void read(DB::ReadBuffer & rb) { DB::readBinary(value.first, rb); DB::readBinary(value.second, rb); } void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(value.first, rb); DB::assertChar(',', rb); DB::readDoubleQuoted(value.second, rb); } }; template struct HashMapCellWithSavedHash : public HashMapCell { using Base = HashMapCell; size_t saved_hash; using Base::Base; bool keyEquals(const Key & key_) const { return this->value.first == key_; } bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; } bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); } void setHash(size_t hash_value) { saved_hash = hash_value; } size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } }; template < typename Key, typename Cell, typename Hash = DefaultHash, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> class HashMapTable : public HashTable { public: using Self = HashMapTable; using Base = HashTable; using LookupResult = typename Base::LookupResult; using Base::Base; /// Merge every cell's value of current map into the destination map via emplace. /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). /// Each filled cell in current map will invoke func once. If that map doesn't /// have a key equals to the given cell, a new cell gets emplaced into that map, /// and func is invoked with the third argument emplaced set to true. Otherwise /// emplaced is set to false. template void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { for (auto it = this->begin(), end = this->end(); it != end; ++it) { typename Self::LookupResult res_it; bool inserted; that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash()); func(res_it->getMapped(), it->getMapped(), inserted); } } /// Merge every cell's value of current map into the destination map via find. /// Func should have signature void(Mapped & dst, Mapped & src, bool exist). /// Each filled cell in current map will invoke func once. If that map doesn't /// have a key equals to the given cell, func is invoked with the third argument /// exist set to false. Otherwise exist is set to true. template void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) { for (auto it = this->begin(), end = this->end(); it != end; ++it) { auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash()); if (!res_it) func(it->getMapped(), it->getMapped(), false); else func(res_it->getMapped(), it->getMapped(), true); } } /// Call func(const Key &, Mapped &) for each hash map element. template void forEachValue(Func && func) { for (auto & v : *this) func(v.getKey(), v.getMapped()); } /// Call func(Mapped &) for each hash map element. template void forEachMapped(Func && func) { for (auto & v : *this) func(v.getMapped()); } typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) { LookupResult it; bool inserted; this->emplace(x, it, inserted); /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor), * since the hash table memory is initially initialized with zeros. * But, in fact, an empty cell may not be initialized with zeros in the following cases: * - ZeroValueStorage (it only zeros the key); * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero. * * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately * after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization. * * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`. * When we do the initialization, for new cells, it's enough to make `store 1` right away. * And if we did not initialize, then even though there was zero in the cell, * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. */ if (inserted) new (&it->getMapped()) typename Cell::Mapped(); return it->getMapped(); } }; template < typename Key, typename Mapped, typename Hash = DefaultHash, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> using HashMap = HashMapTable, Hash, Grower, Allocator>; template < typename Key, typename Mapped, typename Hash = DefaultHash, typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> using HashMapWithSavedHash = HashMapTable, Hash, Grower, Allocator>;