#pragma once #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; } } /** NOTE HashSet could only be used for memmoveable (position independent) types. * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++. * Also, key must be of type, that zero bytes is compared equals to zero key. */ template < typename Key, typename TCell, typename Hash = DefaultHash, typename Grower = HashTableGrowerWithPrecalculation<>, typename Allocator = HashTableAllocator> class HashSetTable : public HashTable { public: using Self = HashSetTable; using Cell = TCell; using Base = HashTable; using typename Base::LookupResult; void merge(const Self & rhs) { if (!this->hasZero() && rhs.hasZero()) { this->setHasZero(); ++this->m_size; } for (size_t i = 0; i < rhs.grower.bufSize(); ++i) if (!rhs.buf[i].isZero(*this)) this->insert(rhs.buf[i].getValue()); } void readAndMerge(DB::ReadBuffer & rb) { Cell::State::read(rb); size_t new_size = 0; DB::readVarUInt(new_size, rb); this->resize(new_size); for (size_t i = 0; i < new_size; ++i) { Cell x; x.read(rb); this->insert(x.getValue()); } } }; template < typename Key, typename TCell, /// Supposed to have no state (HashTableNoState) typename Hash = DefaultHash, typename Grower = TwoLevelHashTableGrower<>, typename Allocator = HashTableAllocator> class TwoLevelHashSetTable : public TwoLevelHashTable> { public: using Self = TwoLevelHashSetTable; using Base = TwoLevelHashTable>; using Base::Base; /// Writes its content in a way that it will be correctly read by HashSetTable. /// Used by uniqExact to preserve backward compatibility. void writeAsSingleLevel(DB::WriteBuffer & wb) const { DB::writeVarUInt(this->size(), wb); bool zero_written = false; for (size_t i = 0; i < Base::NUM_BUCKETS; ++i) { if (this->impls[i].hasZero()) { if (zero_written) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "No more than one zero value expected"); this->impls[i].zeroValue()->write(wb); zero_written = true; } } static constexpr HashTableNoState state; for (auto ptr = this->begin(); ptr != this->end(); ++ptr) if (!ptr.getPtr()->isZero(state)) ptr.getPtr()->write(wb); } }; template struct HashSetCellWithSavedHash : public HashTableCell { using Base = HashTableCell; size_t saved_hash; HashSetCellWithSavedHash() : Base() {} //-V730 HashSetCellWithSavedHash(const Key & key_, const typename Base::State & state) : Base(key_, state) {} //-V730 bool keyEquals(const Key & key_) const { return bitEquals(this->key, key_); } bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && bitEquals(this->key, key_); } bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); } void setHash(size_t hash_value) { saved_hash = hash_value; } size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } }; template < typename Key, typename Hash = DefaultHash, typename Grower = HashTableGrowerWithPrecalculation<>, typename Allocator = HashTableAllocator> using HashSet = HashSetTable, Hash, Grower, Allocator>; template < typename Key, typename Hash = DefaultHash, typename Grower = TwoLevelHashTableGrower<>, typename Allocator = HashTableAllocator> using TwoLevelHashSet = TwoLevelHashSetTable, Hash, Grower, Allocator>; template using HashSetWithStackMemory = HashSet< Key, Hash, HashTableGrower, HashTableAllocatorWithStackMemory< (1ULL << initial_size_degree) * sizeof(HashTableCell)>>; template < typename Key, typename Hash = DefaultHash, typename Grower = HashTableGrowerWithPrecalculation<>, typename Allocator = HashTableAllocator> using HashSetWithSavedHash = HashSetTable, Hash, Grower, Allocator>; template using HashSetWithSavedHashWithStackMemory = HashSetWithSavedHash< Key, Hash, HashTableGrower, HashTableAllocatorWithStackMemory< (1ULL << initial_size_degree) * sizeof(HashSetCellWithSavedHash)>>;