diff --git a/src/Common/HashTable/FixedClearableHashSet.h b/src/Common/HashTable/FixedClearableHashSet.h index 7e9c900f3ff..19d4669831f 100644 --- a/src/Common/HashTable/FixedClearableHashSet.h +++ b/src/Common/HashTable/FixedClearableHashSet.h @@ -33,12 +33,12 @@ struct FixedClearableHashTableCell }; -template +template class FixedClearableHashSet : public FixedHashTable< - Key, FixedClearableHashTableCell, FixedHashTableStoredSize>> + Key, FixedClearableHashTableCell, FixedHashTableStoredSize>, Allocator> { public: - using Base = FixedHashTable, FixedHashTableStoredSize>>; + using Base = FixedHashTable, FixedHashTableStoredSize>, Allocator>; using LookupResult = typename Base::LookupResult; void clear() diff --git a/src/Common/HashTable/FixedHashMap.h b/src/Common/HashTable/FixedHashMap.h index 77aacc0373f..267ff2f0515 100644 --- a/src/Common/HashTable/FixedHashMap.h +++ b/src/Common/HashTable/FixedHashMap.h @@ -16,7 +16,7 @@ struct FixedHashMapCell bool full; Mapped mapped; - FixedHashMapCell() {} /// Not default to avoid unnecessary zero-initialization. + FixedHashMapCell() {} FixedHashMapCell(const Key &, const State &) : full(true) {} FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {} @@ -61,7 +61,7 @@ struct FixedHashMapImplicitZeroCell Mapped mapped; - FixedHashMapImplicitZeroCell() {} /// Not default to avoid unnecessary zero-initialization. + FixedHashMapImplicitZeroCell() {} FixedHashMapImplicitZeroCell(const Key &, const State &) {} FixedHashMapImplicitZeroCell(const value_type & value_, const State &) : mapped(value_.second) {} @@ -98,11 +98,12 @@ template < typename Key, typename Mapped, typename Cell = FixedHashMapCell, - typename Size = FixedHashTableStoredSize> -class FixedHashMap : public FixedHashTable + typename Size = FixedHashTableStoredSize, + typename Allocator = HashTableAllocator> +class FixedHashMap : public FixedHashTable { public: - using Base = FixedHashTable; + using Base = FixedHashTable; using Self = FixedHashMap; using LookupResult = typename Base::LookupResult; @@ -159,9 +160,10 @@ public: } }; -template +template using FixedImplicitZeroHashMapWithCalculatedSize = FixedHashMap< Key, Mapped, FixedHashMapImplicitZeroCell, - FixedHashTableCalculatedSize>>; + FixedHashTableCalculatedSize>, + Allocator>; diff --git a/src/Common/HashTable/FixedHashSet.h b/src/Common/HashTable/FixedHashSet.h index ed2065d5cac..e764038e6c3 100644 --- a/src/Common/HashTable/FixedHashSet.h +++ b/src/Common/HashTable/FixedHashSet.h @@ -2,12 +2,12 @@ #include -template -class FixedHashSet : public FixedHashTable, FixedHashTableStoredSize>> +template +class FixedHashSet : public FixedHashTable, FixedHashTableStoredSize>, Allocator> { public: using Cell = FixedHashTableCell; - using Base = FixedHashTable>; + using Base = FixedHashTable, Allocator>; using Self = FixedHashSet; void merge(const Self & rhs) diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h index b8bd757d08b..9d18f03a30b 100644 --- a/src/Common/HashTable/FixedHashTable.h +++ b/src/Common/HashTable/FixedHashTable.h @@ -19,7 +19,7 @@ struct FixedHashTableCell using mapped_type = VoidMapped; bool full; - FixedHashTableCell() {} /// Not default to avoid unnecessary zero-initialization. + FixedHashTableCell() {} FixedHashTableCell(const Key &, const State &) : full(true) {} const VoidKey getKey() const { return {}; } @@ -95,8 +95,6 @@ struct FixedHashTableCalculatedSize * comparision; c) The number of cycles for checking cell empty is halved; d) * Memory layout is tighter, especially the Clearable variants. * - * The sizeof is large - not meant to be allocated on stack. Use unique_ptr. - * * NOTE: For Set variants this should always be better. For Map variants * however, as we need to assemble the real cell inside each iterator, there * might be some cases we fall short. @@ -106,8 +104,8 @@ struct FixedHashTableCalculatedSize * transfer, key updates (f.g. StringRef) and serde. This will allow * TwoLevelHashSet(Map) to contain different type of sets(maps). */ -template -class FixedHashTable : private boost::noncopyable, protected Cell::State, protected Size +template +class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size { static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8); @@ -118,7 +116,18 @@ protected: using Self = FixedHashTable; - Cell buf[NUM_CELLS]; /// A piece of memory for all elements. + Cell * buf; /// A piece of memory for all elements. + + void alloc() { buf = reinterpret_cast(Allocator::alloc(NUM_CELLS * sizeof(Cell))); } + + void free() + { + if (buf) + { + Allocator::free(buf, getBufferSizeInBytes()); + buf = nullptr; + } + } void destroyElements() { @@ -193,29 +202,25 @@ public: size_t hash(const Key & x) const { return x; } - FixedHashTable() - { - /// Must zero-initialize. - /// Note: Cell buf[NUM_CELLS]{} does not zero-initialize if the Cell constructor is not default for POD type. - memset(buf, 0, NUM_CELLS * sizeof(Cell)); - } + FixedHashTable() { alloc(); } - FixedHashTable(FixedHashTable && rhs) { *this = std::move(rhs); } + FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); } ~FixedHashTable() { destroyElements(); + free(); } FixedHashTable & operator=(FixedHashTable && rhs) { destroyElements(); + free(); - memcpy(buf, rhs.buf, NUM_CELLS * sizeof(Cell)); - memset(rhs.buf, 0, NUM_CELLS * sizeof(Cell)); - + std::swap(buf, rhs.buf); this->setSize(rhs.size()); + Allocator::operator=(std::move(rhs)); Cell::State::operator=(std::move(rhs)); return *this; @@ -283,6 +288,9 @@ public: const_iterator begin() const { + if (!buf) + return end(); + const Cell * ptr = buf; auto buf_end = buf + NUM_CELLS; while (ptr < buf_end && ptr->isZero(*this)) @@ -295,6 +303,9 @@ public: iterator begin() { + if (!buf) + return end(); + Cell * ptr = buf; auto buf_end = buf + NUM_CELLS; while (ptr < buf_end && ptr->isZero(*this)) @@ -306,7 +317,7 @@ public: const_iterator end() const { /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C. - return const_iterator(this, buf + NUM_CELLS); + return const_iterator(this, buf ? buf + NUM_CELLS : buf); } const_iterator cend() const @@ -316,7 +327,7 @@ public: iterator end() { - return iterator(this, buf + NUM_CELLS); + return iterator(this, buf ? buf + NUM_CELLS : buf); } @@ -366,6 +377,9 @@ public: Cell::State::write(wb); DB::writeVarUInt(size(), wb); + if (!buf) + return; + for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) { if (!ptr->isZero(*this)) @@ -381,6 +395,9 @@ public: Cell::State::writeText(wb); DB::writeText(size(), wb); + if (!buf) + return; + for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) { if (!ptr->isZero(*this)) @@ -395,18 +412,13 @@ public: void read(DB::ReadBuffer & rb) { - size_t old_size = size(); Cell::State::read(rb); - - if (old_size) - destroyElements(); - + destroyElements(); size_t m_size; DB::readVarUInt(m_size, rb); this->setSize(m_size); - - if (old_size) - memset(buf, 0, NUM_CELLS * sizeof(Cell)); + free(); + alloc(); for (size_t i = 0; i < m_size; ++i) { @@ -420,18 +432,13 @@ public: void readText(DB::ReadBuffer & rb) { - size_t old_size = size(); Cell::State::readText(rb); - - if (old_size) - destroyElements(); - + destroyElements(); size_t m_size; DB::readText(m_size, rb); this->setSize(m_size); - - if (old_size) - memset(buf, 0, NUM_CELLS * sizeof(Cell)); + free(); + alloc(); for (size_t i = 0; i < m_size; ++i) { @@ -453,7 +460,7 @@ public: destroyElements(); this->clearSize(); - memset(buf, 0, NUM_CELLS * sizeof(Cell)); + memset(static_cast(buf), 0, NUM_CELLS * sizeof(*buf)); } /// After executing this function, the table can only be destroyed, @@ -462,6 +469,7 @@ public: { destroyElements(); this->clearSize(); + free(); } size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }