// // LinearHashTable.h // // Library: Foundation // Package: Hashing // Module: LinearHashTable // // Definition of the LinearHashTable class. // // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. // and Contributors. // // SPDX-License-Identifier: BSL-1.0 // #ifndef Foundation_LinearHashTable_INCLUDED #define Foundation_LinearHashTable_INCLUDED #include #include #include #include #include #include "Poco/Foundation.h" #include "Poco/Hash.h" namespace Poco { template > class LinearHashTable /// This class implements a linear hash table. /// /// In a linear hash table, the available address space /// grows or shrinks dynamically. A linear hash table thus /// supports any number of insertions or deletions without /// lookup or insertion performance deterioration. /// /// Linear hashing was discovered by Witold Litwin in 1980 /// and described in the paper LINEAR HASHING: A NEW TOOL FOR FILE AND TABLE ADDRESSING. /// /// For more information on linear hashing, see . /// /// The LinearHashTable is not thread safe. /// /// Value must support comparison for equality. /// /// Find, insert and delete operations are basically O(1) with regard /// to the total number of elements in the table, and O(N) with regard /// to the number of elements in the bucket where the element is stored. /// On average, every bucket stores one element; the exact number depends /// on the quality of the hash function. In most cases, the maximum number of /// elements in a bucket should not exceed 3. { public: typedef Value ValueType; typedef Value & Reference; typedef const Value & ConstReference; typedef Value * Pointer; typedef const Value * ConstPointer; typedef HashFunc Hash; typedef std::vector Bucket; typedef std::vector BucketVec; typedef typename Bucket::iterator BucketIterator; typedef typename BucketVec::iterator BucketVecIterator; class ConstIterator : public std::iterator { public: ConstIterator() : _initialized(false) { } ConstIterator(const BucketVecIterator & vecIt, const BucketVecIterator & endIt, const BucketIterator & buckIt) : _vecIt(vecIt), _endIt(endIt), _buckIt(buckIt), _initialized(true) { } ConstIterator(const ConstIterator & it) : _vecIt(it._vecIt), _endIt(it._endIt), _buckIt(it._buckIt), _initialized(it._initialized) { } ConstIterator & operator=(const ConstIterator & it) { ConstIterator tmp(it); swap(tmp); return *this; } void swap(ConstIterator & it) { using std::swap; // uninitialized iterators crash when swapped if (_initialized) { swap(_vecIt, it._vecIt); swap(_endIt, it._endIt); swap(_buckIt, it._buckIt); swap(_initialized, it._initialized); } else { _vecIt = it._vecIt; _endIt = it._endIt; _buckIt = it._buckIt; _initialized = it._initialized; } } bool operator==(const ConstIterator & it) const { return _vecIt == it._vecIt && (_vecIt == _endIt || _buckIt == it._buckIt); } bool operator!=(const ConstIterator & it) const { return _vecIt != it._vecIt || (_vecIt != _endIt && _buckIt != it._buckIt); } const typename Bucket::value_type & operator*() const { return *_buckIt; } const typename Bucket::value_type * operator->() const { return &*_buckIt; } ConstIterator & operator++() // prefix { if (_vecIt != _endIt) { ++_buckIt; while (_vecIt != _endIt && _buckIt == _vecIt->end()) { ++_vecIt; if (_vecIt != _endIt) _buckIt = _vecIt->begin(); } } return *this; } ConstIterator operator++(int) // postfix { ConstIterator tmp(*this); ++*this; return tmp; } protected: BucketVecIterator _vecIt; BucketVecIterator _endIt; BucketIterator _buckIt; bool _initialized; friend class LinearHashTable; }; class Iterator : public ConstIterator { public: Iterator() { } Iterator(const BucketVecIterator & vecIt, const BucketVecIterator & endIt, const BucketIterator & buckIt) : ConstIterator(vecIt, endIt, buckIt) { } Iterator(const Iterator & it) : ConstIterator(it) { } Iterator & operator=(const Iterator & it) { Iterator tmp(it); ConstIterator::swap(tmp); return *this; } void swap(Iterator & it) { ConstIterator::swap(it); } typename Bucket::value_type & operator*() { return *this->_buckIt; } const typename Bucket::value_type & operator*() const { return *this->_buckIt; } typename Bucket::value_type * operator->() { return &*this->_buckIt; } const typename Bucket::value_type * operator->() const { return &*this->_buckIt; } Iterator & operator++() // prefix { ConstIterator::operator++(); return *this; } Iterator operator++(int) // postfix { Iterator tmp(*this); ++*this; return tmp; } friend class LinearHashTable; }; LinearHashTable(std::size_t initialReserve = 64) : _split(0), _front(1), _size(0) /// Creates the LinearHashTable, using the given initialReserve. { _buckets.reserve(calcSize(initialReserve)); _buckets.push_back(Bucket()); } LinearHashTable(const LinearHashTable & table) : _buckets(table._buckets), _split(table._split), _front(table._front), _size(table._size) /// Creates the LinearHashTable by copying another one. { } ~LinearHashTable() /// Destroys the LinearHashTable. { } LinearHashTable & operator=(const LinearHashTable & table) /// Assigns another LinearHashTable. { LinearHashTable tmp(table); swap(tmp); return *this; } void swap(LinearHashTable & table) /// Swaps the LinearHashTable with another one. { using std::swap; swap(_buckets, table._buckets); swap(_split, table._split); swap(_front, table._front); swap(_size, table._size); } ConstIterator begin() const /// Returns an iterator pointing to the first entry, if one exists. { BucketVecIterator it(_buckets.begin()); BucketVecIterator end(_buckets.end()); while (it != end && it->empty()) { ++it; } if (it == end) return this->end(); else return ConstIterator(it, end, it->begin()); } ConstIterator end() const /// Returns an iterator pointing to the end of the table. { return ConstIterator(_buckets.end(), _buckets.end(), _buckets.front().end()); } Iterator begin() /// Returns an iterator pointing to the first entry, if one exists. { BucketVecIterator it(_buckets.begin()); BucketVecIterator end(_buckets.end()); while (it != end && it->empty()) { ++it; } if (it == end) return this->end(); else return Iterator(it, end, it->begin()); } Iterator end() /// Returns an iterator pointing to the end of the table. { return Iterator(_buckets.end(), _buckets.end(), _buckets.front().end()); } ConstIterator find(const Value & value) const /// Finds an entry in the table. { std::size_t addr = bucketAddress(value); BucketVecIterator it(_buckets.begin() + addr); BucketIterator buckIt(std::find(it->begin(), it->end(), value)); if (buckIt != it->end()) return ConstIterator(it, _buckets.end(), buckIt); else return end(); } Iterator find(const Value & value) /// Finds an entry in the table. { std::size_t addr = bucketAddress(value); BucketVecIterator it(_buckets.begin() + addr); BucketIterator buckIt(std::find(it->begin(), it->end(), value)); if (buckIt != it->end()) return Iterator(it, _buckets.end(), buckIt); else return end(); } std::size_t count(const Value & value) const /// Returns the number of elements with the given /// value, with is either 1 or 0. { return find(value) != end() ? 1 : 0; } std::pair insert(const Value & value) /// Inserts an element into the table. /// /// If the element already exists in the table, /// a pair(iterator, false) with iterator pointing to the /// existing element is returned. /// Otherwise, the element is inserted an a /// pair(iterator, true) with iterator /// pointing to the new element is returned. { std::size_t hash = _hash(value); std::size_t addr = bucketAddressForHash(hash); BucketVecIterator it(_buckets.begin() + addr); BucketIterator buckIt(std::find(it->begin(), it->end(), value)); if (buckIt == it->end()) { split(); addr = bucketAddressForHash(hash); it = _buckets.begin() + addr; buckIt = it->insert(it->end(), value); ++_size; return std::make_pair(Iterator(it, _buckets.end(), buckIt), true); } else { return std::make_pair(Iterator(it, _buckets.end(), buckIt), false); } } void erase(Iterator it) /// Erases the element pointed to by it. { if (it != end()) { it._vecIt->erase(it._buckIt); --_size; merge(); } } void erase(const Value & value) /// Erases the element with the given value, if it exists. { Iterator it = find(value); erase(it); } void clear() /// Erases all elements. { LinearHashTable empty; swap(empty); } std::size_t size() const /// Returns the number of elements in the table. { return _size; } bool empty() const /// Returns true iff the table is empty. { return _size == 0; } std::size_t buckets() const /// Returns the number of allocated buckets. { return _buckets.size(); } protected: std::size_t bucketAddress(const Value & value) const { std::size_t n = _hash(value); if (n % _front >= _split) return n % _front; else return n % (2 * _front); } std::size_t bucketAddressForHash(std::size_t hash) { if (hash % _front >= _split) return hash % _front; else return hash % (2 * _front); } void split() { if (_split == _front) { _split = 0; _front *= 2; _buckets.reserve(_front * 2); } Bucket tmp; _buckets.push_back(tmp); _buckets[_split].swap(tmp); ++_split; for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) { using std::swap; std::size_t addr = bucketAddress(*it); _buckets[addr].push_back(Value()); swap(*it, _buckets[addr].back()); } } void merge() { if (_split == 0) { _front /= 2; _split = _front; } --_split; Bucket tmp; tmp.swap(_buckets.back()); _buckets.pop_back(); for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) { using std::swap; std::size_t addr = bucketAddress(*it); _buckets[addr].push_back(Value()); swap(*it, _buckets[addr].back()); } } static std::size_t calcSize(std::size_t initialSize) { std::size_t size = 32; while (size < initialSize) size *= 2; return size; } private: // Evil hack: _buckets must be mutable because both ConstIterator and Iterator hold // ordinary iterator's (not const_iterator's). mutable BucketVec _buckets; std::size_t _split; std::size_t _front; std::size_t _size; HashFunc _hash; }; } // namespace Poco #endif // Foundation_LinearHashTable_INCLUDED