ClickHouse/dbms/src/Common/HashTable/HashMap.h

241 lines
8.6 KiB
C++
Raw Normal View History

2014-03-17 02:01:03 +00:00
#pragma once
#include <Common/HashTable/Hash.h>
#include <Common/HashTable/HashTable.h>
#include <Common/HashTable/HashTableAllocator.h>
2014-03-17 02:01:03 +00:00
/** NOTE HashMap could only be used for memmoveable (position independent) types.
* Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
* Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
*/
2019-08-01 15:57:02 +00:00
struct NoInitTag
{
};
2014-05-19 04:54:54 +00:00
2017-05-07 20:25:26 +00:00
/// A pair that does not initialize the elements, if not needed.
2014-05-19 04:54:54 +00:00
template <typename First, typename Second>
2019-08-01 15:57:02 +00:00
struct PairNoInit
2014-05-19 04:54:54 +00:00
{
First first;
Second second;
2014-05-19 04:54:54 +00:00
PairNoInit() {}
2014-05-19 04:54:54 +00:00
template <typename First_>
2019-08-01 15:57:02 +00:00
PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_))
{
}
2014-05-19 04:54:54 +00:00
template <typename First_, typename Second_>
2019-08-01 15:57:02 +00:00
PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_))
{
}
2014-05-19 04:54:54 +00:00
};
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
2014-03-17 02:01:03 +00:00
struct HashMapCell
{
using Mapped = TMapped;
using State = TState;
using value_type = PairNoInit<Key, Mapped>;
value_type value;
2014-03-17 02:01:03 +00:00
HashMapCell() {}
2017-12-01 18:36:55 +00:00
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {}
2014-03-17 02:01:03 +00:00
A Proper lookup table that uses HashTable's API This is the first step of allowing heterogeneous cells in hash tables. performance test results are ``` 1. HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>; 2. NewLookupMap<UInt16, UInt8> ResolutionWidth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................223550276.46 ResolutionWidth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................248772721.24 Best: 2 - 24877272124 ResolutionWidth 100000 1 ..........................................................................................................................................................................................................................................................238498413.99 ResolutionWidth 100000 2 ..........................................................................................................................................................................................................................................................261808889.98 Best: 2 - 26180888998 ResolutionWidth 300000 1 ...................................................................................239307348.81 ResolutionWidth 300000 2 ...................................................................................257592761.30 Best: 2 - 25759276130 ResolutionWidth 1000000 1 .........................240144759.26 ResolutionWidth 1000000 2 .........................257093531.91 Best: 2 - 25709353191 ResolutionWidth 5000000 1 .....241573260.35 ResolutionWidth 5000000 2 .....259314162.79 Best: 2 - 25931416279 ResolutionDepth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................217108119.84 ResolutionDepth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................249459504.41 Best: 2 - 24945950441 ResolutionDepth 100000 1 ..........................................................................................................................................................................................................................................................229065162.17 ResolutionDepth 100000 2 ..........................................................................................................................................................................................................................................................253769105.64 Best: 2 - 25376910564 ResolutionDepth 300000 1 ...................................................................................233079225.18 ResolutionDepth 300000 2 ...................................................................................256316273.78 Best: 2 - 25631627378 ResolutionDepth 1000000 1 .........................234184633.51 ResolutionDepth 1000000 2 .........................261100491.57 Best: 2 - 26110049157 ResolutionDepth 5000000 1 .....233118795.66 ResolutionDepth 5000000 2 .....252436160.41 Best: 2 - 25243616041 ```
2019-02-28 09:35:38 +00:00
const Key & getFirst() const { return value.first; }
Mapped & getSecond() { return value.second; }
const Mapped & getSecond() const { return value.second; }
const value_type & getValue() const { return value; }
2014-03-17 02:01:03 +00:00
static const Key & getKey(const value_type & value) { return value.first; }
2014-03-17 02:01:03 +00:00
bool keyEquals(const Key & key_) const { return value.first == key_; }
2017-12-01 18:36:55 +00:00
bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; }
bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; }
2014-03-17 02:01:03 +00:00
2017-12-01 18:36:55 +00:00
void setHash(size_t /*hash_value*/) {}
size_t getHash(const Hash & hash) const { return hash(value.first); }
2014-03-17 02:01:03 +00:00
bool isZero(const State & state) const { return isZero(value.first, state); }
2017-12-01 18:36:55 +00:00
static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); }
2017-05-07 20:25:26 +00:00
/// Set the key value to zero.
void setZero() { ZeroTraits::set(value.first); }
2014-03-17 02:01:03 +00:00
2017-05-07 20:25:26 +00:00
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
static constexpr bool need_zero_value_storage = true;
/// Whether the cell was deleted.
bool isDeleted() const { return false; }
2014-03-17 02:01:03 +00:00
void setMapped(const value_type & value_) { value.second = value_.second; }
2014-03-17 02:01:03 +00:00
2017-05-07 20:25:26 +00:00
/// Serialization, in binary and text form.
void write(DB::WriteBuffer & wb) const
{
DB::writeBinary(value.first, wb);
DB::writeBinary(value.second, wb);
}
2014-03-17 02:01:03 +00:00
void writeText(DB::WriteBuffer & wb) const
{
DB::writeDoubleQuoted(value.first, wb);
DB::writeChar(',', wb);
DB::writeDoubleQuoted(value.second, wb);
}
2014-03-17 02:01:03 +00:00
2017-05-07 20:25:26 +00:00
/// Deserialization, in binary and text form.
void read(DB::ReadBuffer & rb)
{
DB::readBinary(value.first, rb);
DB::readBinary(value.second, rb);
}
2014-03-17 02:01:03 +00:00
void readText(DB::ReadBuffer & rb)
{
DB::readDoubleQuoted(value.first, rb);
DB::assertChar(',', rb);
DB::readDoubleQuoted(value.second, rb);
}
2014-03-17 02:01:03 +00:00
};
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
{
using Base = HashMapCell<Key, TMapped, Hash, TState>;
size_t saved_hash;
using Base::Base;
2019-08-01 15:57:02 +00:00
bool keyEquals(const Key & key_) const { return this->value.first == key_; }
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
void setHash(size_t hash_value) { saved_hash = hash_value; }
2017-12-01 18:36:55 +00:00
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
};
template <
typename Key,
typename Cell,
typename Hash = DefaultHash<Key>,
typename Grower = HashTableGrower<>,
typename Allocator = HashTableAllocator>
class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
2014-03-17 02:01:03 +00:00
{
public:
using Self = HashMapTable;
using key_type = Key;
using mapped_type = typename Cell::Mapped;
using value_type = typename Cell::value_type;
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
/// Merge every cell's value of current map into the destination map via emplace.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, a new cell gets emplaced into that map,
/// and func is invoked with the third argument emplaced set to true. Otherwise
/// emplaced is set to false.
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
decltype(it) res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(res_it->getSecond(), it->getSecond(), inserted);
}
}
/// Merge every cell's value of current map into the destination map via find.
/// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, func is invoked with the third argument
/// exist set to false. Otherwise exist is set to true.
template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
decltype(it) res_it = that.find(it->getFirst(), it.getHash());
if (res_it == that.end())
func(it->getSecond(), it->getSecond(), false);
else
func(res_it->getSecond(), it->getSecond(), true);
}
}
/// Call func(const Key &, Mapped &) for each hash map element.
template <typename Func>
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
}
/// Call func(Mapped &) for each hash map element.
template <typename Func>
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
}
mapped_type & ALWAYS_INLINE operator[](Key x)
{
typename HashMapTable::iterator it;
bool inserted;
this->emplace(x, it, inserted);
2017-05-07 20:25:26 +00:00
/** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
* since the hash table memory is initially initialized with zeros.
* But, in fact, an empty cell may not be initialized with zeros in the following cases:
* - ZeroValueStorage (it only zeros the key);
* - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
*
2017-05-07 20:25:26 +00:00
* On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
* after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
*
2017-05-07 20:25:26 +00:00
* Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
* When we do the initialization, for new cells, it's enough to make `store 1` right away.
* And if we did not initialize, then even though there was zero in the cell,
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
*/
if (inserted)
A Proper lookup table that uses HashTable's API This is the first step of allowing heterogeneous cells in hash tables. performance test results are ``` 1. HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>; 2. NewLookupMap<UInt16, UInt8> ResolutionWidth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................223550276.46 ResolutionWidth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................248772721.24 Best: 2 - 24877272124 ResolutionWidth 100000 1 ..........................................................................................................................................................................................................................................................238498413.99 ResolutionWidth 100000 2 ..........................................................................................................................................................................................................................................................261808889.98 Best: 2 - 26180888998 ResolutionWidth 300000 1 ...................................................................................239307348.81 ResolutionWidth 300000 2 ...................................................................................257592761.30 Best: 2 - 25759276130 ResolutionWidth 1000000 1 .........................240144759.26 ResolutionWidth 1000000 2 .........................257093531.91 Best: 2 - 25709353191 ResolutionWidth 5000000 1 .....241573260.35 ResolutionWidth 5000000 2 .....259314162.79 Best: 2 - 25931416279 ResolutionDepth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................217108119.84 ResolutionDepth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................249459504.41 Best: 2 - 24945950441 ResolutionDepth 100000 1 ..........................................................................................................................................................................................................................................................229065162.17 ResolutionDepth 100000 2 ..........................................................................................................................................................................................................................................................253769105.64 Best: 2 - 25376910564 ResolutionDepth 300000 1 ...................................................................................233079225.18 ResolutionDepth 300000 2 ...................................................................................256316273.78 Best: 2 - 25631627378 ResolutionDepth 1000000 1 .........................234184633.51 ResolutionDepth 1000000 2 .........................261100491.57 Best: 2 - 26110049157 ResolutionDepth 5000000 1 .....233118795.66 ResolutionDepth 5000000 2 .....252436160.41 Best: 2 - 25243616041 ```
2019-02-28 09:35:38 +00:00
new(&it->getSecond()) mapped_type();
A Proper lookup table that uses HashTable's API This is the first step of allowing heterogeneous cells in hash tables. performance test results are ``` 1. HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>; 2. NewLookupMap<UInt16, UInt8> ResolutionWidth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................223550276.46 ResolutionWidth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................248772721.24 Best: 2 - 24877272124 ResolutionWidth 100000 1 ..........................................................................................................................................................................................................................................................238498413.99 ResolutionWidth 100000 2 ..........................................................................................................................................................................................................................................................261808889.98 Best: 2 - 26180888998 ResolutionWidth 300000 1 ...................................................................................239307348.81 ResolutionWidth 300000 2 ...................................................................................257592761.30 Best: 2 - 25759276130 ResolutionWidth 1000000 1 .........................240144759.26 ResolutionWidth 1000000 2 .........................257093531.91 Best: 2 - 25709353191 ResolutionWidth 5000000 1 .....241573260.35 ResolutionWidth 5000000 2 .....259314162.79 Best: 2 - 25931416279 ResolutionDepth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................217108119.84 ResolutionDepth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................249459504.41 Best: 2 - 24945950441 ResolutionDepth 100000 1 ..........................................................................................................................................................................................................................................................229065162.17 ResolutionDepth 100000 2 ..........................................................................................................................................................................................................................................................253769105.64 Best: 2 - 25376910564 ResolutionDepth 300000 1 ...................................................................................233079225.18 ResolutionDepth 300000 2 ...................................................................................256316273.78 Best: 2 - 25631627378 ResolutionDepth 1000000 1 .........................234184633.51 ResolutionDepth 1000000 2 .........................261100491.57 Best: 2 - 26110049157 ResolutionDepth 5000000 1 .....233118795.66 ResolutionDepth 5000000 2 .....252436160.41 Best: 2 - 25243616041 ```
2019-02-28 09:35:38 +00:00
return it->getSecond();
}
2014-03-17 02:01:03 +00:00
};
template <
typename Key,
typename Mapped,
typename Hash = DefaultHash<Key>,
typename Grower = HashTableGrower<>,
typename Allocator = HashTableAllocator>
using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>;
template <
typename Key,
typename Mapped,
typename Hash = DefaultHash<Key>,
typename Grower = HashTableGrower<>,
typename Allocator = HashTableAllocator>
using HashMapWithSavedHash = HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>;