2014-03-17 02:01:03 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/HashTable/Hash.h>
|
|
|
|
#include <Common/HashTable/HashTable.h>
|
|
|
|
#include <Common/HashTable/HashTableAllocator.h>
|
2014-03-17 02:01:03 +00:00
|
|
|
|
|
|
|
|
2016-12-08 04:56:52 +00:00
|
|
|
/** NOTE HashMap could only be used for memmoveable (position independent) types.
|
|
|
|
* Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
|
|
|
|
* Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2019-08-01 15:57:02 +00:00
|
|
|
struct NoInitTag
|
|
|
|
{
|
|
|
|
};
|
2014-05-19 04:54:54 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// A pair that does not initialize the elements, if not needed.
|
2014-05-19 04:54:54 +00:00
|
|
|
template <typename First, typename Second>
|
2019-08-01 15:57:02 +00:00
|
|
|
struct PairNoInit
|
2014-05-19 04:54:54 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
First first;
|
|
|
|
Second second;
|
2014-05-19 04:54:54 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
PairNoInit() {}
|
2014-05-19 04:54:54 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename First_>
|
2019-08-01 15:57:02 +00:00
|
|
|
PairNoInit(First_ && first_, NoInitTag) : first(std::forward<First_>(first_))
|
|
|
|
{
|
|
|
|
}
|
2014-05-19 04:54:54 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename First_, typename Second_>
|
2019-08-01 15:57:02 +00:00
|
|
|
PairNoInit(First_ && first_, Second_ && second_) : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_))
|
|
|
|
{
|
|
|
|
}
|
2014-05-19 04:54:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-04-28 02:47:56 +00:00
|
|
|
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
|
2014-03-17 02:01:03 +00:00
|
|
|
struct HashMapCell
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
using Mapped = TMapped;
|
|
|
|
using State = TState;
|
2014-04-28 01:48:24 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
using value_type = PairNoInit<Key, Mapped>;
|
|
|
|
value_type value;
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
HashMapCell() {}
|
2017-12-01 18:36:55 +00:00
|
|
|
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
|
|
|
|
HashMapCell(const value_type & value_, const State &) : value(value_) {}
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2019-02-28 09:35:38 +00:00
|
|
|
const Key & getFirst() const { return value.first; }
|
|
|
|
Mapped & getSecond() { return value.second; }
|
|
|
|
const Mapped & getSecond() const { return value.second; }
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const value_type & getValue() const { return value; }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
static const Key & getKey(const value_type & value) { return value.first; }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool keyEquals(const Key & key_) const { return value.first == key_; }
|
2017-12-01 18:36:55 +00:00
|
|
|
bool keyEquals(const Key & key_, size_t /*hash_*/) const { return value.first == key_; }
|
2018-08-02 15:16:40 +00:00
|
|
|
bool keyEquals(const Key & key_, size_t /*hash_*/, const State & /*state*/) const { return value.first == key_; }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-12-01 18:36:55 +00:00
|
|
|
void setHash(size_t /*hash_value*/) {}
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t getHash(const Hash & hash) const { return hash(value.first); }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool isZero(const State & state) const { return isZero(value.first, state); }
|
2017-12-01 18:36:55 +00:00
|
|
|
static bool isZero(const Key & key, const State & /*state*/) { return ZeroTraits::check(key); }
|
2014-04-28 01:48:24 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Set the key value to zero.
|
2017-04-01 07:20:54 +00:00
|
|
|
void setZero() { ZeroTraits::set(value.first); }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
|
2017-04-01 07:20:54 +00:00
|
|
|
static constexpr bool need_zero_value_storage = true;
|
2014-04-28 01:48:24 +00:00
|
|
|
|
2017-05-09 19:07:35 +00:00
|
|
|
/// Whether the cell was deleted.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool isDeleted() const { return false; }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void setMapped(const value_type & value_) { value.second = value_.second; }
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Serialization, in binary and text form.
|
2017-04-01 07:20:54 +00:00
|
|
|
void write(DB::WriteBuffer & wb) const
|
|
|
|
{
|
|
|
|
DB::writeBinary(value.first, wb);
|
|
|
|
DB::writeBinary(value.second, wb);
|
|
|
|
}
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void writeText(DB::WriteBuffer & wb) const
|
|
|
|
{
|
|
|
|
DB::writeDoubleQuoted(value.first, wb);
|
|
|
|
DB::writeChar(',', wb);
|
|
|
|
DB::writeDoubleQuoted(value.second, wb);
|
|
|
|
}
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Deserialization, in binary and text form.
|
2017-04-01 07:20:54 +00:00
|
|
|
void read(DB::ReadBuffer & rb)
|
|
|
|
{
|
|
|
|
DB::readBinary(value.first, rb);
|
|
|
|
DB::readBinary(value.second, rb);
|
|
|
|
}
|
2014-03-17 02:01:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void readText(DB::ReadBuffer & rb)
|
|
|
|
{
|
|
|
|
DB::readDoubleQuoted(value.first, rb);
|
|
|
|
DB::assertChar(',', rb);
|
|
|
|
DB::readDoubleQuoted(value.second, rb);
|
|
|
|
}
|
2014-03-17 02:01:03 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-05-10 02:42:45 +00:00
|
|
|
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
|
|
|
|
struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
using Base = HashMapCell<Key, TMapped, Hash, TState>;
|
2014-05-10 02:42:45 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t saved_hash;
|
2014-05-10 02:42:45 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
using Base::Base;
|
2014-05-10 02:42:45 +00:00
|
|
|
|
2019-08-01 15:57:02 +00:00
|
|
|
bool keyEquals(const Key & key_) const { return this->value.first == key_; }
|
|
|
|
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
|
2018-08-02 15:16:40 +00:00
|
|
|
bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
|
2014-05-10 02:42:45 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void setHash(size_t hash_value) { saved_hash = hash_value; }
|
2017-12-01 18:36:55 +00:00
|
|
|
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
|
2014-05-10 02:42:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-02 16:30:09 +00:00
|
|
|
template <
|
2017-04-01 07:20:54 +00:00
|
|
|
typename Key,
|
|
|
|
typename Cell,
|
|
|
|
typename Hash = DefaultHash<Key>,
|
|
|
|
typename Grower = HashTableGrower<>,
|
2019-08-02 16:30:09 +00:00
|
|
|
typename Allocator = HashTableAllocator>
|
2014-04-28 01:48:24 +00:00
|
|
|
class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
|
2014-03-17 02:01:03 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
using key_type = Key;
|
|
|
|
using mapped_type = typename Cell::Mapped;
|
|
|
|
using value_type = typename Cell::value_type;
|
|
|
|
|
|
|
|
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
|
|
|
|
|
|
|
|
mapped_type & ALWAYS_INLINE operator[](Key x)
|
|
|
|
{
|
|
|
|
typename HashMapTable::iterator it;
|
|
|
|
bool inserted;
|
|
|
|
this->emplace(x, it, inserted);
|
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
|
|
|
|
* since the hash table memory is initially initialized with zeros.
|
|
|
|
* But, in fact, an empty cell may not be initialized with zeros in the following cases:
|
|
|
|
* - ZeroValueStorage (it only zeros the key);
|
|
|
|
* - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
|
|
|
|
* after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
|
|
|
|
* When we do the initialization, for new cells, it's enough to make `store 1` right away.
|
|
|
|
* And if we did not initialize, then even though there was zero in the cell,
|
|
|
|
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
if (inserted)
|
2019-02-28 09:35:38 +00:00
|
|
|
new(&it->getSecond()) mapped_type();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-28 09:35:38 +00:00
|
|
|
return it->getSecond();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2014-03-17 02:01:03 +00:00
|
|
|
};
|
2014-04-28 01:48:24 +00:00
|
|
|
|
|
|
|
|
2019-08-02 16:30:09 +00:00
|
|
|
template <
|
2017-04-01 07:20:54 +00:00
|
|
|
typename Key,
|
|
|
|
typename Mapped,
|
|
|
|
typename Hash = DefaultHash<Key>,
|
|
|
|
typename Grower = HashTableGrower<>,
|
2019-08-02 16:30:09 +00:00
|
|
|
typename Allocator = HashTableAllocator>
|
2014-04-28 01:48:24 +00:00
|
|
|
using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>;
|
2014-05-10 02:42:45 +00:00
|
|
|
|
|
|
|
|
2019-08-02 16:30:09 +00:00
|
|
|
template <
|
2017-04-01 07:20:54 +00:00
|
|
|
typename Key,
|
|
|
|
typename Mapped,
|
|
|
|
typename Hash = DefaultHash<Key>,
|
|
|
|
typename Grower = HashTableGrower<>,
|
2019-08-02 16:30:09 +00:00
|
|
|
typename Allocator = HashTableAllocator>
|
2014-05-10 02:42:45 +00:00
|
|
|
using HashMapWithSavedHash = HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>;
|