Unify hash tables interface.

This commit is contained in:
Amos Bird 2019-10-29 23:16:51 +08:00
parent 4d27e8aa5b
commit fbfbe161ec
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
55 changed files with 439 additions and 483 deletions

View File

@ -579,7 +579,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (histogram.buckets.size() < params.num_buckets_cutoff)
{
@ -593,7 +593,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (!histogram.total)
continue;
@ -625,7 +625,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (!histogram.total)
continue;
@ -641,7 +641,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (!histogram.total)
continue;
@ -676,7 +676,7 @@ public:
while (true)
{
it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
if (it && lookupResultGetMapped(it)->total + lookupResultGetMapped(it)->count_end != 0)
if (it && it->getMapped().total + it->getMapped().count_end != 0)
break;
if (context_size == 0)
@ -710,7 +710,7 @@ public:
if (num_bytes_after_desired_size > 0)
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
CodePoint code = lookupResultGetMapped(it)->sample(determinator, end_probability_multiplier);
CodePoint code = it->getMapped().sample(determinator, end_probability_multiplier);
if (code == END)
break;

View File

@ -55,7 +55,7 @@ struct EntropyData
void merge(const EntropyData & rhs)
{
for (const auto & pair : rhs.map)
map[pair.getFirst()] += pair.getSecond();
map[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
@ -77,12 +77,12 @@ struct EntropyData
{
UInt64 total_value = 0;
for (const auto & pair : map)
total_value += pair.getSecond();
total_value += pair.getMapped();
Float64 shannon_entropy = 0;
for (const auto & pair : map)
{
Float64 frequency = Float64(pair.getSecond()) / total_value;
Float64 frequency = Float64(pair.getMapped()) / total_value;
shannon_entropy -= frequency * log2(frequency);
}

View File

@ -58,7 +58,7 @@ struct QuantileExactWeighted
void merge(const QuantileExactWeighted & rhs)
{
for (const auto & pair : rhs.map)
map[pair.getFirst()] += pair.getSecond();
map[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
@ -93,7 +93,7 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0;
for (const auto & pair : map)
{
sum_weight += pair.getSecond();
sum_weight += pair.getMapped();
array[i] = pair.getValue();
++i;
}
@ -143,7 +143,7 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0;
for (const auto & pair : map)
{
sum_weight += pair.getSecond();
sum_weight += pair.getMapped();
array[i] = pair.getValue();
++i;
}

View File

@ -35,7 +35,7 @@ namespace
data.resize(hash_map.size());
for (const auto & val : hash_map)
data[val.getSecond()] = val.getFirst();
data[val.getMapped()] = val.getKey();
for (auto & ind : index)
ind = hash_map[ind];

View File

@ -359,7 +359,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
if constexpr (has_mapped)
{
auto & mapped = *lookupResultGetMapped(it);
auto & mapped = it->getMapped();
if (inserted)
{
new (&mapped) Mapped();

View File

@ -174,13 +174,13 @@ protected:
[[maybe_unused]] Mapped * cached = nullptr;
if constexpr (has_mapped)
cached = lookupResultGetMapped(it);
cached = &it->getMapped();
if (inserted)
{
if constexpr (has_mapped)
{
new(lookupResultGetMapped(it)) Mapped();
new (&it->getMapped()) Mapped();
}
}
@ -191,18 +191,18 @@ protected:
if constexpr (has_mapped)
{
cache.value.first = *lookupResultGetKey(it);
cache.value.second = *lookupResultGetMapped(it);
cache.value.first = it->getKey();
cache.value.second = it->getMapped();
cached = &cache.value.second;
}
else
{
cache.value = *lookupResultGetKey(it);
cache.value = it->getKey();
}
}
if constexpr (has_mapped)
return EmplaceResult(*lookupResultGetMapped(it), *cached, inserted);
return EmplaceResult(it->getMapped(), *cached, inserted);
else
return EmplaceResult(inserted);
}
@ -233,7 +233,7 @@ protected:
cache.value.first = key;
if (it)
{
cache.value.second = *lookupResultGetMapped(it);
cache.value.second = it->getMapped();
}
}
else
@ -243,7 +243,7 @@ protected:
}
if constexpr (has_mapped)
return FindResult(it ? lookupResultGetMapped(it) : nullptr, it != nullptr);
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
else
return FindResult(it != nullptr);
}

View File

@ -14,12 +14,6 @@ struct ClearableHashMapCell : public ClearableHashTableCell<Key, HashMapCell<Key
: Base::BaseCell(value_, state), Base::version(state.version) {}
};
template<typename Key, typename Mapped, typename Hash>
ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash>
ALWAYS_INLINE inline auto lookupResultGetMapped(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getSecond(); }
template
<
typename Key,
@ -31,20 +25,16 @@ template
class ClearableHashMap : public HashTable<Key, ClearableHashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>
{
public:
using key_type = Key;
using mapped_type = Mapped;
using value_type = typename ClearableHashMap::cell_type::value_type;
mapped_type & operator[](Key x)
Mapped & operator[](const Key & x)
{
typename ClearableHashMap::LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new(lookupResultGetMapped(it)) mapped_type();
new (&it->getMapped()) Mapped();
return *lookupResultGetMapped(it);
return it->getMapped();
}
void clear()

View File

@ -48,12 +48,6 @@ struct ClearableHashTableCell : public BaseCell
ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {}
};
template<typename Key, typename BaseCell>
ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashTableCell<Key, BaseCell> * cell) { return &cell->key; }
template<typename Key, typename BaseCell>
ALWAYS_INLINE inline void * lookupResultGetMapped(ClearableHashTableCell<Key, BaseCell> *) { return nullptr; }
template
<
typename Key,
@ -64,9 +58,6 @@ template
class ClearableHashSet : public HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
{
public:
using key_type = Key;
using value_type = typename ClearableHashSet::cell_type::value_type;
using Base = HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>;
using typename Base::LookupResult;
@ -87,9 +78,6 @@ template
class ClearableHashSetWithSavedHash: public HashTable<Key, ClearableHashTableCell<Key, HashSetCellWithSavedHash<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
{
public:
using key_type = Key;
using value_type = typename ClearableHashSetWithSavedHash::cell_type::value_type;
void clear()
{
++this->version;

View File

@ -11,6 +11,8 @@ struct FixedClearableHashMapCell
using State = ClearableHashSetState;
using value_type = PairNoInit<Key, Mapped>;
using mapped_type = Mapped;
UInt32 version;
Mapped mapped;
@ -18,11 +20,12 @@ struct FixedClearableHashMapCell
FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
Mapped & getSecond() { return mapped; }
const Mapped & getSecond() const { return mapped; }
const VoidKey getKey() const { return {}; }
Mapped & getMapped() { return mapped; }
const Mapped & getMapped() const { return mapped; }
bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
struct CellExt
{
@ -35,32 +38,33 @@ struct FixedClearableHashMapCell
}
Key key;
FixedClearableHashMapCell * ptr;
const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return *ptr->mapped; }
const Key & getKey() const { return key; }
Mapped & getMapped() { return ptr->mapped; }
const Mapped & getMapped() const { return *ptr->mapped; }
const value_type getValue() const { return {key, *ptr->mapped}; }
};
};
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator>
class FixedClearableHashMap : public FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>
{
public:
using key_type = Key;
using mapped_type = Mapped;
using value_type = typename FixedClearableHashMap::cell_type::value_type;
using Base = FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>;
using Self = FixedClearableHashMap;
using LookupResult = typename Base::LookupResult;
mapped_type & operator[](Key x)
using Base::Base;
Mapped & operator[](const Key & x)
{
typename FixedClearableHashMap::iterator it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (&it->second) mapped_type();
new (&it->getMapped()) Mapped();
return it->second;
return it->getMapped();
}
void clear()

View File

@ -10,19 +10,23 @@ struct FixedClearableHashTableCell
using State = ClearableHashSetState;
using value_type = Key;
using mapped_type = void;
using mapped_type = VoidMapped;
UInt32 version;
FixedClearableHashTableCell() {}
FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
struct CellExt
{
Key key;
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; }
void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
};
@ -34,8 +38,6 @@ class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTable
{
public:
using Base = FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>;
using key_type = Key;
using value_type = typename FixedClearableHashSet::cell_type::value_type;
using LookupResult = typename Base::LookupResult;
void clear()

View File

@ -13,18 +13,19 @@ struct FixedHashMapCell
using value_type = PairNoInit<Key, Mapped>;
using mapped_type = TMapped;
Mapped mapped;
bool full;
Mapped mapped;
FixedHashMapCell() {}
FixedHashMapCell(const Key &, const State &) : full(true) {}
FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
Mapped & getSecond() { return mapped; }
const Mapped & getSecond() const { return mapped; }
const VoidKey getKey() const { return {}; }
Mapped & getMapped() { return mapped; }
const Mapped & getMapped() const { return mapped; }
bool isZero(const State &) const { return !full; }
void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
/// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
@ -40,36 +41,23 @@ struct FixedHashMapCell
Key key;
FixedHashMapCell * ptr;
const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return ptr->mapped; }
const Key & getKey() const { return key; }
Mapped & getMapped() { return ptr->mapped; }
const Mapped & getMapped() const { return ptr->mapped; }
const value_type getValue() const { return {key, ptr->mapped}; }
};
};
template<typename Key, typename Mapped, typename State>
ALWAYS_INLINE inline void * lookupResultGetKey(FixedHashMapCell<Key, Mapped, State> *)
{ return nullptr; }
template<typename Key, typename Mapped, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(FixedHashMapCell<Key, Mapped, State> * cell)
{ return &cell->getSecond(); }
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
template <typename Key, typename Mapped, typename Cell = FixedHashMapCell<Key, Mapped>, typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, Cell, Allocator>
{
public:
using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
using Base = FixedHashTable<Key, Cell, Allocator>;
using Self = FixedHashMap;
using key_type = Key;
using Cell = typename Base::cell_type;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::Mapped;
using LookupResult = typename Base::LookupResult;
using Base::Base;
using LookupResult = typename Base::LookupResult;
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
@ -77,8 +65,8 @@ public:
{
typename Self::LookupResult res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(*lookupResultGetMapped(res_it), it->getSecond(), inserted);
that.emplace(it->getKey(), res_it, inserted, it.getHash());
func(res_it->getMapped(), it->getMapped(), inserted);
}
}
@ -87,11 +75,11 @@ public:
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
auto res_it = that.find(it->getFirst(), it.getHash());
auto res_it = that.find(it->getKey(), it.getHash());
if (!res_it)
func(it->getSecond(), it->getSecond(), false);
func(it->getMapped(), it->getMapped(), false);
else
func(*lookupResultGetMapped(res_it), it->getSecond(), true);
func(res_it->getMapped(), it->getMapped(), true);
}
}
@ -99,24 +87,24 @@ public:
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
func(v.getKey(), v.getMapped());
}
template <typename Func>
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
func(v.getMapped());
}
mapped_type & ALWAYS_INLINE operator[](Key x)
Mapped & ALWAYS_INLINE operator[](const Key & x)
{
typename Base::LookupResult it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (it) mapped_type();
new (&it->getMapped()) Mapped();
return it;
return it->getMapped();
}
};

View File

@ -6,14 +6,15 @@ template <typename Key, typename Allocator = HashTableAllocator>
class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
{
public:
using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>;
using Cell = FixedHashTableCell<Key>;
using Base = FixedHashTable<Key, Cell, Allocator>;
using Self = FixedHashSet;
void merge(const Self & rhs)
{
for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
Base::buf[i] = rhs.buf[i];
new (&Base::buf[i]) Cell(rhs.buf[i]);
}
/// NOTE: Currently this method isn't used. When it does, the ReadBuffer should

View File

@ -8,12 +8,15 @@ struct FixedHashTableCell
using State = TState;
using value_type = Key;
using mapped_type = void;
using mapped_type = VoidMapped;
bool full;
FixedHashTableCell() {}
FixedHashTableCell(const Key &, const State &) : full(true) {}
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
bool isZero(const State &) const { return !full; }
void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
@ -28,6 +31,8 @@ struct FixedHashTableCell
{
Key key;
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; }
void update(Key && key_, FixedHashTableCell *) { key = key_; }
};
@ -53,7 +58,7 @@ struct FixedHashTableCell
template <typename Key, typename Cell, typename Allocator>
class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
{
static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8);
static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
protected:
friend class const_iterator;
@ -61,12 +66,11 @@ protected:
friend class Reader;
using Self = FixedHashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key.
Cell * buf; /// A piece of memory for all elements.
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); }
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
void free()
{
@ -111,7 +115,7 @@ protected:
++ptr;
/// Skip empty cells in the main buffer.
auto buf_end = container->buf + container->BUFFER_SIZE;
auto buf_end = container->buf + container->NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*container))
++ptr;
@ -140,8 +144,9 @@ protected:
public:
using key_type = Key;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;
@ -239,7 +244,7 @@ public:
return end();
const Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE;
auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
@ -254,21 +259,21 @@ public:
return end();
Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE;
auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
return iterator(this, ptr);
}
const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); }
const_iterator end() const { return const_iterator(this, buf + NUM_CELLS); }
const_iterator cend() const { return end(); }
iterator end() { return iterator(this, buf + BUFFER_SIZE); }
iterator end() { return iterator(this, buf + NUM_CELLS); }
public:
/// The last parameter is unused but exists for compatibility with HashTable interface.
void ALWAYS_INLINE emplace(Key x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
{
it = &buf[x];
@ -288,40 +293,31 @@ public:
std::pair<LookupResult, bool> res;
emplace(Cell::getKey(x), res.first, res.second);
if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x);
insertSetMapped(res.first->getMapped(), x);
return res;
}
LookupResult ALWAYS_INLINE find(Key x)
{
return !buf[x].isZero(*this) ? &buf[x] : nullptr;
}
LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
ConstLookupResult ALWAYS_INLINE find(Key x) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
}
ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
LookupResult ALWAYS_INLINE find(Key, size_t hash_value)
{
return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr;
}
LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
ConstLookupResult ALWAYS_INLINE find(Key key, size_t hash_value) const
ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
}
bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
void write(DB::WriteBuffer & wb) const
{
Cell::State::write(wb);
DB::writeVarUInt(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
if (!ptr->isZero(*this))
{
DB::writeVarUInt(ptr - buf);
@ -334,7 +330,7 @@ public:
Cell::State::writeText(wb);
DB::writeText(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
{
if (!ptr->isZero(*this))
{
@ -393,7 +389,7 @@ public:
destroyElements();
m_size = 0;
memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf));
memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
}
/// After executing this function, the table can only be destroyed,
@ -405,9 +401,9 @@ public:
free();
}
size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); }
size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
size_t getBufferSizeInCells() const { return BUFFER_SIZE; }
size_t getBufferSizeInCells() const { return NUM_CELLS; }
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
size_t getCollisions() const { return 0; }

View File

@ -52,12 +52,13 @@ struct HashMapCell
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {}
const Key & getFirst() const { return value.first; }
Mapped & getSecond() { return value.second; }
const Mapped & getSecond() const { return value.second; }
/// Get the key (externally).
const Key & getKey() const { return value.first; }
Mapped & getMapped() { return value.second; }
const Mapped & getMapped() const { return value.second; }
const value_type & getValue() const { return value; }
/// Get the key (internally).
static const Key & getKey(const value_type & value) { return value.first; }
bool keyEquals(const Key & key_) const { return value.first == key_; }
@ -110,15 +111,6 @@ struct HashMapCell
}
};
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCell<Key, Mapped, Hash, State> * cell)
{ return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCell<Key, Mapped, Hash, State> * cell)
{ return &cell->getSecond(); }
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
{
@ -136,15 +128,6 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
};
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
{ return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
{ return &cell->getSecond(); }
template <
typename Key,
typename Cell,
@ -156,14 +139,9 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
public:
using Self = HashMapTable;
using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
using key_type = Key;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::Mapped;
using LookupResult = typename Base::LookupResult;
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
using Base::Base;
/// Merge every cell's value of current map into the destination map via emplace.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
@ -178,8 +156,8 @@ public:
{
typename Self::LookupResult res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(*lookupResultGetMapped(res_it), it->getSecond(), inserted);
that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash());
func(res_it->getMapped(), it->getMapped(), inserted);
}
}
@ -193,11 +171,11 @@ public:
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
auto res_it = that.find(it->getFirst(), it.getHash());
auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash());
if (!res_it)
func(it->getSecond(), it->getSecond(), false);
func(it->getMapped(), it->getMapped(), false);
else
func(*lookupResultGetMapped(res_it), it->getSecond(), true);
func(res_it->getMapped(), it->getMapped(), true);
}
}
@ -206,7 +184,7 @@ public:
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
func(v.getKey(), v.getMapped());
}
/// Call func(Mapped &) for each hash map element.
@ -214,12 +192,12 @@ public:
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
func(v.getMapped());
}
mapped_type & ALWAYS_INLINE operator[](Key x)
typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
{
typename HashMapTable::LookupResult it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
@ -238,9 +216,9 @@ public:
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
*/
if (inserted)
new(lookupResultGetMapped(it)) mapped_type();
new (&it->getMapped()) typename Cell::Mapped();
return *lookupResultGetMapped(it);
return it->getMapped();
}
};

View File

@ -84,14 +84,6 @@ struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
};
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashSetCellWithSavedHash<Key, Hash, State> * cell)
{ return &cell->key; }
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline void * lookupResultGetMapped(HashSetCellWithSavedHash<Key, Hash, State> *)
{ return nullptr; }
template
<
typename Key,

View File

@ -78,66 +78,48 @@ void set(T & x) { x = 0; }
}
/**
* lookupResultGetKey/Mapped -- functions to get key/"mapped" values from the
* LookupResult returned by find() and emplace() methods of HashTable.
* Must not be called for a null LookupResult.
* getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and
* emplace() methods of HashTable. Must not be called for a null LookupResult.
*
* We don't use iterators for lookup result to avoid creating temporary
* objects. Instead, LookupResult is a pointer of some kind. There are global
* functions lookupResultGetKey/Mapped, overloaded for this pointer type, that
* return pointers to key/"mapped" values. They are implemented as global
* functions and not as methods, because they have to be overloaded for POD
* types, e.g. in StringHashTable where different components have different
* Cell format.
* We don't use iterators for lookup result. Instead, LookupResult is a pointer of some kind. There
* are methods getKey/Mapped, that return references or values to key/"mapped" values.
*
* Different hash table implementations support this interface to a varying
* degree:
* Different hash table implementations support this interface to a varying degree:
*
* 1) Hash tables that store neither the key in its original form, nor a
* "mapped" value: FixedHashTable or StringHashTable.
* Neither GetKey nor GetMapped are supported, the only valid operation is
* checking LookupResult for null.
* 1) Hash tables that store neither the key in its original form, nor a "mapped" value:
* FixedHashTable or StringHashTable. Neither GetKey nor GetMapped are supported, the only valid
* operation is checking LookupResult for null.
*
* 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap.
* Only GetMapped is supported.
* 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. Only GetMapped is
* supported.
*
* 3) Hash tables that store the key and do not have a "mapped" value, e.g. the
* normal HashTable.
* GetKey returns the key, and GetMapped returns a zero void pointer. This
* simplifies generic code that works with mapped values: it can overload
* on the return type of GetMapped(), and doesn't need other parameters. One
* example is insertSetMapped() function.
* 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable.
* GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic
* code that works with mapped values: it can overload on the return type of GetMapped(), and
* doesn't need other parameters. One example is insertSetMapped() function.
*
* 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap.
* Both GetKey and GetMapped are supported.
* 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and
* GetMapped are supported.
*
* The implementation side goes as follows:
* for (1), LookupResult = void *, no getters;
* for (2), LookupResult = Mapped *, GetMapped is a default implementation that
* takes any pointer-like object;
* for (3) and (4), LookupResult = Cell *, and both getters are implemented.
* They have to be specialized for each particular Cell class to supersede the
* default verision that takes a generic pointer-like object.
*
* for (1), LookupResult->getKey = const VoidKey, LookupResult->getMapped = VoidMapped;
*
* for (2), LookupResult->getKey = const VoidKey, LookupResult->getMapped = Mapped &;
*
* for (3) and (4), LookupResult->getKey = const Key [&], LookupResult->getMapped = Mapped &;
* VoidKey and VoidMapped may have specialized function overloads for generic code.
*/
/**
* The default implementation of GetMapped that is used for the above case (2).
*/
template<typename PointerLike>
ALWAYS_INLINE inline auto lookupResultGetMapped(PointerLike && ptr) { return &*ptr; }
/**
* Generic const wrapper for lookupResultGetMapped, that calls a non-const
* version. Should be safe, given that these functions only do pointer
* arithmetics.
*/
template<typename T>
ALWAYS_INLINE inline auto lookupResultGetMapped(const T * obj)
struct VoidKey {};
struct VoidMapped
{
auto mapped_ptr = lookupResultGetMapped(const_cast<T *>(obj));
const auto const_mapped_ptr = mapped_ptr;
return const_mapped_ptr;
}
template <typename T>
auto & operator=(const T &)
{
return *this;
}
};
/** Compile-time interface for cell of the hash table.
* Different cell types are used to implement different hash tables.
@ -152,7 +134,7 @@ struct HashTableCell
using key_type = Key;
using value_type = Key;
using mapped_type = void;
using mapped_type = VoidMapped;
Key key;
@ -161,10 +143,12 @@ struct HashTableCell
/// Create a cell with the given key / key and value.
HashTableCell(const Key & key_, const State &) : key(key_) {}
/// Get what the value_type of the container will be.
/// Get the key (externally).
const Key & getKey() const { return key; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; }
/// Get the key.
/// Get the key (internally).
static const Key & getKey(const value_type & value) { return value; }
/// Are the keys at the cells equal?
@ -207,23 +191,15 @@ struct HashTableCell
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
};
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashTableCell<Key, Hash, State> * cell)
{ return &cell->key; }
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline void * lookupResultGetMapped(HashTableCell<Key, Hash, State> *)
{ return nullptr; }
/**
* A helper function for HashTable::insert() to set the "mapped" value.
* Overloaded on the mapped type, does nothing if it's void.
* Overloaded on the mapped type, does nothing if it's VoidMapped.
*/
template <typename ValueType>
void insertSetMapped(void * /* dest */, const ValueType & /* src */) {}
void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {}
template <typename MappedType, typename ValueType>
void insertSetMapped(MappedType * dest, const ValueType & src) { *dest = src.second; }
void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; }
/** Determines the size of the hash table, and when and how much it should be resized.
@ -276,7 +252,7 @@ struct HashTableGrower
/** When used as a Grower, it turns a hash table into something like a lookup table.
* It remains non-optimal - the cells store the keys.
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
* TODO Make a proper lookup table.
* NOTE: Better to use FixedHashTable instead.
*/
template <size_t key_bits>
struct HashTableFixedGrower
@ -366,7 +342,6 @@ protected:
using HashValue = size_t;
using Self = HashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key.
@ -586,9 +561,10 @@ protected:
public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
// Use lookupResultGetMapped/Key to work with these values.
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;
@ -751,7 +727,7 @@ protected:
/// If the key is zero, insert it into a special place and return true.
/// We don't have to persist a zero key, because it's not actually inserted.
/// That's why we just take a Key by value, an not a key holder.
bool ALWAYS_INLINE emplaceIfZero(Key x, LookupResult & it, bool & inserted, size_t hash_value)
bool ALWAYS_INLINE emplaceIfZero(const Key & x, LookupResult & it, bool & inserted, size_t hash_value)
{
/// If it is claimed that the zero key can not be inserted into the table.
if (!Cell::need_zero_value_storage)
@ -793,7 +769,7 @@ protected:
keyHolderPersistKey(key_holder);
const auto & key = keyHolderGetKey(key_holder);
new(&buf[place_value]) Cell(key, *this);
new (&buf[place_value]) Cell(key, *this);
buf[place_value].setHash(hash_value);
inserted = true;
++m_size;
@ -846,7 +822,7 @@ public:
}
if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x);
insertSetMapped(res.first->getMapped(), x);
return res;
}
@ -869,11 +845,11 @@ public:
*
* Example usage:
*
* Map::iterator it;
* Map::LookupResult it;
* bool inserted;
* map.emplace(key, it, inserted);
* if (inserted)
* new(&it->second) Mapped(value);
* new (&it->getMapped()) Mapped(value);
*/
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
@ -903,7 +879,7 @@ public:
resize();
}
LookupResult ALWAYS_INLINE find(Key x)
LookupResult ALWAYS_INLINE find(const Key & x)
{
if (Cell::isZero(x, *this))
return this->hasZero() ? this->zeroValue() : nullptr;
@ -913,12 +889,12 @@ public:
return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
}
ConstLookupResult ALWAYS_INLINE find(Key x) const
ConstLookupResult ALWAYS_INLINE find(const Key & x) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
}
LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
LookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value)
{
if (Cell::isZero(x, *this))
return this->hasZero() ? this->zeroValue() : nullptr;
@ -927,7 +903,12 @@ public:
return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
}
bool ALWAYS_INLINE has(Key x) const
ConstLookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
}
bool ALWAYS_INLINE has(const Key & x) const
{
if (Cell::isZero(x, *this))
return this->hasZero();
@ -938,7 +919,7 @@ public:
}
bool ALWAYS_INLINE has(Key x, size_t hash_value) const
bool ALWAYS_INLINE has(const Key & x, size_t hash_value) const
{
if (Cell::isZero(x, *this))
return this->hasZero();

View File

@ -38,7 +38,6 @@ protected:
friend class Reader;
using Self = SmallTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements.
Cell buf[capacity]; /// A piece of memory for all elements.
@ -72,8 +71,9 @@ protected:
public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
class Reader final : private Cell::State
{
@ -391,16 +391,17 @@ class SmallMapTable : public SmallTable<Key, Cell, capacity>
{
public:
using key_type = Key;
using mapped_type = typename Cell::Mapped;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
mapped_type & ALWAYS_INLINE operator[](Key x)
{
typename SmallMapTable::iterator it;
bool inserted;
this->emplace(x, it, inserted);
new(&it->getSecond()) mapped_type();
return it->getSecond();
new (&it->getMapped()) mapped_type();
return it->getMapped();
}
};

View File

@ -8,43 +8,60 @@ template <typename Key, typename TMapped>
struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const Key & getKey(const value_type & value_) { return value_.first; }
};
template<typename Key, typename Mapped>
auto lookupResultGetMapped(StringHashMapCell<Key, Mapped> * cell) { return &cell->getSecond(); }
template <typename TMapped>
struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; }
void setZero() { this->value.first.low = 0; }
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; }
void setZero() { this->value.first.a = 0; }
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
// external
using Base::getKey;
// internal
static const StringRef & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped, typename Allocator>
@ -61,13 +78,10 @@ template <typename TMapped, typename Allocator = HashTableAllocator>
class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
{
public:
using Key = StringRef;
using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
using Self = StringHashMap;
using Key = StringRef;
using key_type = StringRef;
using mapped_type = TMapped;
using value_type = typename Base::Ts::value_type;
using LookupResult = mapped_type *;
using LookupResult = typename Base::LookupResult;
using Base::Base;
@ -80,18 +94,13 @@ public:
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
if (this->m0.hasZero())
if (this->m0.hasZero() && that.m0.hasZero())
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
else if (this->m0.hasZero())
{
const bool emplace_new_zero = !that.m0.hasZero();
if (emplace_new_zero)
{
that.m0.setHasZero();
}
func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(),
emplace_new_zero);
that.m0.setHasZero();
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
}
this->m1.mergeToViaEmplace(that.m1, func);
this->m2.mergeToViaEmplace(that.m2, func);
this->m3.mergeToViaEmplace(that.m3, func);
@ -106,32 +115,25 @@ public:
template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{
if (this->m0.hasZero())
{
if (that.m0.hasZero())
{
func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true);
}
else
{
func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false);
}
}
if (this->m0.size() && that.m0.size())
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
else if (this->m0.size())
func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
this->m1.mergeToViaFind(that.m1, func);
this->m2.mergeToViaFind(that.m2, func);
this->m3.mergeToViaFind(that.m3, func);
this->ms.mergeToViaFind(that.ms, func);
}
mapped_type & ALWAYS_INLINE operator[](Key x)
TMapped & ALWAYS_INLINE operator[](const Key & x)
{
LookupResult it;
bool inserted;
LookupResult it = nullptr;
emplace(x, it, inserted);
this->emplace(x, it, inserted);
if (inserted)
new (it) mapped_type();
return *it;
new (&it->getMapped()) TMapped();
return it->getMapped();
}
template <typename Func>
@ -139,27 +141,27 @@ public:
{
if (this->m0.size())
{
func(StringRef{}, this->m0.zeroValue()->getSecond());
func(StringRef{}, this->m0.zeroValue()->getMapped());
}
for (auto & v : this->m1)
{
func(toStringRef(v.getFirst()), v.getSecond());
func(v.getKey(), v.getMapped());
}
for (auto & v : this->m2)
{
func(toStringRef(v.getFirst()), v.getSecond());
func(v.getKey(), v.getMapped());
}
for (auto & v : this->m3)
{
func(toStringRef(v.getFirst()), v.getSecond());
func(v.getKey(), v.getMapped());
}
for (auto & v : this->ms)
{
func(v.getFirst(), v.getSecond());
func(v.getKey(), v.getMapped());
}
}
@ -167,14 +169,14 @@ public:
void ALWAYS_INLINE forEachMapped(Func && func)
{
if (this->m0.size())
func(this->m0.zeroValue()->getSecond());
func(this->m0.zeroValue()->getMapped());
for (auto & v : this->m1)
func(v.getSecond());
func(v.getMapped());
for (auto & v : this->m2)
func(v.getSecond());
func(v.getMapped());
for (auto & v : this->m3)
func(v.getSecond());
func(v.getMapped());
for (auto & v : this->ms)
func(v.getSecond());
func(v.getMapped());
}
};

View File

@ -3,9 +3,7 @@
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h>
struct StringKey0
{
};
#include <variant>
using StringKey8 = UInt64;
using StringKey16 = DB::UInt128;
@ -112,7 +110,7 @@ public:
using ConstLookupResult = const Cell *;
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */)
void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
{
if (!hasZero())
{
@ -125,11 +123,16 @@ public:
}
template <typename Key>
LookupResult ALWAYS_INLINE find(Key, size_t /* hash */)
LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
{
return hasZero() ? zeroValue() : nullptr;
}
template <typename Key>
ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
{
return hasZero() ? zeroValue() : nullptr;
}
void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
@ -148,6 +151,26 @@ struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
void increaseSize() { this->size_degree += 1; }
};
template <typename Mapped>
struct StringHashTableLookupResult
{
Mapped * mapped_ptr;
StringHashTableLookupResult() {}
StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
StringHashTableLookupResult(std::nullptr_t) {}
const VoidKey getKey() const { return {}; }
auto & getMapped() { return *mapped_ptr; }
auto & operator*() { return *this; }
auto & operator*() const { return *this; }
auto * operator->() { return this; }
auto * operator->() const { return this; }
operator bool() const { return mapped_ptr; }
friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
};
template <typename SubMaps>
class StringHashTable : private boost::noncopyable
{
@ -177,8 +200,12 @@ protected:
public:
using Key = StringRef;
using key_type = Key;
using mapped_type = typename Ts::mapped_type;
using value_type = typename Ts::value_type;
using LookupResult = typename Ts::mapped_type *;
using cell_type = typename Ts::cell_type;
using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
StringHashTable() {}
@ -199,16 +226,15 @@ public:
// 2. Use switch case extension to generate fast dispatching table
// 3. Funcs are named callables that can be force_inlined
// NOTE: It relies on Little Endianness
template <typename KeyHolder, typename Func>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
template <typename Self, typename KeyHolder, typename Func>
static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{
const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size;
if (sz == 0)
{
static constexpr StringKey0 key0{};
keyHolderDiscardKey(key_holder);
return func(m0, key0, 0);
return func(self.m0, VoidKey{}, 0);
}
const char * p = x.data;
@ -239,7 +265,7 @@ public:
n[0] >>= s;
}
keyHolderDiscardKey(key_holder);
return func(m1, k8, hash(k8));
return func(self.m1, k8, hash(k8));
}
case 1: // 9..16 bytes
{
@ -248,7 +274,7 @@ public:
memcpy(&n[1], lp, 8);
n[1] >>= s;
keyHolderDiscardKey(key_holder);
return func(m2, k16, hash(k16));
return func(self.m2, k16, hash(k16));
}
case 2: // 17..24 bytes
{
@ -257,11 +283,11 @@ public:
memcpy(&n[2], lp, 8);
n[2] >>= s;
keyHolderDiscardKey(key_holder);
return func(m3, k24, hash(k24));
return func(self.m3, k24, hash(k24));
}
default: // >= 25 bytes
{
return func(ms, std::forward<KeyHolder>(key_holder), hash(x));
return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
}
}
}
@ -279,14 +305,14 @@ public:
{
typename Map::LookupResult result;
map.emplace(key_holder, result, inserted, hash);
mapped = lookupResultGetMapped(result);
mapped = &result->getMapped();
}
};
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{
this->dispatch(key_holder, EmplaceCallable(it, inserted));
this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
}
struct FindCallable
@ -295,15 +321,25 @@ public:
// any key holders here, only with normal keys. The key type is still
// different for every subtable, this is why it is a template parameter.
template <typename Submap, typename SubmapKey>
LookupResult ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
{
return lookupResultGetMapped(map.find(key, hash));
return &map.find(key, hash)->getMapped();
}
};
LookupResult ALWAYS_INLINE find(Key x)
LookupResult ALWAYS_INLINE find(const Key & x)
{
return dispatch(x, FindCallable{});
return dispatch(*this, x, FindCallable{});
}
ConstLookupResult ALWAYS_INLINE find(const Key & x) const
{
return dispatch(*this, x, FindCallable{});
}
bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
{
return dispatch(*this, x, FindCallable{}) != nullptr;
}
void write(DB::WriteBuffer & wb) const

View File

@ -16,10 +16,6 @@ template
class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
{
public:
using key_type = Key;
using mapped_type = typename Cell::Mapped;
using value_type = typename Cell::value_type;
using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
using LookupResult = typename Impl::LookupResult;
@ -32,16 +28,16 @@ public:
this->impls[i].forEachMapped(func);
}
mapped_type & ALWAYS_INLINE operator[](Key x)
typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
{
typename TwoLevelHashMapTable::LookupResult it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new(lookupResultGetMapped(it)) mapped_type();
new (&it->getMapped()) typename Cell::Mapped();
return *lookupResultGetMapped(it);
return it->getMapped();
}
};

View File

@ -82,7 +82,9 @@ protected:
public:
using key_type = typename Impl::key_type;
using mapped_type = typename Impl::mapped_type;
using value_type = typename Impl::value_type;
using cell_type = typename Impl::cell_type;
using LookupResult = typename Impl::LookupResult;
using ConstLookupResult = typename Impl::ConstLookupResult;
@ -217,7 +219,7 @@ public:
emplace(Cell::getKey(x), res.first, res.second, hash_value);
if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x);
insertSetMapped(res.first->getMapped(), x);
return res;
}

View File

@ -8,16 +8,12 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMap
{
public:
using Key = StringRef;
using key_type = Key;
using Self = TwoLevelStringHashMap;
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
using Base::Base;
using typename Base::Impl;
using mapped_type = TMapped;
using value_type = typename Base::value_type;
using LookupResult = typename Base::LookupResult;
using Base::Base;
template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func)
{
@ -25,13 +21,13 @@ public:
return this->impls[i].forEachMapped(func);
}
mapped_type & ALWAYS_INLINE operator[](Key x)
TMapped & ALWAYS_INLINE operator[](const Key & x)
{
bool inserted;
LookupResult it;
emplace(x, it, inserted);
this->emplace(x, it, inserted);
if (inserted)
new (lookupResultGetMapped(it)) mapped_type();
return *lookupResultGetMapped(it);
new (&it->getMapped()) TMapped();
return it->getMapped();
}
};

View File

@ -19,8 +19,7 @@ public:
// TODO: currently hashing contains redundant computations when doing distributed or external aggregations
size_t hash(const Key & x) const
{
return const_cast<Self &>(*this).dispatch(x,
[&](const auto &, const auto &, size_t hash) { return hash; });
return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
}
size_t operator()(const Key & x) const { return hash(x); }
@ -30,8 +29,12 @@ public:
public:
using key_type = typename Impl::key_type;
using mapped_type = typename Impl::mapped_type;
using value_type = typename Impl::value_type;
using cell_type = typename Impl::cell_type;
using LookupResult = typename Impl::LookupResult;
using ConstLookupResult = typename Impl::ConstLookupResult;
Impl impls[NUM_BUCKETS];
@ -71,16 +74,15 @@ public:
// This function is mostly the same as StringHashTable::dispatch, but with
// added bucket computation. See the comments there.
template <typename Func, typename KeyHolder>
decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func)
template <typename Self, typename Func, typename KeyHolder>
static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{
const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size;
if (sz == 0)
{
static constexpr StringKey0 key0{};
keyHolderDiscardKey(key_holder);
return func(impls[0].m0, key0, 0);
return func(self.impls[0].m0, VoidKey{}, 0);
}
const char * p = x.data;
@ -113,7 +115,7 @@ public:
auto res = hash(k8);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(impls[buck].m1, k8, res);
return func(self.impls[buck].m1, k8, res);
}
case 1:
{
@ -124,7 +126,7 @@ public:
auto res = hash(k16);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(impls[buck].m2, k16, res);
return func(self.impls[buck].m2, k16, res);
}
case 2:
{
@ -135,13 +137,13 @@ public:
auto res = hash(k24);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(impls[buck].m3, k24, res);
return func(self.impls[buck].m3, k24, res);
}
default:
{
auto res = hash(x);
auto buck = getBucketFromHash(res);
return func(impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
}
}
}
@ -149,12 +151,17 @@ public:
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{
dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted});
dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
}
LookupResult ALWAYS_INLINE find(Key x)
LookupResult ALWAYS_INLINE find(const Key x)
{
return dispatch(x, typename Impl::FindCallable{});
return dispatch(*this, x, typename Impl::FindCallable{});
}
ConstLookupResult ALWAYS_INLINE find(const Key x) const
{
return dispatch(*this, x, typename Impl::FindCallable{});
}
void write(DB::WriteBuffer & wb) const

View File

@ -369,7 +369,7 @@ private:
if (!it)
return nullptr;
return *lookupResultGetMapped(it);
return it->getMapped();
}
void rebuildCounterMap()

View File

@ -155,10 +155,10 @@ int main(int argc, char ** argv)
map.emplace(rand(), it, inserted);
if (inserted)
{
new(lookupResultGetMapped(it)) Arr(n);
new (&it->getMapped()) Arr(n);
for (size_t j = 0; j < n; ++j)
(*lookupResultGetMapped(it))[j] = field;
(it->getMapped())[j] = field;
}
}

View File

@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
{
if (prev_it != end && *it == *prev_it)
{
++*lookupResultGetMapped(found);
++found->getMapped();
continue;
}
prev_it = it;
bool inserted;
map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found);
++found->getMapped();
}
}
@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
{
if (*it == *prev_it)
{
++*lookupResultGetMapped(found);
++found->getMapped();
continue;
}
prev_it = it;
bool inserted;
map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found);
++found->getMapped();
}
}
@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
{
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
maps[0].impls[bucket][it->getFirst()] += it->getSecond();
maps[0].impls[bucket][it->getKey()] += it->getMapped();
}
void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
auto found = local_map.find(*it);
if (found)
++*lookupResultGetMapped(found);
++found->getMapped();
else if (local_map.size() < threshold)
++local_map[*it]; /// TODO You could do one lookup, not two.
else
@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
Map::LookupResult found;
bool inserted;
local_map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found);
++found->getMapped();
if (inserted && local_map.size() == threshold)
{
std::lock_guard<Mutex> lock(mutex);
for (auto & value_type : local_map)
global_map[value_type.getFirst()] += value_type.getSecond();
global_map[value_type.getKey()] += value_type.getMapped();
local_map.clear();
}
@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
auto found = local_map.find(*it);
if (found)
++*lookupResultGetMapped(found);
++found->getMapped();
else
{
size_t hash_value = global_map.hash(*it);
@ -311,7 +311,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->getFirst()] += it->getSecond();
maps[0][it->getKey()] += it->getMapped();
watch.stop();
double time_merged = watch.elapsedSeconds();
@ -365,7 +365,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->getFirst()] += it->getSecond();
maps[0][it->getKey()] += it->getMapped();
watch.stop();
@ -435,7 +435,7 @@ int main(int argc, char ** argv)
continue;
finish = false;
maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond();
maps[0][iterators[i]->getKey()] += iterators[i]->getMapped();
++iterators[i];
}
@ -623,7 +623,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond();
global_map[it->getKey()] += it->getMapped();
pool.wait();
@ -689,7 +689,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond();
global_map[it->getKey()] += it->getMapped();
pool.wait();
@ -760,7 +760,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond();
global_map[it->getKey()] += it->getMapped();
pool.wait();

View File

@ -51,9 +51,9 @@ struct AggregateIndependent
map.emplace(*it, place, inserted);
if (inserted)
creator(*lookupResultGetMapped(place));
creator(place->getMapped());
else
updater(*lookupResultGetMapped(place));
updater(place->getMapped());
}
});
}
@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
{
if (it != begin && *it == prev_key)
{
updater(*lookupResultGetMapped(place));
updater(place->getMapped());
continue;
}
prev_key = *it;
@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
map.emplace(*it, place, inserted);
if (inserted)
creator(*lookupResultGetMapped(place));
creator(place->getMapped());
else
updater(*lookupResultGetMapped(place));
updater(place->getMapped());
}
});
}
@ -131,7 +131,7 @@ struct MergeSequential
auto begin = source_maps[i]->begin();
auto end = source_maps[i]->end();
for (auto it = begin; it != end; ++it)
merger((*source_maps[0])[it->getFirst()], it->getSecond());
merger((*source_maps[0])[it->getKey()], it->getMapped());
}
result_map = source_maps[0];
@ -161,7 +161,7 @@ struct MergeSequentialTransposed /// In practice not better than usual.
continue;
finish = false;
merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond());
merger((*result_map)[iterators[i]->getKey()], iterators[i]->getMapped());
++iterators[i];
}

View File

@ -42,7 +42,7 @@ int main(int, char **)
cont[1] = "Goodbye.";
for (auto x : cont)
std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl;
std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);

View File

@ -211,7 +211,7 @@ int main(int argc, char ** argv)
{
RefsHashMap::LookupResult inserted_it;
bool inserted;
set.emplace(StringRef(*lookupResultGetMapped(it)), inserted_it, inserted);
set.emplace(StringRef(*it), inserted_it, inserted);
}
std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, "
@ -222,7 +222,7 @@ int main(int argc, char ** argv)
size_t i = 0;
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
{
devnull.write(it->getFirst().data, it->getFirst().size);
devnull.write(it->getKey().data, it->getKey().size);
devnull << std::endl;
}
@ -249,7 +249,7 @@ int main(int argc, char ** argv)
size_t i = 0;
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
{
devnull.write(it->getFirst().data, it->getFirst().size);
devnull.write(it->getKey().data, it->getKey().size);
devnull << std::endl;
}
}

View File

@ -75,7 +75,7 @@ void DataTypeEnum<Type>::fillMaps()
if (!inserted_value.second)
throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and " + toString(*lookupResultGetMapped(inserted_value.first)),
+ " and " + toString(inserted_value.first->getMapped()),
ErrorCodes::SYNTAX_ERROR};
const auto inserted_name = value_to_name_map.insert(

View File

@ -81,7 +81,7 @@ public:
if (!it)
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR};
return *lookupResultGetMapped(it);
return it->getMapped();
}
Field castToName(const Field & value_or_name) const override;

View File

@ -216,7 +216,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
/// request new values
update(

View File

@ -311,7 +311,7 @@ private:
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); });
/// request new values
update(
@ -437,7 +437,7 @@ private:
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.getSecond().front();
return pair.getMapped().front();
});
update(
@ -469,7 +469,7 @@ private:
{
const StringRef key = keys_array[row];
const auto it = map.find(key);
const auto string_ref = it ? *lookupResultGetMapped(it) : get_default(row);
const auto string_ref = it ? it->getMapped() : get_default(row);
out->insertData(string_ref.data, string_ref.size);
}
}
@ -576,7 +576,7 @@ private:
/// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys)
{
if (key_found_pair.getSecond())
if (key_found_pair.getMapped())
{
++found_num;
continue;
@ -584,7 +584,7 @@ private:
++not_found_num;
auto key = key_found_pair.getFirst();
auto key = key_found_pair.getKey();
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;

View File

@ -561,7 +561,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
const auto it = attr.find(key);
set_value(i, it ? static_cast<OutputType>(*lookupResultGetMapped(it)) : get_default(i));
set_value(i, it ? static_cast<OutputType>(it->getMapped()) : get_default(i));
/// free memory allocated for the key
temporary_keys_pool.rollback(key.size);
@ -729,7 +729,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
std::vector<StringRef> keys;
keys.reserve(attr.size());
for (const auto & key : attr)
keys.push_back(key.getFirst());
keys.push_back(key.getKey());
return keys;
}

View File

@ -13,8 +13,8 @@ template <typename T> auto first(const T & value) -> decltype(value.first) { ret
template <typename T> auto second(const T & value) -> decltype(value.second) { return value.second; }
/// HashMap
template <typename T> auto first(const T & value) -> decltype(value.getFirst()) { return value.getFirst(); }
template <typename T> auto second(const T & value) -> decltype(value.getSecond()) { return value.getSecond(); }
template <typename T> auto first(const T & value) -> decltype(value.getKey()) { return value.getKey(); }
template <typename T> auto second(const T & value) -> decltype(value.getMapped()) { return value.getMapped(); }
}

View File

@ -127,7 +127,7 @@ void RangeHashedDictionary::getString(
if (it)
{
const auto date = dates[i];
const auto & ranges_and_values = *lookupResultGetMapped(it);
const auto & ranges_and_values = it->getMapped();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
{
@ -398,7 +398,7 @@ void RangeHashedDictionary::getItemsImpl(
if (it)
{
const auto date = dates[i];
const auto & ranges_and_values = *lookupResultGetMapped(it);
const auto & ranges_and_values = it->getMapped();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
{
@ -425,7 +425,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
if (it)
{
auto & values = *lookupResultGetMapped(it);
auto & values = it->getMapped();
const auto insert_it
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
@ -498,7 +498,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
if (it)
{
auto & values = *lookupResultGetMapped(it);
auto & values = it->getMapped();
const auto insert_it = std::lower_bound(
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
@ -610,9 +610,9 @@ void RangeHashedDictionary::getIdsAndDates(
for (const auto & key : attr)
{
for (const auto & value : key.getSecond())
for (const auto & value : key.getMapped())
{
ids.push_back(key.getFirst());
ids.push_back(key.getKey());
start_dates.push_back(value.range.left);
end_dates.push_back(value.range.right);

View File

@ -140,8 +140,8 @@ private:
std::lock_guard lock(mutex);
map.emplace(addr, it, inserted);
if (inserted)
*lookupResultGetMapped(it) = impl(addr);
return *lookupResultGetMapped(it);
it->getMapped() = impl(addr);
return it->getMapped();
}
};

View File

@ -467,15 +467,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
for (const auto & pair : map)
{
if (pair.getSecond() == args)
if (pair.getMapped() == args)
{
++result_offset;
if constexpr (is_numeric_column)
result_data.insertValue(pair.getFirst());
result_data.insertValue(pair.getKey());
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
result_data.insertData(pair.getFirst().data, pair.getFirst().size);
result_data.insertData(pair.getKey().data, pair.getKey().size);
else
result_data.deserializeAndInsertFromArena(pair.getFirst().data);
result_data.deserializeAndInsertFromArena(pair.getKey().data);
if (all_nullable)
null_map.push_back(0);

View File

@ -508,7 +508,7 @@ private:
{
auto it = table.find(src[i]);
if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); /// little endian.
memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian.
else
dst[i] = dst_default;
}
@ -524,7 +524,7 @@ private:
{
auto it = table.find(src[i]);
if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); /// little endian.
memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian.
else
dst[i] = dst_default[i];
}
@ -540,7 +540,7 @@ private:
{
auto it = table.find(src[i]);
if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));
memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
else
dst[i] = src[i];
}
@ -557,7 +557,7 @@ private:
for (size_t i = 0; i < size; ++i)
{
auto it = table.find(src[i]);
StringRef ref = it ? *lookupResultGetMapped(it) : dst_default;
StringRef ref = it ? it->getMapped() : dst_default;
dst_data.resize(current_dst_offset + ref.size);
memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
current_dst_offset += ref.size;
@ -581,7 +581,7 @@ private:
StringRef ref;
if (it)
ref = *lookupResultGetMapped(it);
ref = it->getMapped();
else
{
ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
@ -611,7 +611,7 @@ private:
current_src_offset = src_offsets[i];
auto it = table.find(ref);
if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));
memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
else
dst[i] = dst_default;
}
@ -632,7 +632,7 @@ private:
current_src_offset = src_offsets[i];
auto it = table.find(ref);
if (it)
memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i]));
memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
else
dst[i] = dst_default[i];
}
@ -655,7 +655,7 @@ private:
auto it = table.find(src_ref);
StringRef dst_ref = it ? *lookupResultGetMapped(it) : (with_default ? dst_default : src_ref);
StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref);
dst_data.resize(current_dst_offset + dst_ref.size);
memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
current_dst_offset += dst_ref.size;
@ -697,7 +697,7 @@ private:
StringRef dst_ref;
if (it)
dst_ref = *lookupResultGetMapped(it);
dst_ref = it->getMapped();
else
{
dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);

View File

@ -180,8 +180,6 @@ struct AggregationMethodOneNumber
using Data = TData;
using Key = typename Data::key_type;
using Mapped = typename Data::mapped_type;
using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator;
Data data;
@ -356,8 +354,6 @@ struct AggregationMethodKeysFixed
using Data = TData;
using Key = typename Data::key_type;
using Mapped = typename Data::mapped_type;
using iterator = typename Data::iterator;
using const_iterator = typename Data::const_iterator;
static constexpr bool has_nullable_keys = has_nullable_keys_;
static constexpr bool has_low_cardinality = has_low_cardinality_;

View File

@ -1237,7 +1237,7 @@ private:
for (; it != end; ++it)
{
const Mapped & mapped = it->getSecond();
const Mapped & mapped = it->getMapped();
if (mapped.getUsed())
continue;

View File

@ -162,8 +162,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(lookupResultGetMapped(it)) Value;
std::swap(*lookupResultGetMapped(it), value);
new (&it->getMapped()) Value;
std::swap(it->getMapped(), value);
INIT
}
}
@ -193,8 +193,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(lookupResultGetMapped(it)) Value;
std::swap(*lookupResultGetMapped(it), value);
new (&it->getMapped()) Value;
std::swap(it->getMapped(), value);
INIT
}
}
@ -225,8 +225,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(lookupResultGetMapped(it)) Value;
std::swap(*lookupResultGetMapped(it), value);
new (&it->getMapped()) Value;
std::swap(it->getMapped(), value);
INIT
}
}

View File

@ -85,7 +85,7 @@ int main(int, char **)
std::cerr << "Collisions: " << map.getCollisions() << std::endl;
for (auto x : map)
std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl;
std::cerr << x.getKey().toString() << " -> " << x.getMapped() << std::endl;
return 0;
}

View File

@ -55,15 +55,15 @@ void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 1;
it->getMapped() = 1;
else
++*lookupResultGetMapped(it);
++it->getMapped();
}
for (size_t i = 0, size = data.size(); i < size; ++i)
{
auto it = map.find(data[i]);
++*lookupResultGetMapped(it);
++it->getMapped();
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
@ -81,10 +81,10 @@ void insert(Map & map, StringRef & k)
typename Map::LookupResult it;
map.emplace(k, it, inserted, nullptr);
if (inserted)
*lookupResultGetMapped(it) = 1;
it->getMapped() = 1;
else
++*lookupResultGetMapped(it);
std::cout << *lookupResultGetMapped(map.find(k))<< std::endl;
++it->getMapped();
std::cout << map.find(k)->getMapped() << std::endl;
}
int main(int argc, char ** argv)

View File

@ -337,8 +337,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
@ -366,8 +366,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
@ -396,8 +396,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
@ -426,8 +426,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();

View File

@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{
map.emplace(static_cast<const Key &>(data[i]), it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();

View File

@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{
map.emplace(static_cast<const Key &>(data[i]), it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();

View File

@ -144,8 +144,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
@ -173,8 +173,8 @@ int main(int argc, char ** argv)
{
map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();

View File

@ -151,8 +151,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena &, const cha
{
map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();

View File

@ -67,8 +67,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
@ -82,7 +82,7 @@ int main(int argc, char ** argv)
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.getSecond();
sum_counts += kv.getMapped();
++elems;
}
@ -103,8 +103,8 @@ int main(int argc, char ** argv)
{
map.emplace(i, it, inserted);
if (inserted)
*lookupResultGetMapped(it) = 0;
++*lookupResultGetMapped(it);
it->getMapped() = 0;
++it->getMapped();
}
watch.stop();
@ -118,11 +118,11 @@ int main(int argc, char ** argv)
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.getSecond();
sum_counts += kv.getMapped();
++elems;
if (kv.getFirst() > n)
std::cerr << kv.getFirst() << std::endl;
if (kv.getKey() > n)
std::cerr << kv.getKey() << std::endl;
}
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;

View File

@ -65,9 +65,9 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
if (prev_positions.size() > key_index
&& prev_positions[key_index]
&& name == *lookupResultGetKey(prev_positions[key_index]))
&& name == prev_positions[key_index]->getKey())
{
return *lookupResultGetMapped(prev_positions[key_index]);
return prev_positions[key_index]->getMapped();
}
else
{
@ -78,7 +78,7 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz
if (key_index < prev_positions.size())
prev_positions[key_index] = it;
return *lookupResultGetMapped(it);
return it->getMapped();
}
else
return UNKNOWN_FIELD;

View File

@ -131,7 +131,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
}
else
{
index = *lookupResultGetMapped(it);
index = it->getMapped();
if (seen_columns[index])
throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);

View File

@ -57,7 +57,7 @@ void buildScatterSelector(
throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS);
partition_num_to_first_row.push_back(i);
*lookupResultGetMapped(it) = partitions_count;
it->getMapped() = partitions_count;
++partitions_count;
@ -70,7 +70,7 @@ void buildScatterSelector(
}
if (partitions_count > 1)
selector[i] = *lookupResultGetMapped(it);
selector[i] = it->getMapped();
}
}

View File

@ -333,9 +333,9 @@ private:
{
for (size_t j = 0; j < columns.size(); ++j)
if (j == key_pos)
columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst()));
columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey()));
else
columns[j]->insertFrom(*it->getSecond().block->getByPosition(column_indices[j]).column.get(), it->getSecond().row_num);
columns[j]->insertFrom(*it->getMapped().block->getByPosition(column_indices[j]).column.get(), it->getMapped().row_num);
++rows_added;
}
else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof)
@ -343,11 +343,11 @@ private:
throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED);
}
else
for (auto ref_it = it->getSecond().begin(); ref_it.ok(); ++ref_it)
for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it)
{
for (size_t j = 0; j < columns.size(); ++j)
if (j == key_pos)
columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst()));
columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey()));
else
columns[j]->insertFrom(*ref_it->block->getByPosition(column_indices[j]).column.get(), ref_it->row_num);
++rows_added;

View File

@ -105,7 +105,7 @@ public:
if (table.end() == it)
return pos - data;
*pos = it->getSecond().sample(random());
*pos = it->getMapped().sample(random());
/// Zero byte marks end of string.
if (0 == *pos)
@ -125,12 +125,12 @@ public:
for (auto & elem : table)
{
UInt32 new_total = 0;
for (auto & frequency : elem.getSecond().data)
for (auto & frequency : elem.getMapped().data)
{
frequency.count = transform(frequency.count);
new_total += frequency.count;
}
elem.getSecond().total = new_total;
elem.getMapped().total = new_total;
}
}
@ -142,10 +142,10 @@ public:
for (const auto & elem : table)
{
writeBinary(elem.getFirst(), out);
writeBinary(UInt8(elem.getSecond().data.size()), out);
writeBinary(elem.getKey(), out);
writeBinary(UInt8(elem.getMapped().data.size()), out);
for (const auto & frequency : elem.getSecond().data)
for (const auto & frequency : elem.getMapped().data)
{
writeBinary(frequency.byte, out);
writeVarUInt(frequency.count, out);