mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge pull request #6729 from yandex/aku/key-holder
Key memory management for compound hash tables.
This commit is contained in:
commit
b7cbd33886
@ -10,6 +10,7 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
@ -132,11 +133,6 @@ struct AggregateFunctionGroupUniqArrayGenericData
|
||||
Set value;
|
||||
};
|
||||
|
||||
|
||||
/// Helper function for deserialize and insert for the class AggregateFunctionGroupUniqArrayGeneric
|
||||
template <bool is_plain_column>
|
||||
static StringRef getSerializationImpl(const IColumn & column, size_t row_num, Arena & arena);
|
||||
|
||||
template <bool is_plain_column>
|
||||
static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
|
||||
|
||||
@ -154,9 +150,18 @@ class AggregateFunctionGroupUniqArrayGeneric
|
||||
|
||||
using State = AggregateFunctionGroupUniqArrayGenericData;
|
||||
|
||||
static StringRef getSerialization(const IColumn & column, size_t row_num, Arena & arena)
|
||||
static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
|
||||
{
|
||||
return getSerializationImpl<is_plain_column>(column, row_num, arena);
|
||||
if constexpr (is_plain_column)
|
||||
{
|
||||
return ArenaKeyHolder{column.getDataAt(row_num), arena};
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
StringRef serialized = column.serializeValueIntoArena(row_num, arena, begin);
|
||||
return SerializedKeyHolder{serialized, arena};
|
||||
}
|
||||
}
|
||||
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to)
|
||||
@ -209,26 +214,13 @@ public:
|
||||
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
if (limit_num_elems && set.size() >= max_elems)
|
||||
return;
|
||||
|
||||
bool inserted;
|
||||
State::Set::iterator it;
|
||||
|
||||
if (limit_num_elems && set.size() >= max_elems)
|
||||
return;
|
||||
StringRef str_serialized = getSerialization(*columns[0], row_num, *arena);
|
||||
|
||||
set.emplace(str_serialized, it, inserted);
|
||||
|
||||
if constexpr (!is_plain_column)
|
||||
{
|
||||
if (!inserted)
|
||||
arena->rollback(str_serialized.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (inserted)
|
||||
it->getValueMutable().data = arena->insert(str_serialized.data, str_serialized.size);
|
||||
}
|
||||
auto key_holder = getKeyHolder(*columns[0], row_num, *arena);
|
||||
set.emplace(key_holder, it, inserted);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
@ -241,15 +233,11 @@ public:
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
if (limit_num_elems && cur_set.size() >= max_elems)
|
||||
return ;
|
||||
cur_set.emplace(rhs_elem.getValue(), it, inserted);
|
||||
if (inserted)
|
||||
{
|
||||
if (it->getValue().size)
|
||||
it->getValueMutable().data = arena->insert(it->getValue().data, it->getValue().size);
|
||||
else
|
||||
it->getValueMutable().data = nullptr;
|
||||
}
|
||||
return;
|
||||
|
||||
// We have to copy the keys to our arena.
|
||||
assert(arena != nullptr);
|
||||
cur_set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
|
||||
}
|
||||
}
|
||||
|
||||
@ -271,20 +259,6 @@ public:
|
||||
const char * getHeaderFilePath() const override { return __FILE__; }
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
inline StringRef getSerializationImpl<false>(const IColumn & column, size_t row_num, Arena & arena)
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
return column.serializeValueIntoArena(row_num, arena, begin);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline StringRef getSerializationImpl<true>(const IColumn & column, size_t row_num, Arena &)
|
||||
{
|
||||
return column.getDataAt(row_num);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void deserializeAndInsertImpl<false>(StringRef str, IColumn & data_to)
|
||||
{
|
||||
|
@ -170,11 +170,14 @@ public:
|
||||
|
||||
/** Rollback just performed allocation.
|
||||
* Must pass size not more that was just allocated.
|
||||
* Return the resulting head pointer, so that the caller can assert that
|
||||
* the allocation it intended to roll back was indeed the last one.
|
||||
*/
|
||||
void rollback(size_t size)
|
||||
void * rollback(size_t size)
|
||||
{
|
||||
head->pos -= size;
|
||||
ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
|
||||
return head->pos;
|
||||
}
|
||||
|
||||
/** Begin or expand allocation of contiguous piece of memory without alignment.
|
||||
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include <Common/HashTable/HashTable.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Common/ColumnsHashingImpl.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/LRUCache.h>
|
||||
@ -57,7 +59,7 @@ struct HashMethodOneNumber
|
||||
using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
|
||||
|
||||
/// Is used for default implementation in HashMethodBase.
|
||||
FieldType getKey(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
|
||||
FieldType getKeyHolder(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
|
||||
|
||||
/// Get StringRef from value which can be inserted into column.
|
||||
static StringRef getValueRef(const Value & value)
|
||||
@ -86,24 +88,24 @@ struct HashMethodString
|
||||
chars = column_string.getChars().data();
|
||||
}
|
||||
|
||||
auto getKey(ssize_t row, Arena &) const
|
||||
auto getKeyHolder(ssize_t row, [[maybe_unused]] Arena & pool) const
|
||||
{
|
||||
return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
|
||||
StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
|
||||
|
||||
if constexpr (place_string_to_arena)
|
||||
{
|
||||
return ArenaKeyHolder{key, pool};
|
||||
}
|
||||
else
|
||||
{
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
||||
static StringRef getValueRef(const Value & value) { return value.first; }
|
||||
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
|
||||
static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
|
||||
{
|
||||
if constexpr (place_string_to_arena)
|
||||
{
|
||||
if (key.size)
|
||||
key.data = pool.insert(key.data, key.size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -126,17 +128,24 @@ struct HashMethodFixedString
|
||||
chars = &column_string.getChars();
|
||||
}
|
||||
|
||||
StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); }
|
||||
auto getKeyHolder(size_t row, [[maybe_unused]] Arena & pool) const
|
||||
{
|
||||
StringRef key(&(*chars)[row * n], n);
|
||||
|
||||
if constexpr (place_string_to_arena)
|
||||
{
|
||||
return ArenaKeyHolder{key, pool};
|
||||
}
|
||||
else
|
||||
{
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
||||
static StringRef getValueRef(const Value & value) { return value.first; }
|
||||
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
|
||||
{
|
||||
if constexpr (place_string_to_arena)
|
||||
key.data = pool.insert(key.data, key.size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -316,10 +325,10 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the key from the key columns for insertion into the hash table.
|
||||
ALWAYS_INLINE auto getKey(size_t row, Arena & pool) const
|
||||
/// Get the key holder from the key columns for insertion into the hash table.
|
||||
ALWAYS_INLINE auto getKeyHolder(size_t row, Arena & pool) const
|
||||
{
|
||||
return Base::getKey(getIndexAt(row), pool);
|
||||
return Base::getKeyHolder(getIndexAt(row), pool);
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
@ -347,30 +356,23 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
|
||||
return EmplaceResult(false);
|
||||
}
|
||||
|
||||
auto key = getKey(row_, pool);
|
||||
auto key_holder = getKeyHolder(row_, pool);
|
||||
|
||||
bool inserted = false;
|
||||
typename Data::iterator it;
|
||||
if (saved_hash)
|
||||
data.emplace(key, it, inserted, saved_hash[row]);
|
||||
data.emplace(key_holder, it, inserted, saved_hash[row]);
|
||||
else
|
||||
data.emplace(key, it, inserted);
|
||||
data.emplace(key_holder, it, inserted);
|
||||
|
||||
visit_cache[row] = VisitValue::Found;
|
||||
|
||||
if (inserted)
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
new(&it->getSecond()) Mapped();
|
||||
Base::onNewKey(it->getFirstMutable(), pool);
|
||||
}
|
||||
else
|
||||
Base::onNewKey(*it, pool);
|
||||
}
|
||||
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
if (inserted)
|
||||
{
|
||||
new (&it->getSecond()) Mapped();
|
||||
}
|
||||
mapped_cache[row] = it->getSecond();
|
||||
return EmplaceResult(it->getSecond(), mapped_cache[row], inserted);
|
||||
}
|
||||
@ -407,13 +409,13 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
|
||||
return FindResult(visit_cache[row] == VisitValue::Found);
|
||||
}
|
||||
|
||||
auto key = getKey(row_, pool);
|
||||
auto key_holder = getKeyHolder(row_, pool);
|
||||
|
||||
typename Data::iterator it;
|
||||
if (saved_hash)
|
||||
it = data.find(key, saved_hash[row]);
|
||||
it = data.find(*key_holder, saved_hash[row]);
|
||||
else
|
||||
it = data.find(key);
|
||||
it = data.find(*key_holder);
|
||||
|
||||
bool found = it != data.end();
|
||||
visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
|
||||
@ -493,7 +495,7 @@ struct HashMethodKeysFixed
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE Key getKey(size_t row, Arena &) const
|
||||
ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
|
||||
{
|
||||
if constexpr (has_nullable_keys)
|
||||
{
|
||||
@ -532,12 +534,12 @@ struct HashMethodSerialized
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
|
||||
|
||||
ALWAYS_INLINE StringRef getKey(size_t row, Arena & pool) const
|
||||
ALWAYS_INLINE SerializedKeyHolder getKeyHolder(size_t row, Arena & pool) const
|
||||
{
|
||||
return serializeKeysToPoolContiguous(row, keys_size, key_columns, pool);
|
||||
return SerializedKeyHolder{
|
||||
serializeKeysToPoolContiguous(row, keys_size, key_columns, pool),
|
||||
pool};
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void onExistingKey(StringRef & key, Arena & pool) { pool.rollback(key.size); }
|
||||
};
|
||||
|
||||
/// For the case when there is one string key.
|
||||
@ -554,7 +556,10 @@ struct HashMethodHashed
|
||||
HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes &, const HashMethodContextPtr &)
|
||||
: key_columns(std::move(key_columns_)) {}
|
||||
|
||||
ALWAYS_INLINE Key getKey(size_t row, Arena &) const { return hash128(row, key_columns.size(), key_columns); }
|
||||
ALWAYS_INLINE Key getKeyHolder(size_t row, Arena &) const
|
||||
{
|
||||
return hash128(row, key_columns.size(), key_columns);
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE StringRef getValueRef(const Value & value)
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Interpreters/AggregationCommon.h>
|
||||
|
||||
|
||||
@ -117,26 +118,22 @@ public:
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
|
||||
{
|
||||
auto key = static_cast<Derived &>(*this).getKey(row, pool);
|
||||
return emplaceKeyImpl(key, data, pool);
|
||||
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
|
||||
return emplaceImpl(key_holder, data);
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
|
||||
{
|
||||
auto key = static_cast<Derived &>(*this).getKey(row, pool);
|
||||
auto res = findKeyImpl(key, data);
|
||||
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||
return res;
|
||||
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
|
||||
return findKeyImpl(keyHolderGetKey(key_holder), data);
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
|
||||
{
|
||||
auto key = static_cast<Derived &>(*this).getKey(row, pool);
|
||||
auto res = data.hash(key);
|
||||
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||
return res;
|
||||
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
|
||||
return data.hash(keyHolderGetKey(key_holder));
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -157,20 +154,13 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Key>
|
||||
static ALWAYS_INLINE void onNewKey(Key & /*key*/, Arena & /*pool*/) {}
|
||||
template <typename Key>
|
||||
static ALWAYS_INLINE void onExistingKey(Key & /*key*/, Arena & /*pool*/) {}
|
||||
|
||||
template <typename Data, typename Key>
|
||||
ALWAYS_INLINE EmplaceResult emplaceKeyImpl(Key key, Data & data, Arena & pool)
|
||||
template <typename Data, typename KeyHolder>
|
||||
ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
|
||||
{
|
||||
if constexpr (Cache::consecutive_keys_optimization)
|
||||
{
|
||||
if (cache.found && cache.check(key))
|
||||
if (cache.found && cache.check(keyHolderGetKey(key_holder)))
|
||||
{
|
||||
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||
|
||||
if constexpr (has_mapped)
|
||||
return EmplaceResult(cache.value.second, cache.value.second, false);
|
||||
else
|
||||
@ -180,7 +170,7 @@ protected:
|
||||
|
||||
typename Data::iterator it;
|
||||
bool inserted = false;
|
||||
data.emplace(key, it, inserted);
|
||||
data.emplace(key_holder, it, inserted);
|
||||
|
||||
[[maybe_unused]] Mapped * cached = nullptr;
|
||||
if constexpr (has_mapped)
|
||||
@ -191,13 +181,8 @@ protected:
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
new(&it->getSecond()) Mapped();
|
||||
static_cast<Derived &>(*this).onNewKey(it->getFirstMutable(), pool);
|
||||
}
|
||||
else
|
||||
static_cast<Derived &>(*this).onNewKey(it->getValueMutable(), pool);
|
||||
}
|
||||
else
|
||||
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||
|
||||
if constexpr (consecutive_keys_optimization)
|
||||
{
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <IO/VarInt.h>
|
||||
|
||||
#include <Common/HashTable/HashTableAllocator.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
|
||||
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
|
||||
#include <iostream>
|
||||
@ -630,6 +631,8 @@ protected:
|
||||
|
||||
|
||||
/// If the key is zero, insert it into a special place and return true.
|
||||
/// We don't have to persist a zero key, because it's not actually inserted.
|
||||
/// That's why we just take a Key by value, an not a key holder.
|
||||
bool ALWAYS_INLINE emplaceIfZero(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
/// If it is claimed that the zero key can not be inserted into the table.
|
||||
@ -655,17 +658,23 @@ protected:
|
||||
return false;
|
||||
}
|
||||
|
||||
void ALWAYS_INLINE emplaceNonZeroImpl(size_t place_value, Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplaceNonZeroImpl(size_t place_value, KeyHolder && key_holder,
|
||||
iterator & it, bool & inserted, size_t hash_value)
|
||||
{
|
||||
it = iterator(this, &buf[place_value]);
|
||||
|
||||
if (!buf[place_value].isZero(*this))
|
||||
{
|
||||
keyHolderDiscardKey(key_holder);
|
||||
inserted = false;
|
||||
return;
|
||||
}
|
||||
|
||||
new(&buf[place_value]) Cell(x, *this);
|
||||
keyHolderPersistKey(key_holder);
|
||||
const auto & key = keyHolderGetKey(key_holder);
|
||||
|
||||
new(&buf[place_value]) Cell(key, *this);
|
||||
buf[place_value].setHash(hash_value);
|
||||
inserted = true;
|
||||
++m_size;
|
||||
@ -687,19 +696,21 @@ protected:
|
||||
throw;
|
||||
}
|
||||
|
||||
it = find(x, hash_value);
|
||||
it = find(keyHolderGetKey(key_holder), hash_value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
|
||||
void ALWAYS_INLINE emplaceNonZero(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplaceNonZero(KeyHolder && key_holder, iterator & it,
|
||||
bool & inserted, size_t hash_value)
|
||||
{
|
||||
size_t place_value = findCell(x, hash_value, grower.place(hash_value));
|
||||
emplaceNonZeroImpl(place_value, x, it, inserted, hash_value);
|
||||
const auto & key = keyHolderGetKey(key_holder);
|
||||
size_t place_value = findCell(key, hash_value, grower.place(hash_value));
|
||||
emplaceNonZeroImpl(place_value, key_holder, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public:
|
||||
/// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
|
||||
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
|
||||
@ -708,7 +719,9 @@ public:
|
||||
|
||||
size_t hash_value = hash(Cell::getKey(x));
|
||||
if (!emplaceIfZero(Cell::getKey(x), res.first, res.second, hash_value))
|
||||
{
|
||||
emplaceNonZero(Cell::getKey(x), res.first, res.second, hash_value);
|
||||
}
|
||||
|
||||
if (res.second)
|
||||
res.first.ptr->setMapped(x);
|
||||
@ -739,19 +752,20 @@ public:
|
||||
* if (inserted)
|
||||
* new(&it->second) Mapped(value);
|
||||
*/
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted)
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it, bool & inserted)
|
||||
{
|
||||
size_t hash_value = hash(x);
|
||||
if (!emplaceIfZero(x, it, inserted, hash_value))
|
||||
emplaceNonZero(x, it, inserted, hash_value);
|
||||
const auto & key = keyHolderGetKey(key_holder);
|
||||
emplace(key_holder, it, inserted, hash(key));
|
||||
}
|
||||
|
||||
|
||||
/// Same, but with a precalculated value of hash function.
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it,
|
||||
bool & inserted, size_t hash_value)
|
||||
{
|
||||
if (!emplaceIfZero(x, it, inserted, hash_value))
|
||||
emplaceNonZero(x, it, inserted, hash_value);
|
||||
const auto & key = keyHolderGetKey(key_holder);
|
||||
if (!emplaceIfZero(key, it, inserted, hash_value))
|
||||
emplaceNonZero(key_holder, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
/// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
|
||||
|
130
dbms/src/Common/HashTable/HashTableKeyHolder.h
Normal file
130
dbms/src/Common/HashTable/HashTableKeyHolder.h
Normal file
@ -0,0 +1,130 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Arena.h>
|
||||
|
||||
/**
|
||||
* In some aggregation scenarios, when adding a key to the hash table, we
|
||||
* start with a temporary key object, and if it turns out to be a new key,
|
||||
* we must make it persistent (e.g. copy to an Arena) and use the resulting
|
||||
* persistent object as hash table key. This happens only for StringRef keys,
|
||||
* because other key types are stored by value, but StringRef is a pointer-like
|
||||
* type: the actual data are stored elsewhere. Even for StringRef, we don't
|
||||
* make a persistent copy of the key in each of the following cases:
|
||||
* 1) the aggregation method doesn't use temporary keys, so they're persistent
|
||||
* from the start;
|
||||
* 1) the key is already present in the hash table;
|
||||
* 3) that particular key is stored by value, e.g. a short StringRef key in
|
||||
* StringHashMap.
|
||||
*
|
||||
* In the past, the caller was responsible for making the key persistent after
|
||||
* in was inserted. emplace() returned whether the key is new or not, so the
|
||||
* caller only stored new keys (this is case (2) from the above list). However,
|
||||
* now we are adding a compound hash table for StringRef keys, so case (3)
|
||||
* appears. The decision about persistence now depends on some properties of
|
||||
* the key, and the logic of this decision is tied to the particular hash table
|
||||
* implementation. This means that the hash table user now doesn't have enough
|
||||
* data and logic to make this decision by itself.
|
||||
*
|
||||
* To support these new requirements, we now manage key persistence by passing
|
||||
* a special key holder to emplace(), which has the functions to make the key
|
||||
* persistent or to discard it. emplace() then calls these functions at the
|
||||
* appropriate moments.
|
||||
*
|
||||
* This approach has the following benefits:
|
||||
* - no extra runtime branches in the caller to make the key persistent.
|
||||
* - no additional data is stored in the hash table itself, which is important
|
||||
* when it's used in aggregate function states.
|
||||
* - no overhead when the key memory management isn't needed: we just pass the
|
||||
* bare key without any wrapper to emplace(), and the default callbacks do
|
||||
* nothing.
|
||||
*
|
||||
* This file defines the default key persistence functions, as well as two
|
||||
* different key holders and corresponding functions for storing StringRef
|
||||
* keys to Arena.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns the key. Can return the temporary key initially.
|
||||
* After the call to keyHolderPersistKey(), must return the persistent key.
|
||||
*/
|
||||
template <typename Key>
|
||||
inline Key & ALWAYS_INLINE keyHolderGetKey(Key && key) { return key; }
|
||||
|
||||
/**
|
||||
* Make the key persistent. keyHolderGetKey() must return the persistent key
|
||||
* after this call.
|
||||
*/
|
||||
template <typename Key>
|
||||
inline void ALWAYS_INLINE keyHolderPersistKey(Key &&) {}
|
||||
|
||||
/**
|
||||
* Discard the key. Calling keyHolderGetKey() is ill-defined after this.
|
||||
*/
|
||||
template <typename Key>
|
||||
inline void ALWAYS_INLINE keyHolderDiscardKey(Key &&) {}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* ArenaKeyHolder is a key holder for hash tables that serializes a StringRef
|
||||
* key to an Arena.
|
||||
*/
|
||||
struct ArenaKeyHolder
|
||||
{
|
||||
StringRef key;
|
||||
Arena & pool;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::ArenaKeyHolder & holder)
|
||||
{
|
||||
return holder.key;
|
||||
}
|
||||
|
||||
inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder)
|
||||
{
|
||||
// Hash table shouldn't ask us to persist a zero key
|
||||
assert(holder.key.size > 0);
|
||||
holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
|
||||
}
|
||||
|
||||
inline void ALWAYS_INLINE keyHolderDiscardKey(DB::ArenaKeyHolder &)
|
||||
{
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* SerializedKeyHolder is a key holder for a StringRef key that is already
|
||||
* serialized to an Arena. The key must be the last allocation in this Arena,
|
||||
* and is discarded by rolling back the allocation.
|
||||
*/
|
||||
struct SerializedKeyHolder
|
||||
{
|
||||
StringRef key;
|
||||
Arena & pool;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::SerializedKeyHolder & holder)
|
||||
{
|
||||
return holder.key;
|
||||
}
|
||||
|
||||
inline void ALWAYS_INLINE keyHolderPersistKey(DB::SerializedKeyHolder &)
|
||||
{
|
||||
}
|
||||
|
||||
inline void ALWAYS_INLINE keyHolderDiscardKey(DB::SerializedKeyHolder & holder)
|
||||
{
|
||||
[[maybe_unused]] void * new_head = holder.pool.rollback(holder.key.size);
|
||||
assert(new_head == holder.key.data);
|
||||
holder.key.data = nullptr;
|
||||
holder.key.size = 0;
|
||||
}
|
||||
|
@ -235,19 +235,22 @@ public:
|
||||
* if (inserted)
|
||||
* new(&it->second) Mapped(value);
|
||||
*/
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted)
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it, bool & inserted)
|
||||
{
|
||||
size_t hash_value = hash(x);
|
||||
emplace(x, it, inserted, hash_value);
|
||||
size_t hash_value = hash(keyHolderGetKey(key_holder));
|
||||
emplace(key_holder, it, inserted, hash_value);
|
||||
}
|
||||
|
||||
|
||||
/// Same, but with a precalculated values of hash function.
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t hash_value)
|
||||
template <typename KeyHolder>
|
||||
void ALWAYS_INLINE emplace(KeyHolder && key_holder, iterator & it,
|
||||
bool & inserted, size_t hash_value)
|
||||
{
|
||||
size_t buck = getBucketFromHash(hash_value);
|
||||
typename Impl::iterator impl_it;
|
||||
impls[buck].emplace(x, impl_it, inserted, hash_value);
|
||||
impls[buck].emplace(key_holder, impl_it, inserted, hash_value);
|
||||
it = iterator(this, buck, impl_it);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user