mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
SpaceSaving: internal storage for StringRef{}
The SpaceSaving has now specialised storage for some keys, which only copies keys that are to be retained in the structure, not all. Most of the PODs implement this interface empty, so there shouldn’t be any extra cost.
This commit is contained in:
parent
106a979ac2
commit
e189c39056
@ -31,7 +31,6 @@ struct AggregateFunctionTopKData
|
|||||||
{
|
{
|
||||||
using Set = SpaceSaving
|
using Set = SpaceSaving
|
||||||
<
|
<
|
||||||
T,
|
|
||||||
T,
|
T,
|
||||||
HashCRC32<T>,
|
HashCRC32<T>,
|
||||||
HashTableGrower<4>,
|
HashTableGrower<4>,
|
||||||
@ -129,7 +128,6 @@ struct AggregateFunctionTopKGenericData
|
|||||||
{
|
{
|
||||||
using Set = SpaceSaving
|
using Set = SpaceSaving
|
||||||
<
|
<
|
||||||
std::string,
|
|
||||||
StringRef,
|
StringRef,
|
||||||
StringRefHash,
|
StringRefHash,
|
||||||
HashTableGrower<4>,
|
HashTableGrower<4>,
|
||||||
@ -199,12 +197,12 @@ public:
|
|||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
readVarUInt(count, buf);
|
readVarUInt(count, buf);
|
||||||
for (size_t i = 0; i < count; ++i) {
|
for (size_t i = 0; i < count; ++i) {
|
||||||
std::string key_string;
|
auto ref = readStringBinaryInto(*arena, buf);
|
||||||
readStringBinary(key_string, buf);
|
|
||||||
UInt64 count, error;
|
UInt64 count, error;
|
||||||
readVarUInt(count, buf);
|
readVarUInt(count, buf);
|
||||||
readVarUInt(error, buf);
|
readVarUInt(error, buf);
|
||||||
set.insert(key_string, count, error);
|
set.insert(ref, count, error);
|
||||||
|
arena->rollback(ref.size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,7 +214,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
StringRef str_serialized = column.getDataAt(row_num);
|
StringRef str_serialized = column.getDataAt(row_num);
|
||||||
set.insert(str_serialized.toString());
|
set.insert(str_serialized);
|
||||||
}
|
}
|
||||||
|
|
||||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include <boost/range/adaptor/reversed.hpp>
|
#include <boost/range/adaptor/reversed.hpp>
|
||||||
|
|
||||||
|
#include <Common/ArenaWithFreeLists.h>
|
||||||
#include <Common/UInt128.h>
|
#include <Common/UInt128.h>
|
||||||
#include <Common/HashTable/Hash.h>
|
#include <Common/HashTable/Hash.h>
|
||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
@ -25,11 +26,46 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Arena interface to allow specialized storage of keys.
|
||||||
|
* POD keys do not require additional storage, so this interface is empty.
|
||||||
|
*/
|
||||||
|
template <typename TKey> struct SpaceSavingArena
|
||||||
|
{
|
||||||
|
SpaceSavingArena() {}
|
||||||
|
const TKey emplace(const TKey & key) { return key; }
|
||||||
|
void free(const TKey & key) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Specialized storage for StringRef with a freelist arena.
|
||||||
|
* Keys of this type that are retained on insertion must be serialised into local storage,
|
||||||
|
* otherwise the reference would be invalid after the processed block is released.
|
||||||
|
*/
|
||||||
|
template <> struct SpaceSavingArena<StringRef>
|
||||||
|
{
|
||||||
|
const StringRef emplace(const StringRef & key)
|
||||||
|
{
|
||||||
|
auto ptr = arena.alloc(key.size);
|
||||||
|
std::copy(key.data, key.data + key.size, ptr);
|
||||||
|
return StringRef{ptr, key.size};
|
||||||
|
}
|
||||||
|
|
||||||
|
void free(const StringRef & key)
|
||||||
|
{
|
||||||
|
if (key.data)
|
||||||
|
arena.free(const_cast<char *>(key.data), key.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ArenaWithFreeLists arena;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template
|
template
|
||||||
<
|
<
|
||||||
typename TKey,
|
typename TKey,
|
||||||
typename HashKey = TKey,
|
typename Hash = DefaultHash<TKey>,
|
||||||
typename Hash = DefaultHash<HashKey>,
|
|
||||||
typename Grower = HashTableGrower<>,
|
typename Grower = HashTableGrower<>,
|
||||||
typename Allocator = HashTableAllocator
|
typename Allocator = HashTableAllocator
|
||||||
>
|
>
|
||||||
@ -45,7 +81,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using Self = SpaceSaving<TKey, HashKey, Hash, Grower, Allocator>;
|
using Self = SpaceSaving<TKey, Hash, Grower, Allocator>;
|
||||||
|
|
||||||
struct Counter
|
struct Counter
|
||||||
{
|
{
|
||||||
@ -81,6 +117,7 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
SpaceSaving(size_t c = 10) : alpha_map(nextAlphaSize(c)), m_capacity(c) {}
|
SpaceSaving(size_t c = 10) : alpha_map(nextAlphaSize(c)), m_capacity(c) {}
|
||||||
|
|
||||||
~SpaceSaving() { destroyElements(); }
|
~SpaceSaving() { destroyElements(); }
|
||||||
|
|
||||||
inline size_t size() const
|
inline size_t size() const
|
||||||
@ -117,7 +154,7 @@ public:
|
|||||||
// Key doesn't exist, but can fit in the top K
|
// Key doesn't exist, but can fit in the top K
|
||||||
else if (unlikely(size() < capacity()))
|
else if (unlikely(size() < capacity()))
|
||||||
{
|
{
|
||||||
auto c = new Counter(key, increment, error, hash);
|
auto c = new Counter(arena.emplace(key), increment, error, hash);
|
||||||
push(c);
|
push(c);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -138,14 +175,15 @@ public:
|
|||||||
// Replace minimum with newly inserted element
|
// Replace minimum with newly inserted element
|
||||||
if (it != counter_map.end())
|
if (it != counter_map.end())
|
||||||
{
|
{
|
||||||
|
arena.free(min->key);
|
||||||
min->hash = hash;
|
min->hash = hash;
|
||||||
min->key = key;
|
min->key = arena.emplace(key);
|
||||||
min->count = alpha + increment;
|
min->count = alpha + increment;
|
||||||
min->error = alpha + error;
|
min->error = alpha + error;
|
||||||
percolate(min);
|
percolate(min);
|
||||||
|
|
||||||
it->second = min;
|
it->second = min;
|
||||||
it->first = key;
|
it->first = min->key;
|
||||||
counter_map.reinsert(it, hash);
|
counter_map.reinsert(it, hash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -279,9 +317,10 @@ private:
|
|||||||
alpha_map.clear();
|
alpha_map.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
HashMap<HashKey, Counter *, Hash, Grower, Allocator> counter_map;
|
HashMap<TKey, Counter *, Hash, Grower, Allocator> counter_map;
|
||||||
std::vector<Counter *> counter_list;
|
std::vector<Counter *> counter_list;
|
||||||
std::vector<UInt64> alpha_map;
|
std::vector<UInt64> alpha_map;
|
||||||
|
SpaceSavingArena<TKey> arena;
|
||||||
size_t m_capacity;
|
size_t m_capacity;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user