This commit is contained in:
Nikita Taranov 2022-06-29 22:15:12 +02:00
parent 986ead7358
commit 7d2e8b0883
2 changed files with 26 additions and 32 deletions

View File

@ -240,40 +240,42 @@ struct HashTableGrower
static constexpr auto performs_linear_probing_with_single_step = true;
/// The size of the hash table in the cells.
size_t bufSize() const { return 1ULL << size_degree; }
size_t bufSize() const { return 1ULL << size_degree; }
size_t maxFill() const { return 1ULL << (size_degree - 1); }
size_t mask() const { return bufSize() - 1; }
size_t maxFill() const { return 1ULL << (size_degree - 1); }
size_t mask() const { return bufSize() - 1; }
/// From the hash value, get the cell number in the hash table.
size_t place(size_t x) const { return x & mask(); }
size_t place(size_t x) const { return x & mask(); }
/// The next cell in the collision resolution chain.
size_t next(size_t pos) const
{
++pos;
return pos & mask();
}
size_t next(size_t pos) const { ++pos; return pos & mask(); }
/// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
bool overflow(size_t elems) const { return elems > maxFill(); }
bool overflow(size_t elems) const { return elems > maxFill(); }
/// Increase the size of the hash table.
void increaseSize() { size_degree += size_degree >= 23 ? 1 : 2; }
void increaseSize()
{
size_degree += size_degree >= 23 ? 1 : 2;
}
/// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
void set(size_t num_elems)
{
size_degree = num_elems <= 1
? initial_size_degree
: ((initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2) ? initial_size_degree
: (static_cast<size_t>(log2(num_elems - 1)) + 2));
? initial_size_degree
: ((initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
? initial_size_degree
: (static_cast<size_t>(log2(num_elems - 1)) + 2));
}
void setBufSize(size_t buf_size_) { size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1); }
void setBufSize(size_t buf_size_)
{
size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1);
}
};
/** Determines the size of the hash table, and when and how much it should be resized.
* This structure is aligned to cache line boundary and also occupies it all.
* Precalculates some values to speed up lookups and insertion into the HashTable (and thus has bigger memory footprint than HashTableGrower).
@ -284,8 +286,8 @@ class alignas(64) HashTableGrowerWithPrecalculation
/// The state of this structure is enough to get the buffer size of the hash table.
UInt8 size_degree = initial_size_degree;
size_t cached_mask = (1ULL << initial_size_degree) - 1;
size_t cached_max_fill = 1ULL << (initial_size_degree - 1);
size_t precalculated_mask = (1ULL << initial_size_degree) - 1;
size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1);
public:
UInt8 sizeDegree() const { return size_degree; }
@ -293,8 +295,8 @@ public:
void increaseSizeDegree(UInt8 delta)
{
size_degree += delta;
cached_mask = (1ULL << size_degree) - 1;
cached_max_fill = 1ULL << (size_degree - 1);
precalculated_mask = (1ULL << size_degree) - 1;
precalculated_max_fill = 1ULL << (size_degree - 1);
}
static constexpr auto initial_count = 1ULL << initial_size_degree;
@ -303,16 +305,16 @@ public:
static constexpr auto performs_linear_probing_with_single_step = true;
/// The size of the hash table in the cells.
size_t bufSize() const { return 1ULL << size_degree; }
size_t bufSize() const { return 1ULL << size_degree; }
/// From the hash value, get the cell number in the hash table.
size_t place(size_t x) const { return x & cached_mask; }
size_t place(size_t x) const { return x & precalculated_mask; }
/// The next cell in the collision resolution chain.
size_t next(size_t pos) const { return (pos + 1) & cached_mask; }
size_t next(size_t pos) const { return (pos + 1) & precalculated_mask; }
/// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
bool overflow(size_t elems) const { return elems > cached_max_fill; }
bool overflow(size_t elems) const { return elems > precalculated_max_fill; }
/// Increase the size of the hash table.
void increaseSize() { increaseSizeDegree(size_degree >= 23 ? 1 : 2); }

View File

@ -1,8 +0,0 @@
<test>
<query>select * from numbers_mt(200) group by number format Null</query>
<query>select * from numbers_mt(1e4) group by number format Null</query>
<query>select * from numbers_mt(1e5) group by number format Null</query>
<query>select * from numbers_mt(1e6) group by number format Null</query>
<query>select * from numbers_mt(1e7) group by number format Null</query>
<query>select * from numbers_mt(1e8) group by number format Null</query>
</test>