Compare commits

...

2 Commits

Author SHA1 Message Date
Anton Popov
a2c43025e4
Merge 2fdde4ba55 into 2cef99c311 2024-09-18 00:38:43 +02:00
Anton Popov
2fdde4ba55 unroll loop in bloom filter 2024-08-30 00:41:34 +00:00
3 changed files with 54 additions and 3 deletions

View File

@ -71,10 +71,39 @@ void BloomFilter::add(const char * data, size_t len)
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
for (size_t i = 0; i < hashes; ++i)
switch (hashes)
{
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
case 1:
{
addHashesImpl<1>(hash1, hash2);
break;
}
case 2:
{
addHashesImpl<2>(hash1, hash2);
break;
}
case 3:
{
addHashesImpl<3>(hash1, hash2);
break;
}
case 4:
{
addHashesImpl<4>(hash1, hash2);
break;
}
default:
{
addHashesImpl<4>(hash1, hash2);
for (size_t i = 4; i < hashes; ++i)
{
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
}
break;
}
}
}

View File

@ -8,6 +8,7 @@
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/IDataType.h>
#include <base/constexpr_helpers.h>
namespace DB
@ -56,7 +57,19 @@ public:
UInt64 isEmpty() const;
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
private:
template <size_t num_hashes>
ALWAYS_INLINE void addHashesImpl(size_t hash1, size_t hash2)
{
static_assert(num_hashes >= 1 && num_hashes <= 4);
for (size_t i = 0; i < num_hashes; ++i)
{
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
}
}
size_t size;
size_t hashes;

View File

@ -0,0 +1,9 @@
<test>
<create_query>DROP TABLE IF EXISTS test_ngram</create_query>
<create_query>CREATE TABLE test_ngram (s String, INDEX idx_s s TYPE ngrambf_v1(5, 10000, 3, 0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple()</create_query>
<create_query>SYSTEM STOP MERGES test_ngram</create_query>
<query>INSERT INTO test_ngram SELECT randomPrintableASCII(128) FROM numbers(100000)</query>
<drop_query>DROP TABLE IF EXISTS test_ngram</drop_query>
</test>