This commit is contained in:
Anton Popov 2024-09-18 23:23:52 +03:00 committed by GitHub
commit 1231e0848d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 54 additions and 3 deletions

View File

@ -71,11 +71,40 @@ void BloomFilter::add(const char * data, size_t len)
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
for (size_t i = 0; i < hashes; ++i)
switch (hashes)
{
case 1:
{
addHashesImpl<1>(hash1, hash2);
break;
}
case 2:
{
addHashesImpl<2>(hash1, hash2);
break;
}
case 3:
{
addHashesImpl<3>(hash1, hash2);
break;
}
case 4:
{
addHashesImpl<4>(hash1, hash2);
break;
}
default:
{
addHashesImpl<4>(hash1, hash2);
for (size_t i = 4; i < hashes; ++i)
{
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
}
break;
}
}
}
void BloomFilter::clear()

View File

@ -8,6 +8,7 @@
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/IDataType.h>
#include <base/constexpr_helpers.h>
namespace DB
@ -56,7 +57,19 @@ public:
UInt64 isEmpty() const;
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
private:
template <size_t num_hashes>
ALWAYS_INLINE void addHashesImpl(size_t hash1, size_t hash2)
{
static_assert(num_hashes >= 1 && num_hashes <= 4);
for (size_t i = 0; i < num_hashes; ++i)
{
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
}
}
size_t size;
size_t hashes;

View File

@ -0,0 +1,9 @@
<test>
<create_query>DROP TABLE IF EXISTS test_ngram</create_query>
<create_query>CREATE TABLE test_ngram (s String, INDEX idx_s s TYPE ngrambf_v1(5, 10000, 3, 0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple()</create_query>
<create_query>SYSTEM STOP MERGES test_ngram</create_query>
<query>INSERT INTO test_ngram SELECT randomPrintableASCII(128) FROM numbers(100000)</query>
<drop_query>DROP TABLE IF EXISTS test_ngram</drop_query>
</test>