mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge 2fdde4ba55
into b94a7167a8
This commit is contained in:
commit
1231e0848d
@ -71,10 +71,39 @@ void BloomFilter::add(const char * data, size_t len)
|
||||
size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, seed);
|
||||
size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN_A * seed + SEED_GEN_B);
|
||||
|
||||
for (size_t i = 0; i < hashes; ++i)
|
||||
switch (hashes)
|
||||
{
|
||||
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
|
||||
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
|
||||
case 1:
|
||||
{
|
||||
addHashesImpl<1>(hash1, hash2);
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
addHashesImpl<2>(hash1, hash2);
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
addHashesImpl<3>(hash1, hash2);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
addHashesImpl<4>(hash1, hash2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
addHashesImpl<4>(hash1, hash2);
|
||||
|
||||
for (size_t i = 4; i < hashes; ++i)
|
||||
{
|
||||
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
|
||||
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <base/constexpr_helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -56,7 +57,19 @@ public:
|
||||
UInt64 isEmpty() const;
|
||||
|
||||
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
|
||||
|
||||
private:
|
||||
template <size_t num_hashes>
|
||||
ALWAYS_INLINE void addHashesImpl(size_t hash1, size_t hash2)
|
||||
{
|
||||
static_assert(num_hashes >= 1 && num_hashes <= 4);
|
||||
|
||||
for (size_t i = 0; i < num_hashes; ++i)
|
||||
{
|
||||
size_t pos = (hash1 + i * hash2 + i * i) % (8 * size);
|
||||
filter[pos / (8 * sizeof(UnderType))] |= (1ULL << (pos % (8 * sizeof(UnderType))));
|
||||
}
|
||||
}
|
||||
|
||||
size_t size;
|
||||
size_t hashes;
|
||||
|
9
tests/performance/ngram_filter_insert.xml
Normal file
9
tests/performance/ngram_filter_insert.xml
Normal file
@ -0,0 +1,9 @@
|
||||
<test>
|
||||
<create_query>DROP TABLE IF EXISTS test_ngram</create_query>
|
||||
<create_query>CREATE TABLE test_ngram (s String, INDEX idx_s s TYPE ngrambf_v1(5, 10000, 3, 0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple()</create_query>
|
||||
<create_query>SYSTEM STOP MERGES test_ngram</create_query>
|
||||
|
||||
<query>INSERT INTO test_ngram SELECT randomPrintableASCII(128) FROM numbers(100000)</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS test_ngram</drop_query>
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user