ClickHouse/dbms/src/Interpreters/BloomFilter.h

70 lines
1.7 KiB
C++
Raw Normal View History

2019-02-20 09:02:19 +00:00
#pragma once
#include <Core/Types.h>
#include <vector>
namespace DB
{
/// Good constants for LCG from wikipedia.
constexpr UInt64 LCG_A = 84589;
constexpr UInt64 LCG_C = 45989;
constexpr UInt64 LCG_M = 217728;
/// LinearCongruentialGenerator for generating random seeds for hash functions.
2019-02-20 20:17:44 +00:00
/// It is used here because it is very fast and lightweight.
2019-02-20 09:02:19 +00:00
/// https://en.wikipedia.org/wiki/Linear_congruential_generator
class LinearCongruentialGenerator
{
public:
LinearCongruentialGenerator(
size_t seed, UInt64 a_ = LCG_A, UInt64 c_ = LCG_C, UInt64 m_ = LCG_M);
UInt64 next();
private:
UInt64 current;
UInt64 a;
UInt64 c;
UInt64 m;
};
/// Bloom filter for strings.
class StringBloomFilter
{
public:
/// size -- size of filter in bytes.
/// hashes -- number of used hash functions.
/// seed -- random seed for hash functions generation.
StringBloomFilter(size_t size_, size_t hashes_, size_t seed_);
bool find(const char * data, size_t len);
void add(const char * data, size_t len);
2019-02-20 12:48:50 +00:00
void clear();
2019-02-20 09:02:19 +00:00
/// Checks if this contains everything from another bloom filter.
/// Bloom filters must have equal size and seed.
bool contains(const StringBloomFilter & bf);
2019-02-20 16:24:46 +00:00
void merge(const StringBloomFilter & bf);
2019-02-20 11:22:07 +00:00
const std::vector<UInt8> & getFilter() const { return filter; };
void setFilter(std::vector<UInt8> && new_filter) { filter = std::move(new_filter); };
2019-02-20 20:17:44 +00:00
/// For debug.
2019-02-20 17:17:31 +00:00
UInt64 getFingerPrint() const;
2019-02-20 19:27:23 +00:00
friend bool operator== (const StringBloomFilter & a, const StringBloomFilter & b);
2019-02-20 09:02:19 +00:00
private:
size_t size;
size_t hashes;
size_t seed;
std::vector<UInt8> filter;
};
2019-02-20 19:27:23 +00:00
bool operator== (const StringBloomFilter & a, const StringBloomFilter & b);
2019-02-20 09:02:19 +00:00
}