2019-02-20 09:02:19 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Core/Types.h>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
/// Good constants for LCG from wikipedia.
|
|
|
|
constexpr UInt64 LCG_A = 84589;
|
|
|
|
constexpr UInt64 LCG_C = 45989;
|
|
|
|
constexpr UInt64 LCG_M = 217728;
|
|
|
|
|
|
|
|
/// LinearCongruentialGenerator for generating random seeds for hash functions.
|
2019-02-20 20:17:44 +00:00
|
|
|
/// It is used here because it is very fast and lightweight.
|
2019-02-20 09:02:19 +00:00
|
|
|
/// https://en.wikipedia.org/wiki/Linear_congruential_generator
|
|
|
|
class LinearCongruentialGenerator
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
LinearCongruentialGenerator(
|
|
|
|
size_t seed, UInt64 a_ = LCG_A, UInt64 c_ = LCG_C, UInt64 m_ = LCG_M);
|
|
|
|
|
|
|
|
UInt64 next();
|
|
|
|
|
|
|
|
private:
|
|
|
|
UInt64 current;
|
|
|
|
UInt64 a;
|
|
|
|
UInt64 c;
|
|
|
|
UInt64 m;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// Bloom filter for strings.
|
|
|
|
class StringBloomFilter
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/// size -- size of filter in bytes.
|
|
|
|
/// hashes -- number of used hash functions.
|
|
|
|
/// seed -- random seed for hash functions generation.
|
|
|
|
StringBloomFilter(size_t size_, size_t hashes_, size_t seed_);
|
|
|
|
|
|
|
|
bool find(const char * data, size_t len);
|
|
|
|
void add(const char * data, size_t len);
|
2019-02-20 12:48:50 +00:00
|
|
|
void clear();
|
2019-02-20 09:02:19 +00:00
|
|
|
|
|
|
|
/// Checks if this contains everything from another bloom filter.
|
|
|
|
/// Bloom filters must have equal size and seed.
|
|
|
|
bool contains(const StringBloomFilter & bf);
|
|
|
|
|
2019-02-20 16:24:46 +00:00
|
|
|
void merge(const StringBloomFilter & bf);
|
|
|
|
|
2019-02-20 11:22:07 +00:00
|
|
|
const std::vector<UInt8> & getFilter() const { return filter; };
|
|
|
|
void setFilter(std::vector<UInt8> && new_filter) { filter = std::move(new_filter); };
|
|
|
|
|
2019-02-20 20:17:44 +00:00
|
|
|
/// For debug.
|
2019-02-20 17:17:31 +00:00
|
|
|
UInt64 getFingerPrint() const;
|
2019-02-20 19:27:23 +00:00
|
|
|
|
|
|
|
friend bool operator== (const StringBloomFilter & a, const StringBloomFilter & b);
|
2019-02-20 09:02:19 +00:00
|
|
|
private:
|
|
|
|
size_t size;
|
|
|
|
size_t hashes;
|
|
|
|
size_t seed;
|
|
|
|
std::vector<UInt8> filter;
|
|
|
|
};
|
|
|
|
|
2019-02-20 19:27:23 +00:00
|
|
|
|
|
|
|
bool operator== (const StringBloomFilter & a, const StringBloomFilter & b);
|
|
|
|
|
2019-02-20 09:02:19 +00:00
|
|
|
}
|