2019-02-20 09:02:19 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <vector>
|
2020-09-15 09:55:57 +00:00
|
|
|
#include <common/types.h>
|
2019-10-15 04:22:51 +00:00
|
|
|
#include <Core/Field.h>
|
2019-05-10 03:42:28 +00:00
|
|
|
#include <Common/PODArray.h>
|
|
|
|
#include <Common/Allocator.h>
|
2019-10-15 04:22:51 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2019-05-10 03:42:28 +00:00
|
|
|
#include <Columns/ColumnVector.h>
|
2019-10-15 04:22:51 +00:00
|
|
|
#include <DataTypes/IDataType.h>
|
2019-02-20 09:02:19 +00:00
|
|
|
|
2020-07-10 08:13:21 +00:00
|
|
|
|
2019-02-21 21:29:24 +00:00
|
|
|
namespace DB
|
2019-02-20 09:02:19 +00:00
|
|
|
{
|
2020-05-28 12:37:05 +00:00
|
|
|
struct BloomFilterParameters
|
|
|
|
{
|
2020-07-10 08:13:21 +00:00
|
|
|
BloomFilterParameters(size_t filter_size_, size_t filter_hashes_, size_t seed_);
|
|
|
|
|
2020-05-28 12:37:05 +00:00
|
|
|
/// size of filter in bytes.
|
|
|
|
size_t filter_size;
|
|
|
|
/// number of used hash functions.
|
|
|
|
size_t filter_hashes;
|
|
|
|
/// random seed for hash functions generation.
|
|
|
|
size_t seed;
|
|
|
|
};
|
2019-02-20 09:02:19 +00:00
|
|
|
|
2019-05-10 03:42:28 +00:00
|
|
|
class BloomFilter
|
2019-02-20 09:02:19 +00:00
|
|
|
{
|
2019-05-10 03:42:28 +00:00
|
|
|
|
2019-02-20 09:02:19 +00:00
|
|
|
public:
|
2019-02-23 15:56:48 +00:00
|
|
|
using UnderType = UInt64;
|
|
|
|
using Container = std::vector<UnderType>;
|
2019-02-22 19:59:40 +00:00
|
|
|
|
2020-05-28 12:37:05 +00:00
|
|
|
BloomFilter(const BloomFilterParameters & params);
|
2019-02-20 09:02:19 +00:00
|
|
|
/// size -- size of filter in bytes.
|
|
|
|
/// hashes -- number of used hash functions.
|
|
|
|
/// seed -- random seed for hash functions generation.
|
2019-05-10 03:42:28 +00:00
|
|
|
BloomFilter(size_t size_, size_t hashes_, size_t seed_);
|
2019-02-20 09:02:19 +00:00
|
|
|
|
|
|
|
bool find(const char * data, size_t len);
|
|
|
|
void add(const char * data, size_t len);
|
2019-02-20 12:48:50 +00:00
|
|
|
void clear();
|
2019-02-20 09:02:19 +00:00
|
|
|
|
2019-06-19 15:09:07 +00:00
|
|
|
void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
|
|
|
|
bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
|
2019-05-10 03:42:28 +00:00
|
|
|
|
2019-02-20 09:02:19 +00:00
|
|
|
/// Checks if this contains everything from another bloom filter.
|
|
|
|
/// Bloom filters must have equal size and seed.
|
2019-05-10 03:42:28 +00:00
|
|
|
bool contains(const BloomFilter & bf);
|
2019-02-20 09:02:19 +00:00
|
|
|
|
2019-02-26 19:37:07 +00:00
|
|
|
const Container & getFilter() const { return filter; }
|
|
|
|
Container & getFilter() { return filter; }
|
2019-02-20 11:22:07 +00:00
|
|
|
|
2019-02-20 20:17:44 +00:00
|
|
|
/// For debug.
|
2019-02-25 18:23:21 +00:00
|
|
|
UInt64 isEmpty() const;
|
2019-02-20 19:27:23 +00:00
|
|
|
|
2019-05-10 03:42:28 +00:00
|
|
|
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
|
2019-02-20 09:02:19 +00:00
|
|
|
private:
|
2019-02-21 21:29:24 +00:00
|
|
|
|
2019-02-20 09:02:19 +00:00
|
|
|
size_t size;
|
|
|
|
size_t hashes;
|
|
|
|
size_t seed;
|
2019-02-23 15:56:48 +00:00
|
|
|
size_t words;
|
2019-02-22 19:59:40 +00:00
|
|
|
Container filter;
|
2019-11-01 15:31:02 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
|
|
|
|
static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);
|
2019-02-20 09:02:19 +00:00
|
|
|
};
|
|
|
|
|
2019-05-10 03:42:28 +00:00
|
|
|
using BloomFilterPtr = std::shared_ptr<BloomFilter>;
|
|
|
|
|
|
|
|
bool operator== (const BloomFilter & a, const BloomFilter & b);
|
2019-02-20 19:27:23 +00:00
|
|
|
|
2019-11-01 15:31:02 +00:00
|
|
|
|
2019-02-20 09:02:19 +00:00
|
|
|
}
|