ClickHouse/dbms/Interpreters/BloomFilter.h

64 lines
1.6 KiB
C++
Raw Normal View History

2019-02-20 09:02:19 +00:00
#pragma once
#include <vector>
2019-05-10 03:42:28 +00:00
#include <Core/Types.h>
#include <Core/Field.h>
2019-05-10 03:42:28 +00:00
#include <Common/PODArray.h>
#include <Common/Allocator.h>
#include <Columns/IColumn.h>
2019-05-10 03:42:28 +00:00
#include <Columns/ColumnVector.h>
#include <DataTypes/IDataType.h>
2019-02-20 09:02:19 +00:00
2019-02-21 21:29:24 +00:00
namespace DB
2019-02-20 09:02:19 +00:00
{
2019-05-10 03:42:28 +00:00
class BloomFilter
2019-02-20 09:02:19 +00:00
{
2019-05-10 03:42:28 +00:00
2019-02-20 09:02:19 +00:00
public:
2019-02-23 15:56:48 +00:00
using UnderType = UInt64;
using Container = std::vector<UnderType>;
2019-02-22 19:59:40 +00:00
2019-02-20 09:02:19 +00:00
/// size -- size of filter in bytes.
/// hashes -- number of used hash functions.
/// seed -- random seed for hash functions generation.
2019-05-10 03:42:28 +00:00
BloomFilter(size_t size_, size_t hashes_, size_t seed_);
2019-02-20 09:02:19 +00:00
bool find(const char * data, size_t len);
void add(const char * data, size_t len);
2019-02-20 12:48:50 +00:00
void clear();
2019-02-20 09:02:19 +00:00
2019-06-19 15:09:07 +00:00
void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
2019-05-10 03:42:28 +00:00
2019-02-20 09:02:19 +00:00
/// Checks if this contains everything from another bloom filter.
/// Bloom filters must have equal size and seed.
2019-05-10 03:42:28 +00:00
bool contains(const BloomFilter & bf);
2019-02-20 09:02:19 +00:00
2019-02-26 19:37:07 +00:00
const Container & getFilter() const { return filter; }
Container & getFilter() { return filter; }
2019-02-20 11:22:07 +00:00
2019-02-20 20:17:44 +00:00
/// For debug.
2019-02-25 18:23:21 +00:00
UInt64 isEmpty() const;
2019-02-20 19:27:23 +00:00
2019-05-10 03:42:28 +00:00
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
2019-02-20 09:02:19 +00:00
private:
2019-02-21 21:29:24 +00:00
2019-02-20 09:02:19 +00:00
size_t size;
size_t hashes;
size_t seed;
2019-02-23 15:56:48 +00:00
size_t words;
2019-02-22 19:59:40 +00:00
Container filter;
2019-11-01 15:31:02 +00:00
public:
static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);
2019-02-20 09:02:19 +00:00
};
2019-05-10 03:42:28 +00:00
using BloomFilterPtr = std::shared_ptr<BloomFilter>;
bool operator== (const BloomFilter & a, const BloomFilter & b);
2019-02-20 19:27:23 +00:00
2019-11-01 15:31:02 +00:00
2019-02-20 09:02:19 +00:00
}