ClickHouse/src/Interpreters/BloomFilter.h

77 lines
1.9 KiB
C++
Raw Normal View History

2019-02-20 09:02:19 +00:00
#pragma once
#include <vector>
2020-09-15 09:55:57 +00:00
#include <common/types.h>
#include <Core/Field.h>
2019-05-10 03:42:28 +00:00
#include <Common/PODArray.h>
#include <Common/Allocator.h>
#include <Columns/IColumn.h>
2019-05-10 03:42:28 +00:00
#include <Columns/ColumnVector.h>
#include <DataTypes/IDataType.h>
2019-02-20 09:02:19 +00:00
2020-07-10 08:13:21 +00:00
2019-02-21 21:29:24 +00:00
namespace DB
2019-02-20 09:02:19 +00:00
{
2020-05-28 12:37:05 +00:00
struct BloomFilterParameters
{
2020-07-10 08:13:21 +00:00
BloomFilterParameters(size_t filter_size_, size_t filter_hashes_, size_t seed_);
2020-05-28 12:37:05 +00:00
/// size of filter in bytes.
size_t filter_size;
/// number of used hash functions.
size_t filter_hashes;
/// random seed for hash functions generation.
size_t seed;
};
2019-02-20 09:02:19 +00:00
2019-05-10 03:42:28 +00:00
class BloomFilter
2019-02-20 09:02:19 +00:00
{
2019-05-10 03:42:28 +00:00
2019-02-20 09:02:19 +00:00
public:
2019-02-23 15:56:48 +00:00
using UnderType = UInt64;
using Container = std::vector<UnderType>;
2019-02-22 19:59:40 +00:00
2020-05-28 12:37:05 +00:00
BloomFilter(const BloomFilterParameters & params);
2019-02-20 09:02:19 +00:00
/// size -- size of filter in bytes.
/// hashes -- number of used hash functions.
/// seed -- random seed for hash functions generation.
2019-05-10 03:42:28 +00:00
BloomFilter(size_t size_, size_t hashes_, size_t seed_);
2019-02-20 09:02:19 +00:00
bool find(const char * data, size_t len);
void add(const char * data, size_t len);
2019-02-20 12:48:50 +00:00
void clear();
2019-02-20 09:02:19 +00:00
2019-06-19 15:09:07 +00:00
void addHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
bool findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed);
2019-05-10 03:42:28 +00:00
2019-02-20 09:02:19 +00:00
/// Checks if this contains everything from another bloom filter.
/// Bloom filters must have equal size and seed.
2019-05-10 03:42:28 +00:00
bool contains(const BloomFilter & bf);
2019-02-20 09:02:19 +00:00
2019-02-26 19:37:07 +00:00
const Container & getFilter() const { return filter; }
Container & getFilter() { return filter; }
2019-02-20 11:22:07 +00:00
2019-02-20 20:17:44 +00:00
/// For debug.
2019-02-25 18:23:21 +00:00
UInt64 isEmpty() const;
2019-02-20 19:27:23 +00:00
2019-05-10 03:42:28 +00:00
friend bool operator== (const BloomFilter & a, const BloomFilter & b);
2019-02-20 09:02:19 +00:00
private:
2019-02-21 21:29:24 +00:00
2019-02-20 09:02:19 +00:00
size_t size;
size_t hashes;
size_t seed;
2019-02-23 15:56:48 +00:00
size_t words;
2019-02-22 19:59:40 +00:00
Container filter;
2019-11-01 15:31:02 +00:00
public:
static ColumnPtr getPrimitiveColumn(const ColumnPtr & column);
static DataTypePtr getPrimitiveType(const DataTypePtr & data_type);
2019-02-20 09:02:19 +00:00
};
2019-05-10 03:42:28 +00:00
using BloomFilterPtr = std::shared_ptr<BloomFilter>;
bool operator== (const BloomFilter & a, const BloomFilter & b);
2019-02-20 19:27:23 +00:00
2019-11-01 15:31:02 +00:00
2019-02-20 09:02:19 +00:00
}