mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-12 02:23:14 +00:00
88 lines
2.2 KiB
C++
88 lines
2.2 KiB
C++
#pragma once
|
|
|
|
#include <IO/WriteBuffer.h>
|
|
#include <IO/BufferWithOwnMemory.h>
|
|
#include <IO/ReadHelpers.h>
|
|
#include <city.h>
|
|
|
|
#define DBMS_DEFAULT_HASHING_BLOCK_SIZE 2048ULL
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
template <typename Buffer>
|
|
class IHashingBuffer : public BufferWithOwnMemory<Buffer>
|
|
{
|
|
public:
|
|
using uint128 = CityHash_v1_0_2::uint128;
|
|
|
|
IHashingBuffer<Buffer>(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
|
|
: BufferWithOwnMemory<Buffer>(block_size_), block_pos(0), block_size(block_size_), state(0, 0)
|
|
{
|
|
}
|
|
|
|
uint128 getHash()
|
|
{
|
|
if (block_pos)
|
|
return CityHash_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state);
|
|
else
|
|
return state;
|
|
}
|
|
|
|
void append(DB::BufferBase::Position data)
|
|
{
|
|
state = CityHash_v1_0_2::CityHash128WithSeed(data, block_size, state);
|
|
}
|
|
|
|
/// computation of the hash depends on the partitioning of blocks
|
|
/// so you need to compute a hash of n complete pieces and one incomplete
|
|
void calculateHash(DB::BufferBase::Position data, size_t len);
|
|
|
|
protected:
|
|
size_t block_pos;
|
|
size_t block_size;
|
|
uint128 state;
|
|
};
|
|
|
|
/** Computes the hash from the data to write and passes it to the specified WriteBuffer.
|
|
* The buffer of the nested WriteBuffer is used as the main buffer.
|
|
*/
|
|
class HashingWriteBuffer : public IHashingBuffer<WriteBuffer>
|
|
{
|
|
private:
|
|
WriteBuffer & out;
|
|
|
|
void nextImpl() override
|
|
{
|
|
size_t len = offset();
|
|
|
|
Position data = working_buffer.begin();
|
|
calculateHash(data, len);
|
|
|
|
out.position() = pos;
|
|
out.next();
|
|
working_buffer = out.buffer();
|
|
}
|
|
|
|
public:
|
|
HashingWriteBuffer(
|
|
WriteBuffer & out_,
|
|
size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
|
|
: IHashingBuffer<DB::WriteBuffer>(block_size_), out(out_)
|
|
{
|
|
out.next(); /// If something has already been written to `out` before us, we will not let the remains of this data affect the hash.
|
|
working_buffer = out.buffer();
|
|
pos = working_buffer.begin();
|
|
state = uint128(0, 0);
|
|
}
|
|
|
|
uint128 getHash()
|
|
{
|
|
next();
|
|
return IHashingBuffer<WriteBuffer>::getHash();
|
|
}
|
|
};
|
|
|
|
}
|