2014-03-26 19:44:51 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <city.h>
|
|
|
|
|
#include <DB/IO/WriteBuffer.h>
|
|
|
|
|
#include <DB/IO/BufferWithOwnMemory.h>
|
2014-06-05 16:44:12 +00:00
|
|
|
|
#include <DB/IO/ReadHelpers.h>
|
2014-03-26 19:44:51 +00:00
|
|
|
|
|
|
|
|
|
#define DBMS_DEFAULT_HASHING_BLOCK_SIZE 2048ULL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2014-06-06 14:50:29 +00:00
|
|
|
|
template <class Buffer>
|
|
|
|
|
class IHashingBuffer : public BufferWithOwnMemory<Buffer>
|
2014-03-26 19:44:51 +00:00
|
|
|
|
{
|
2014-06-06 14:50:29 +00:00
|
|
|
|
public:
|
2015-08-14 20:18:08 +00:00
|
|
|
|
IHashingBuffer<Buffer>(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
|
|
|
|
|
: BufferWithOwnMemory<Buffer>(block_size_), block_pos(0), block_size(block_size_), state(0, 0)
|
2014-06-06 14:50:29 +00:00
|
|
|
|
{
|
|
|
|
|
}
|
2014-03-26 19:44:51 +00:00
|
|
|
|
|
2014-06-06 14:50:29 +00:00
|
|
|
|
uint128 getHash()
|
|
|
|
|
{
|
|
|
|
|
if (block_pos)
|
|
|
|
|
return CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state);
|
|
|
|
|
else
|
|
|
|
|
return state;
|
|
|
|
|
}
|
2014-03-26 19:44:51 +00:00
|
|
|
|
|
2014-06-06 14:50:29 +00:00
|
|
|
|
void append(DB::BufferBase::Position data)
|
2014-03-26 19:44:51 +00:00
|
|
|
|
{
|
|
|
|
|
state = CityHash128WithSeed(data, block_size, state);
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-06 14:50:29 +00:00
|
|
|
|
/// вычисление хэша зависит от разбиения по блокам
|
|
|
|
|
/// поэтому нужно вычислить хэш от n полных кусочков и одного неполного
|
|
|
|
|
void calculateHash(DB::BufferBase::Position data, size_t len)
|
2014-03-26 19:44:51 +00:00
|
|
|
|
{
|
|
|
|
|
if (len)
|
|
|
|
|
{
|
2014-06-06 14:50:29 +00:00
|
|
|
|
/// если данных меньше, чем block_size то сложим их в свой буффер и посчитаем от них hash позже
|
2014-03-26 19:44:51 +00:00
|
|
|
|
if (block_pos + len < block_size)
|
|
|
|
|
{
|
2014-06-06 14:50:29 +00:00
|
|
|
|
memcpy(&BufferWithOwnMemory<Buffer>::memory[block_pos], data, len);
|
2014-03-26 19:44:51 +00:00
|
|
|
|
block_pos += len;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2014-06-06 14:50:29 +00:00
|
|
|
|
/// если в буффер уже что-то записано, то допишем его
|
2014-03-26 19:44:51 +00:00
|
|
|
|
if (block_pos)
|
|
|
|
|
{
|
|
|
|
|
size_t n = block_size - block_pos;
|
2014-06-06 14:50:29 +00:00
|
|
|
|
memcpy(&BufferWithOwnMemory<Buffer>::memory[block_pos], data, n);
|
|
|
|
|
append(&BufferWithOwnMemory<Buffer>::memory[0]);
|
2014-03-26 19:44:51 +00:00
|
|
|
|
len -= n;
|
|
|
|
|
data += n;
|
|
|
|
|
block_pos = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (len >= block_size)
|
|
|
|
|
{
|
|
|
|
|
append(data);
|
|
|
|
|
len -= block_size;
|
|
|
|
|
data += block_size;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-06 14:50:29 +00:00
|
|
|
|
/// запишем остаток в свой буфер
|
2014-03-26 19:44:51 +00:00
|
|
|
|
if (len)
|
|
|
|
|
{
|
2014-06-06 14:50:29 +00:00
|
|
|
|
memcpy(&BufferWithOwnMemory<Buffer>::memory[0], data, len);
|
2014-03-26 19:44:51 +00:00
|
|
|
|
block_pos = len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-06 14:50:29 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
size_t block_pos;
|
|
|
|
|
size_t block_size;
|
|
|
|
|
uint128 state;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Вычисляет хеш от записываемых данных и передает их в указанный WriteBuffer.
|
|
|
|
|
* В качестве основного буфера используется буфер вложенного WriteBuffer.
|
|
|
|
|
*/
|
|
|
|
|
class HashingWriteBuffer : public IHashingBuffer<WriteBuffer>
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
WriteBuffer & out;
|
|
|
|
|
|
|
|
|
|
void nextImpl() override
|
|
|
|
|
{
|
|
|
|
|
size_t len = offset();
|
|
|
|
|
|
|
|
|
|
Position data = working_buffer.begin();
|
|
|
|
|
calculateHash(data, len);
|
2014-03-26 19:44:51 +00:00
|
|
|
|
|
|
|
|
|
out.position() = pos;
|
|
|
|
|
out.next();
|
|
|
|
|
working_buffer = out.buffer();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
HashingWriteBuffer(
|
|
|
|
|
WriteBuffer & out_,
|
|
|
|
|
size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
|
2014-06-06 14:50:29 +00:00
|
|
|
|
: IHashingBuffer<DB::WriteBuffer>(block_size_), out(out_)
|
2014-03-26 19:44:51 +00:00
|
|
|
|
{
|
|
|
|
|
out.next(); /// Если до нас в out что-то уже писали, не дадим остаткам этих данных повлиять на хеш.
|
|
|
|
|
working_buffer = out.buffer();
|
|
|
|
|
pos = working_buffer.begin();
|
|
|
|
|
state = uint128(0, 0);
|
|
|
|
|
}
|
2014-07-21 13:46:13 +00:00
|
|
|
|
|
|
|
|
|
uint128 getHash()
|
|
|
|
|
{
|
|
|
|
|
next();
|
|
|
|
|
return IHashingBuffer<WriteBuffer>::getHash();
|
|
|
|
|
}
|
2014-03-26 19:44:51 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
2014-06-05 16:44:12 +00:00
|
|
|
|
|
2014-06-06 09:17:13 +00:00
|
|
|
|
|
|
|
|
|
std::string uint128ToString(uint128 data);
|
|
|
|
|
|
2014-06-05 16:44:12 +00:00
|
|
|
|
std::ostream & operator<<(std::ostream & os, const uint128 & data);
|
|
|
|
|
std::istream & operator>>(std::istream & is, uint128 & data);
|