mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Better [#DEVTOOLS-3381].
This commit is contained in:
parent
a736ef618a
commit
604de2bfeb
@ -47,14 +47,22 @@
|
||||
#include <stdint.h>
|
||||
#include <utility>
|
||||
|
||||
/** This is a version of CityHash that predates v1.0.3 algorithm change.
|
||||
* Why we need exactly this version?
|
||||
* Although hash values of CityHash are not recommended for storing persistently anywhere,
|
||||
* it has already been used this way in ClickHouse:
|
||||
* - for calculation of checksums of compressed chunks and for data parts;
|
||||
* - this version of CityHash is exposed in cityHash64 function in ClickHouse SQL language;
|
||||
* - and already used by many users for data ordering, sampling and sharding.
|
||||
*/
|
||||
namespace CityHash64_v1_0_2
|
||||
{
|
||||
|
||||
typedef uint8_t uint8;
|
||||
typedef uint32_t uint32;
|
||||
typedef uint64_t uint64;
|
||||
typedef std::pair<uint64, uint64> uint128;
|
||||
|
||||
/// This is a version of CityHash that predates v1.0.3 algorithm change.
|
||||
namespace DB
|
||||
{
|
||||
|
||||
inline uint64 Uint128Low64(const uint128& x) { return x.first; }
|
||||
inline uint64 Uint128High64(const uint128& x) { return x.second; }
|
||||
|
@ -30,7 +30,7 @@
|
||||
|
||||
#include <city.h>
|
||||
|
||||
namespace DB
|
||||
namespace CityHash64_v1_0_2
|
||||
{
|
||||
|
||||
// Hash function for a byte array.
|
||||
|
@ -35,17 +35,6 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
static uint64 UNALIGNED_LOAD64(const char *p) {
|
||||
uint64 result;
|
||||
memcpy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32 UNALIGNED_LOAD32(const char *p) {
|
||||
uint32 result;
|
||||
memcpy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
#if !defined(WORDS_BIGENDIAN)
|
||||
|
||||
@ -82,9 +71,21 @@ static uint32 UNALIGNED_LOAD32(const char *p) {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
namespace CityHash64_v1_0_2
|
||||
{
|
||||
|
||||
static uint64 UNALIGNED_LOAD64(const char *p) {
|
||||
uint64 result;
|
||||
memcpy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32 UNALIGNED_LOAD32(const char *p) {
|
||||
uint32 result;
|
||||
memcpy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint64 Fetch64(const char *p) {
|
||||
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
|
||||
}
|
||||
@ -362,7 +363,7 @@ uint128 CityHash128(const char *s, size_t len) {
|
||||
#include <citycrc.h>
|
||||
#include <nmmintrin.h>
|
||||
|
||||
namespace DB
|
||||
namespace CityHash64_v1_0_2
|
||||
{
|
||||
|
||||
// Requires len >= 240.
|
||||
|
@ -266,7 +266,7 @@ struct OneAdder<T, Data, typename std::enable_if<
|
||||
typename std::enable_if<std::is_same<T2, String>::value>::type * = nullptr)
|
||||
{
|
||||
StringRef value = column.getDataAt(row_num);
|
||||
data.set.insert(CityHash64(value.data, value.size));
|
||||
data.set.insert(CityHash64_v1_0_2::CityHash64(value.data, value.size));
|
||||
}
|
||||
};
|
||||
|
||||
@ -290,7 +290,7 @@ struct OneAdder<T, Data, typename std::enable_if<
|
||||
typename std::enable_if<std::is_same<T2, String>::value>::type * = nullptr)
|
||||
{
|
||||
StringRef value = column.getDataAt(row_num);
|
||||
data.set.insert(CityHash64(value.data, value.size));
|
||||
data.set.insert(CityHash64_v1_0_2::CityHash64(value.data, value.size));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -107,7 +107,7 @@ struct AggregateFunctionUniqUpToData<String> : AggregateFunctionUniqUpToData<UIn
|
||||
{
|
||||
/// Keep in mind that calculations are approximate.
|
||||
StringRef value = column.getDataAt(row_num);
|
||||
insert(CityHash64(value.data, value.size), threshold);
|
||||
insert(CityHash64_v1_0_2::CityHash64(value.data, value.size), threshold);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -39,14 +39,14 @@ struct UniqVariadicHash<false, false>
|
||||
|
||||
{
|
||||
StringRef value = (*column)->getDataAt(row_num);
|
||||
hash = CityHash64(value.data, value.size);
|
||||
hash = CityHash64_v1_0_2::CityHash64(value.data, value.size);
|
||||
++column;
|
||||
}
|
||||
|
||||
while (column < columns_end)
|
||||
{
|
||||
StringRef value = (*column)->getDataAt(row_num);
|
||||
hash = Hash128to64(uint128(CityHash64(value.data, value.size), hash));
|
||||
hash = CityHash64_v1_0_2::Hash128to64(CityHash64_v1_0_2::uint128(CityHash64_v1_0_2::CityHash64(value.data, value.size), hash));
|
||||
++column;
|
||||
}
|
||||
|
||||
@ -68,14 +68,14 @@ struct UniqVariadicHash<false, true>
|
||||
|
||||
{
|
||||
StringRef value = column->get()->getDataAt(row_num);
|
||||
hash = CityHash64(value.data, value.size);
|
||||
hash = CityHash64_v1_0_2::CityHash64(value.data, value.size);
|
||||
++column;
|
||||
}
|
||||
|
||||
while (column < columns_end)
|
||||
{
|
||||
StringRef value = column->get()->getDataAt(row_num);
|
||||
hash = Hash128to64(uint128(CityHash64(value.data, value.size), hash));
|
||||
hash = CityHash64_v1_0_2::Hash128to64(CityHash64_v1_0_2::uint128(CityHash64_v1_0_2::CityHash64(value.data, value.size), hash));
|
||||
++column;
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct UInt128Hash
|
||||
{
|
||||
size_t operator()(UInt128 x) const
|
||||
{
|
||||
return Hash128to64({x.first, x.second});
|
||||
return CityHash64_v1_0_2::Hash128to64({x.first, x.second});
|
||||
}
|
||||
};
|
||||
|
||||
@ -122,7 +122,7 @@ struct UInt256Hash
|
||||
size_t operator()(UInt256 x) const
|
||||
{
|
||||
/// NOTE suboptimal
|
||||
return Hash128to64({Hash128to64({x.a, x.b}), Hash128to64({x.c, x.d})});
|
||||
return CityHash64_v1_0_2::Hash128to64({CityHash64_v1_0_2::Hash128to64({x.a, x.b}), CityHash64_v1_0_2::Hash128to64({x.c, x.d})});
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -48,12 +48,12 @@ int main(int argc, char ** argv)
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
*reinterpret_cast<UInt64*>(&hashes[i * 16]) = DB::CityHash64(strings[i].data(), strings[i].size());
|
||||
*reinterpret_cast<UInt64*>(&hashes[i * 16]) = CityHash64_v1_0_2::CityHash64(strings[i].data(), strings[i].size());
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
|
||||
UInt64 check = DB::CityHash64(&hashes[0], hashes.size());
|
||||
UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size());
|
||||
|
||||
std::cerr << std::fixed << std::setprecision(2)
|
||||
<< "CityHash64 (check = " << check << ")"
|
||||
@ -78,7 +78,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
watch.stop();
|
||||
|
||||
UInt64 check = DB::CityHash64(&hashes[0], hashes.size());
|
||||
UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size());
|
||||
|
||||
std::cerr << std::fixed << std::setprecision(2)
|
||||
<< "SipHash (check = " << check << ")"
|
||||
@ -99,7 +99,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
watch.stop();
|
||||
|
||||
UInt64 check = DB::CityHash64(&hashes[0], hashes.size());
|
||||
UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size());
|
||||
|
||||
std::cerr << std::fixed << std::setprecision(2)
|
||||
<< "SipHash, stream (check = " << check << ")"
|
||||
@ -121,7 +121,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
watch.stop();
|
||||
|
||||
UInt64 check = DB::CityHash64(&hashes[0], hashes.size());
|
||||
UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size());
|
||||
|
||||
std::cerr << std::fixed << std::setprecision(2)
|
||||
<< "MD5 (check = " << check << ")"
|
||||
|
@ -165,7 +165,7 @@ struct StringRefHash64
|
||||
{
|
||||
size_t operator() (StringRef x) const
|
||||
{
|
||||
return DB::CityHash64(x.data, x.size);
|
||||
return CityHash64_v1_0_2::CityHash64(x.data, x.size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -177,7 +177,7 @@ struct StringRefHash64
|
||||
|
||||
inline UInt64 hashLen16(UInt64 u, UInt64 v)
|
||||
{
|
||||
return DB::Hash128to64(uint128(u, v));
|
||||
return CityHash64_v1_0_2::Hash128to64(CityHash64_v1_0_2::uint128(u, v));
|
||||
}
|
||||
|
||||
inline UInt64 shiftMix(UInt64 val)
|
||||
|
@ -638,9 +638,9 @@ struct URLHashImpl
|
||||
{
|
||||
/// do not take last slash, '?' or '#' character into account
|
||||
if (size > 0 && (data[size - 1] == '/' || data[size - 1] == '?' || data[size - 1] == '#'))
|
||||
return CityHash64(data, size - 1);
|
||||
return CityHash64_v1_0_2::CityHash64(data, size - 1);
|
||||
|
||||
return CityHash64(data, size);
|
||||
return CityHash64_v1_0_2::CityHash64(data, size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -844,10 +844,10 @@ struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
struct ImplCityHash64
|
||||
{
|
||||
static constexpr auto name = "cityHash64";
|
||||
using uint128_t = uint128;
|
||||
using uint128_t = CityHash64_v1_0_2::uint128;
|
||||
|
||||
static auto Hash128to64(const uint128_t & x) { return DB::Hash128to64(x); }
|
||||
static auto Hash64(const char * const s, const std::size_t len) { return CityHash64(s, len); }
|
||||
static auto Hash128to64(const uint128_t & x) { return CityHash64_v1_0_2::Hash128to64(x); }
|
||||
static auto Hash64(const char * const s, const std::size_t len) { return CityHash64_v1_0_2::CityHash64(s, len); }
|
||||
};
|
||||
|
||||
struct ImplFarmHash64
|
||||
@ -862,9 +862,9 @@ struct ImplFarmHash64
|
||||
struct ImplMetroHash64
|
||||
{
|
||||
static constexpr auto name = "metroHash64";
|
||||
using uint128_t = uint128;
|
||||
using uint128_t = CityHash64_v1_0_2::uint128;
|
||||
|
||||
static auto Hash128to64(const uint128_t & x) { return DB::Hash128to64(x); }
|
||||
static auto Hash128to64(const uint128_t & x) { return CityHash64_v1_0_2::Hash128to64(x); }
|
||||
static auto Hash64(const char * const s, const std::size_t len)
|
||||
{
|
||||
union {
|
||||
|
@ -42,7 +42,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
|
||||
if (compressed_in->eof())
|
||||
return 0;
|
||||
|
||||
uint128 checksum;
|
||||
CityHash64_v1_0_2::uint128 checksum;
|
||||
compressed_in->readStrict(reinterpret_cast<char *>(&checksum), sizeof(checksum));
|
||||
|
||||
own_compressed_buffer.resize(COMPRESSED_BLOCK_HEADER_SIZE);
|
||||
@ -80,7 +80,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
|
||||
compressed_in->readStrict(&compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_compressed - COMPRESSED_BLOCK_HEADER_SIZE);
|
||||
}
|
||||
|
||||
if (!disable_checksum && checksum != CityHash128(&compressed_buffer[0], size_compressed))
|
||||
if (!disable_checksum && checksum != CityHash64_v1_0_2::CityHash128(&compressed_buffer[0], size_compressed))
|
||||
throw Exception("Checksum doesn't match: corrupted data.", ErrorCodes::CHECKSUM_DOESNT_MATCH);
|
||||
|
||||
return size_compressed + sizeof(checksum);
|
||||
|
@ -102,7 +102,7 @@ void CompressedWriteBuffer::nextImpl()
|
||||
throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
}
|
||||
|
||||
uint128 checksum = CityHash128(compressed_buffer_ptr, compressed_size);
|
||||
CityHash64_v1_0_2::uint128 checksum = CityHash64_v1_0_2::CityHash128(compressed_buffer_ptr, compressed_size);
|
||||
out.write(reinterpret_cast<const char *>(&checksum), sizeof(checksum));
|
||||
|
||||
out.write(compressed_buffer_ptr, compressed_size);
|
||||
|
@ -52,36 +52,3 @@ template class IHashingBuffer<DB::ReadBuffer>;
|
||||
template class IHashingBuffer<DB::WriteBuffer>;
|
||||
|
||||
}
|
||||
|
||||
/// UInt64 is 39 characters in 10th number system
|
||||
static const size_t UINT64_DECIMAL_SIZE = 39;
|
||||
std::string uint128ToString(uint128 data)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << std::setw(UINT64_DECIMAL_SIZE) << std::setfill('0') << data.first << std::setw(UINT64_DECIMAL_SIZE) << std::setfill('0') << data.second;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const uint128 & data)
|
||||
{
|
||||
os << uint128ToString(data);
|
||||
return os;
|
||||
}
|
||||
|
||||
std::istream & operator>>(std::istream & is, uint128 & data)
|
||||
{
|
||||
std::vector<char> buffer(UINT64_DECIMAL_SIZE);
|
||||
is.read(buffer.data(), UINT64_DECIMAL_SIZE);
|
||||
data.first = DB::parse<UInt64>(buffer.data(), UINT64_DECIMAL_SIZE);
|
||||
|
||||
if (!is)
|
||||
throw DB::Exception(std::string("Fail to parse uint128 from ") + buffer.data());
|
||||
|
||||
is.read(buffer.data(), UINT64_DECIMAL_SIZE);
|
||||
data.first = DB::parse<UInt64>(buffer.data(), UINT64_DECIMAL_SIZE);
|
||||
|
||||
if (!is)
|
||||
throw DB::Exception(std::string("Fail to parse uint128 from ") + buffer.data());
|
||||
|
||||
return is;
|
||||
}
|
||||
|
@ -15,6 +15,8 @@ template <class Buffer>
|
||||
class IHashingBuffer : public BufferWithOwnMemory<Buffer>
|
||||
{
|
||||
public:
|
||||
using uint128 = CityHash64_v1_0_2::uint128;
|
||||
|
||||
IHashingBuffer<Buffer>(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE)
|
||||
: BufferWithOwnMemory<Buffer>(block_size_), block_pos(0), block_size(block_size_), state(0, 0)
|
||||
{
|
||||
@ -23,14 +25,14 @@ public:
|
||||
uint128 getHash()
|
||||
{
|
||||
if (block_pos)
|
||||
return CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state);
|
||||
return CityHash64_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state);
|
||||
else
|
||||
return state;
|
||||
}
|
||||
|
||||
void append(DB::BufferBase::Position data)
|
||||
{
|
||||
state = CityHash128WithSeed(data, block_size, state);
|
||||
state = CityHash64_v1_0_2::CityHash128WithSeed(data, block_size, state);
|
||||
}
|
||||
|
||||
/// computation of the hash depends on the partitioning of blocks
|
||||
@ -82,9 +84,3 @@ public:
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
std::string uint128ToString(uint128 data);
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const uint128 & data);
|
||||
std::istream & operator>>(std::istream & is, uint128 & data);
|
||||
|
@ -657,9 +657,8 @@ template <typename T>
|
||||
inline typename std::enable_if<std::is_arithmetic<T>::value, void>::type
|
||||
readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); }
|
||||
|
||||
inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); }
|
||||
inline void readBinary(uint128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
|
||||
inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); }
|
||||
inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); }
|
||||
inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); }
|
||||
inline void readBinary(LocalDateTime & x, ReadBuffer & buf) { readPODBinary(x, buf); }
|
||||
|
||||
|
||||
@ -672,9 +671,9 @@ template <typename T>
|
||||
inline typename std::enable_if<std::is_floating_point<T>::value, void>::type
|
||||
readText(T & x, ReadBuffer & buf) { readFloatText(x, buf); }
|
||||
|
||||
inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
|
||||
inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
|
||||
inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); }
|
||||
inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
|
||||
inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
|
||||
inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); }
|
||||
inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); }
|
||||
|
||||
|
||||
@ -684,7 +683,7 @@ template <typename T>
|
||||
inline typename std::enable_if<std::is_arithmetic<T>::value, void>::type
|
||||
readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
|
||||
|
||||
inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); }
|
||||
inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); }
|
||||
|
||||
inline void readQuoted(LocalDate & x, ReadBuffer & buf)
|
||||
{
|
||||
@ -706,7 +705,7 @@ template <typename T>
|
||||
inline typename std::enable_if<std::is_arithmetic<T>::value, void>::type
|
||||
readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); }
|
||||
|
||||
inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); }
|
||||
inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); }
|
||||
|
||||
inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf)
|
||||
{
|
||||
@ -746,7 +745,7 @@ inline typename std::enable_if<std::is_arithmetic<T>::value, void>::type
|
||||
readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
|
||||
inline void readCSV(String & x, ReadBuffer & buf, const char delimiter = ',') { readCSVString(x, buf, delimiter); }
|
||||
inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); }
|
||||
|
||||
|
||||
|
@ -581,11 +581,10 @@ template <typename T>
|
||||
inline typename std::enable_if<std::is_arithmetic<T>::value, void>::type
|
||||
writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
|
||||
inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const uint128 & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); }
|
||||
inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); }
|
||||
|
||||
|
||||
/// Methods for outputting the value in text form for a tab-separated format.
|
||||
|
@ -4,19 +4,19 @@
|
||||
#define FAIL(msg) { std::cout << msg; exit(1); }
|
||||
|
||||
|
||||
uint128 referenceHash(const char * data, size_t len)
|
||||
CityHash64_v1_0_2::uint128 referenceHash(const char * data, size_t len)
|
||||
{
|
||||
const size_t block_size = DBMS_DEFAULT_HASHING_BLOCK_SIZE;
|
||||
uint128 state(0, 0);
|
||||
CityHash64_v1_0_2::uint128 state(0, 0);
|
||||
size_t pos;
|
||||
|
||||
for (pos = 0; pos + block_size <= len; pos += block_size)
|
||||
{
|
||||
state = DB::CityHash128WithSeed(data + pos, block_size, state);
|
||||
state = CityHash64_v1_0_2::CityHash128WithSeed(data + pos, block_size, state);
|
||||
}
|
||||
|
||||
if (pos < len)
|
||||
state = DB::CityHash128WithSeed(data + pos, len - pos, state);
|
||||
state = CityHash64_v1_0_2::CityHash128WithSeed(data + pos, len - pos, state);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ void test(size_t data_size)
|
||||
for (size_t i = 0; i < data_size; ++i)
|
||||
data[i] = rand() & 255;
|
||||
|
||||
uint128 reference = referenceHash(data, data_size);
|
||||
CityHash64_v1_0_2::uint128 reference = referenceHash(data, data_size);
|
||||
|
||||
std::vector<size_t> block_sizes = {56, 128, 513, 2048, 3055, 4097, 4096};
|
||||
for (size_t read_buffer_block_size : block_sizes)
|
||||
@ -52,7 +52,6 @@ void test(size_t data_size)
|
||||
|
||||
if (buf.getHash() != reference)
|
||||
{
|
||||
//std::cout << uint128ToString(buf.getHash()) << " " << uint128ToString(reference) << std::endl;
|
||||
FAIL("failed on data size " << data_size << " reading by blocks of size " << read_buffer_block_size);
|
||||
}
|
||||
if (buf.getHash() != out.getHash())
|
||||
|
@ -11,7 +11,7 @@ void test(size_t data_size)
|
||||
for (size_t i = 0; i < data_size; ++i)
|
||||
data[i] = rand() & 255;
|
||||
|
||||
uint128 reference = referenceHash(data, data_size);
|
||||
CityHash64_v1_0_2::uint128 reference = referenceHash(data, data_size);
|
||||
|
||||
DB::WriteBufferFromFile sink("/dev/null", 1 << 16);
|
||||
|
||||
|
@ -76,7 +76,7 @@ struct DefaultHash<CompactStringRef>
|
||||
{
|
||||
size_t operator() (CompactStringRef x) const
|
||||
{
|
||||
return DB::CityHash64(x.data(), x.size);
|
||||
return CityHash64_v1_0_2::CityHash64(x.data(), x.size);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -65,7 +65,7 @@ struct DefaultHash<STRUCT> \
|
||||
{ \
|
||||
size_t operator() (STRUCT x) const \
|
||||
{ \
|
||||
return DB::CityHash64(x.data, x.size); \
|
||||
return CityHash64_v1_0_2::CityHash64(x.data, x.size); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
@ -68,7 +68,7 @@ struct DefaultHash<STRUCT> \
|
||||
{ \
|
||||
size_t operator() (STRUCT x) const \
|
||||
{ \
|
||||
return DB::CityHash64(x.data, x.size); \
|
||||
return CityHash64_v1_0_2::CityHash64(x.data, x.size); \
|
||||
} \
|
||||
};
|
||||
|
||||
|
@ -126,7 +126,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
|
||||
if (hashing_out.count() != size)
|
||||
throw Exception("Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART);
|
||||
|
||||
writeBinary(hashing_out.getHash(), out);
|
||||
writePODBinary(hashing_out.getHash(), out);
|
||||
|
||||
if (file_name != "checksums.txt" &&
|
||||
file_name != "columns.txt")
|
||||
@ -250,8 +250,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl(
|
||||
throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED);
|
||||
}
|
||||
|
||||
uint128 expected_hash;
|
||||
readBinary(expected_hash, in);
|
||||
MergeTreeDataPartChecksum::uint128 expected_hash;
|
||||
readPODBinary(expected_hash, in);
|
||||
|
||||
if (expected_hash != hashing_out.getHash())
|
||||
throw Exception("Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path);
|
||||
|
@ -959,7 +959,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
|
||||
|
||||
/// Update primary key if needed.
|
||||
size_t new_primary_key_file_size{};
|
||||
uint128 new_primary_key_hash{};
|
||||
MergeTreeDataPartChecksum::uint128 new_primary_key_hash{};
|
||||
|
||||
if (new_primary_key.get() != primary_expr_ast.get())
|
||||
{
|
||||
|
@ -175,13 +175,13 @@ bool MergeTreeDataPartChecksums::read_v3(ReadBuffer & in)
|
||||
|
||||
readBinary(name, in);
|
||||
readVarUInt(sum.file_size, in);
|
||||
readBinary(sum.file_hash, in);
|
||||
readPODBinary(sum.file_hash, in);
|
||||
readBinary(sum.is_compressed, in);
|
||||
|
||||
if (sum.is_compressed)
|
||||
{
|
||||
readVarUInt(sum.uncompressed_size, in);
|
||||
readBinary(sum.uncompressed_hash, in);
|
||||
readPODBinary(sum.uncompressed_hash, in);
|
||||
}
|
||||
|
||||
files.emplace(std::move(name), sum);
|
||||
@ -210,18 +210,18 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const
|
||||
|
||||
writeBinary(name, out);
|
||||
writeVarUInt(sum.file_size, out);
|
||||
writeBinary(sum.file_hash, out);
|
||||
writePODBinary(sum.file_hash, out);
|
||||
writeBinary(sum.is_compressed, out);
|
||||
|
||||
if (sum.is_compressed)
|
||||
{
|
||||
writeVarUInt(sum.uncompressed_size, out);
|
||||
writeBinary(sum.uncompressed_hash, out);
|
||||
writePODBinary(sum.uncompressed_hash, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeDataPartChecksums::addFile(const String & file_name, size_t file_size, uint128 file_hash)
|
||||
void MergeTreeDataPartChecksums::addFile(const String & file_name, size_t file_size, MergeTreeDataPartChecksum::uint128 file_hash)
|
||||
{
|
||||
files[file_name] = Checksum(file_size, file_hash);
|
||||
}
|
||||
|
@ -16,6 +16,8 @@ namespace DB
|
||||
/// Checksum of one file.
|
||||
struct MergeTreeDataPartChecksum
|
||||
{
|
||||
using uint128 = CityHash64_v1_0_2::uint128;
|
||||
|
||||
size_t file_size {};
|
||||
uint128 file_hash {};
|
||||
|
||||
@ -44,7 +46,7 @@ struct MergeTreeDataPartChecksums
|
||||
using FileChecksums = std::map<String, Checksum>;
|
||||
FileChecksums files;
|
||||
|
||||
void addFile(const String & file_name, size_t file_size, uint128 file_hash);
|
||||
void addFile(const String & file_name, size_t file_size, Checksum::uint128 file_hash);
|
||||
|
||||
void add(MergeTreeDataPartChecksums && rhs_checksums);
|
||||
|
||||
|
@ -91,8 +91,8 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body
|
||||
throw Exception{"Fetching of part was cancelled", ErrorCodes::ABORTED};
|
||||
}
|
||||
|
||||
uint128 expected_hash;
|
||||
readBinary(expected_hash, body);
|
||||
MergeTreeDataPartChecksum::uint128 expected_hash;
|
||||
readPODBinary(expected_hash, body);
|
||||
|
||||
if (expected_hash != hashing_out.getHash())
|
||||
throw Exception{"Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path};
|
||||
@ -182,7 +182,7 @@ bool Client::send(const std::string & part_name, size_t shard_no,
|
||||
if (hashing_out.count() != size)
|
||||
throw Exception{"Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART};
|
||||
|
||||
writeBinary(hashing_out.getHash(), out);
|
||||
writePODBinary(hashing_out.getHash(), out);
|
||||
|
||||
if (file_name != "checksums.txt" &&
|
||||
file_name != "columns.txt")
|
||||
|
Loading…
Reference in New Issue
Block a user