From 604de2bfeb02c457f491de9fab8cb5f2cc564b13 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Jun 2017 04:24:05 +0300 Subject: [PATCH] Better [#DEVTOOLS-3381]. --- contrib/libcityhash/include/city.h | 14 ++++++-- contrib/libcityhash/include/citycrc.h | 2 +- contrib/libcityhash/src/city.cc | 27 +++++++-------- .../AggregateFunctionUniq.h | 4 +-- .../AggregateFunctionUniqUpTo.h | 2 +- .../src/AggregateFunctions/UniqVariadicHash.h | 8 ++--- dbms/src/Common/UInt128.h | 4 +-- dbms/src/Common/tests/hashes_test.cpp | 10 +++--- dbms/src/Core/StringRef.h | 4 +-- dbms/src/Functions/FunctionsHashing.h | 14 ++++---- dbms/src/IO/CompressedReadBufferBase.cpp | 4 +-- dbms/src/IO/CompressedWriteBuffer.cpp | 2 +- dbms/src/IO/HashingWriteBuffer.cpp | 33 ------------------- dbms/src/IO/HashingWriteBuffer.h | 12 +++---- dbms/src/IO/ReadHelpers.h | 17 +++++----- dbms/src/IO/WriteHelpers.h | 9 +++-- dbms/src/IO/tests/hashing_buffer.h | 8 ++--- dbms/src/IO/tests/hashing_read_buffer.cpp | 3 +- dbms/src/IO/tests/hashing_write_buffer.cpp | 2 +- .../Interpreters/tests/hash_map_string.cpp | 2 +- .../Interpreters/tests/hash_map_string_2.cpp | 2 +- .../Interpreters/tests/hash_map_string_3.cpp | 2 +- .../Storages/MergeTree/DataPartsExchange.cpp | 6 ++-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../Storages/MergeTree/MergeTreeDataPart.cpp | 10 +++--- .../Storages/MergeTree/MergeTreeDataPart.h | 4 ++- .../MergeTree/ShardedPartitionUploader.cpp | 6 ++-- 27 files changed, 92 insertions(+), 121 deletions(-) diff --git a/contrib/libcityhash/include/city.h b/contrib/libcityhash/include/city.h index 92e03cf0c22..3e08a91f2a6 100644 --- a/contrib/libcityhash/include/city.h +++ b/contrib/libcityhash/include/city.h @@ -47,14 +47,22 @@ #include #include +/** This is a version of CityHash that predates v1.0.3 algorithm change. + * Why we need exactly this version? + * Although hash values of CityHash are not recommended for storing persistently anywhere, + * it has already been used this way in ClickHouse: + * - for calculation of checksums of compressed chunks and for data parts; + * - this version of CityHash is exposed in cityHash64 function in ClickHouse SQL language; + * - and already used by many users for data ordering, sampling and sharding. + */ +namespace CityHash64_v1_0_2 +{ + typedef uint8_t uint8; typedef uint32_t uint32; typedef uint64_t uint64; typedef std::pair uint128; -/// This is a version of CityHash that predates v1.0.3 algorithm change. -namespace DB -{ inline uint64 Uint128Low64(const uint128& x) { return x.first; } inline uint64 Uint128High64(const uint128& x) { return x.second; } diff --git a/contrib/libcityhash/include/citycrc.h b/contrib/libcityhash/include/citycrc.h index 0d462015a63..d4155a0410d 100644 --- a/contrib/libcityhash/include/citycrc.h +++ b/contrib/libcityhash/include/citycrc.h @@ -30,7 +30,7 @@ #include -namespace DB +namespace CityHash64_v1_0_2 { // Hash function for a byte array. diff --git a/contrib/libcityhash/src/city.cc b/contrib/libcityhash/src/city.cc index 9ab8fe04592..bc7025dbf40 100644 --- a/contrib/libcityhash/src/city.cc +++ b/contrib/libcityhash/src/city.cc @@ -35,17 +35,6 @@ using namespace std; -static uint64 UNALIGNED_LOAD64(const char *p) { - uint64 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -static uint32 UNALIGNED_LOAD32(const char *p) { - uint32 result; - memcpy(&result, p, sizeof(result)); - return result; -} #if !defined(WORDS_BIGENDIAN) @@ -82,9 +71,21 @@ static uint32 UNALIGNED_LOAD32(const char *p) { #endif #endif -namespace DB +namespace CityHash64_v1_0_2 { +static uint64 UNALIGNED_LOAD64(const char *p) { + uint64 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +static uint32 UNALIGNED_LOAD32(const char *p) { + uint32 result; + memcpy(&result, p, sizeof(result)); + return result; +} + static uint64 Fetch64(const char *p) { return uint64_in_expected_order(UNALIGNED_LOAD64(p)); } @@ -362,7 +363,7 @@ uint128 CityHash128(const char *s, size_t len) { #include #include -namespace DB +namespace CityHash64_v1_0_2 { // Requires len >= 240. diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h index 51ee93024d1..c52981b554b 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniq.h @@ -266,7 +266,7 @@ struct OneAdder::value>::type * = nullptr) { StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); + data.set.insert(CityHash64_v1_0_2::CityHash64(value.data, value.size)); } }; @@ -290,7 +290,7 @@ struct OneAdder::value>::type * = nullptr) { StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); + data.set.insert(CityHash64_v1_0_2::CityHash64(value.data, value.size)); } }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index b7946f5e0c7..83c58a111f7 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -107,7 +107,7 @@ struct AggregateFunctionUniqUpToData : AggregateFunctionUniqUpToData { StringRef value = (*column)->getDataAt(row_num); - hash = CityHash64(value.data, value.size); + hash = CityHash64_v1_0_2::CityHash64(value.data, value.size); ++column; } while (column < columns_end) { StringRef value = (*column)->getDataAt(row_num); - hash = Hash128to64(uint128(CityHash64(value.data, value.size), hash)); + hash = CityHash64_v1_0_2::Hash128to64(CityHash64_v1_0_2::uint128(CityHash64_v1_0_2::CityHash64(value.data, value.size), hash)); ++column; } @@ -68,14 +68,14 @@ struct UniqVariadicHash { StringRef value = column->get()->getDataAt(row_num); - hash = CityHash64(value.data, value.size); + hash = CityHash64_v1_0_2::CityHash64(value.data, value.size); ++column; } while (column < columns_end) { StringRef value = column->get()->getDataAt(row_num); - hash = Hash128to64(uint128(CityHash64(value.data, value.size), hash)); + hash = CityHash64_v1_0_2::Hash128to64(CityHash64_v1_0_2::uint128(CityHash64_v1_0_2::CityHash64(value.data, value.size), hash)); ++column; } diff --git a/dbms/src/Common/UInt128.h b/dbms/src/Common/UInt128.h index 2b46af6f4a3..9ea0f6f45ff 100644 --- a/dbms/src/Common/UInt128.h +++ b/dbms/src/Common/UInt128.h @@ -42,7 +42,7 @@ struct UInt128Hash { size_t operator()(UInt128 x) const { - return Hash128to64({x.first, x.second}); + return CityHash64_v1_0_2::Hash128to64({x.first, x.second}); } }; @@ -122,7 +122,7 @@ struct UInt256Hash size_t operator()(UInt256 x) const { /// NOTE suboptimal - return Hash128to64({Hash128to64({x.a, x.b}), Hash128to64({x.c, x.d})}); + return CityHash64_v1_0_2::Hash128to64({CityHash64_v1_0_2::Hash128to64({x.a, x.b}), CityHash64_v1_0_2::Hash128to64({x.c, x.d})}); } }; diff --git a/dbms/src/Common/tests/hashes_test.cpp b/dbms/src/Common/tests/hashes_test.cpp index c7ce284b253..0511d01cc35 100644 --- a/dbms/src/Common/tests/hashes_test.cpp +++ b/dbms/src/Common/tests/hashes_test.cpp @@ -48,12 +48,12 @@ int main(int argc, char ** argv) for (size_t i = 0; i < rows; ++i) { - *reinterpret_cast(&hashes[i * 16]) = DB::CityHash64(strings[i].data(), strings[i].size()); + *reinterpret_cast(&hashes[i * 16]) = CityHash64_v1_0_2::CityHash64(strings[i].data(), strings[i].size()); } watch.stop(); - UInt64 check = DB::CityHash64(&hashes[0], hashes.size()); + UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "CityHash64 (check = " << check << ")" @@ -78,7 +78,7 @@ int main(int argc, char ** argv) watch.stop(); - UInt64 check = DB::CityHash64(&hashes[0], hashes.size()); + UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "SipHash (check = " << check << ")" @@ -99,7 +99,7 @@ int main(int argc, char ** argv) watch.stop(); - UInt64 check = DB::CityHash64(&hashes[0], hashes.size()); + UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "SipHash, stream (check = " << check << ")" @@ -121,7 +121,7 @@ int main(int argc, char ** argv) watch.stop(); - UInt64 check = DB::CityHash64(&hashes[0], hashes.size()); + UInt64 check = CityHash64_v1_0_2::CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "MD5 (check = " << check << ")" diff --git a/dbms/src/Core/StringRef.h b/dbms/src/Core/StringRef.h index 8f975120665..31ea25d8e62 100644 --- a/dbms/src/Core/StringRef.h +++ b/dbms/src/Core/StringRef.h @@ -165,7 +165,7 @@ struct StringRefHash64 { size_t operator() (StringRef x) const { - return DB::CityHash64(x.data, x.size); + return CityHash64_v1_0_2::CityHash64(x.data, x.size); } }; @@ -177,7 +177,7 @@ struct StringRefHash64 inline UInt64 hashLen16(UInt64 u, UInt64 v) { - return DB::Hash128to64(uint128(u, v)); + return CityHash64_v1_0_2::Hash128to64(CityHash64_v1_0_2::uint128(u, v)); } inline UInt64 shiftMix(UInt64 val) diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index bd207ef7a7b..98948764b88 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -638,9 +638,9 @@ struct URLHashImpl { /// do not take last slash, '?' or '#' character into account if (size > 0 && (data[size - 1] == '/' || data[size - 1] == '?' || data[size - 1] == '#')) - return CityHash64(data, size - 1); + return CityHash64_v1_0_2::CityHash64(data, size - 1); - return CityHash64(data, size); + return CityHash64_v1_0_2::CityHash64(data, size); } }; @@ -844,10 +844,10 @@ struct NameIntHash64 { static constexpr auto name = "intHash64"; }; struct ImplCityHash64 { static constexpr auto name = "cityHash64"; - using uint128_t = uint128; + using uint128_t = CityHash64_v1_0_2::uint128; - static auto Hash128to64(const uint128_t & x) { return DB::Hash128to64(x); } - static auto Hash64(const char * const s, const std::size_t len) { return CityHash64(s, len); } + static auto Hash128to64(const uint128_t & x) { return CityHash64_v1_0_2::Hash128to64(x); } + static auto Hash64(const char * const s, const std::size_t len) { return CityHash64_v1_0_2::CityHash64(s, len); } }; struct ImplFarmHash64 @@ -862,9 +862,9 @@ struct ImplFarmHash64 struct ImplMetroHash64 { static constexpr auto name = "metroHash64"; - using uint128_t = uint128; + using uint128_t = CityHash64_v1_0_2::uint128; - static auto Hash128to64(const uint128_t & x) { return DB::Hash128to64(x); } + static auto Hash128to64(const uint128_t & x) { return CityHash64_v1_0_2::Hash128to64(x); } static auto Hash64(const char * const s, const std::size_t len) { union { diff --git a/dbms/src/IO/CompressedReadBufferBase.cpp b/dbms/src/IO/CompressedReadBufferBase.cpp index d00de4f8bc4..338931a7ec9 100644 --- a/dbms/src/IO/CompressedReadBufferBase.cpp +++ b/dbms/src/IO/CompressedReadBufferBase.cpp @@ -42,7 +42,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, if (compressed_in->eof()) return 0; - uint128 checksum; + CityHash64_v1_0_2::uint128 checksum; compressed_in->readStrict(reinterpret_cast(&checksum), sizeof(checksum)); own_compressed_buffer.resize(COMPRESSED_BLOCK_HEADER_SIZE); @@ -80,7 +80,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, compressed_in->readStrict(&compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } - if (!disable_checksum && checksum != CityHash128(&compressed_buffer[0], size_compressed)) + if (!disable_checksum && checksum != CityHash64_v1_0_2::CityHash128(&compressed_buffer[0], size_compressed)) throw Exception("Checksum doesn't match: corrupted data.", ErrorCodes::CHECKSUM_DOESNT_MATCH); return size_compressed + sizeof(checksum); diff --git a/dbms/src/IO/CompressedWriteBuffer.cpp b/dbms/src/IO/CompressedWriteBuffer.cpp index e1f8d426354..c0bdc7faf3e 100644 --- a/dbms/src/IO/CompressedWriteBuffer.cpp +++ b/dbms/src/IO/CompressedWriteBuffer.cpp @@ -102,7 +102,7 @@ void CompressedWriteBuffer::nextImpl() throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD); } - uint128 checksum = CityHash128(compressed_buffer_ptr, compressed_size); + CityHash64_v1_0_2::uint128 checksum = CityHash64_v1_0_2::CityHash128(compressed_buffer_ptr, compressed_size); out.write(reinterpret_cast(&checksum), sizeof(checksum)); out.write(compressed_buffer_ptr, compressed_size); diff --git a/dbms/src/IO/HashingWriteBuffer.cpp b/dbms/src/IO/HashingWriteBuffer.cpp index c317ef3787c..be5461578ba 100644 --- a/dbms/src/IO/HashingWriteBuffer.cpp +++ b/dbms/src/IO/HashingWriteBuffer.cpp @@ -52,36 +52,3 @@ template class IHashingBuffer; template class IHashingBuffer; } - -/// UInt64 is 39 characters in 10th number system -static const size_t UINT64_DECIMAL_SIZE = 39; -std::string uint128ToString(uint128 data) -{ - std::stringstream ss; - ss << std::setw(UINT64_DECIMAL_SIZE) << std::setfill('0') << data.first << std::setw(UINT64_DECIMAL_SIZE) << std::setfill('0') << data.second; - return ss.str(); -} - -std::ostream & operator<<(std::ostream & os, const uint128 & data) -{ - os << uint128ToString(data); - return os; -} - -std::istream & operator>>(std::istream & is, uint128 & data) -{ - std::vector buffer(UINT64_DECIMAL_SIZE); - is.read(buffer.data(), UINT64_DECIMAL_SIZE); - data.first = DB::parse(buffer.data(), UINT64_DECIMAL_SIZE); - - if (!is) - throw DB::Exception(std::string("Fail to parse uint128 from ") + buffer.data()); - - is.read(buffer.data(), UINT64_DECIMAL_SIZE); - data.first = DB::parse(buffer.data(), UINT64_DECIMAL_SIZE); - - if (!is) - throw DB::Exception(std::string("Fail to parse uint128 from ") + buffer.data()); - - return is; -} diff --git a/dbms/src/IO/HashingWriteBuffer.h b/dbms/src/IO/HashingWriteBuffer.h index d35efe64bf4..2dd717c2d1e 100644 --- a/dbms/src/IO/HashingWriteBuffer.h +++ b/dbms/src/IO/HashingWriteBuffer.h @@ -15,6 +15,8 @@ template class IHashingBuffer : public BufferWithOwnMemory { public: + using uint128 = CityHash64_v1_0_2::uint128; + IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) : BufferWithOwnMemory(block_size_), block_pos(0), block_size(block_size_), state(0, 0) { @@ -23,14 +25,14 @@ public: uint128 getHash() { if (block_pos) - return CityHash128WithSeed(&BufferWithOwnMemory::memory[0], block_pos, state); + return CityHash64_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory::memory[0], block_pos, state); else return state; } void append(DB::BufferBase::Position data) { - state = CityHash128WithSeed(data, block_size, state); + state = CityHash64_v1_0_2::CityHash128WithSeed(data, block_size, state); } /// computation of the hash depends on the partitioning of blocks @@ -82,9 +84,3 @@ public: } }; } - - -std::string uint128ToString(uint128 data); - -std::ostream & operator<<(std::ostream & os, const uint128 & data); -std::istream & operator>>(std::istream & is, uint128 & data); diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index 61e60079f8a..f0ffc159150 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -657,9 +657,8 @@ template inline typename std::enable_if::value, void>::type readBinary(T & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); } -inline void readBinary(uint128 & x, ReadBuffer & buf) { readPODBinary(x, buf); } -inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); } +inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); } +inline void readBinary(LocalDate & x, ReadBuffer & buf) { readPODBinary(x, buf); } inline void readBinary(LocalDateTime & x, ReadBuffer & buf) { readPODBinary(x, buf); } @@ -672,9 +671,9 @@ template inline typename std::enable_if::value, void>::type readText(T & x, ReadBuffer & buf) { readFloatText(x, buf); } -inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); } -inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } -inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } +inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); } +inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } +inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } @@ -684,7 +683,7 @@ template inline typename std::enable_if::value, void>::type readQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } -inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); } +inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); } inline void readQuoted(LocalDate & x, ReadBuffer & buf) { @@ -706,7 +705,7 @@ template inline typename std::enable_if::value, void>::type readDoubleQuoted(T & x, ReadBuffer & buf) { readText(x, buf); } -inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); } +inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); } inline void readDoubleQuoted(LocalDate & x, ReadBuffer & buf) { @@ -746,7 +745,7 @@ inline typename std::enable_if::value, void>::type readCSV(T & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(String & x, ReadBuffer & buf, const char delimiter = ',') { readCSVString(x, buf, delimiter); } -inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index 73e9ad1da5c..705817e1329 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -581,11 +581,10 @@ template inline typename std::enable_if::value, void>::type writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); } -inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); } -inline void writeBinary(const uint128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); } +inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); } +inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); } /// Methods for outputting the value in text form for a tab-separated format. diff --git a/dbms/src/IO/tests/hashing_buffer.h b/dbms/src/IO/tests/hashing_buffer.h index 1bc8930d7ab..34986a6e449 100644 --- a/dbms/src/IO/tests/hashing_buffer.h +++ b/dbms/src/IO/tests/hashing_buffer.h @@ -4,19 +4,19 @@ #define FAIL(msg) { std::cout << msg; exit(1); } -uint128 referenceHash(const char * data, size_t len) +CityHash64_v1_0_2::uint128 referenceHash(const char * data, size_t len) { const size_t block_size = DBMS_DEFAULT_HASHING_BLOCK_SIZE; - uint128 state(0, 0); + CityHash64_v1_0_2::uint128 state(0, 0); size_t pos; for (pos = 0; pos + block_size <= len; pos += block_size) { - state = DB::CityHash128WithSeed(data + pos, block_size, state); + state = CityHash64_v1_0_2::CityHash128WithSeed(data + pos, block_size, state); } if (pos < len) - state = DB::CityHash128WithSeed(data + pos, len - pos, state); + state = CityHash64_v1_0_2::CityHash128WithSeed(data + pos, len - pos, state); return state; } diff --git a/dbms/src/IO/tests/hashing_read_buffer.cpp b/dbms/src/IO/tests/hashing_read_buffer.cpp index 5d6dbd0fa39..627a96041f0 100644 --- a/dbms/src/IO/tests/hashing_read_buffer.cpp +++ b/dbms/src/IO/tests/hashing_read_buffer.cpp @@ -12,7 +12,7 @@ void test(size_t data_size) for (size_t i = 0; i < data_size; ++i) data[i] = rand() & 255; - uint128 reference = referenceHash(data, data_size); + CityHash64_v1_0_2::uint128 reference = referenceHash(data, data_size); std::vector block_sizes = {56, 128, 513, 2048, 3055, 4097, 4096}; for (size_t read_buffer_block_size : block_sizes) @@ -52,7 +52,6 @@ void test(size_t data_size) if (buf.getHash() != reference) { - //std::cout << uint128ToString(buf.getHash()) << " " << uint128ToString(reference) << std::endl; FAIL("failed on data size " << data_size << " reading by blocks of size " << read_buffer_block_size); } if (buf.getHash() != out.getHash()) diff --git a/dbms/src/IO/tests/hashing_write_buffer.cpp b/dbms/src/IO/tests/hashing_write_buffer.cpp index 92c6ec70934..887975fef54 100644 --- a/dbms/src/IO/tests/hashing_write_buffer.cpp +++ b/dbms/src/IO/tests/hashing_write_buffer.cpp @@ -11,7 +11,7 @@ void test(size_t data_size) for (size_t i = 0; i < data_size; ++i) data[i] = rand() & 255; - uint128 reference = referenceHash(data, data_size); + CityHash64_v1_0_2::uint128 reference = referenceHash(data, data_size); DB::WriteBufferFromFile sink("/dev/null", 1 << 16); diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp index f2a5325de57..acbcd69566f 100644 --- a/dbms/src/Interpreters/tests/hash_map_string.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string.cpp @@ -76,7 +76,7 @@ struct DefaultHash { size_t operator() (CompactStringRef x) const { - return DB::CityHash64(x.data(), x.size); + return CityHash64_v1_0_2::CityHash64(x.data(), x.size); } }; diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp index 5db0b04d588..b3b7eecf8b3 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp @@ -65,7 +65,7 @@ struct DefaultHash \ { \ size_t operator() (STRUCT x) const \ { \ - return DB::CityHash64(x.data, x.size); \ + return CityHash64_v1_0_2::CityHash64(x.data, x.size); \ } \ }; diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index 3f2d3d13534..0580775c8a4 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -68,7 +68,7 @@ struct DefaultHash \ { \ size_t operator() (STRUCT x) const \ { \ - return DB::CityHash64(x.data, x.size); \ + return CityHash64_v1_0_2::CityHash64(x.data, x.size); \ } \ }; diff --git a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp index 21ebfdb36d1..148873e8e86 100644 --- a/dbms/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/dbms/src/Storages/MergeTree/DataPartsExchange.cpp @@ -126,7 +126,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body if (hashing_out.count() != size) throw Exception("Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); - writeBinary(hashing_out.getHash(), out); + writePODBinary(hashing_out.getHash(), out); if (file_name != "checksums.txt" && file_name != "columns.txt") @@ -250,8 +250,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPartImpl( throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } - uint128 expected_hash; - readBinary(expected_hash, in); + MergeTreeDataPartChecksum::uint128 expected_hash; + readPODBinary(expected_hash, in); if (expected_hash != hashing_out.getHash()) throw Exception("Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index af1b8256e7f..2a0f9ea9482 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -959,7 +959,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( /// Update primary key if needed. size_t new_primary_key_file_size{}; - uint128 new_primary_key_hash{}; + MergeTreeDataPartChecksum::uint128 new_primary_key_hash{}; if (new_primary_key.get() != primary_expr_ast.get()) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index d4369a5c74d..9da738f2389 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -175,13 +175,13 @@ bool MergeTreeDataPartChecksums::read_v3(ReadBuffer & in) readBinary(name, in); readVarUInt(sum.file_size, in); - readBinary(sum.file_hash, in); + readPODBinary(sum.file_hash, in); readBinary(sum.is_compressed, in); if (sum.is_compressed) { readVarUInt(sum.uncompressed_size, in); - readBinary(sum.uncompressed_hash, in); + readPODBinary(sum.uncompressed_hash, in); } files.emplace(std::move(name), sum); @@ -210,18 +210,18 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const writeBinary(name, out); writeVarUInt(sum.file_size, out); - writeBinary(sum.file_hash, out); + writePODBinary(sum.file_hash, out); writeBinary(sum.is_compressed, out); if (sum.is_compressed) { writeVarUInt(sum.uncompressed_size, out); - writeBinary(sum.uncompressed_hash, out); + writePODBinary(sum.uncompressed_hash, out); } } } -void MergeTreeDataPartChecksums::addFile(const String & file_name, size_t file_size, uint128 file_hash) +void MergeTreeDataPartChecksums::addFile(const String & file_name, size_t file_size, MergeTreeDataPartChecksum::uint128 file_hash) { files[file_name] = Checksum(file_size, file_hash); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index efe9c1de3c8..5a865f7878f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -16,6 +16,8 @@ namespace DB /// Checksum of one file. struct MergeTreeDataPartChecksum { + using uint128 = CityHash64_v1_0_2::uint128; + size_t file_size {}; uint128 file_hash {}; @@ -44,7 +46,7 @@ struct MergeTreeDataPartChecksums using FileChecksums = std::map; FileChecksums files; - void addFile(const String & file_name, size_t file_size, uint128 file_hash); + void addFile(const String & file_name, size_t file_size, Checksum::uint128 file_hash); void add(MergeTreeDataPartChecksums && rhs_checksums); diff --git a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp index a2b9061d1a1..1be8dcb3faa 100644 --- a/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp +++ b/dbms/src/Storages/MergeTree/ShardedPartitionUploader.cpp @@ -91,8 +91,8 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & body throw Exception{"Fetching of part was cancelled", ErrorCodes::ABORTED}; } - uint128 expected_hash; - readBinary(expected_hash, body); + MergeTreeDataPartChecksum::uint128 expected_hash; + readPODBinary(expected_hash, body); if (expected_hash != hashing_out.getHash()) throw Exception{"Checksum mismatch for file " + absolute_part_path + file_name + " transferred from " + replica_path}; @@ -182,7 +182,7 @@ bool Client::send(const std::string & part_name, size_t shard_no, if (hashing_out.count() != size) throw Exception{"Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART}; - writeBinary(hashing_out.getHash(), out); + writePODBinary(hashing_out.getHash(), out); if (file_name != "checksums.txt" && file_name != "columns.txt")