Merge pull request #51040 from vitlibar/reorder-part-checksum-halves

Show halves of checksums in correct order
This commit is contained in:
Vitaly Baranov 2023-06-29 12:04:51 +02:00 committed by GitHub
commit fb6243ec9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 66 additions and 34 deletions

View File

@ -27,6 +27,8 @@ using FromDoubleIntermediateType = long double;
using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended;
#endif
namespace CityHash_v1_0_2 { struct uint128; }
namespace wide
{
@ -281,6 +283,17 @@ struct integer<Bits, Signed>::_impl
}
}
template <typename CityHashUInt128 = CityHash_v1_0_2::uint128>
constexpr static void wide_integer_from_cityhash_uint128(integer<Bits, Signed> & self, const CityHashUInt128 & value) noexcept
{
static_assert(sizeof(item_count) >= 2);
if constexpr (std::endian::native == std::endian::little)
wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64));
else
wide_integer_from_tuple_like(self, std::make_pair(value.high64, value.low64));
}
/**
* N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
* the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
@ -1036,6 +1049,8 @@ constexpr integer<Bits, Signed>::integer(T rhs) noexcept
_impl::wide_integer_from_wide_integer(*this, rhs);
else if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, rhs);
else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>)
_impl::wide_integer_from_cityhash_uint128(*this, rhs);
else
_impl::wide_integer_from_builtin(*this, rhs);
}
@ -1051,6 +1066,8 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
_impl::wide_integer_from_wide_integer(*this, *il.begin());
else if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, *il.begin());
else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>)
_impl::wide_integer_from_cityhash_uint128(*this, *il.begin());
else
_impl::wide_integer_from_builtin(*this, *il.begin());
}
@ -1088,6 +1105,8 @@ constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator=(T rhs) noexce
{
if constexpr (IsTupleLike<T>::value)
_impl::wide_integer_from_tuple_like(*this, rhs);
else if constexpr (std::is_same_v<std::remove_cvref_t<T>, CityHash_v1_0_2::uint128>)
_impl::wide_integer_from_cityhash_uint128(*this, rhs);
else
_impl::wide_integer_from_builtin(*this, rhs);
return *this;

View File

@ -61,11 +61,24 @@ namespace CityHash_v1_0_2
typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;
/// Represent an unsigned integer of 128 bits as it's used in CityHash.
/// Originally CityHash used `std::pair<uint64, uint64>` instead of this struct,
/// however the members `first` and `second` could be easily confused so they were renamed to `low64` and `high64`:
/// `first` -> `low64`, `second` -> `high64`.
struct uint128
{
uint64 low64 = 0;
uint64 high64 = 0;
inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }
uint128() = default;
uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {}
friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); }
friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); }
};
inline uint64 Uint128Low64(const uint128 & x) { return x.low64; }
inline uint64 Uint128High64(const uint128 & x) { return x.high64; }
// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);

View File

@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
/// TODO mess up of endianness in error message.
message << "Checksum doesn't match: corrupted data."
" Reference: " + getHexUIntLowercase(expected_checksum.first) + getHexUIntLowercase(expected_checksum.second)
+ ". Actual: " + getHexUIntLowercase(calculated_checksum.first) + getHexUIntLowercase(calculated_checksum.second)
" Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64)
+ ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64)
+ ". Size of compressed block: " + toString(size);
const char * message_hardware_failure = "This is most likely due to hardware failure. "
@ -95,8 +95,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
}
/// Check if the difference caused by single bit flip in stored checksum.
size_t difference = std::popcount(expected_checksum.first ^ calculated_checksum.first)
+ std::popcount(expected_checksum.second ^ calculated_checksum.second);
size_t difference = std::popcount(expected_checksum.low64 ^ calculated_checksum.low64)
+ std::popcount(expected_checksum.high64 ^ calculated_checksum.high64);
if (difference == 1)
{
@ -194,8 +194,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
{
Checksum checksum;
ReadBufferFromMemory checksum_in(own_compressed_buffer.data(), sizeof(checksum));
readBinaryLittleEndian(checksum.first, checksum_in);
readBinaryLittleEndian(checksum.second, checksum_in);
readBinaryLittleEndian(checksum.low64, checksum_in);
readBinaryLittleEndian(checksum.high64, checksum_in);
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
}
@ -238,8 +238,8 @@ size_t CompressedReadBufferBase::readCompressedDataBlockForAsynchronous(size_t &
{
Checksum checksum;
ReadBufferFromMemory checksum_in(own_compressed_buffer.data(), sizeof(checksum));
readBinaryLittleEndian(checksum.first, checksum_in);
readBinaryLittleEndian(checksum.second, checksum_in);
readBinaryLittleEndian(checksum.low64, checksum_in);
readBinaryLittleEndian(checksum.high64, checksum_in);
validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
}

View File

@ -38,8 +38,8 @@ void CompressedWriteBuffer::nextImpl()
CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(out_compressed_ptr, compressed_size);
writeBinaryLittleEndian(checksum.first, out);
writeBinaryLittleEndian(checksum.second, out);
writeBinaryLittleEndian(checksum.low64, out);
writeBinaryLittleEndian(checksum.high64, out);
out.position() += compressed_size;
}
@ -50,8 +50,8 @@ void CompressedWriteBuffer::nextImpl()
CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size);
writeBinaryLittleEndian(checksum.first, out);
writeBinaryLittleEndian(checksum.second, out);
writeBinaryLittleEndian(checksum.low64, out);
writeBinaryLittleEndian(checksum.high64, out);
out.write(compressed_buffer.data(), compressed_size);
}

View File

@ -40,8 +40,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi
{
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
"Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.",
getHexUIntLowercase(expected_checksum.first), getHexUIntLowercase(expected_checksum.second),
getHexUIntLowercase(calculated_checksum.first), getHexUIntLowercase(calculated_checksum.second));
getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64),
getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64));
}
/// Read the parts of the header.

View File

@ -400,7 +400,7 @@ void DataPartStorageOnDiskBase::backup(
if (it != checksums.files.end())
{
file_size = it->second.file_size;
file_hash = {it->second.file_hash.first, it->second.file_hash.second};
file_hash = it->second.file_hash;
}
BackupEntryPtr backup_entry = std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, copy_encrypted, file_size, file_hash);

View File

@ -154,9 +154,9 @@ bool MergeTreeDataPartChecksums::readV2(ReadBuffer & in)
assertString("\n\tsize: ", in);
readText(sum.file_size, in);
assertString("\n\thash: ", in);
readText(sum.file_hash.first, in);
readText(sum.file_hash.low64, in);
assertString(" ", in);
readText(sum.file_hash.second, in);
readText(sum.file_hash.high64, in);
assertString("\n\tcompressed: ", in);
readText(sum.is_compressed, in);
if (sum.is_compressed)
@ -164,9 +164,9 @@ bool MergeTreeDataPartChecksums::readV2(ReadBuffer & in)
assertString("\n\tuncompressed size: ", in);
readText(sum.uncompressed_size, in);
assertString("\n\tuncompressed hash: ", in);
readText(sum.uncompressed_hash.first, in);
readText(sum.uncompressed_hash.low64, in);
assertString(" ", in);
readText(sum.uncompressed_hash.second, in);
readText(sum.uncompressed_hash.high64, in);
}
assertChar('\n', in);

View File

@ -250,8 +250,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
ErrorCodes::CORRUPTED_DATA,
"Checksums doesn't match in part {} for {}. Expected: {}. Found {}.",
part->name, file_path,
getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second),
getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second));
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
disk_checksums.push_back(disk_checksum);
continue;
@ -287,8 +287,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
ErrorCodes::CORRUPTED_DATA,
"Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.",
part->name, proj_name,
getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second),
getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second));
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
disk_checksums.push_back(disk_checksum);
}
return results;

View File

@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage(
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_all_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_uncompressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.uncompressed_hash_of_compressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
}

View File

@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage(
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_all_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.hash_of_uncompressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
if (columns_mask[src_index++])
{
auto checksum = helper.uncompressed_hash_of_compressed_files;
columns[res_index++]->insert(getHexUIntLowercase(checksum.first) + getHexUIntLowercase(checksum.second));
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
}
}

View File

@ -1 +1 @@
20000101_1_1_0 test_00961 b5fce9c4ef1ca42ce4ed027389c208d2 fc3b062b646cd23d4c23d7f5920f89ae da96ff1e527a8a1f908ddf2b1d0af239
20000101_1_1_0 test_00961 e4ed027389c208d2b5fce9c4ef1ca42c 4c23d7f5920f89aefc3b062b646cd23d 908ddf2b1d0af239da96ff1e527a8a1f

View File

@ -45,7 +45,7 @@ int main(int, char **)
{
auto flipped = flipBit(str, pos);
auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size());
std::cout << getHexUIntLowercase(checksum.first) << getHexUIntLowercase(checksum.second) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
}
return 0;