mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Support getHexUIntLowercase() with CityHash_v1_0_2::uint128 parameter.
This commit is contained in:
parent
fb6243ec9d
commit
0cccba62cf
254
base/base/hex.h
254
base/base/hex.h
@ -4,22 +4,21 @@
|
||||
#include <cstring>
|
||||
#include "types.h"
|
||||
|
||||
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
|
||||
namespace CityHash_v1_0_2 { struct uint128; }
|
||||
|
||||
namespace wide
|
||||
{
|
||||
template <size_t Bits, typename Signed>
|
||||
class integer;
|
||||
}
|
||||
|
||||
namespace impl
|
||||
{
|
||||
/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly.
|
||||
constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF";
|
||||
constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef";
|
||||
|
||||
constexpr char hexDigitUppercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_uppercase_table[c];
|
||||
}
|
||||
constexpr char hexDigitLowercase(unsigned char c)
|
||||
{
|
||||
return hex_digit_to_char_lowercase_table[c];
|
||||
}
|
||||
|
||||
/// Maps 0..255 to 00..FF or 00..ff correspondingly
|
||||
|
||||
/// Maps 0..255 to 00..FF or 00..ff correspondingly.
|
||||
constexpr inline std::string_view hex_byte_to_char_uppercase_table = //
|
||||
"000102030405060708090A0B0C0D0E0F"
|
||||
"101112131415161718191A1B1C1D1E1F"
|
||||
@ -56,16 +55,7 @@ constexpr inline std::string_view hex_byte_to_char_lowercase_table = //
|
||||
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
|
||||
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
||||
|
||||
inline void writeHexByteUppercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
inline void writeHexByteLowercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
/// Maps 0..255 to 00000000..11111111 correspondingly.
|
||||
constexpr inline std::string_view bin_byte_to_char_table = //
|
||||
"0000000000000001000000100000001100000100000001010000011000000111"
|
||||
"0000100000001001000010100000101100001100000011010000111000001111"
|
||||
@ -100,62 +90,7 @@ constexpr inline std::string_view bin_byte_to_char_table = //
|
||||
"1111000011110001111100101111001111110100111101011111011011110111"
|
||||
"1111100011111001111110101111101111111100111111011111111011111111";
|
||||
|
||||
inline void writeBinByte(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
|
||||
}
|
||||
|
||||
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table)
|
||||
{
|
||||
union
|
||||
{
|
||||
TUInt value;
|
||||
UInt8 uint8[sizeof(TUInt)];
|
||||
};
|
||||
|
||||
value = uint_;
|
||||
|
||||
for (size_t i = 0; i < sizeof(TUInt); ++i)
|
||||
{
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
|
||||
else
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntUppercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
inline void writeHexUIntLowercase(TUInt uint_, char * out)
|
||||
{
|
||||
writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table);
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntUppercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntUppercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
std::string getHexUIntLowercase(TUInt uint_)
|
||||
{
|
||||
std::string res(sizeof(TUInt) * 2, '\0');
|
||||
writeHexUIntLowercase(uint_, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value.
|
||||
|
||||
constexpr inline std::string_view hex_char_to_digit_table
|
||||
= {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
@ -175,41 +110,180 @@ constexpr inline std::string_view hex_char_to_digit_table
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
|
||||
256};
|
||||
|
||||
constexpr UInt8 unhex(char c)
|
||||
/// Converts a hex digit '0'..'f' or '0'..'F' to its value 0..15.
|
||||
constexpr UInt8 unhexDigit(char c)
|
||||
{
|
||||
return hex_char_to_digit_table[static_cast<UInt8>(c)];
|
||||
}
|
||||
|
||||
constexpr UInt8 unhex2(const char * data)
|
||||
/// Converts an unsigned integer in the native endian to hexadecimal representation and back. Used as a base class for HexConversion<T>.
|
||||
template <typename TUInt, typename = void>
|
||||
struct HexConversionUInt
|
||||
{
|
||||
return static_cast<UInt8>(unhex(data[0])) * 0x10 + static_cast<UInt8>(unhex(data[1]));
|
||||
static const constexpr size_t num_hex_digits = sizeof(TUInt) * 2;
|
||||
|
||||
static void hex(TUInt uint_, char * out, std::string_view table)
|
||||
{
|
||||
union
|
||||
{
|
||||
TUInt value;
|
||||
UInt8 uint8[sizeof(TUInt)];
|
||||
};
|
||||
|
||||
value = uint_;
|
||||
|
||||
for (size_t i = 0; i < sizeof(TUInt); ++i)
|
||||
{
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
|
||||
else
|
||||
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr UInt16 unhex4(const char * data)
|
||||
static TUInt unhex(const char * data)
|
||||
{
|
||||
return static_cast<UInt16>(unhex(data[0])) * 0x1000 + static_cast<UInt16>(unhex(data[1])) * 0x100
|
||||
+ static_cast<UInt16>(unhex(data[2])) * 0x10 + static_cast<UInt16>(unhex(data[3]));
|
||||
TUInt res;
|
||||
if constexpr (sizeof(TUInt) == 1)
|
||||
{
|
||||
res = static_cast<UInt8>(unhexDigit(data[0])) * 0x10 + static_cast<UInt8>(unhexDigit(data[1]));
|
||||
}
|
||||
|
||||
template <typename TUInt>
|
||||
constexpr TUInt unhexUInt(const char * data)
|
||||
else if constexpr (sizeof(TUInt) == 2)
|
||||
{
|
||||
TUInt res = 0;
|
||||
if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
|
||||
res = static_cast<UInt16>(unhexDigit(data[0])) * 0x1000 + static_cast<UInt16>(unhexDigit(data[1])) * 0x100
|
||||
+ static_cast<UInt16>(unhexDigit(data[2])) * 0x10 + static_cast<UInt16>(unhexDigit(data[3]));
|
||||
}
|
||||
else if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0))
|
||||
{
|
||||
res = 0;
|
||||
for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data)
|
||||
{
|
||||
res <<= 4;
|
||||
res += unhex(*data);
|
||||
res += unhexDigit(*data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res = 0;
|
||||
for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16)
|
||||
{
|
||||
res <<= 64;
|
||||
res += unhexUInt<UInt64>(data);
|
||||
res += HexConversionUInt<UInt64>::unhex(data);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
/// Helper template class to convert a value of any supported type to hexadecimal representation and back.
|
||||
template <typename T, typename = void>
|
||||
struct HexConversion;
|
||||
|
||||
template <typename TUInt>
|
||||
struct HexConversion<TUInt, std::enable_if_t<std::is_integral_v<TUInt>>> : public HexConversionUInt<TUInt> {};
|
||||
|
||||
template <size_t Bits, typename Signed>
|
||||
struct HexConversion<wide::integer<Bits, Signed>> : public HexConversionUInt<wide::integer<Bits, Signed>> {};
|
||||
|
||||
template <typename CityHashUInt128>
|
||||
struct HexConversion<CityHashUInt128, std::enable_if_t<std::is_same_v<CityHashUInt128, typename CityHash_v1_0_2::uint128>>>
|
||||
{
|
||||
static const constexpr size_t num_hex_digits = 32;
|
||||
|
||||
static void hex(const CityHashUInt128 & uint_, char * out, std::string_view table)
|
||||
{
|
||||
HexConversion<UInt64>::hex(uint_.high64, out, table);
|
||||
HexConversion<UInt64>::hex(uint_.low64, out + 16, table);
|
||||
}
|
||||
|
||||
static CityHashUInt128 unhex(const char * data)
|
||||
{
|
||||
CityHashUInt128 res;
|
||||
res.high64 = HexConversion<UInt64>::unhex(data);
|
||||
res.low64 = HexConversion<UInt64>::unhex(data + 16);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Produces a hexadecimal representation of an integer value with leading zeros (for checksums).
|
||||
/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128.
|
||||
template <typename T>
|
||||
void writeHexUIntUppercase(const T & value, char * out)
|
||||
{
|
||||
impl::HexConversion<T>::hex(value, out, impl::hex_byte_to_char_uppercase_table);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void writeHexUIntLowercase(const T & value, char * out)
|
||||
{
|
||||
impl::HexConversion<T>::hex(value, out, impl::hex_byte_to_char_lowercase_table);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string getHexUIntUppercase(const T & value)
|
||||
{
|
||||
std::string res(impl::HexConversion<T>::num_hex_digits, '\0');
|
||||
writeHexUIntUppercase(value, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string getHexUIntLowercase(const T & value)
|
||||
{
|
||||
std::string res(impl::HexConversion<T>::num_hex_digits, '\0');
|
||||
writeHexUIntLowercase(value, res.data());
|
||||
return res;
|
||||
}
|
||||
|
||||
constexpr char hexDigitUppercase(unsigned char c)
|
||||
{
|
||||
return impl::hex_digit_to_char_uppercase_table[c];
|
||||
}
|
||||
|
||||
constexpr char hexDigitLowercase(unsigned char c)
|
||||
{
|
||||
return impl::hex_digit_to_char_lowercase_table[c];
|
||||
}
|
||||
|
||||
inline void writeHexByteUppercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &impl::hex_byte_to_char_uppercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
inline void writeHexByteLowercase(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &impl::hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
/// Converts a hex representation with leading zeros back to an integer value.
|
||||
/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128.
|
||||
template <typename T>
|
||||
constexpr T unhexUInt(const char * data)
|
||||
{
|
||||
return impl::HexConversion<T>::unhex(data);
|
||||
}
|
||||
|
||||
/// Converts a hexadecimal digit '0'..'f' or '0'..'F' to UInt8.
|
||||
constexpr UInt8 unhex(char c)
|
||||
{
|
||||
return impl::unhexDigit(c);
|
||||
}
|
||||
|
||||
/// Converts two hexadecimal digits to UInt8.
|
||||
constexpr UInt8 unhex2(const char * data)
|
||||
{
|
||||
return unhexUInt<UInt8>(data);
|
||||
}
|
||||
|
||||
/// Converts four hexadecimal digits to UInt16.
|
||||
constexpr UInt16 unhex4(const char * data)
|
||||
{
|
||||
return unhexUInt<UInt16>(data);
|
||||
}
|
||||
|
||||
/// Produces a binary representation of a single byte.
|
||||
inline void writeBinByte(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &impl::bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ SipHash getHashOfLoadedBinary()
|
||||
std::string getHashOfLoadedBinaryHex()
|
||||
{
|
||||
SipHash hash = getHashOfLoadedBinary();
|
||||
std::array<UInt64, 2> checksum;
|
||||
UInt128 checksum;
|
||||
hash.get128(checksum);
|
||||
return getHexUIntUppercase(checksum);
|
||||
}
|
||||
|
@ -49,8 +49,8 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c
|
||||
|
||||
/// TODO mess up of endianness in error message.
|
||||
message << "Checksum doesn't match: corrupted data."
|
||||
" Reference: " + getHexUIntLowercase(expected_checksum.high64) + getHexUIntLowercase(expected_checksum.low64)
|
||||
+ ". Actual: " + getHexUIntLowercase(calculated_checksum.high64) + getHexUIntLowercase(calculated_checksum.low64)
|
||||
" Reference: " + getHexUIntLowercase(expected_checksum)
|
||||
+ ". Actual: " + getHexUIntLowercase(calculated_checksum)
|
||||
+ ". Size of compressed block: " + toString(size);
|
||||
|
||||
const char * message_hardware_failure = "This is most likely due to hardware failure. "
|
||||
|
@ -39,9 +39,8 @@ DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFi
|
||||
if (expected_checksum != calculated_checksum)
|
||||
{
|
||||
throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH,
|
||||
"Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.",
|
||||
getHexUIntLowercase(expected_checksum.high64), getHexUIntLowercase(expected_checksum.low64),
|
||||
getHexUIntLowercase(calculated_checksum.high64), getHexUIntLowercase(calculated_checksum.low64));
|
||||
"Checksum of extra info doesn't match: corrupted data. Reference: {}. Actual: {}.",
|
||||
getHexUIntLowercase(expected_checksum), getHexUIntLowercase(calculated_checksum));
|
||||
}
|
||||
|
||||
/// Read the parts of the header.
|
||||
|
@ -307,19 +307,7 @@ static void updateHash(SipHash & hash, const std::string & data)
|
||||
/// Hash is the same as MinimalisticDataPartChecksums::hash_of_all_files
|
||||
String MergeTreeDataPartChecksums::getTotalChecksumHex() const
|
||||
{
|
||||
SipHash hash_of_all_files;
|
||||
|
||||
for (const auto & [name, checksum] : files)
|
||||
{
|
||||
updateHash(hash_of_all_files, name);
|
||||
hash_of_all_files.update(checksum.file_hash);
|
||||
}
|
||||
|
||||
UInt64 lo;
|
||||
UInt64 hi;
|
||||
hash_of_all_files.get128(lo, hi);
|
||||
|
||||
return getHexUIntUppercase(hi) + getHexUIntUppercase(lo);
|
||||
return getHexUIntUppercase(getTotalChecksumUInt128());
|
||||
}
|
||||
|
||||
MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTotalChecksumUInt128() const
|
||||
|
@ -250,8 +250,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
|
||||
ErrorCodes::CORRUPTED_DATA,
|
||||
"Checksums doesn't match in part {} for {}. Expected: {}. Found {}.",
|
||||
part->name, file_path,
|
||||
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
|
||||
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
|
||||
getHexUIntUppercase(disk_checksum),
|
||||
getHexUIntUppercase(cache_checksums[i]));
|
||||
|
||||
disk_checksums.push_back(disk_checksum);
|
||||
continue;
|
||||
@ -287,8 +287,8 @@ std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWit
|
||||
ErrorCodes::CORRUPTED_DATA,
|
||||
"Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.",
|
||||
part->name, proj_name,
|
||||
getHexUIntUppercase(disk_checksum.high64) + getHexUIntUppercase(disk_checksum.low64),
|
||||
getHexUIntUppercase(cache_checksums[i].high64) + getHexUIntUppercase(cache_checksums[i].low64));
|
||||
getHexUIntUppercase(disk_checksum),
|
||||
getHexUIntUppercase(cache_checksums[i]));
|
||||
disk_checksums.push_back(disk_checksum);
|
||||
}
|
||||
return results;
|
||||
|
@ -2891,8 +2891,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
|
||||
desired_checksums = MinimalisticDataPartChecksums::deserializeFrom(desired_checksums_str);
|
||||
}
|
||||
|
||||
const auto [lo, hi] = desired_checksums.hash_of_all_files;
|
||||
log_entry.part_checksum = getHexUIntUppercase(hi) + getHexUIntUppercase(lo);
|
||||
log_entry.part_checksum = getHexUIntUppercase(desired_checksums.hash_of_all_files);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -252,17 +252,17 @@ void StorageSystemParts::processNextStorage(
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
auto checksum = helper.hash_of_all_files;
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum));
|
||||
}
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
auto checksum = helper.hash_of_uncompressed_files;
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum));
|
||||
}
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
auto checksum = helper.uncompressed_hash_of_compressed_files;
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,17 +221,17 @@ void StorageSystemProjectionParts::processNextStorage(
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
auto checksum = helper.hash_of_all_files;
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum));
|
||||
}
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
auto checksum = helper.hash_of_uncompressed_files;
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum));
|
||||
}
|
||||
if (columns_mask[src_index++])
|
||||
{
|
||||
auto checksum = helper.uncompressed_hash_of_compressed_files;
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum.high64) + getHexUIntLowercase(checksum.low64));
|
||||
columns[res_index++]->insert(getHexUIntLowercase(checksum));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ int main(int, char **)
|
||||
{
|
||||
auto flipped = flipBit(str, pos);
|
||||
auto checksum = CityHash_v1_0_2::CityHash128(flipped.data(), flipped.size());
|
||||
std::cout << getHexUIntLowercase(checksum.high64) << getHexUIntLowercase(checksum.low64) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
|
||||
std::cout << getHexUIntLowercase(checksum) << "\t" << pos / 8 << ", " << pos % 8 << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user