mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 18:12:02 +00:00
Merge pull request #52329 from ClibMouse/feature/mergetree-checksum-on-big-endian
Implement endianness-indepedent support for MergeTree checksums
This commit is contained in:
commit
300399d1f5
@ -3,23 +3,25 @@
|
||||
#include <base/Decimal_fwd.h>
|
||||
#include <base/extended_types.h>
|
||||
|
||||
#include <city.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <std::endian endian, typename T>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename T>
|
||||
requires std::is_integral_v<T>
|
||||
inline void transformEndianness(T & value)
|
||||
{
|
||||
if constexpr (endian != std::endian::native)
|
||||
if constexpr (ToEndian != FromEndian)
|
||||
value = std::byteswap(value);
|
||||
}
|
||||
|
||||
template <std::endian endian, typename T>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename T>
|
||||
requires is_big_int_v<T>
|
||||
inline void transformEndianness(T & x)
|
||||
{
|
||||
if constexpr (std::endian::native != endian)
|
||||
if constexpr (ToEndian != FromEndian)
|
||||
{
|
||||
auto & items = x.items;
|
||||
std::transform(std::begin(items), std::end(items), std::begin(items), [](auto & item) { return std::byteswap(item); });
|
||||
@ -27,42 +29,49 @@ inline void transformEndianness(T & x)
|
||||
}
|
||||
}
|
||||
|
||||
template <std::endian endian, typename T>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename T>
|
||||
requires is_decimal<T>
|
||||
inline void transformEndianness(T & x)
|
||||
{
|
||||
transformEndianness<endian>(x.value);
|
||||
transformEndianness<ToEndian, FromEndian>(x.value);
|
||||
}
|
||||
|
||||
template <std::endian endian, typename T>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename T>
|
||||
requires std::is_floating_point_v<T>
|
||||
inline void transformEndianness(T & value)
|
||||
{
|
||||
if constexpr (std::endian::native != endian)
|
||||
if constexpr (ToEndian != FromEndian)
|
||||
{
|
||||
auto * start = reinterpret_cast<std::byte *>(&value);
|
||||
std::reverse(start, start + sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
template <std::endian endian, typename T>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename T>
|
||||
requires std::is_scoped_enum_v<T>
|
||||
inline void transformEndianness(T & x)
|
||||
{
|
||||
using UnderlyingType = std::underlying_type_t<T>;
|
||||
transformEndianness<endian>(reinterpret_cast<UnderlyingType &>(x));
|
||||
transformEndianness<ToEndian, FromEndian>(reinterpret_cast<UnderlyingType &>(x));
|
||||
}
|
||||
|
||||
template <std::endian endian, typename A, typename B>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename A, typename B>
|
||||
inline void transformEndianness(std::pair<A, B> & pair)
|
||||
{
|
||||
transformEndianness<endian>(pair.first);
|
||||
transformEndianness<endian>(pair.second);
|
||||
transformEndianness<ToEndian, FromEndian>(pair.first);
|
||||
transformEndianness<ToEndian, FromEndian>(pair.second);
|
||||
}
|
||||
|
||||
template <std::endian endian, typename T, typename Tag>
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native, typename T, typename Tag>
|
||||
inline void transformEndianness(StrongTypedef<T, Tag> & x)
|
||||
{
|
||||
transformEndianness<endian>(x.toUnderType());
|
||||
transformEndianness<ToEndian, FromEndian>(x.toUnderType());
|
||||
}
|
||||
|
||||
template <std::endian ToEndian, std::endian FromEndian = std::endian::native>
|
||||
inline void transformEndianness(CityHash_v1_0_2::uint128 & x)
|
||||
{
|
||||
transformEndianness<ToEndian, FromEndian>(x.low64);
|
||||
transformEndianness<ToEndian, FromEndian>(x.high64);
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <Formats/ProtobufReader.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
#include <ranges>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -135,13 +137,25 @@ template <typename T>
|
||||
void SerializationNumber<T>::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
|
||||
{
|
||||
const typename ColumnVector<T>::Container & x = typeid_cast<const ColumnVector<T> &>(column).getData();
|
||||
|
||||
size_t size = x.size();
|
||||
|
||||
if (limit == 0 || offset + limit > size)
|
||||
if (const size_t size = x.size(); limit == 0 || offset + limit > size)
|
||||
limit = size - offset;
|
||||
|
||||
if (limit)
|
||||
if (limit == 0)
|
||||
return;
|
||||
|
||||
if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
|
||||
{
|
||||
static constexpr auto to_little_endian = [](auto i)
|
||||
{
|
||||
transformEndianness<std::endian::little>(i);
|
||||
return i;
|
||||
};
|
||||
|
||||
std::ranges::for_each(
|
||||
x | std::views::drop(offset) | std::views::take(limit) | std::views::transform(to_little_endian),
|
||||
[&ostr](const auto & i) { ostr.write(reinterpret_cast<const char *>(&i), sizeof(typename ColumnVector<T>::ValueType)); });
|
||||
}
|
||||
else
|
||||
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(typename ColumnVector<T>::ValueType) * limit);
|
||||
}
|
||||
|
||||
@ -149,10 +163,13 @@ template <typename T>
|
||||
void SerializationNumber<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
|
||||
{
|
||||
typename ColumnVector<T>::Container & x = typeid_cast<ColumnVector<T> &>(column).getData();
|
||||
size_t initial_size = x.size();
|
||||
const size_t initial_size = x.size();
|
||||
x.resize(initial_size + limit);
|
||||
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(typename ColumnVector<T>::ValueType) * limit);
|
||||
const size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(typename ColumnVector<T>::ValueType) * limit);
|
||||
x.resize(initial_size + size / sizeof(typename ColumnVector<T>::ValueType));
|
||||
|
||||
if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
|
||||
std::ranges::for_each(x | std::views::drop(initial_size), [](auto & i) { transformEndianness<std::endian::big, std::endian::little>(i); });
|
||||
}
|
||||
|
||||
template class SerializationNumber<UInt8>;
|
||||
|
@ -1374,8 +1374,8 @@ public:
|
||||
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
{
|
||||
if (std::endian::native == std::endian::big)
|
||||
std::ranges::for_each(col_to->getData(), transformEndianness<std::endian::little, ToType>);
|
||||
if constexpr (std::endian::native == std::endian::big)
|
||||
std::ranges::for_each(col_to->getData(), transformEndianness<std::endian::little, std::endian::native, ToType>);
|
||||
|
||||
auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128));
|
||||
const auto & data = col_to->getData();
|
||||
|
@ -187,15 +187,15 @@ bool MergeTreeDataPartChecksums::readV3(ReadBuffer & in)
|
||||
String name;
|
||||
Checksum sum;
|
||||
|
||||
readBinary(name, in);
|
||||
readStringBinary(name, in);
|
||||
readVarUInt(sum.file_size, in);
|
||||
readPODBinary(sum.file_hash, in);
|
||||
readBinary(sum.is_compressed, in);
|
||||
readBinaryLittleEndian(sum.file_hash, in);
|
||||
readBinaryLittleEndian(sum.is_compressed, in);
|
||||
|
||||
if (sum.is_compressed)
|
||||
{
|
||||
readVarUInt(sum.uncompressed_size, in);
|
||||
readPODBinary(sum.uncompressed_hash, in);
|
||||
readBinaryLittleEndian(sum.uncompressed_hash, in);
|
||||
}
|
||||
|
||||
files.emplace(std::move(name), sum);
|
||||
@ -223,15 +223,15 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const
|
||||
const String & name = it.first;
|
||||
const Checksum & sum = it.second;
|
||||
|
||||
writeBinary(name, out);
|
||||
writeStringBinary(name, out);
|
||||
writeVarUInt(sum.file_size, out);
|
||||
writePODBinary(sum.file_hash, out);
|
||||
writeBinary(sum.is_compressed, out);
|
||||
writeBinaryLittleEndian(sum.file_hash, out);
|
||||
writeBinaryLittleEndian(sum.is_compressed, out);
|
||||
|
||||
if (sum.is_compressed)
|
||||
{
|
||||
writeVarUInt(sum.uncompressed_size, out);
|
||||
writePODBinary(sum.uncompressed_hash, out);
|
||||
writeBinaryLittleEndian(sum.uncompressed_hash, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -339,9 +339,9 @@ void MinimalisticDataPartChecksums::serializeWithoutHeader(WriteBuffer & to) con
|
||||
writeVarUInt(num_compressed_files, to);
|
||||
writeVarUInt(num_uncompressed_files, to);
|
||||
|
||||
writePODBinary(hash_of_all_files, to);
|
||||
writePODBinary(hash_of_uncompressed_files, to);
|
||||
writePODBinary(uncompressed_hash_of_compressed_files, to);
|
||||
writeBinaryLittleEndian(hash_of_all_files, to);
|
||||
writeBinaryLittleEndian(hash_of_uncompressed_files, to);
|
||||
writeBinaryLittleEndian(uncompressed_hash_of_compressed_files, to);
|
||||
}
|
||||
|
||||
String MinimalisticDataPartChecksums::getSerializedString() const
|
||||
@ -382,9 +382,9 @@ void MinimalisticDataPartChecksums::deserializeWithoutHeader(ReadBuffer & in)
|
||||
readVarUInt(num_compressed_files, in);
|
||||
readVarUInt(num_uncompressed_files, in);
|
||||
|
||||
readPODBinary(hash_of_all_files, in);
|
||||
readPODBinary(hash_of_uncompressed_files, in);
|
||||
readPODBinary(uncompressed_hash_of_compressed_files, in);
|
||||
readBinaryLittleEndian(hash_of_all_files, in);
|
||||
readBinaryLittleEndian(hash_of_uncompressed_files, in);
|
||||
readBinaryLittleEndian(uncompressed_hash_of_compressed_files, in);
|
||||
}
|
||||
|
||||
void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums_)
|
||||
|
@ -365,8 +365,9 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums &
|
||||
{
|
||||
uncompressed_size += stream->hashing_buf.count();
|
||||
auto stream_hash = stream->hashing_buf.getHash();
|
||||
transformEndianness<std::endian::little>(stream_hash);
|
||||
uncompressed_hash = CityHash_v1_0_2::CityHash128WithSeed(
|
||||
reinterpret_cast<char *>(&stream_hash), sizeof(stream_hash), uncompressed_hash);
|
||||
reinterpret_cast<const char *>(&stream_hash), sizeof(stream_hash), uncompressed_hash);
|
||||
}
|
||||
|
||||
checksums.files[data_file_name].is_compressed = true;
|
||||
|
Loading…
Reference in New Issue
Block a user