From 3938309374cef05afb618c36d932bd380abb1651 Mon Sep 17 00:00:00 2001 From: ltrk2 <107155950+ltrk2@users.noreply.github.com> Date: Mon, 5 Jun 2023 08:18:03 -0700 Subject: [PATCH] Implement review comments --- .../Serializations/SerializationUUID.cpp | 2 +- src/IO/ReadHelpers.cpp | 27 ++++++++++--------- src/IO/ReadHelpers.h | 1 - src/IO/WriteHelpers.cpp | 25 ++++++++--------- src/IO/WriteHelpers.h | 7 +++-- .../Formats/Impl/AvroRowInputFormat.cpp | 2 +- .../Formats/Impl/AvroRowOutputFormat.cpp | 4 +-- 7 files changed, 35 insertions(+), 33 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 13313111b2b..76be273d7dc 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -51,7 +51,7 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist { assertChar('\'', istr); char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end()); - const auto len = next_pos - istr.position(); + const size_t len = next_pos - istr.position(); if ((len == 32 || len == 36) && istr.position()[len] == '\'') { uuid = parseUUID(std::span(reinterpret_cast(istr.position()), len)); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index a85a057f2b3..99b3e4b514b 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_QUOTED_STRING; extern const int CANNOT_PARSE_DATETIME; extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_PARSE_UUID; extern const int INCORRECT_DATA; extern const int ATTEMPT_TO_READ_AFTER_EOF; extern const int LOGICAL_ERROR; @@ -51,33 +52,35 @@ UUID parseUUID(std::span src) UUID uuid; const auto * src_ptr = src.data(); auto * dst = reinterpret_cast(&uuid); - if (const auto size = src.size(); size == 36) + const auto size = src.size(); + if (size == 36) { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - parseHex<4>(src_ptr, dst); - parseHex<2>(src_ptr + 9, dst + 4); - parseHex<2>(src_ptr + 14, dst + 6); - parseHex<2>(src_ptr + 19, dst + 8); - parseHex<6>(src_ptr + 24, dst + 10); -#else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ const std::reverse_iterator dst_it(dst + sizeof(UUID)); - /// FIXME This code looks like trash. parseHex<4>(src_ptr, dst + 8); parseHex<2>(src_ptr + 9, dst + 12); parseHex<2>(src_ptr + 14, dst + 14); parseHex<2>(src_ptr + 19, dst); parseHex<6>(src_ptr + 24, dst + 2); +#else + parseHex<4>(src_ptr, dst); + parseHex<2>(src_ptr + 9, dst + 4); + parseHex<2>(src_ptr + 14, dst + 6); + parseHex<2>(src_ptr + 19, dst + 8); + parseHex<6>(src_ptr + 24, dst + 10); #endif } else if (size == 32) { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - parseHex<16>(src_ptr, dst); -#else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ parseHex<8>(src_ptr, dst + 8); parseHex<8>(src_ptr + 16, dst); +#else + parseHex<16>(src_ptr, dst); #endif } + else + throw Exception(ErrorCodes::CANNOT_PARSE_UUID, "Unexpected length when trying to parse UUID ({})", size); return uuid; } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 7e293944d19..804dab16db9 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -765,7 +765,6 @@ inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf) return readDateTextImpl(date, buf); } -/// If string is not like UUID - implementation specific behaviour. UUID parseUUID(std::span src); template diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index 6023d4c9d5b..4f1a95181d4 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -20,25 +20,12 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes) } } -/** Function used when byte ordering is important when parsing uuid - * ex: When we create an UUID type - */ std::array formatUUID(const UUID & uuid) { std::array dst; const auto * src_ptr = reinterpret_cast(&uuid); auto * dst_ptr = dst.data(); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - formatHex(src_ptr, dst_ptr, 4); - dst[8] = '-'; - formatHex(src_ptr + 4, dst_ptr + 9, 2); - dst[13] = '-'; - formatHex(src_ptr + 6, dst_ptr + 14, 2); - dst[18] = '-'; - formatHex(src_ptr + 8, dst_ptr + 19, 2); - dst[23] = '-'; - formatHex(src_ptr + 10, dst_ptr + 24, 6); -#else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ const std::reverse_iterator src_it(src_ptr + 16); formatHex(src_it + 8, dst_ptr, 4); dst[8] = '-'; @@ -49,6 +36,16 @@ std::array formatUUID(const UUID & uuid) formatHex(src_it, dst_ptr + 19, 2); dst[23] = '-'; formatHex(src_it + 2, dst_ptr + 24, 6); +#else + formatHex(src_ptr, dst_ptr, 4); + dst[8] = '-'; + formatHex(src_ptr + 4, dst_ptr + 9, 2); + dst[13] = '-'; + formatHex(src_ptr + 6, dst_ptr + 14, 2); + dst[18] = '-'; + formatHex(src_ptr + 8, dst_ptr + 19, 2); + dst[23] = '-'; + formatHex(src_ptr + 10, dst_ptr + 24, 6); #endif return dst; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 923684c4249..056c2ca1b50 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -625,12 +625,15 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf) writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf); } +/// @brief Serialize `uuid` into an array of characters in big-endian byte order. +/// @param uuid UUID to serialize. +/// @return Array of characters in big-endian byte order. std::array formatUUID(const UUID & uuid); inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf) { - const auto text = formatUUID(uuid); - buf.write(text.data(), text.size()); + const auto serialized_uuid = formatUUID(uuid); + buf.write(serialized_uuid.data(), serialized_uuid.size()); } void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf); diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 974b198a483..a4d4e374f4f 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -256,7 +256,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro if (tmp.length() != 36) throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp); - const auto uuid = parseUUID({reinterpret_cast(tmp.data()), tmp.length()}); + const UUID uuid = parseUUID({reinterpret_cast(tmp.data()), tmp.length()}); assert_cast(column).insertValue(uuid); return true; }; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 2b163164d56..f0985e7cffc 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -329,8 +329,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder) { const auto & uuid = assert_cast(column).getElement(row_num); - const auto text = formatUUID(uuid); - encoder.encodeBytes(reinterpret_cast(text.data()), text.size()); + const auto serialized_uuid = formatUUID(uuid); + encoder.encodeBytes(reinterpret_cast(serialized_uuid.data()), serialized_uuid.size()); }}; } case TypeIndex::Array: