Implement review comments

2024-11-23 16:12:01 +00:00 · 2023-06-05 08:18:03 -07:00 · 2023-06-05 08:18:03 -07:00 · 3938309374
commit 3938309374
parent 50654435dc
7 changed files with 35 additions and 33 deletions
--- a/src/DataTypes/Serializations/SerializationUUID.cpp
+++ b/src/DataTypes/Serializations/SerializationUUID.cpp
@ -51,7 +51,7 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
    {
        assertChar('\'', istr);
        char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end());
-        const auto len = next_pos - istr.position();
+        const size_t len = next_pos - istr.position();
        if ((len == 32 || len == 36) && istr.position()[len] == '\'')
        {
            uuid = parseUUID(std::span(reinterpret_cast<const UInt8 *>(istr.position()), len));
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@ -31,6 +31,7 @@ namespace ErrorCodes
    extern const int CANNOT_PARSE_QUOTED_STRING;
    extern const int CANNOT_PARSE_DATETIME;
    extern const int CANNOT_PARSE_DATE;
+    extern const int CANNOT_PARSE_UUID;
    extern const int INCORRECT_DATA;
    extern const int ATTEMPT_TO_READ_AFTER_EOF;
    extern const int LOGICAL_ERROR;
@ -51,33 +52,35 @@ UUID parseUUID(std::span<const UInt8> src)
    UUID uuid;
    const auto * src_ptr = src.data();
    auto * dst = reinterpret_cast<UInt8 *>(&uuid);
-    if (const auto size = src.size(); size == 36)
+    const auto size = src.size();
+    if (size == 36)
    {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-        parseHex<4>(src_ptr, dst);
-        parseHex<2>(src_ptr + 9, dst + 4);
-        parseHex<2>(src_ptr + 14, dst + 6);
-        parseHex<2>(src_ptr + 19, dst + 8);
-        parseHex<6>(src_ptr + 24, dst + 10);
-#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
        const std::reverse_iterator dst_it(dst + sizeof(UUID));
-        /// FIXME This code looks like trash.
        parseHex<4>(src_ptr, dst + 8);
        parseHex<2>(src_ptr + 9, dst + 12);
        parseHex<2>(src_ptr + 14, dst + 14);
        parseHex<2>(src_ptr + 19, dst);
        parseHex<6>(src_ptr + 24, dst + 2);
+#else
+        parseHex<4>(src_ptr, dst);
+        parseHex<2>(src_ptr + 9, dst + 4);
+        parseHex<2>(src_ptr + 14, dst + 6);
+        parseHex<2>(src_ptr + 19, dst + 8);
+        parseHex<6>(src_ptr + 24, dst + 10);
 #endif
    }
    else if (size == 32)
    {
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-        parseHex<16>(src_ptr, dst);
-#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
        parseHex<8>(src_ptr, dst + 8);
        parseHex<8>(src_ptr + 16, dst);
+#else
+        parseHex<16>(src_ptr, dst);
 #endif
    }
+    else
+        throw Exception(ErrorCodes::CANNOT_PARSE_UUID, "Unexpected length when trying to parse UUID ({})", size);

    return uuid;
 }
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -765,7 +765,6 @@ inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf)
    return readDateTextImpl<bool>(date, buf);
 }

-/// If string is not like UUID - implementation specific behaviour.
 UUID parseUUID(std::span<const UInt8> src);

 template <typename ReturnType = void>
--- a/src/IO/WriteHelpers.cpp
+++ b/src/IO/WriteHelpers.cpp
@ -20,25 +20,12 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes)
    }
 }

-/** Function used when byte ordering is important when parsing uuid
- *  ex: When we create an UUID type
- */
 std::array<char, 36> formatUUID(const UUID & uuid)
 {
    std::array<char, 36> dst;
    const auto * src_ptr = reinterpret_cast<const UInt8 *>(&uuid);
    auto * dst_ptr = dst.data();
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    formatHex(src_ptr, dst_ptr, 4);
-    dst[8] = '-';
-    formatHex(src_ptr + 4, dst_ptr + 9, 2);
-    dst[13] = '-';
-    formatHex(src_ptr + 6, dst_ptr + 14, 2);
-    dst[18] = '-';
-    formatHex(src_ptr + 8, dst_ptr + 19, 2);
-    dst[23] = '-';
-    formatHex(src_ptr + 10, dst_ptr + 24, 6);
-#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    const std::reverse_iterator src_it(src_ptr + 16);
    formatHex(src_it + 8, dst_ptr, 4);
    dst[8] = '-';
@ -49,6 +36,16 @@ std::array<char, 36> formatUUID(const UUID & uuid)
    formatHex(src_it, dst_ptr + 19, 2);
    dst[23] = '-';
    formatHex(src_it + 2, dst_ptr + 24, 6);
+#else
+    formatHex(src_ptr, dst_ptr, 4);
+    dst[8] = '-';
+    formatHex(src_ptr + 4, dst_ptr + 9, 2);
+    dst[13] = '-';
+    formatHex(src_ptr + 6, dst_ptr + 14, 2);
+    dst[18] = '-';
+    formatHex(src_ptr + 8, dst_ptr + 19, 2);
+    dst[23] = '-';
+    formatHex(src_ptr + 10, dst_ptr + 24, 6);
 #endif

    return dst;
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@ -625,12 +625,15 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf)
    writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf);
 }

+/// @brief Serialize `uuid` into an array of characters in big-endian byte order.
+/// @param uuid UUID to serialize.
+/// @return Array of characters in big-endian byte order.
 std::array<char, 36> formatUUID(const UUID & uuid);

 inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 {
-    const auto text = formatUUID(uuid);
-    buf.write(text.data(), text.size());
+    const auto serialized_uuid = formatUUID(uuid);
+    buf.write(serialized_uuid.data(), serialized_uuid.size());
 }

 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@ -256,7 +256,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
                    if (tmp.length() != 36)
                        throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp);

-                    const auto uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
+                    const UUID uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
                    assert_cast<DataTypeUUID::ColumnType &>(column).insertValue(uuid);
                    return true;
                };
--- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp
@ -329,8 +329,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
            return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
            {
                const auto & uuid = assert_cast<const DataTypeUUID::ColumnType &>(column).getElement(row_num);
-                const auto text = formatUUID(uuid);
-                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(text.data()), text.size());
+                const auto serialized_uuid = formatUUID(uuid);
+                encoder.encodeBytes(reinterpret_cast<const uint8_t *>(serialized_uuid.data()), serialized_uuid.size());
            }};
        }
        case TypeIndex::Array: