Implement review comments

This commit is contained in:
ltrk2 2023-06-05 08:18:03 -07:00
parent 50654435dc
commit 3938309374
7 changed files with 35 additions and 33 deletions

View File

@ -51,7 +51,7 @@ void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & ist
{
assertChar('\'', istr);
char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end());
const auto len = next_pos - istr.position();
const size_t len = next_pos - istr.position();
if ((len == 32 || len == 36) && istr.position()[len] == '\'')
{
uuid = parseUUID(std::span(reinterpret_cast<const UInt8 *>(istr.position()), len));

View File

@ -31,6 +31,7 @@ namespace ErrorCodes
extern const int CANNOT_PARSE_QUOTED_STRING;
extern const int CANNOT_PARSE_DATETIME;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_PARSE_UUID;
extern const int INCORRECT_DATA;
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int LOGICAL_ERROR;
@ -51,33 +52,35 @@ UUID parseUUID(std::span<const UInt8> src)
UUID uuid;
const auto * src_ptr = src.data();
auto * dst = reinterpret_cast<UInt8 *>(&uuid);
if (const auto size = src.size(); size == 36)
const auto size = src.size();
if (size == 36)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
parseHex<4>(src_ptr, dst);
parseHex<2>(src_ptr + 9, dst + 4);
parseHex<2>(src_ptr + 14, dst + 6);
parseHex<2>(src_ptr + 19, dst + 8);
parseHex<6>(src_ptr + 24, dst + 10);
#else
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
const std::reverse_iterator dst_it(dst + sizeof(UUID));
/// FIXME This code looks like trash.
parseHex<4>(src_ptr, dst + 8);
parseHex<2>(src_ptr + 9, dst + 12);
parseHex<2>(src_ptr + 14, dst + 14);
parseHex<2>(src_ptr + 19, dst);
parseHex<6>(src_ptr + 24, dst + 2);
#else
parseHex<4>(src_ptr, dst);
parseHex<2>(src_ptr + 9, dst + 4);
parseHex<2>(src_ptr + 14, dst + 6);
parseHex<2>(src_ptr + 19, dst + 8);
parseHex<6>(src_ptr + 24, dst + 10);
#endif
}
else if (size == 32)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
parseHex<16>(src_ptr, dst);
#else
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
parseHex<8>(src_ptr, dst + 8);
parseHex<8>(src_ptr + 16, dst);
#else
parseHex<16>(src_ptr, dst);
#endif
}
else
throw Exception(ErrorCodes::CANNOT_PARSE_UUID, "Unexpected length when trying to parse UUID ({})", size);
return uuid;
}

View File

@ -765,7 +765,6 @@ inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf)
return readDateTextImpl<bool>(date, buf);
}
/// If string is not like UUID - implementation specific behaviour.
UUID parseUUID(std::span<const UInt8> src);
template <typename ReturnType = void>

View File

@ -20,25 +20,12 @@ void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes)
}
}
/** Function used when byte ordering is important when parsing uuid
* ex: When we create an UUID type
*/
std::array<char, 36> formatUUID(const UUID & uuid)
{
std::array<char, 36> dst;
const auto * src_ptr = reinterpret_cast<const UInt8 *>(&uuid);
auto * dst_ptr = dst.data();
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
formatHex(src_ptr, dst_ptr, 4);
dst[8] = '-';
formatHex(src_ptr + 4, dst_ptr + 9, 2);
dst[13] = '-';
formatHex(src_ptr + 6, dst_ptr + 14, 2);
dst[18] = '-';
formatHex(src_ptr + 8, dst_ptr + 19, 2);
dst[23] = '-';
formatHex(src_ptr + 10, dst_ptr + 24, 6);
#else
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
const std::reverse_iterator src_it(src_ptr + 16);
formatHex(src_it + 8, dst_ptr, 4);
dst[8] = '-';
@ -49,6 +36,16 @@ std::array<char, 36> formatUUID(const UUID & uuid)
formatHex(src_it, dst_ptr + 19, 2);
dst[23] = '-';
formatHex(src_it + 2, dst_ptr + 24, 6);
#else
formatHex(src_ptr, dst_ptr, 4);
dst[8] = '-';
formatHex(src_ptr + 4, dst_ptr + 9, 2);
dst[13] = '-';
formatHex(src_ptr + 6, dst_ptr + 14, 2);
dst[18] = '-';
formatHex(src_ptr + 8, dst_ptr + 19, 2);
dst[23] = '-';
formatHex(src_ptr + 10, dst_ptr + 24, 6);
#endif
return dst;

View File

@ -625,12 +625,15 @@ inline void writeXMLStringForTextElement(std::string_view s, WriteBuffer & buf)
writeXMLStringForTextElement(s.data(), s.data() + s.size(), buf);
}
/// @brief Serialize `uuid` into an array of characters in big-endian byte order.
/// @param uuid UUID to serialize.
/// @return Array of characters in big-endian byte order.
std::array<char, 36> formatUUID(const UUID & uuid);
inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
{
const auto text = formatUUID(uuid);
buf.write(text.data(), text.size());
const auto serialized_uuid = formatUUID(uuid);
buf.write(serialized_uuid.data(), serialized_uuid.size());
}
void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);

View File

@ -256,7 +256,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
if (tmp.length() != 36)
throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp);
const auto uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
const UUID uuid = parseUUID({reinterpret_cast<const UInt8 *>(tmp.data()), tmp.length()});
assert_cast<DataTypeUUID::ColumnType &>(column).insertValue(uuid);
return true;
};

View File

@ -329,8 +329,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF
return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder)
{
const auto & uuid = assert_cast<const DataTypeUUID::ColumnType &>(column).getElement(row_num);
const auto text = formatUUID(uuid);
encoder.encodeBytes(reinterpret_cast<const uint8_t *>(text.data()), text.size());
const auto serialized_uuid = formatUUID(uuid);
encoder.encodeBytes(reinterpret_cast<const uint8_t *>(serialized_uuid.data()), serialized_uuid.size());
}};
}
case TypeIndex::Array: