From b3ef9a90e9597ee21042acc9d554ad20c77c403d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 22 Jun 2020 17:26:37 +0800 Subject: [PATCH 1/5] Support parse uuid without separator --- src/IO/ReadHelpers.cpp | 45 ++++++++++++++----- src/IO/ReadHelpers.h | 13 ++++-- .../01338_uuid_without_separator.reference | 2 + .../01338_uuid_without_separator.sql | 2 + 4 files changed, 48 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/01338_uuid_without_separator.reference create mode 100644 tests/queries/0_stateless/01338_uuid_without_separator.sql diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9d1d27611b5..da60c764e77 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -40,30 +40,53 @@ void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) } } +template void parseUUID(const UInt8 * src36, UInt8 * dst16) { /// If string is not like UUID - implementation specific behaviour. - - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[9], &dst16[4], 2); - parseHex(&src36[14], &dst16[6], 2); - parseHex(&src36[19], &dst16[8], 2); - parseHex(&src36[24], &dst16[10], 6); + if constexpr (with_separator) + { + parseHex(&src36[0], &dst16[0], 4); + parseHex(&src36[9], &dst16[4], 2); + parseHex(&src36[14], &dst16[6], 2); + parseHex(&src36[19], &dst16[8], 2); + parseHex(&src36[24], &dst16[10], 6); + } + else + { + parseHex(&src36[0], &dst16[0], 4); + parseHex(&src36[8], &dst16[4], 2); + parseHex(&src36[12], &dst16[6], 2); + parseHex(&src36[16], &dst16[8], 2); + parseHex(&src36[20], &dst16[10], 6); + } } /** Function used when byte ordering is important when parsing uuid * ex: When we create an UUID type */ +template void parseUUID(const UInt8 * src36, std::reverse_iterator dst16) { /// If string is not like UUID - implementation specific behaviour. /// FIXME This code looks like trash. - parseHex(&src36[0], dst16 + 8, 4); - parseHex(&src36[9], dst16 + 12, 2); - parseHex(&src36[14], dst16 + 14, 2); - parseHex(&src36[19], dst16, 2); - parseHex(&src36[24], dst16 + 2, 6); + if constexpr (with_separator) + { + parseHex(&src36[0], dst16 + 8, 4); + parseHex(&src36[9], dst16 + 12, 2); + parseHex(&src36[14], dst16 + 14, 2); + parseHex(&src36[19], dst16, 2); + parseHex(&src36[24], dst16 + 2, 6); + } + else + { + parseHex(&src36[0], dst16 + 8, 4); + parseHex(&src36[8], dst16 + 12, 2); + parseHex(&src36[12], dst16 + 14, 2); + parseHex(&src36[16], dst16, 2); + parseHex(&src36[20], dst16 + 2, 6); + } } UInt128 stringToUUID(const String & str) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 266b5ae1917..666a6071550 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -487,7 +487,9 @@ struct NullSink void push_back(char) {} }; +template void parseUUID(const UInt8 * src36, UInt8 * dst16); +template void parseUUID(const UInt8 * src36, std::reverse_iterator dst16); template @@ -577,13 +579,18 @@ inline void readUUIDText(UUID & uuid, ReadBuffer & buf) char s[36]; size_t size = buf.read(s, 36); - if (size != 36) + if (size >= 32) + { + if (s[8] == '-') + parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); + else + parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); + } + else { s[size] = 0; throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); } - - parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); } diff --git a/tests/queries/0_stateless/01338_uuid_without_separator.reference b/tests/queries/0_stateless/01338_uuid_without_separator.reference new file mode 100644 index 00000000000..f7575a7f9f8 --- /dev/null +++ b/tests/queries/0_stateless/01338_uuid_without_separator.reference @@ -0,0 +1,2 @@ +417ddc5d-e556-4d27-95dd-a34d84e46a50 +417ddc5d-e556-4d27-95dd-a34d84e46a50 diff --git a/tests/queries/0_stateless/01338_uuid_without_separator.sql b/tests/queries/0_stateless/01338_uuid_without_separator.sql new file mode 100644 index 00000000000..4dd26a7629e --- /dev/null +++ b/tests/queries/0_stateless/01338_uuid_without_separator.sql @@ -0,0 +1,2 @@ +SELECT toUUID('417ddc5de5564d2795dda34d84e46a50'); +SELECT toUUID('417ddc5d-e556-4d27-95dd-a34d84e46a50'); From ee2ca536fff31729fdc44a6216308e38330187de Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 22 Jun 2020 20:01:54 +0800 Subject: [PATCH 2/5] try fix build failure --- src/IO/ReadHelpers.cpp | 62 ------------------------------------------ src/IO/ReadHelpers.h | 61 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 64 deletions(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index da60c764e77..6a018329bc6 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -28,67 +27,6 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } -template -void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) -{ - size_t src_pos = 0; - size_t dst_pos = 0; - for (; dst_pos < num_bytes; ++dst_pos) - { - dst[dst_pos] = UInt8(unhex(src[src_pos])) * 16 + UInt8(unhex(src[src_pos + 1])); - src_pos += 2; - } -} - -template -void parseUUID(const UInt8 * src36, UInt8 * dst16) -{ - /// If string is not like UUID - implementation specific behaviour. - if constexpr (with_separator) - { - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[9], &dst16[4], 2); - parseHex(&src36[14], &dst16[6], 2); - parseHex(&src36[19], &dst16[8], 2); - parseHex(&src36[24], &dst16[10], 6); - } - else - { - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[8], &dst16[4], 2); - parseHex(&src36[12], &dst16[6], 2); - parseHex(&src36[16], &dst16[8], 2); - parseHex(&src36[20], &dst16[10], 6); - } -} - -/** Function used when byte ordering is important when parsing uuid - * ex: When we create an UUID type - */ -template -void parseUUID(const UInt8 * src36, std::reverse_iterator dst16) -{ - /// If string is not like UUID - implementation specific behaviour. - - /// FIXME This code looks like trash. - if constexpr (with_separator) - { - parseHex(&src36[0], dst16 + 8, 4); - parseHex(&src36[9], dst16 + 12, 2); - parseHex(&src36[14], dst16 + 14, 2); - parseHex(&src36[19], dst16, 2); - parseHex(&src36[24], dst16 + 2, 6); - } - else - { - parseHex(&src36[0], dst16 + 8, 4); - parseHex(&src36[8], dst16 + 12, 2); - parseHex(&src36[12], dst16 + 14, 2); - parseHex(&src36[16], dst16, 2); - parseHex(&src36[20], dst16 + 2, 6); - } -} - UInt128 stringToUUID(const String & str) { return parseFromString(str); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 666a6071550..a64fbacd467 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -35,6 +35,7 @@ #include #include +#include /// 1 GiB @@ -487,10 +488,66 @@ struct NullSink void push_back(char) {} }; +template +void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) +{ + size_t src_pos = 0; + size_t dst_pos = 0; + for (; dst_pos < num_bytes; ++dst_pos) + { + dst[dst_pos] = UInt8(unhex(src[src_pos])) * 16 + UInt8(unhex(src[src_pos + 1])); + src_pos += 2; + } +} + template -void parseUUID(const UInt8 * src36, UInt8 * dst16); +void parseUUID(const UInt8 * src36, UInt8 * dst16) +{ + /// If string is not like UUID - implementation specific behaviour. + if constexpr (with_separator) + { + parseHex(&src36[0], &dst16[0], 4); + parseHex(&src36[9], &dst16[4], 2); + parseHex(&src36[14], &dst16[6], 2); + parseHex(&src36[19], &dst16[8], 2); + parseHex(&src36[24], &dst16[10], 6); + } + else + { + parseHex(&src36[0], &dst16[0], 4); + parseHex(&src36[8], &dst16[4], 2); + parseHex(&src36[12], &dst16[6], 2); + parseHex(&src36[16], &dst16[8], 2); + parseHex(&src36[20], &dst16[10], 6); + } +} + +/** Function used when byte ordering is important when parsing uuid + * ex: When we create an UUID type + */ template -void parseUUID(const UInt8 * src36, std::reverse_iterator dst16); +void parseUUID(const UInt8 * src36, std::reverse_iterator dst16) +{ + /// If string is not like UUID - implementation specific behaviour. + + /// FIXME This code looks like trash. + if constexpr (with_separator) + { + parseHex(&src36[0], dst16 + 8, 4); + parseHex(&src36[9], dst16 + 12, 2); + parseHex(&src36[14], dst16 + 14, 2); + parseHex(&src36[19], dst16, 2); + parseHex(&src36[24], dst16 + 2, 6); + } + else + { + parseHex(&src36[0], dst16 + 8, 4); + parseHex(&src36[8], dst16 + 12, 2); + parseHex(&src36[12], dst16 + 14, 2); + parseHex(&src36[16], dst16, 2); + parseHex(&src36[20], dst16 + 2, 6); + } +} template void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); From b820cfb3b67c673c9af002c9caa0315ff62bf6ca Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Mon, 22 Jun 2020 21:30:55 +0800 Subject: [PATCH 3/5] fix read extra bytes when with separator --- src/IO/ReadHelpers.h | 14 ++++++++++++-- .../01338_uuid_without_separator.reference | 2 ++ .../0_stateless/01338_uuid_without_separator.sql | 9 +++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index a64fbacd467..08d5d6f9de1 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -634,12 +634,22 @@ inline bool tryReadDateText(DayNum & date, ReadBuffer & buf) inline void readUUIDText(UUID & uuid, ReadBuffer & buf) { char s[36]; - size_t size = buf.read(s, 36); + size_t size = buf.read(s, 32); - if (size >= 32) + if (size == 32) { if (s[8] == '-') + { + size += buf.read(&s[32], 4); + + if (size != 36) + { + s[size] = 0; + throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + } + parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); + } else parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); } diff --git a/tests/queries/0_stateless/01338_uuid_without_separator.reference b/tests/queries/0_stateless/01338_uuid_without_separator.reference index f7575a7f9f8..c009ee7a13f 100644 --- a/tests/queries/0_stateless/01338_uuid_without_separator.reference +++ b/tests/queries/0_stateless/01338_uuid_without_separator.reference @@ -1,2 +1,4 @@ 417ddc5d-e556-4d27-95dd-a34d84e46a50 417ddc5d-e556-4d27-95dd-a34d84e46a50 +1 417ddc5d-e556-4d27-95dd-a34d84e46a50 Example 1 +2 417ddc5d-e556-4d27-95dd-a34d84e46a51 Example 2 diff --git a/tests/queries/0_stateless/01338_uuid_without_separator.sql b/tests/queries/0_stateless/01338_uuid_without_separator.sql index 4dd26a7629e..efbf4bc2812 100644 --- a/tests/queries/0_stateless/01338_uuid_without_separator.sql +++ b/tests/queries/0_stateless/01338_uuid_without_separator.sql @@ -1,2 +1,11 @@ SELECT toUUID('417ddc5de5564d2795dda34d84e46a50'); SELECT toUUID('417ddc5d-e556-4d27-95dd-a34d84e46a50'); + +DROP TABLE IF EXISTS t_uuid; +CREATE TABLE t_uuid (x UInt8, y UUID, z String) ENGINE = TinyLog; + +INSERT INTO t_uuid VALUES (1, '417ddc5de5564d2795dda34d84e46a50', 'Example 1'); +INSERT INTO t_uuid VALUES (2, '417ddc5d-e556-4d27-95dd-a34d84e46a51', 'Example 2'); + +SELECT * FROM t_uuid ORDER BY x ASC; +DROP TABLE IF EXISTS t_uuid; From 29451cc8f2ba49767c48f05ed3fc0625ba889407 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 23 Jun 2020 00:23:26 +0800 Subject: [PATCH 4/5] try fix test failure --- .../0_stateless/01338_uuid_without_separator.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01338_uuid_without_separator.reference b/tests/queries/0_stateless/01338_uuid_without_separator.reference index c009ee7a13f..9ae1e1e6e2b 100644 --- a/tests/queries/0_stateless/01338_uuid_without_separator.reference +++ b/tests/queries/0_stateless/01338_uuid_without_separator.reference @@ -1,4 +1,4 @@ 417ddc5d-e556-4d27-95dd-a34d84e46a50 417ddc5d-e556-4d27-95dd-a34d84e46a50 -1 417ddc5d-e556-4d27-95dd-a34d84e46a50 Example 1 -2 417ddc5d-e556-4d27-95dd-a34d84e46a51 Example 2 +1 417ddc5d-e556-4d27-95dd-a34d84e46a50 Example 1 +2 417ddc5d-e556-4d27-95dd-a34d84e46a51 Example 2 From c3a38e7f8c4e8dda61c0abbfa311e9b6e5342852 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Tue, 23 Jun 2020 11:14:16 +0800 Subject: [PATCH 5/5] apply review suggestions --- src/IO/ReadHelpers.cpp | 54 +++++++++++++++++++++++++++++++++ src/IO/ReadHelpers.h | 69 ++++-------------------------------------- 2 files changed, 60 insertions(+), 63 deletions(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 6a018329bc6..d4574732c58 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -27,6 +28,59 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } +template +void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) +{ + size_t src_pos = 0; + size_t dst_pos = 0; + for (; dst_pos < num_bytes; ++dst_pos, src_pos += 2) + dst[dst_pos] = unhex2(reinterpret_cast(&src[src_pos])); +} + +void parseUUID(const UInt8 * src36, UInt8 * dst16) +{ + /// If string is not like UUID - implementation specific behaviour. + + parseHex(&src36[0], &dst16[0], 4); + parseHex(&src36[9], &dst16[4], 2); + parseHex(&src36[14], &dst16[6], 2); + parseHex(&src36[19], &dst16[8], 2); + parseHex(&src36[24], &dst16[10], 6); +} + +void parseUUIDWithoutSeparator(const UInt8 * src36, UInt8 * dst16) +{ + /// If string is not like UUID - implementation specific behaviour. + + parseHex(&src36[0], &dst16[0], 16); +} + +/** Function used when byte ordering is important when parsing uuid + * ex: When we create an UUID type + */ +void parseUUID(const UInt8 * src36, std::reverse_iterator dst16) +{ + /// If string is not like UUID - implementation specific behaviour. + + /// FIXME This code looks like trash. + parseHex(&src36[0], dst16 + 8, 4); + parseHex(&src36[9], dst16 + 12, 2); + parseHex(&src36[14], dst16 + 14, 2); + parseHex(&src36[19], dst16, 2); + parseHex(&src36[24], dst16 + 2, 6); +} + +/** Function used when byte ordering is important when parsing uuid + * ex: When we create an UUID type + */ +void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator dst16) +{ + /// If string is not like UUID - implementation specific behaviour. + + parseHex(&src36[0], dst16 + 8, 8); + parseHex(&src36[16], dst16, 8); +} + UInt128 stringToUUID(const String & str) { return parseFromString(str); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 08d5d6f9de1..7e1e0e016ad 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -35,7 +35,6 @@ #include #include -#include /// 1 GiB @@ -488,66 +487,10 @@ struct NullSink void push_back(char) {} }; -template -void parseHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) -{ - size_t src_pos = 0; - size_t dst_pos = 0; - for (; dst_pos < num_bytes; ++dst_pos) - { - dst[dst_pos] = UInt8(unhex(src[src_pos])) * 16 + UInt8(unhex(src[src_pos + 1])); - src_pos += 2; - } -} - -template -void parseUUID(const UInt8 * src36, UInt8 * dst16) -{ - /// If string is not like UUID - implementation specific behaviour. - if constexpr (with_separator) - { - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[9], &dst16[4], 2); - parseHex(&src36[14], &dst16[6], 2); - parseHex(&src36[19], &dst16[8], 2); - parseHex(&src36[24], &dst16[10], 6); - } - else - { - parseHex(&src36[0], &dst16[0], 4); - parseHex(&src36[8], &dst16[4], 2); - parseHex(&src36[12], &dst16[6], 2); - parseHex(&src36[16], &dst16[8], 2); - parseHex(&src36[20], &dst16[10], 6); - } -} - -/** Function used when byte ordering is important when parsing uuid - * ex: When we create an UUID type - */ -template -void parseUUID(const UInt8 * src36, std::reverse_iterator dst16) -{ - /// If string is not like UUID - implementation specific behaviour. - - /// FIXME This code looks like trash. - if constexpr (with_separator) - { - parseHex(&src36[0], dst16 + 8, 4); - parseHex(&src36[9], dst16 + 12, 2); - parseHex(&src36[14], dst16 + 14, 2); - parseHex(&src36[19], dst16, 2); - parseHex(&src36[24], dst16 + 2, 6); - } - else - { - parseHex(&src36[0], dst16 + 8, 4); - parseHex(&src36[8], dst16 + 12, 2); - parseHex(&src36[12], dst16 + 14, 2); - parseHex(&src36[16], dst16, 2); - parseHex(&src36[20], dst16 + 2, 6); - } -} +void parseUUID(const UInt8 * src36, UInt8 * dst16); +void parseUUIDWithoutSeparator(const UInt8 * src36, UInt8 * dst16); +void parseUUID(const UInt8 * src36, std::reverse_iterator dst16); +void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator dst16); template void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); @@ -648,10 +591,10 @@ inline void readUUIDText(UUID & uuid, ReadBuffer & buf) throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); } - parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); + parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); } else - parseUUID(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); + parseUUIDWithoutSeparator(reinterpret_cast(s), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); } else {