From 7e3587034d17dd75838110bb3236816161bcd3f7 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Thu, 12 Oct 2023 19:49:06 +0100 Subject: [PATCH 1/6] Consume leading zeroes when parsing a number in ConstantExpressionTemplate --- .../Formats/Impl/ConstantExpressionTemplate.cpp | 4 ++++ .../02896_leading_zeroes_no_octal.reference | 15 +++++++++++++++ .../0_stateless/02896_leading_zeroes_no_octal.sql | 8 ++++++++ 3 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference create mode 100644 tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index fe6fb42d0a0..c2b0ce3e486 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -597,6 +597,10 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( if (negative || *istr.position() == '+') ++istr.position(); + /// Consume leading zeroes - we don't want any funny octal business + while (*istr.position() == '0') + ++istr.position(); + static constexpr size_t MAX_LENGTH_OF_NUMBER = 319; char buf[MAX_LENGTH_OF_NUMBER + 1]; size_t bytes_to_copy = std::min(istr.available(), MAX_LENGTH_OF_NUMBER); diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference new file mode 100644 index 00000000000..5982c315878 --- /dev/null +++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference @@ -0,0 +1,15 @@ +-0.02 0 +0 0 +0.01 0 +0.02 1 +0.03 1 +0.04 -1 +1 1 +2 5 +3 8 +4 17 +5 21 +6 51 +7 123 +8 16 +9 43981 diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql new file mode 100644 index 00000000000..fbb931b4c88 --- /dev/null +++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS t_leading_zeroes; +CREATE TABLE t_leading_zeroes(ref Float64, val INTEGER) ENGINE=MergeTree ORDER BY ref; + +INSERT INTO t_leading_zeroes VALUES (-0.02, 00000), (0, 0), (0.01, 00), (0.02, 01), (0.03, +01), (0.04, -01), (1, 0001), (2, 0005), (3, 0008), (4, 0017), (5, 0021), (6, 0051), (7, 00000123), (8, 0b10000), (9, 0x0abcd); + +SELECT * FROM t_leading_zeroes; + +DROP TABLE IF EXISTS t_leading_zeroes; From 6ad027ed7a5d6377f36d60f103159a9f78c0e301 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 30 Oct 2023 05:32:36 +0300 Subject: [PATCH 2/6] Update ConstantExpressionTemplate.cpp --- src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index c2b0ce3e486..421d41f93c9 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -598,7 +598,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( ++istr.position(); /// Consume leading zeroes - we don't want any funny octal business - while (*istr.position() == '0') + while (!istr.eof() && *istr.position() == '0') ++istr.position(); static constexpr size_t MAX_LENGTH_OF_NUMBER = 319; From 0aeb9fba192ed9678b102cbbe2a7a9578d59a689 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Tue, 5 Dec 2023 21:55:43 +0000 Subject: [PATCH 3/6] Move consuming of leading zeros from ReadBuffer to char buffer --- .../Impl/ConstantExpressionTemplate.cpp | 19 ++++----- .../02896_leading_zeroes_no_octal.reference | 40 ++++++++++++------- .../02896_leading_zeroes_no_octal.sql | 20 ++++++++-- 3 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 1c009252183..fe11e3f6360 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -597,28 +597,29 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( if (negative || *istr.position() == '+') ++istr.position(); - /// Consume leading zeroes - we don't want any funny octal business - while (!istr.eof() && *istr.position() == '0') - ++istr.position(); - static constexpr size_t MAX_LENGTH_OF_NUMBER = 319; char buf[MAX_LENGTH_OF_NUMBER + 1]; size_t bytes_to_copy = std::min(istr.available(), MAX_LENGTH_OF_NUMBER); memcpy(buf, istr.position(), bytes_to_copy); buf[bytes_to_copy] = 0; - char * pos_double = buf; + /// Consume leading zeroes - we don't want any funny octal business + auto* non_zero_buf = buf; + while (*non_zero_buf == '0') + ++non_zero_buf; + + char * pos_double = non_zero_buf; errno = 0; - Float64 float_value = std::strtod(buf, &pos_double); - if (pos_double == buf || errno == ERANGE || float_value < 0) + Float64 float_value = std::strtod(non_zero_buf, &pos_double); + if (pos_double == non_zero_buf || errno == ERANGE || float_value < 0) return false; if (negative) float_value = -float_value; - char * pos_integer = buf; + char * pos_integer = non_zero_buf; errno = 0; - UInt64 uint_value = std::strtoull(buf, &pos_integer, 0); + UInt64 uint_value = std::strtoull(non_zero_buf, &pos_integer, 0); if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63))) { istr.position() += pos_integer - buf; diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference index 5982c315878..69446796e09 100644 --- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference +++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference @@ -1,15 +1,25 @@ --0.02 0 -0 0 -0.01 0 -0.02 1 -0.03 1 -0.04 -1 -1 1 -2 5 -3 8 -4 17 -5 21 -6 51 -7 123 -8 16 -9 43981 +Leading zeroes into INTEGER +1 1 00000 0 0 +1 2 0 0 0 +1 3 00 0 0 +1 4 01 1 1 +1 5 +01 1 1 +1 6 -01 -1 -1 +1 7 0001 1 1 +1 8 0005 5 5 +1 9 0008 8 8 +1 10 0017 17 17 +1 11 0021 21 21 +1 12 0051 51 51 +1 13 00000123 123 123 +1 14 0b10000 16 16 +1 15 0x0abcd 43981 43981 +1 16 0000.008 0 0 +1 17 1000.0008 1000 1000 +1 18 0008.0008 8 8 +Leading zeroes into Float32 +1 1 00000 0 0 +1 2 00009.00009 9.00009 9.00009 +1 3 00009e9 9000000000 9000000000 +1 4 00009e09 9000000000 9000000000 +1 5 00009e0009 9000000000 9000000000 diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql index fbb931b4c88..a204259f3ec 100644 --- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql +++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql @@ -1,8 +1,22 @@ DROP TABLE IF EXISTS t_leading_zeroes; -CREATE TABLE t_leading_zeroes(ref Float64, val INTEGER) ENGINE=MergeTree ORDER BY ref; +CREATE TABLE t_leading_zeroes(id INTEGER, input String, val INTEGER, expected INTEGER) ENGINE=MergeTree ORDER BY id; -INSERT INTO t_leading_zeroes VALUES (-0.02, 00000), (0, 0), (0.01, 00), (0.02, 01), (0.03, +01), (0.04, -01), (1, 0001), (2, 0005), (3, 0008), (4, 0017), (5, 0021), (6, 0051), (7, 00000123), (8, 0b10000), (9, 0x0abcd); +INSERT INTO t_leading_zeroes VALUES (1, '00000', 00000, 0), (2, '0', 0, 0), (3, '00', 00, 0), (4, '01', 01, 1), (5, '+01', +01, 1); +INSERT INTO t_leading_zeroes VALUES (6, '-01', -01, -1), (7, '0001', 0001, 1), (8, '0005', 0005, 5), (9, '0008', 0008, 8); +INSERT INTO t_leading_zeroes VALUES (10, '0017', 0017, 17), (11, '0021', 0021, 21), (12, '0051', 0051, 51), (13, '00000123', 00000123, 123); +INSERT INTO t_leading_zeroes VALUES (14, '0b10000', 0b10000, 16), (15, '0x0abcd', 0x0abcd, 43981), (16, '0000.008', 0000.008, 0) +INSERT INTO t_leading_zeroes VALUES (17, '1000.0008', 1000.0008, 1000), (18, '0008.0008', 0008.0008, 8); -SELECT * FROM t_leading_zeroes; +SELECT 'Leading zeroes into INTEGER'; +SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes t ORDER BY id; + +CREATE TABLE t_leading_zeroes_f(id INTEGER, input String, val Float32, expected Float32) ENGINE=MergeTree ORDER BY id; +INSERT INTO t_leading_zeroes_f VALUES (1, '00000', 00000, 0), (2, '00009.00009', 00009.00009, 9.00009), (3, '00009e9', 00009e9, 9e9), (4, '00009e09', 00009e09, 9e9), (5, '00009e0009', 00009e0009, 9e9); +-- Turns out this is not ok in master as well - will have a look and fix +-- (6, '00009e00009', 00009e00009, 9e9); + +SELECT 'Leading zeroes into Float32'; +SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id; DROP TABLE IF EXISTS t_leading_zeroes; +DROP TABLE IF EXISTS t_leading_zeroes_f; \ No newline at end of file From da10b21616121ef32a265769e28f04e9c94d8a40 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Fri, 8 Dec 2023 17:42:11 +0000 Subject: [PATCH 4/6] Some test clarification --- .../02896_leading_zeroes_no_octal.reference | 10 ++++++++++ .../0_stateless/02896_leading_zeroes_no_octal.sql | 10 ++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference index 69446796e09..5b932f50824 100644 --- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference +++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference @@ -23,3 +23,13 @@ Leading zeroes into Float32 1 3 00009e9 9000000000 9000000000 1 4 00009e09 9000000000 9000000000 1 5 00009e0009 9000000000 9000000000 +1 6 -00000 -0.1 -0.1 +1 7 -00009.00009 -9.00009 -9.00009 +1 8 -00009e9 -9000000000 -9000000000 +1 9 -00009e09 -9000000000 -9000000000 +1 10 -00009e0009 -9000000000 -9000000000 +1 11 +00000 0 0 +1 12 +00009.00009 9.00009 9.00009 +1 13 +00009e9 9000000000 9000000000 +1 14 +00009e09 9000000000 9000000000 +1 15 +00009e0009 9000000000 9000000000 diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql index a204259f3ec..69cc06a46f8 100644 --- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql +++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql @@ -1,4 +1,6 @@ DROP TABLE IF EXISTS t_leading_zeroes; +DROP TABLE IF EXISTS t_leading_zeroes_f; + CREATE TABLE t_leading_zeroes(id INTEGER, input String, val INTEGER, expected INTEGER) ENGINE=MergeTree ORDER BY id; INSERT INTO t_leading_zeroes VALUES (1, '00000', 00000, 0), (2, '0', 0, 0), (3, '00', 00, 0), (4, '01', 01, 1), (5, '+01', +01, 1); @@ -10,10 +12,14 @@ INSERT INTO t_leading_zeroes VALUES (17, '1000.0008', 1000.0008, 1000), (18, '00 SELECT 'Leading zeroes into INTEGER'; SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes t ORDER BY id; +-- Floats don't go via the weird octal path: CREATE TABLE t_leading_zeroes_f(id INTEGER, input String, val Float32, expected Float32) ENGINE=MergeTree ORDER BY id; INSERT INTO t_leading_zeroes_f VALUES (1, '00000', 00000, 0), (2, '00009.00009', 00009.00009, 9.00009), (3, '00009e9', 00009e9, 9e9), (4, '00009e09', 00009e09, 9e9), (5, '00009e0009', 00009e0009, 9e9); --- Turns out this is not ok in master as well - will have a look and fix --- (6, '00009e00009', 00009e00009, 9e9); +INSERT INTO t_leading_zeroes_f VALUES (6, '-00000', -00000.1, -0.1), (7, '-00009.00009', -00009.00009, -9.00009), (8, '-00009e9', -00009e9, -9e9), (9, '-00009e09', -00009e09, -9e9), (10, '-00009e0009', -00009e0009, -9e9); +INSERT INTO t_leading_zeroes_f VALUES (11, '+00000', +00000., 0), (12, '+00009.00009', +00009.00009, 9.00009), (13, '+00009e9', +00009e9, 9e9), (14, '+00009e09', +00009e09, 9e9), (15, '+00009e0009', +00009e0009, 9e9); +-- Coincidentally, the following result in 9 rather than 9e9 because of readFloatTextFastImpl +-- using readUIntTextUpToNSignificantDigits<4>(exponent, in) +-- INSERT INTO t_leading_zeroes_f VALUES (100, '00009e00009', 00009e00009, 9e9), (101, '-00009e00009', -00009e00009, -9e9), (102, '+00009e00009', +00009e00009, 9e9) SELECT 'Leading zeroes into Float32'; SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id; From f67edfb397d96fde056086f963a51fe1e0cae6a8 Mon Sep 17 00:00:00 2001 From: Joanna Hulboj Date: Mon, 8 Jan 2024 18:42:33 +0000 Subject: [PATCH 5/6] Consume leading zeroes - use find_first_not_symbols --- src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index fe11e3f6360..8c18df97c46 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -603,10 +603,8 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( memcpy(buf, istr.position(), bytes_to_copy); buf[bytes_to_copy] = 0; - /// Consume leading zeroes - we don't want any funny octal business - auto* non_zero_buf = buf; - while (*non_zero_buf == '0') - ++non_zero_buf; + /// Skip leading zeroes - we don't want any funny octal business + auto* non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy); char * pos_double = non_zero_buf; errno = 0; From 5210bbc5876aea3689a9c7c96a59c40ad1bc5c71 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jan 2024 05:13:52 +0300 Subject: [PATCH 6/6] Update ConstantExpressionTemplate.cpp --- src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 8c18df97c46..316a84fe94f 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -604,7 +604,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( buf[bytes_to_copy] = 0; /// Skip leading zeroes - we don't want any funny octal business - auto* non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy); + char * non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy); char * pos_double = non_zero_buf; errno = 0;