From 7e3587034d17dd75838110bb3236816161bcd3f7 Mon Sep 17 00:00:00 2001
From: Joanna Hulboj <joanna.hulboj@gmail.com>
Date: Thu, 12 Oct 2023 19:49:06 +0100
Subject: [PATCH 1/6] Consume leading zeroes when parsing a number in
 ConstantExpressionTemplate

---
 .../Formats/Impl/ConstantExpressionTemplate.cpp   |  4 ++++
 .../02896_leading_zeroes_no_octal.reference       | 15 +++++++++++++++
 .../0_stateless/02896_leading_zeroes_no_octal.sql |  8 ++++++++
 3 files changed, 27 insertions(+)
 create mode 100644 tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
 create mode 100644 tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql

diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index fe6fb42d0a0..c2b0ce3e486 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -597,6 +597,10 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
             if (negative || *istr.position() == '+')
                 ++istr.position();
 
+            /// Consume leading zeroes - we don't want any funny octal business
+            while (*istr.position() == '0')
+                ++istr.position();
+
             static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;
             char buf[MAX_LENGTH_OF_NUMBER + 1];
             size_t bytes_to_copy = std::min(istr.available(), MAX_LENGTH_OF_NUMBER);
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
new file mode 100644
index 00000000000..5982c315878
--- /dev/null
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
@@ -0,0 +1,15 @@
+-0.02	0
+0	0
+0.01	0
+0.02	1
+0.03	1
+0.04	-1
+1	1
+2	5
+3	8
+4	17
+5	21
+6	51
+7	123
+8	16
+9	43981
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
new file mode 100644
index 00000000000..fbb931b4c88
--- /dev/null
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
@@ -0,0 +1,8 @@
+DROP TABLE IF EXISTS t_leading_zeroes;
+CREATE TABLE t_leading_zeroes(ref Float64, val INTEGER) ENGINE=MergeTree ORDER BY ref;
+
+INSERT INTO t_leading_zeroes VALUES (-0.02, 00000), (0, 0), (0.01, 00), (0.02, 01), (0.03, +01), (0.04, -01), (1, 0001), (2, 0005), (3, 0008), (4, 0017), (5, 0021), (6, 0051), (7, 00000123), (8, 0b10000), (9, 0x0abcd);
+
+SELECT * FROM t_leading_zeroes;
+
+DROP TABLE IF EXISTS t_leading_zeroes;

From 6ad027ed7a5d6377f36d60f103159a9f78c0e301 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 30 Oct 2023 05:32:36 +0300
Subject: [PATCH 2/6] Update ConstantExpressionTemplate.cpp

---
 src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index c2b0ce3e486..421d41f93c9 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -598,7 +598,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
                 ++istr.position();
 
             /// Consume leading zeroes - we don't want any funny octal business
-            while (*istr.position() == '0')
+            while (!istr.eof() && *istr.position() == '0')
                 ++istr.position();
 
             static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;

From 0aeb9fba192ed9678b102cbbe2a7a9578d59a689 Mon Sep 17 00:00:00 2001
From: Joanna Hulboj <joanna.hulboj@gmail.com>
Date: Tue, 5 Dec 2023 21:55:43 +0000
Subject: [PATCH 3/6] Move consuming of leading zeros from ReadBuffer to char
 buffer

---
 .../Impl/ConstantExpressionTemplate.cpp       | 19 ++++-----
 .../02896_leading_zeroes_no_octal.reference   | 40 ++++++++++++-------
 .../02896_leading_zeroes_no_octal.sql         | 20 ++++++++--
 3 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index 1c009252183..fe11e3f6360 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -597,28 +597,29 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
             if (negative || *istr.position() == '+')
                 ++istr.position();
 
-            /// Consume leading zeroes - we don't want any funny octal business
-            while (!istr.eof() && *istr.position() == '0')
-                ++istr.position();
-
             static constexpr size_t MAX_LENGTH_OF_NUMBER = 319;
             char buf[MAX_LENGTH_OF_NUMBER + 1];
             size_t bytes_to_copy = std::min(istr.available(), MAX_LENGTH_OF_NUMBER);
             memcpy(buf, istr.position(), bytes_to_copy);
             buf[bytes_to_copy] = 0;
 
-            char * pos_double = buf;
+            /// Consume leading zeroes - we don't want any funny octal business
+            auto* non_zero_buf = buf;
+            while (*non_zero_buf == '0')
+                ++non_zero_buf;
+
+            char * pos_double = non_zero_buf;
             errno = 0;
-            Float64 float_value = std::strtod(buf, &pos_double);
-            if (pos_double == buf || errno == ERANGE || float_value < 0)
+            Float64 float_value = std::strtod(non_zero_buf, &pos_double);
+            if (pos_double == non_zero_buf || errno == ERANGE || float_value < 0)
                 return false;
 
             if (negative)
                 float_value = -float_value;
 
-            char * pos_integer = buf;
+            char * pos_integer = non_zero_buf;
             errno = 0;
-            UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
+            UInt64 uint_value = std::strtoull(non_zero_buf, &pos_integer, 0);
             if (pos_integer == pos_double && errno != ERANGE && (!negative || uint_value <= (1ULL << 63)))
             {
                 istr.position() += pos_integer - buf;
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
index 5982c315878..69446796e09 100644
--- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
@@ -1,15 +1,25 @@
--0.02	0
-0	0
-0.01	0
-0.02	1
-0.03	1
-0.04	-1
-1	1
-2	5
-3	8
-4	17
-5	21
-6	51
-7	123
-8	16
-9	43981
+Leading zeroes into INTEGER
+1	1	00000	0	0
+1	2	0	0	0
+1	3	00	0	0
+1	4	01	1	1
+1	5	+01	1	1
+1	6	-01	-1	-1
+1	7	0001	1	1
+1	8	0005	5	5
+1	9	0008	8	8
+1	10	0017	17	17
+1	11	0021	21	21
+1	12	0051	51	51
+1	13	00000123	123	123
+1	14	0b10000	16	16
+1	15	0x0abcd	43981	43981
+1	16	0000.008	0	0
+1	17	1000.0008	1000	1000
+1	18	0008.0008	8	8
+Leading zeroes into Float32
+1	1	00000	0	0
+1	2	00009.00009	9.00009	9.00009
+1	3	00009e9	9000000000	9000000000
+1	4	00009e09	9000000000	9000000000
+1	5	00009e0009	9000000000	9000000000
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
index fbb931b4c88..a204259f3ec 100644
--- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
@@ -1,8 +1,22 @@
 DROP TABLE IF EXISTS t_leading_zeroes;
-CREATE TABLE t_leading_zeroes(ref Float64, val INTEGER) ENGINE=MergeTree ORDER BY ref;
+CREATE TABLE t_leading_zeroes(id INTEGER, input String, val INTEGER, expected INTEGER) ENGINE=MergeTree ORDER BY id;
 
-INSERT INTO t_leading_zeroes VALUES (-0.02, 00000), (0, 0), (0.01, 00), (0.02, 01), (0.03, +01), (0.04, -01), (1, 0001), (2, 0005), (3, 0008), (4, 0017), (5, 0021), (6, 0051), (7, 00000123), (8, 0b10000), (9, 0x0abcd);
+INSERT INTO t_leading_zeroes VALUES (1, '00000', 00000, 0), (2, '0', 0, 0), (3, '00', 00, 0), (4, '01', 01, 1), (5, '+01', +01, 1);
+INSERT INTO t_leading_zeroes VALUES (6, '-01', -01, -1), (7, '0001', 0001, 1), (8, '0005', 0005, 5), (9, '0008', 0008, 8);
+INSERT INTO t_leading_zeroes VALUES (10, '0017', 0017, 17), (11, '0021', 0021, 21), (12, '0051', 0051, 51), (13, '00000123', 00000123, 123);
+INSERT INTO t_leading_zeroes VALUES (14, '0b10000', 0b10000, 16), (15, '0x0abcd', 0x0abcd, 43981), (16, '0000.008', 0000.008, 0)
+INSERT INTO t_leading_zeroes VALUES (17, '1000.0008', 1000.0008, 1000), (18, '0008.0008', 0008.0008, 8);
 
-SELECT * FROM t_leading_zeroes;
+SELECT 'Leading zeroes into INTEGER';
+SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes t ORDER BY id;
+
+CREATE TABLE t_leading_zeroes_f(id INTEGER, input String, val Float32, expected Float32) ENGINE=MergeTree ORDER BY id;
+INSERT INTO t_leading_zeroes_f VALUES (1, '00000', 00000, 0), (2, '00009.00009', 00009.00009, 9.00009), (3, '00009e9', 00009e9, 9e9), (4, '00009e09', 00009e09, 9e9), (5, '00009e0009', 00009e0009, 9e9);
+-- Turns out this is not ok in master as well - will have a look and fix
+--  (6, '00009e00009', 00009e00009, 9e9);
+
+SELECT 'Leading zeroes into Float32';
+SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id;
 
 DROP TABLE IF EXISTS t_leading_zeroes;
+DROP TABLE IF EXISTS t_leading_zeroes_f;
\ No newline at end of file

From da10b21616121ef32a265769e28f04e9c94d8a40 Mon Sep 17 00:00:00 2001
From: Joanna Hulboj <joanna.hulboj@gmail.com>
Date: Fri, 8 Dec 2023 17:42:11 +0000
Subject: [PATCH 4/6] Some test clarification

---
 .../02896_leading_zeroes_no_octal.reference            | 10 ++++++++++
 .../0_stateless/02896_leading_zeroes_no_octal.sql      | 10 ++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
index 69446796e09..5b932f50824 100644
--- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference
@@ -23,3 +23,13 @@ Leading zeroes into Float32
 1	3	00009e9	9000000000	9000000000
 1	4	00009e09	9000000000	9000000000
 1	5	00009e0009	9000000000	9000000000
+1	6	-00000	-0.1	-0.1
+1	7	-00009.00009	-9.00009	-9.00009
+1	8	-00009e9	-9000000000	-9000000000
+1	9	-00009e09	-9000000000	-9000000000
+1	10	-00009e0009	-9000000000	-9000000000
+1	11	+00000	0	0
+1	12	+00009.00009	9.00009	9.00009
+1	13	+00009e9	9000000000	9000000000
+1	14	+00009e09	9000000000	9000000000
+1	15	+00009e0009	9000000000	9000000000
diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
index a204259f3ec..69cc06a46f8 100644
--- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
+++ b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql
@@ -1,4 +1,6 @@
 DROP TABLE IF EXISTS t_leading_zeroes;
+DROP TABLE IF EXISTS t_leading_zeroes_f;
+
 CREATE TABLE t_leading_zeroes(id INTEGER, input String, val INTEGER, expected INTEGER) ENGINE=MergeTree ORDER BY id;
 
 INSERT INTO t_leading_zeroes VALUES (1, '00000', 00000, 0), (2, '0', 0, 0), (3, '00', 00, 0), (4, '01', 01, 1), (5, '+01', +01, 1);
@@ -10,10 +12,14 @@ INSERT INTO t_leading_zeroes VALUES (17, '1000.0008', 1000.0008, 1000), (18, '00
 SELECT 'Leading zeroes into INTEGER';
 SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes t ORDER BY id;
 
+-- Floats don't go via the weird octal path:
 CREATE TABLE t_leading_zeroes_f(id INTEGER, input String, val Float32, expected Float32) ENGINE=MergeTree ORDER BY id;
 INSERT INTO t_leading_zeroes_f VALUES (1, '00000', 00000, 0), (2, '00009.00009', 00009.00009, 9.00009), (3, '00009e9', 00009e9, 9e9), (4, '00009e09', 00009e09, 9e9), (5, '00009e0009', 00009e0009, 9e9);
--- Turns out this is not ok in master as well - will have a look and fix
---  (6, '00009e00009', 00009e00009, 9e9);
+INSERT INTO t_leading_zeroes_f VALUES (6, '-00000', -00000.1, -0.1), (7, '-00009.00009', -00009.00009, -9.00009), (8, '-00009e9', -00009e9, -9e9), (9, '-00009e09', -00009e09, -9e9), (10, '-00009e0009', -00009e0009, -9e9);
+INSERT INTO t_leading_zeroes_f VALUES (11, '+00000', +00000., 0), (12, '+00009.00009', +00009.00009, 9.00009), (13, '+00009e9', +00009e9, 9e9), (14, '+00009e09', +00009e09, 9e9), (15, '+00009e0009', +00009e0009, 9e9);
+-- Coincidentally, the following result in 9 rather than 9e9 because of readFloatTextFastImpl
+-- using readUIntTextUpToNSignificantDigits<4>(exponent, in)
+-- INSERT INTO t_leading_zeroes_f VALUES (100, '00009e00009', 00009e00009, 9e9), (101, '-00009e00009', -00009e00009, -9e9), (102, '+00009e00009', +00009e00009, 9e9)
 
 SELECT 'Leading zeroes into Float32';
 SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id;

From f67edfb397d96fde056086f963a51fe1e0cae6a8 Mon Sep 17 00:00:00 2001
From: Joanna Hulboj <joanna.hulboj@gmail.com>
Date: Mon, 8 Jan 2024 18:42:33 +0000
Subject: [PATCH 5/6] Consume leading zeroes - use find_first_not_symbols

---
 src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index fe11e3f6360..8c18df97c46 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -603,10 +603,8 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
             memcpy(buf, istr.position(), bytes_to_copy);
             buf[bytes_to_copy] = 0;
 
-            /// Consume leading zeroes - we don't want any funny octal business
-            auto* non_zero_buf = buf;
-            while (*non_zero_buf == '0')
-                ++non_zero_buf;
+            /// Skip leading zeroes - we don't want any funny octal business
+            auto* non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy);
 
             char * pos_double = non_zero_buf;
             errno = 0;

From 5210bbc5876aea3689a9c7c96a59c40ad1bc5c71 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 9 Jan 2024 05:13:52 +0300
Subject: [PATCH 6/6] Update ConstantExpressionTemplate.cpp

---
 src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
index 8c18df97c46..316a84fe94f 100644
--- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
+++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp
@@ -604,7 +604,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(
             buf[bytes_to_copy] = 0;
 
             /// Skip leading zeroes - we don't want any funny octal business
-            auto* non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy);
+            char * non_zero_buf = find_first_not_symbols<'0'>(buf, buf + bytes_to_copy);
 
             char * pos_double = non_zero_buf;
             errno = 0;