From 7db097a49a8bd8103196f93963bff3c6d0bd98c0 Mon Sep 17 00:00:00 2001
From: chertus <chertus@gmail.com>
Date: Mon, 17 Sep 2018 20:07:38 +0300
Subject: [PATCH] support e-notation for decimal input [issue-3135]

---
 dbms/src/IO/readFloatText.h                   | 90 +++++++++++++------
 .../00700_decimal_bounds.reference            |  2 +
 .../0_stateless/00700_decimal_bounds.sql      |  7 +-
 .../0_stateless/00700_decimal_casts.sql       | 52 +++++++++++
 .../00700_decimal_formats.reference           |  6 ++
 .../0_stateless/00700_decimal_formats.sql     |  8 +-
 6 files changed, 135 insertions(+), 30 deletions(-)
diff --git a/dbms/src/IO/readFloatText.h b/dbms/src/IO/readFloatText.h
index d91a250ac77..f8e0277521b 100644
--- a/dbms/src/IO/readFloatText.h
+++ b/dbms/src/IO/readFloatText.h
@@ -554,12 +554,15 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
 
 
 template <typename T>
-inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, unsigned int & scale, bool digits_only = false)
+inline void readDigits(ReadBuffer & buf, T & x, unsigned int & digits, int & exponent, bool digits_only = false)
 {
     x = 0;
+    exponent = 0;
+    unsigned int max_digits = digits;
+    digits = 0;
+    unsigned int places = 0;
     typename T::NativeType sign = 1;
-    bool leading_zeores = true;
-    bool trailing_zeores = false;
+    bool leading_zeroes = true;
     bool after_point = false;
 
     if (buf.eof())
@@ -578,16 +581,28 @@ inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, uns
         }
     }
 
-    while (!buf.eof())
+    bool stop = false;
+    while (!buf.eof() && !stop)
     {
         const char & byte = *buf.position();
         switch (byte)
         {
             case '.':
                 after_point = true;
-                if (scale == 0)
-                    trailing_zeores = true;
+                leading_zeroes = false;
                 break;
+            case '0':
+            {
+                if (leading_zeroes)
+                    break;
+
+                if (after_point)
+                {
+                    ++places; /// Count trailing zeroes. They would be used only if there's some other digit after them.
+                    break;
+                }
+                [[fallthrough]];
+            }
             case '1': [[fallthrough]];
             case '2': [[fallthrough]];
             case '3': [[fallthrough]];
@@ -597,40 +612,61 @@ inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, uns
             case '7': [[fallthrough]];
             case '8': [[fallthrough]];
             case '9':
-                leading_zeores = false;
-                if (trailing_zeores || precision == 0)
-                    throw Exception("Cannot read decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-                [[fallthrough]];
-            case '0':
             {
-                /// ignore leading and trailing zeroes
-                if (likely(!leading_zeores && !trailing_zeores))
-                {
-                    if (precision == 0 || precision < scale || ((precision == scale) && !after_point))
-                        throw Exception("Cannot read decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
-                    --precision;
-                    x = x * 10 + (byte - '0');
-                }
-                if (after_point && scale)
-                {
-                    --scale;
-                    if (!scale)
-                        trailing_zeores = true;
-                }
+                leading_zeroes = false;
+
+                ++places; // num zeroes before + current digit
+                if (digits + places > max_digits)
+                    throw Exception("Too many digits in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+
+                digits += places;
+                if (after_point)
+                    exponent -= places;
+
+                // TODO: accurate shift10 for big integers
+                for (; places; --places)
+                    x *= 10;
+                x += (byte - '0');
                 break;
             }
+            case 'e': [[fallthrough]];
+            case 'E':
+            {
+                ++buf.position();
+                Int32 addition_exp = 0;
+                readIntText(addition_exp, buf);
+                exponent += addition_exp;
+                stop = true;
+                continue;
+            }
 
             default:
                 if (digits_only)
                     throw Exception("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
-                x *= sign;
-                return;
+                stop = true;
+                continue;
         }
         ++buf.position();
     }
+
     x *= sign;
 }
 
+template <typename T>
+inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, unsigned int & scale, bool digits_only = false)
+{
+    unsigned int digits = precision;
+    int exponent;
+    readDigits(buf, x, digits, exponent, digits_only);
+
+    if (static_cast<int>(digits) + exponent > static_cast<int>(precision - scale))
+        throw Exception("Decimal value is too big", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+    if (static_cast<int>(scale) + exponent < 0)
+        throw Exception("Decimal value is too small", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+
+    scale += exponent;
+}
+
 
 template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
 template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
diff --git a/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference b/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference
index 710df36ebdb..e589b6c1dfa 100644
--- a/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference
+++ b/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference
@@ -1,4 +1,5 @@
 -999999999	-999999999999999999	0	-0.999999999	0.000000000000000000	0.00000000000000000000000000000000000000	-9999.99999	0.000000000	0.000000000000000000	0
+-900000000	-900000000000000000	-90000000000000000000000000000000000000	-0.000000009	-0.000000000000000009	-0.00000000000000000000000000000000000009	0.00000	0.000000000	0.000000000000000000	0
 -1	-1	-1	-0.000000001	0.000000000000000000	0.00000000000000000000000000000000000000	-0.00001	-0.000000001	0.000000000000000000	-1
 0	0	-99999999999999999999999999999999999999	0.000000000	0.000000000000000000	0.00000000000000000000000000000000000000	0.00000	0.000000000	0.000000000000000000	0
 0	0	0	0.000000000	-0.999999999999999999	0.00000000000000000000000000000000000000	0.00000	-999999999.999999999	0.000000000000000000	0
@@ -18,4 +19,5 @@
 0	0	99999999999999999999999999999999999999	0.000000000	0.000000000000000000	0.00000000000000000000000000000000000000	0.00000	0.000000000	0.000000000000000000	0
 1	1	1	0.000000001	0.000000000000000000	0.00000000000000000000000000000000000000	0.00001	0.000000001	0.000000000000000000	1
 42	42	0	0.000000000	0.000000000000000000	0.00000000000000000000000000000000000000	0.99999	0.000000000	0.000000000000000000	0
+900000000	900000000000000000	90000000000000000000000000000000000000	0.000000009	0.000000000000000009	0.00000000000000000000000000000000000009	0.00000	0.000000000	0.000000000000000000	0
 999999999	999999999999999999	0	0.999999999	0.000000000000000000	0.00000000000000000000000000000000000000	9999.99999	0.000000000	0.000000000000000000	0
diff --git a/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql b/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql
index e4ea6eb9608..c2cceb27774 100644
--- a/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql
+++ b/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql
@@ -90,7 +90,12 @@ INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-0.0, -0.0, -0.0
 INSERT INTO test.decimal (a, b, g) VALUES ('42.00000', 42.0000000000000000000000000000000, '0.999990');
 INSERT INTO test.decimal (a) VALUES ('-9x'); -- { clientError 72 }
 INSERT INTO test.decimal (a) VALUES ('0x1'); -- { clientError 72 }
-INSERT INTO test.decimal (a) VALUES ('1e2'); -- { clientError 72 }
+
+INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('0.9e9', '0.9e18', '0.9e38', '9e-9', '9e-18', '9e-38');
+INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('-0.9e9', '-0.9e18', '-0.9e38', '-9e-9', '-9e-18', '-9e-38');
+
+INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('1e9', '1e18', '1e38', '1e-10', '1e-19', '1e-39');  -- { clientError 69 }
+INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('-1e9', '-1e18', '-1e38', '-1e-10', '-1e-19', '-1e-39');  -- { clientError 69 }
 
 SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j;
 DROP TABLE IF EXISTS test.decimal;
diff --git a/dbms/tests/queries/0_stateless/00700_decimal_casts.sql b/dbms/tests/queries/0_stateless/00700_decimal_casts.sql
index f2d0d63ffc2..111dc5fb1cb 100644
--- a/dbms/tests/queries/0_stateless/00700_decimal_casts.sql
+++ b/dbms/tests/queries/0_stateless/00700_decimal_casts.sql
@@ -5,6 +5,58 @@ SELECT toDecimal32('1.1', 1), toDecimal32('1.1', 2), toDecimal32('1.1', 8);
 SELECT toDecimal32('1.1', 0); -- { serverError 69 }
 SELECT toDecimal32(1.1, 0), toDecimal32(1.1, 1), toDecimal32(1.1, 2), toDecimal32(1.1, 8);
 
+SELECT '1000000000' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '-1000000000' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '1000000000000000000' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '-1000000000000000000' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '100000000000000000000000000000000000000' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '-100000000000000000000000000000000000000' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '1' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '-1' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '1' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '-1' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '1' AS x, toDecimal128(x, 38); -- { serverError 69 }
+SELECT '-1' AS x, toDecimal128(x, 38); -- { serverError 69 }
+
+SELECT '0.1' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '-0.1' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '0.1' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '-0.1' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '0.1' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '-0.1' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '0.0000000001' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '-0.0000000001' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '0.0000000000000000001' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '-0.0000000000000000001' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '0.000000000000000000000000000000000000001' AS x, toDecimal128(x, 38); -- { serverError 69 }
+SELECT '-0.000000000000000000000000000000000000001' AS x, toDecimal128(x, 38); -- { serverError 69 }
+
+SELECT '1e9' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '1E18' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '-1e18' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '1e38' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '-1E38' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '1e0' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '-1e-0' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '1e0' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '-1e-0' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '1e-0' AS x, toDecimal128(x, 38); -- { serverError 69 }
+SELECT '-1e0' AS x, toDecimal128(x, 38); -- { serverError 69 }
+
+SELECT '1e-1' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '-1e-1' AS x, toDecimal32(x, 0); -- { serverError 69 }
+SELECT '1e-1' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '-1e-1' AS x, toDecimal64(x, 0); -- { serverError 69 }
+SELECT '1e-1' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '-1e-1' AS x, toDecimal128(x, 0); -- { serverError 69 }
+SELECT '1e-10' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '-1e-10' AS x, toDecimal32(x, 9); -- { serverError 69 }
+SELECT '1e-19' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '-1e-19' AS x, toDecimal64(x, 18); -- { serverError 69 }
+SELECT '1e-39' AS x, toDecimal128(x, 38); -- { serverError 69 }
+SELECT '-1e-39' AS x, toDecimal128(x, 38); -- { serverError 69 }
+
 SELECT toFloat32(9999999)   as x, toDecimal32(x, 0), toDecimal32(-x, 0), toDecimal64(x, 0), toDecimal64(-x, 0);
 SELECT toFloat32(999999.9)  as x, toDecimal32(x, 1), toDecimal32(-x, 1), toDecimal64(x, 1), toDecimal64(-x, 1);
 SELECT toFloat32(99999.99)  as x, toDecimal32(x, 2), toDecimal32(-x, 2), toDecimal64(x, 2), toDecimal64(-x, 2);
diff --git a/dbms/tests/queries/0_stateless/00700_decimal_formats.reference b/dbms/tests/queries/0_stateless/00700_decimal_formats.reference
index fe36e7af689..0bea4ba27be 100644
--- a/dbms/tests/queries/0_stateless/00700_decimal_formats.reference
+++ b/dbms/tests/queries/0_stateless/00700_decimal_formats.reference
@@ -1,3 +1,4 @@
+{"a":0.055,"b":-0.000000005,"c":0.000000000000000005}
 {"a":0.100,"b":-0.100000000,"c":0.100000000000000000}
 {"a":0.200,"b":-0.200000000,"c":0.200000000000000000}
 {"a":0.300,"b":-0.300000000,"c":0.300000000000000000}
@@ -10,6 +11,8 @@
 {"a":3.300,"b":-3.300000000,"c":3.300000000000000000}
 {"a":42.000,"b":-42.000000000,"c":42.000000000000000000}
 {"a":42.420,"b":-42.420000000,"c":42.420000000000000000}
+{"a":440000.000,"b":-400000000.000000000,"c":40000000000000000000.000000000000000000}
+0.055,-0.000000005,0.000000000000000005
 0.100,-0.100000000,0.100000000000000000
 0.200,-0.200000000,0.200000000000000000
 0.300,-0.300000000,0.300000000000000000
@@ -22,6 +25,8 @@
 3.300,-3.300000000,3.300000000000000000
 42.000,-42.000000000,42.000000000000000000
 42.420,-42.420000000,42.420000000000000000
+440000.000,-400000000.000000000,40000000000000000000.000000000000000000
+0.055	-0.000000005	0.000000000000000005
 0.100	-0.100000000	0.100000000000000000
 0.200	-0.200000000	0.200000000000000000
 0.300	-0.300000000	0.300000000000000000
@@ -34,3 +39,4 @@
 3.300	-3.300000000	3.300000000000000000
 42.000	-42.000000000	42.000000000000000000
 42.420	-42.420000000	42.420000000000000000
+440000.000	-400000000.000000000	40000000000000000000.000000000000000000
diff --git a/dbms/tests/queries/0_stateless/00700_decimal_formats.sql b/dbms/tests/queries/0_stateless/00700_decimal_formats.sql
index e2979b84cfc..ba7161a8249 100644
--- a/dbms/tests/queries/0_stateless/00700_decimal_formats.sql
+++ b/dbms/tests/queries/0_stateless/00700_decimal_formats.sql
@@ -11,9 +11,9 @@ CREATE TABLE IF NOT EXISTS test.decimal
 
 INSERT INTO test.decimal (a, b, c) VALUES (42.0, -42.0, 42) (0.42, -0.42, .42) (42.42, -42.42, 42.42);
 INSERT INTO test.decimal (a, b, c) FORMAT JSONEachRow {"a":1.1, "b":-1.1, "c":1.1} {"a":1.0, "b":-1.0, "c":1} {"a":0.1, "b":-0.1, "c":.1};
-INSERT INTO test.decimal (a, b, c) FORMAT CSV 2.0, -2.0, 2
+INSERT INTO test.decimal (a, b, c) FORMAT CSV 2.0,-2.0,2
 ;
-INSERT INTO test.decimal (a, b, c) FORMAT CSV 0.2, -0.2, .2
+INSERT INTO test.decimal (a, b, c) FORMAT CSV 0.2 ,-0.2 ,.2
 ;
 INSERT INTO test.decimal (a, b, c) FORMAT CSV 2.2 , -2.2 , 2.2
 ;
@@ -23,6 +23,10 @@ INSERT INTO test.decimal (a, b, c) FORMAT TabSeparated 3.0	-3.0	3
 ;
 INSERT INTO test.decimal (a, b, c) FORMAT TabSeparated 0.3	-0.3	.3
 ;
+INSERT INTO test.decimal (a, b, c) FORMAT CSV 4.4E+5,-4E+8,.4E+20
+;
+INSERT INTO test.decimal (a, b, c) FORMAT CSV 5.5e-2, -5e-9 ,.5e-17
+;
 
 SELECT * FROM test.decimal ORDER BY a FORMAT JSONEachRow;
 SELECT * FROM test.decimal ORDER BY b DESC FORMAT CSV;