From 7db097a49a8bd8103196f93963bff3c6d0bd98c0 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 17 Sep 2018 20:07:38 +0300 Subject: [PATCH] support e-notation for decimal input [issue-3135] --- dbms/src/IO/readFloatText.h | 90 +++++++++++++------ .../00700_decimal_bounds.reference | 2 + .../0_stateless/00700_decimal_bounds.sql | 7 +- .../0_stateless/00700_decimal_casts.sql | 52 +++++++++++ .../00700_decimal_formats.reference | 6 ++ .../0_stateless/00700_decimal_formats.sql | 8 +- 6 files changed, 135 insertions(+), 30 deletions(-) diff --git a/dbms/src/IO/readFloatText.h b/dbms/src/IO/readFloatText.h index d91a250ac77..f8e0277521b 100644 --- a/dbms/src/IO/readFloatText.h +++ b/dbms/src/IO/readFloatText.h @@ -554,12 +554,15 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf) template -inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, unsigned int & scale, bool digits_only = false) +inline void readDigits(ReadBuffer & buf, T & x, unsigned int & digits, int & exponent, bool digits_only = false) { x = 0; + exponent = 0; + unsigned int max_digits = digits; + digits = 0; + unsigned int places = 0; typename T::NativeType sign = 1; - bool leading_zeores = true; - bool trailing_zeores = false; + bool leading_zeroes = true; bool after_point = false; if (buf.eof()) @@ -578,16 +581,28 @@ inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, uns } } - while (!buf.eof()) + bool stop = false; + while (!buf.eof() && !stop) { const char & byte = *buf.position(); switch (byte) { case '.': after_point = true; - if (scale == 0) - trailing_zeores = true; + leading_zeroes = false; break; + case '0': + { + if (leading_zeroes) + break; + + if (after_point) + { + ++places; /// Count trailing zeroes. They would be used only if there's some other digit after them. + break; + } + [[fallthrough]]; + } case '1': [[fallthrough]]; case '2': [[fallthrough]]; case '3': [[fallthrough]]; @@ -597,40 +612,61 @@ inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, uns case '7': [[fallthrough]]; case '8': [[fallthrough]]; case '9': - leading_zeores = false; - if (trailing_zeores || precision == 0) - throw Exception("Cannot read decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - [[fallthrough]]; - case '0': { - /// ignore leading and trailing zeroes - if (likely(!leading_zeores && !trailing_zeores)) - { - if (precision == 0 || precision < scale || ((precision == scale) && !after_point)) - throw Exception("Cannot read decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - --precision; - x = x * 10 + (byte - '0'); - } - if (after_point && scale) - { - --scale; - if (!scale) - trailing_zeores = true; - } + leading_zeroes = false; + + ++places; // num zeroes before + current digit + if (digits + places > max_digits) + throw Exception("Too many digits in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + digits += places; + if (after_point) + exponent -= places; + + // TODO: accurate shift10 for big integers + for (; places; --places) + x *= 10; + x += (byte - '0'); break; } + case 'e': [[fallthrough]]; + case 'E': + { + ++buf.position(); + Int32 addition_exp = 0; + readIntText(addition_exp, buf); + exponent += addition_exp; + stop = true; + continue; + } default: if (digits_only) throw Exception("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER); - x *= sign; - return; + stop = true; + continue; } ++buf.position(); } + x *= sign; } +template +inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, unsigned int & scale, bool digits_only = false) +{ + unsigned int digits = precision; + int exponent; + readDigits(buf, x, digits, exponent, digits_only); + + if (static_cast(digits) + exponent > static_cast(precision - scale)) + throw Exception("Decimal value is too big", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + if (static_cast(scale) + exponent < 0) + throw Exception("Decimal value is too small", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + scale += exponent; +} + template void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl(x, in); } template bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl(x, in); } diff --git a/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference b/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference index 710df36ebdb..e589b6c1dfa 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_bounds.reference @@ -1,4 +1,5 @@ -999999999 -999999999999999999 0 -0.999999999 0.000000000000000000 0.00000000000000000000000000000000000000 -9999.99999 0.000000000 0.000000000000000000 0 +-900000000 -900000000000000000 -90000000000000000000000000000000000000 -0.000000009 -0.000000000000000009 -0.00000000000000000000000000000000000009 0.00000 0.000000000 0.000000000000000000 0 -1 -1 -1 -0.000000001 0.000000000000000000 0.00000000000000000000000000000000000000 -0.00001 -0.000000001 0.000000000000000000 -1 0 0 -99999999999999999999999999999999999999 0.000000000 0.000000000000000000 0.00000000000000000000000000000000000000 0.00000 0.000000000 0.000000000000000000 0 0 0 0 0.000000000 -0.999999999999999999 0.00000000000000000000000000000000000000 0.00000 -999999999.999999999 0.000000000000000000 0 @@ -18,4 +19,5 @@ 0 0 99999999999999999999999999999999999999 0.000000000 0.000000000000000000 0.00000000000000000000000000000000000000 0.00000 0.000000000 0.000000000000000000 0 1 1 1 0.000000001 0.000000000000000000 0.00000000000000000000000000000000000000 0.00001 0.000000001 0.000000000000000000 1 42 42 0 0.000000000 0.000000000000000000 0.00000000000000000000000000000000000000 0.99999 0.000000000 0.000000000000000000 0 +900000000 900000000000000000 90000000000000000000000000000000000000 0.000000009 0.000000000000000009 0.00000000000000000000000000000000000009 0.00000 0.000000000 0.000000000000000000 0 999999999 999999999999999999 0 0.999999999 0.000000000000000000 0.00000000000000000000000000000000000000 9999.99999 0.000000000 0.000000000000000000 0 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql b/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql index e4ea6eb9608..c2cceb27774 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_bounds.sql @@ -90,7 +90,12 @@ INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-0.0, -0.0, -0.0 INSERT INTO test.decimal (a, b, g) VALUES ('42.00000', 42.0000000000000000000000000000000, '0.999990'); INSERT INTO test.decimal (a) VALUES ('-9x'); -- { clientError 72 } INSERT INTO test.decimal (a) VALUES ('0x1'); -- { clientError 72 } -INSERT INTO test.decimal (a) VALUES ('1e2'); -- { clientError 72 } + +INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('0.9e9', '0.9e18', '0.9e38', '9e-9', '9e-18', '9e-38'); +INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('-0.9e9', '-0.9e18', '-0.9e38', '-9e-9', '-9e-18', '-9e-38'); + +INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('1e9', '1e18', '1e38', '1e-10', '1e-19', '1e-39'); -- { clientError 69 } +INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('-1e9', '-1e18', '-1e38', '-1e-10', '-1e-19', '-1e-39'); -- { clientError 69 } SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j; DROP TABLE IF EXISTS test.decimal; diff --git a/dbms/tests/queries/0_stateless/00700_decimal_casts.sql b/dbms/tests/queries/0_stateless/00700_decimal_casts.sql index f2d0d63ffc2..111dc5fb1cb 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_casts.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_casts.sql @@ -5,6 +5,58 @@ SELECT toDecimal32('1.1', 1), toDecimal32('1.1', 2), toDecimal32('1.1', 8); SELECT toDecimal32('1.1', 0); -- { serverError 69 } SELECT toDecimal32(1.1, 0), toDecimal32(1.1, 1), toDecimal32(1.1, 2), toDecimal32(1.1, 8); +SELECT '1000000000' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '-1000000000' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '1000000000000000000' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '-1000000000000000000' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '100000000000000000000000000000000000000' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '-100000000000000000000000000000000000000' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '1' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '-1' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '1' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '-1' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '1' AS x, toDecimal128(x, 38); -- { serverError 69 } +SELECT '-1' AS x, toDecimal128(x, 38); -- { serverError 69 } + +SELECT '0.1' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '-0.1' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '0.1' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '-0.1' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '0.1' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '-0.1' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '0.0000000001' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '-0.0000000001' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '0.0000000000000000001' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '-0.0000000000000000001' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '0.000000000000000000000000000000000000001' AS x, toDecimal128(x, 38); -- { serverError 69 } +SELECT '-0.000000000000000000000000000000000000001' AS x, toDecimal128(x, 38); -- { serverError 69 } + +SELECT '1e9' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '1E18' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '-1e18' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '1e38' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '-1E38' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '1e0' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '-1e-0' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '1e0' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '-1e-0' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '1e-0' AS x, toDecimal128(x, 38); -- { serverError 69 } +SELECT '-1e0' AS x, toDecimal128(x, 38); -- { serverError 69 } + +SELECT '1e-1' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '-1e-1' AS x, toDecimal32(x, 0); -- { serverError 69 } +SELECT '1e-1' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '-1e-1' AS x, toDecimal64(x, 0); -- { serverError 69 } +SELECT '1e-1' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '-1e-1' AS x, toDecimal128(x, 0); -- { serverError 69 } +SELECT '1e-10' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '-1e-10' AS x, toDecimal32(x, 9); -- { serverError 69 } +SELECT '1e-19' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '-1e-19' AS x, toDecimal64(x, 18); -- { serverError 69 } +SELECT '1e-39' AS x, toDecimal128(x, 38); -- { serverError 69 } +SELECT '-1e-39' AS x, toDecimal128(x, 38); -- { serverError 69 } + SELECT toFloat32(9999999) as x, toDecimal32(x, 0), toDecimal32(-x, 0), toDecimal64(x, 0), toDecimal64(-x, 0); SELECT toFloat32(999999.9) as x, toDecimal32(x, 1), toDecimal32(-x, 1), toDecimal64(x, 1), toDecimal64(-x, 1); SELECT toFloat32(99999.99) as x, toDecimal32(x, 2), toDecimal32(-x, 2), toDecimal64(x, 2), toDecimal64(-x, 2); diff --git a/dbms/tests/queries/0_stateless/00700_decimal_formats.reference b/dbms/tests/queries/0_stateless/00700_decimal_formats.reference index fe36e7af689..0bea4ba27be 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_formats.reference +++ b/dbms/tests/queries/0_stateless/00700_decimal_formats.reference @@ -1,3 +1,4 @@ +{"a":0.055,"b":-0.000000005,"c":0.000000000000000005} {"a":0.100,"b":-0.100000000,"c":0.100000000000000000} {"a":0.200,"b":-0.200000000,"c":0.200000000000000000} {"a":0.300,"b":-0.300000000,"c":0.300000000000000000} @@ -10,6 +11,8 @@ {"a":3.300,"b":-3.300000000,"c":3.300000000000000000} {"a":42.000,"b":-42.000000000,"c":42.000000000000000000} {"a":42.420,"b":-42.420000000,"c":42.420000000000000000} +{"a":440000.000,"b":-400000000.000000000,"c":40000000000000000000.000000000000000000} +0.055,-0.000000005,0.000000000000000005 0.100,-0.100000000,0.100000000000000000 0.200,-0.200000000,0.200000000000000000 0.300,-0.300000000,0.300000000000000000 @@ -22,6 +25,8 @@ 3.300,-3.300000000,3.300000000000000000 42.000,-42.000000000,42.000000000000000000 42.420,-42.420000000,42.420000000000000000 +440000.000,-400000000.000000000,40000000000000000000.000000000000000000 +0.055 -0.000000005 0.000000000000000005 0.100 -0.100000000 0.100000000000000000 0.200 -0.200000000 0.200000000000000000 0.300 -0.300000000 0.300000000000000000 @@ -34,3 +39,4 @@ 3.300 -3.300000000 3.300000000000000000 42.000 -42.000000000 42.000000000000000000 42.420 -42.420000000 42.420000000000000000 +440000.000 -400000000.000000000 40000000000000000000.000000000000000000 diff --git a/dbms/tests/queries/0_stateless/00700_decimal_formats.sql b/dbms/tests/queries/0_stateless/00700_decimal_formats.sql index e2979b84cfc..ba7161a8249 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_formats.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_formats.sql @@ -11,9 +11,9 @@ CREATE TABLE IF NOT EXISTS test.decimal INSERT INTO test.decimal (a, b, c) VALUES (42.0, -42.0, 42) (0.42, -0.42, .42) (42.42, -42.42, 42.42); INSERT INTO test.decimal (a, b, c) FORMAT JSONEachRow {"a":1.1, "b":-1.1, "c":1.1} {"a":1.0, "b":-1.0, "c":1} {"a":0.1, "b":-0.1, "c":.1}; -INSERT INTO test.decimal (a, b, c) FORMAT CSV 2.0, -2.0, 2 +INSERT INTO test.decimal (a, b, c) FORMAT CSV 2.0,-2.0,2 ; -INSERT INTO test.decimal (a, b, c) FORMAT CSV 0.2, -0.2, .2 +INSERT INTO test.decimal (a, b, c) FORMAT CSV 0.2 ,-0.2 ,.2 ; INSERT INTO test.decimal (a, b, c) FORMAT CSV 2.2 , -2.2 , 2.2 ; @@ -23,6 +23,10 @@ INSERT INTO test.decimal (a, b, c) FORMAT TabSeparated 3.0 -3.0 3 ; INSERT INTO test.decimal (a, b, c) FORMAT TabSeparated 0.3 -0.3 .3 ; +INSERT INTO test.decimal (a, b, c) FORMAT CSV 4.4E+5,-4E+8,.4E+20 +; +INSERT INTO test.decimal (a, b, c) FORMAT CSV 5.5e-2, -5e-9 ,.5e-17 +; SELECT * FROM test.decimal ORDER BY a FORMAT JSONEachRow; SELECT * FROM test.decimal ORDER BY b DESC FORMAT CSV;