2020-06-20 17:10:16 +00:00
|
|
|
#pragma once
|
|
|
|
|
2020-08-07 11:18:41 +00:00
|
|
|
#include <limits>
|
2019-05-16 16:41:10 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
2020-06-20 14:43:01 +00:00
|
|
|
#include <Common/intExp.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/wide_integer_to_string.h>
|
2019-05-16 16:41:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_PARSE_NUMBER;
|
|
|
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
|
|
|
}
|
|
|
|
|
2020-04-14 14:43:09 +00:00
|
|
|
/// Try to read Decimal into underlying type T from ReadBuffer. Throws if 'digits_only' is set and there's unexpected symbol in input.
|
2021-06-17 22:42:33 +00:00
|
|
|
/// Returns integer 'exponent' factor that x should be multiplied by to get correct Decimal value: result = x * 10^exponent.
|
2020-06-20 13:34:22 +00:00
|
|
|
/// Use 'digits' input as max allowed meaning decimal digits in result. Place actual number of meaning digits in 'digits' output.
|
2021-06-17 22:42:33 +00:00
|
|
|
/// Does not care about decimal scale, only about meaningful digits in decimal text representation.
|
2020-01-21 18:04:22 +00:00
|
|
|
template <bool _throw_on_error, typename T>
|
2020-06-20 14:00:38 +00:00
|
|
|
inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exponent, bool digits_only = false)
|
2019-05-16 16:41:10 +00:00
|
|
|
{
|
2020-08-19 11:52:17 +00:00
|
|
|
x = T(0);
|
2019-05-16 16:41:10 +00:00
|
|
|
exponent = 0;
|
2020-06-20 14:00:38 +00:00
|
|
|
uint32_t max_digits = digits;
|
2019-05-16 16:41:10 +00:00
|
|
|
digits = 0;
|
2020-06-20 14:00:38 +00:00
|
|
|
uint32_t places = 0;
|
2019-05-16 16:41:10 +00:00
|
|
|
typename T::NativeType sign = 1;
|
|
|
|
bool leading_zeroes = true;
|
|
|
|
bool after_point = false;
|
|
|
|
|
|
|
|
if (buf.eof())
|
2020-01-21 18:04:22 +00:00
|
|
|
{
|
|
|
|
if constexpr (_throw_on_error)
|
|
|
|
throwReadAfterEOF();
|
|
|
|
return false;
|
|
|
|
}
|
2019-05-16 16:41:10 +00:00
|
|
|
|
2020-04-21 20:41:52 +00:00
|
|
|
switch (*buf.position())
|
2019-05-16 16:41:10 +00:00
|
|
|
{
|
2020-04-21 20:41:52 +00:00
|
|
|
case '-':
|
|
|
|
sign = -1;
|
|
|
|
[[fallthrough]];
|
|
|
|
case '+':
|
|
|
|
++buf.position();
|
|
|
|
break;
|
2019-05-16 16:41:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool stop = false;
|
|
|
|
while (!buf.eof() && !stop)
|
|
|
|
{
|
|
|
|
const char & byte = *buf.position();
|
|
|
|
switch (byte)
|
|
|
|
{
|
|
|
|
case '.':
|
|
|
|
after_point = true;
|
|
|
|
leading_zeroes = false;
|
|
|
|
break;
|
|
|
|
case '0':
|
|
|
|
{
|
|
|
|
if (leading_zeroes)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (after_point)
|
|
|
|
{
|
|
|
|
++places; /// Count trailing zeroes. They would be used only if there's some other digit after them.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
[[fallthrough]];
|
|
|
|
}
|
|
|
|
case '1': [[fallthrough]];
|
|
|
|
case '2': [[fallthrough]];
|
|
|
|
case '3': [[fallthrough]];
|
|
|
|
case '4': [[fallthrough]];
|
|
|
|
case '5': [[fallthrough]];
|
|
|
|
case '6': [[fallthrough]];
|
|
|
|
case '7': [[fallthrough]];
|
|
|
|
case '8': [[fallthrough]];
|
|
|
|
case '9':
|
|
|
|
{
|
|
|
|
leading_zeroes = false;
|
|
|
|
|
|
|
|
++places; // num zeroes before + current digit
|
|
|
|
if (digits + places > max_digits)
|
2020-01-21 18:04:22 +00:00
|
|
|
{
|
2020-06-20 14:43:01 +00:00
|
|
|
if (after_point)
|
|
|
|
{
|
|
|
|
/// Simply cut excessive digits.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if constexpr (_throw_on_error)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Too many digits ({} > {}) in decimal value",
|
|
|
|
std::to_string(digits + places), std::to_string(max_digits));
|
2020-06-20 14:43:01 +00:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2020-01-21 18:04:22 +00:00
|
|
|
}
|
2020-06-20 14:43:01 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
digits += places;
|
|
|
|
if (after_point)
|
|
|
|
exponent -= places;
|
2019-05-16 16:41:10 +00:00
|
|
|
|
2020-06-20 14:43:01 +00:00
|
|
|
// TODO: accurate shift10 for big integers
|
2022-10-07 10:46:45 +00:00
|
|
|
x *= intExp10OfSize<typename T::NativeType>(places);
|
2020-06-20 14:43:01 +00:00
|
|
|
places = 0;
|
2019-05-16 16:41:10 +00:00
|
|
|
|
2020-06-20 14:43:01 +00:00
|
|
|
x += (byte - '0');
|
|
|
|
break;
|
|
|
|
}
|
2019-05-16 16:41:10 +00:00
|
|
|
}
|
|
|
|
case 'e': [[fallthrough]];
|
|
|
|
case 'E':
|
|
|
|
{
|
|
|
|
++buf.position();
|
|
|
|
Int32 addition_exp = 0;
|
2020-01-21 18:04:22 +00:00
|
|
|
if (!tryReadIntText(addition_exp, buf))
|
|
|
|
{
|
|
|
|
if constexpr (_throw_on_error)
|
2023-01-24 22:21:29 +00:00
|
|
|
throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot parse exponent while reading decimal");
|
2020-01-21 18:04:22 +00:00
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
2019-05-16 16:41:10 +00:00
|
|
|
exponent += addition_exp;
|
|
|
|
stop = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
if (digits_only)
|
2020-01-21 18:04:22 +00:00
|
|
|
{
|
|
|
|
if constexpr (_throw_on_error)
|
2023-01-24 22:21:29 +00:00
|
|
|
throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected symbol while reading decimal");
|
2020-01-21 18:04:22 +00:00
|
|
|
return false;
|
|
|
|
}
|
2019-05-16 16:41:10 +00:00
|
|
|
stop = true;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
++buf.position();
|
|
|
|
}
|
|
|
|
|
|
|
|
x *= sign;
|
2020-01-21 18:04:22 +00:00
|
|
|
return true;
|
2019-05-16 16:41:10 +00:00
|
|
|
}
|
|
|
|
|
2022-10-26 19:44:17 +00:00
|
|
|
template <typename T, typename ReturnType=void>
|
|
|
|
inline ReturnType readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)
|
2019-05-16 16:41:10 +00:00
|
|
|
{
|
2022-10-26 19:44:17 +00:00
|
|
|
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
|
|
|
|
2020-06-20 14:00:38 +00:00
|
|
|
uint32_t digits = precision;
|
|
|
|
int32_t exponent;
|
2022-10-26 19:44:17 +00:00
|
|
|
auto ok = readDigits<throw_exception>(buf, x, digits, exponent, digits_only);
|
2022-09-19 12:00:48 +00:00
|
|
|
|
2022-10-26 19:44:17 +00:00
|
|
|
if (!throw_exception && !ok)
|
|
|
|
return ReturnType(false);
|
2019-05-16 16:41:10 +00:00
|
|
|
|
2020-06-20 14:00:38 +00:00
|
|
|
if (static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale))
|
2020-08-19 11:52:17 +00:00
|
|
|
{
|
2022-10-26 19:44:17 +00:00
|
|
|
if constexpr (throw_exception)
|
2022-09-19 12:00:48 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
static constexpr auto pattern = "Decimal value is too big: {} digits were read: {}e{}."
|
2022-09-19 12:00:48 +00:00
|
|
|
" Expected to read decimal with scale {} and precision {}";
|
2020-08-19 11:52:17 +00:00
|
|
|
|
2022-09-19 12:00:48 +00:00
|
|
|
if constexpr (is_big_int_v<typename T::NativeType>)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, pattern, digits, x.value, exponent, scale, precision);
|
2022-09-19 12:00:48 +00:00
|
|
|
else
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, pattern, digits, x, exponent, scale, precision);
|
2022-09-19 12:00:48 +00:00
|
|
|
}
|
2020-08-19 11:52:17 +00:00
|
|
|
else
|
2022-10-26 19:44:17 +00:00
|
|
|
return ReturnType(false);
|
2020-08-19 11:52:17 +00:00
|
|
|
}
|
2020-06-20 13:34:22 +00:00
|
|
|
|
2020-06-20 14:00:38 +00:00
|
|
|
if (static_cast<int32_t>(scale) + exponent < 0)
|
2020-06-20 14:43:01 +00:00
|
|
|
{
|
2020-08-02 20:19:26 +00:00
|
|
|
auto divisor_exp = -exponent - static_cast<int32_t>(scale);
|
|
|
|
|
|
|
|
if (divisor_exp >= std::numeric_limits<typename T::NativeType>::digits10)
|
2020-08-02 02:35:44 +00:00
|
|
|
{
|
|
|
|
/// Too big negative exponent
|
|
|
|
x.value = 0;
|
|
|
|
scale = 0;
|
2022-10-26 19:44:17 +00:00
|
|
|
return ReturnType(true);
|
2020-08-02 02:35:44 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Too many digits after point. Just cut off excessive digits.
|
2020-08-19 11:52:17 +00:00
|
|
|
auto divisor = intExp10OfSize<typename T::NativeType>(divisor_exp);
|
2020-09-04 13:33:02 +00:00
|
|
|
assert(divisor > 0); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
|
2020-08-02 02:35:44 +00:00
|
|
|
x.value /= divisor;
|
|
|
|
scale = 0;
|
2022-10-26 19:44:17 +00:00
|
|
|
return ReturnType(true);
|
2020-08-02 02:35:44 +00:00
|
|
|
}
|
2020-06-20 14:43:01 +00:00
|
|
|
}
|
2019-05-16 16:41:10 +00:00
|
|
|
|
|
|
|
scale += exponent;
|
2022-10-26 19:44:17 +00:00
|
|
|
return ReturnType(true);
|
2019-05-16 16:41:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
2020-06-20 14:00:38 +00:00
|
|
|
inline bool tryReadDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
|
2019-05-16 16:41:10 +00:00
|
|
|
{
|
2022-10-26 19:44:17 +00:00
|
|
|
return readDecimalText<T, bool>(buf, x, precision, scale, true);
|
2019-05-16 16:41:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
2020-06-20 14:00:38 +00:00
|
|
|
inline void readCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
|
2019-05-16 16:41:10 +00:00
|
|
|
{
|
|
|
|
if (buf.eof())
|
|
|
|
throwReadAfterEOF();
|
|
|
|
|
|
|
|
char maybe_quote = *buf.position();
|
|
|
|
|
|
|
|
if (maybe_quote == '\'' || maybe_quote == '\"')
|
|
|
|
++buf.position();
|
|
|
|
|
|
|
|
readDecimalText(buf, x, precision, scale, false);
|
|
|
|
|
|
|
|
if (maybe_quote == '\'' || maybe_quote == '\"')
|
|
|
|
assertChar(maybe_quote, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|