ClickHouse/src/IO/readDecimalText.h

#pragma once

#include <limits>
#include <IO/ReadHelpers.h>
#include <Common/intExp.h>


namespace DB
{

namespace ErrorCodes
{
    extern const int CANNOT_PARSE_NUMBER;
    extern const int ARGUMENT_OUT_OF_BOUND;
}

/// Try to read Decimal into underlying type T from ReadBuffer. Throws if 'digits_only' is set and there's unexpected symbol in input.
/// Returns integer 'exponent' factor that x should be muntiplyed by to get correct Decimal value: result = x * 10^exponent.
/// Use 'digits' input as max allowed meaning decimal digits in result. Place actual number of meaning digits in 'digits' output.
/// Do not care about decimal scale, only about meaning digits in decimal text representation.
template <bool _throw_on_error, typename T>
inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exponent, bool digits_only = false)
{
    x = T(0);
    exponent = 0;
    uint32_t max_digits = digits;
    digits = 0;
    uint32_t places = 0;
    typename T::NativeType sign = 1;
    bool leading_zeroes = true;
    bool after_point = false;

    if (buf.eof())
    {
        if constexpr (_throw_on_error)
            throwReadAfterEOF();
        return false;
    }

    switch (*buf.position())
    {
        case '-':
            sign = -1;
            [[fallthrough]];
        case '+':
            ++buf.position();
            break;
    }

    bool stop = false;
    while (!buf.eof() && !stop)
    {
        const char & byte = *buf.position();
        switch (byte)
        {
            case '.':
                after_point = true;
                leading_zeroes = false;
                break;
            case '0':
            {
                if (leading_zeroes)
                    break;

                if (after_point)
                {
                    ++places; /// Count trailing zeroes. They would be used only if there's some other digit after them.
                    break;
                }
                [[fallthrough]];
            }
            case '1': [[fallthrough]];
            case '2': [[fallthrough]];
            case '3': [[fallthrough]];
            case '4': [[fallthrough]];
            case '5': [[fallthrough]];
            case '6': [[fallthrough]];
            case '7': [[fallthrough]];
            case '8': [[fallthrough]];
            case '9':
            {
                leading_zeroes = false;

                ++places; // num zeroes before + current digit
                if (digits + places > max_digits)
                {
                    if (after_point)
                    {
                        /// Simply cut excessive digits.
                        break;
                    }
                    else
                    {
                        if constexpr (_throw_on_error)
                            throw Exception("Too many digits (" + std::to_string(digits + places) + " > " + std::to_string(max_digits)
                                + ") in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);

                        return false;
                    }
                }
                else
                {
                    digits += places;
                    if (after_point)
                        exponent -= places;

                    // TODO: accurate shift10 for big integers
                    x *= intExp10OfSize<T>(places);
                    places = 0;

                    x += (byte - '0');
                    break;
                }
            }
            case 'e': [[fallthrough]];
            case 'E':
            {
                ++buf.position();
                Int32 addition_exp = 0;
                if (!tryReadIntText(addition_exp, buf))
                {
                    if constexpr (_throw_on_error)
                        throw Exception("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
                    else
                        return false;
                }
                exponent += addition_exp;
                stop = true;
                continue;
            }

            default:
                if (digits_only)
                {
                    if constexpr (_throw_on_error)
                        throw Exception("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
                    return false;
                }
                stop = true;
                continue;
        }
        ++buf.position();
    }

    x *= sign;
    return true;
}

template <typename T>
inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)
{
    uint32_t digits = precision;
    int32_t exponent;
    readDigits<true>(buf, x, digits, exponent, digits_only);

    if (static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale))
    {
        static constexpr const char * pattern =
            "Decimal value is too big: {} digits were read: {}e{}."
            " Expected to read decimal with scale {} and precision {}";

        if constexpr (is_big_int_v<typename T::NativeType>)
            throw Exception(fmt::format(pattern, digits, x.value.str(), exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
        else
            throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
    }

    if (static_cast<int32_t>(scale) + exponent < 0)
    {
        auto divisor_exp = -exponent - static_cast<int32_t>(scale);

        if (divisor_exp >= std::numeric_limits<typename T::NativeType>::digits10)
        {
            /// Too big negative exponent
            x.value = 0;
            scale = 0;
            return;
        }
        else
        {
            /// Too many digits after point. Just cut off excessive digits.
            auto divisor = intExp10OfSize<typename T::NativeType>(divisor_exp);
            assert(divisor > T(0)); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
            x.value /= divisor;
            scale = 0;
            return;
        }
    }

    scale += exponent;
}

template <typename T>
inline bool tryReadDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
{
    uint32_t digits = precision;
    int32_t exponent;

    if (!readDigits<false>(buf, x, digits, exponent, true) ||
        static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale) ||
        static_cast<int32_t>(scale) + exponent < 0)
        return false;

    scale += exponent;
    return true;
}

template <typename T>
inline void readCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)
{
    if (buf.eof())
        throwReadAfterEOF();

    char maybe_quote = *buf.position();

    if (maybe_quote == '\'' || maybe_quote == '\"')
        ++buf.position();

    readDecimalText(buf, x, precision, scale, false);

    if (maybe_quote == '\'' || maybe_quote == '\"')
        assertChar(maybe_quote, buf);
}

}
Trying to fix clang-static-analyzer 2020-06-20 17:10:16 +00:00			`#pragma once`

Attempt to fix "Unbundled" build 2020-08-07 11:18:41 +00:00			`#include <limits>`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`#include <IO/ReadHelpers.h>`
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`#include <Common/intExp.h>`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00

			`namespace DB`
			`{`

			`namespace ErrorCodes`
			`{`
			`extern const int CANNOT_PARSE_NUMBER;`
			`extern const int ARGUMENT_OUT_OF_BOUND;`
			`}`

tech debt 2020-04-14 14:43:09 +00:00			`/// Try to read Decimal into underlying type T from ReadBuffer. Throws if 'digits_only' is set and there's unexpected symbol in input.`
			`/// Returns integer 'exponent' factor that x should be muntiplyed by to get correct Decimal value: result = x * 10^exponent.`
Improve error message for Decimal CAST 2020-06-20 13:34:22 +00:00			`/// Use 'digits' input as max allowed meaning decimal digits in result. Place actual number of meaning digits in 'digits' output.`
tech debt 2020-04-14 14:43:09 +00:00			`/// Do not care about decimal scale, only about meaning digits in decimal text representation.`
variant without catch 2020-01-21 18:04:22 +00:00			`template <bool _throw_on_error, typename T>`
Fix bad types 2020-06-20 14:00:38 +00:00			`inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exponent, bool digits_only = false)`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`{`
Add support for extended precision integers and decimals (#13097) 2020-08-19 11:52:17 +00:00			`x = T(0);`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`exponent = 0;`
Fix bad types 2020-06-20 14:00:38 +00:00			`uint32_t max_digits = digits;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`digits = 0;`
Fix bad types 2020-06-20 14:00:38 +00:00			`uint32_t places = 0;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`typename T::NativeType sign = 1;`
			`bool leading_zeroes = true;`
			`bool after_point = false;`

			`if (buf.eof())`
variant without catch 2020-01-21 18:04:22 +00:00			`{`
			`if constexpr (_throw_on_error)`
			`throwReadAfterEOF();`
			`return false;`
			`}`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00
better diagnostic info in input formats 2020-04-21 20:41:52 +00:00			`switch (*buf.position())`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`{`
better diagnostic info in input formats 2020-04-21 20:41:52 +00:00			`case '-':`
			`sign = -1;`
			`[[fallthrough]];`
			`case '+':`
			`++buf.position();`
			`break;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`}`

			`bool stop = false;`
			`while (!buf.eof() && !stop)`
			`{`
			`const char & byte = *buf.position();`
			`switch (byte)`
			`{`
			`case '.':`
			`after_point = true;`
			`leading_zeroes = false;`
			`break;`
			`case '0':`
			`{`
			`if (leading_zeroes)`
			`break;`

			`if (after_point)`
			`{`
			`++places; /// Count trailing zeroes. They would be used only if there's some other digit after them.`
			`break;`
			`}`
			`[[fallthrough]];`
			`}`
			`case '1': [[fallthrough]];`
			`case '2': [[fallthrough]];`
			`case '3': [[fallthrough]];`
			`case '4': [[fallthrough]];`
			`case '5': [[fallthrough]];`
			`case '6': [[fallthrough]];`
			`case '7': [[fallthrough]];`
			`case '8': [[fallthrough]];`
			`case '9':`
			`{`
			`leading_zeroes = false;`

			`++places; // num zeroes before + current digit`
			`if (digits + places > max_digits)`
variant without catch 2020-01-21 18:04:22 +00:00			`{`
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`if (after_point)`
			`{`
			`/// Simply cut excessive digits.`
			`break;`
			`}`
			`else`
			`{`
			`if constexpr (_throw_on_error)`
			`throw Exception("Too many digits (" + std::to_string(digits + places) + " > " + std::to_string(max_digits)`
			`+ ") in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);`

			`return false;`
			`}`
variant without catch 2020-01-21 18:04:22 +00:00			`}`
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`else`
			`{`
			`digits += places;`
			`if (after_point)`
			`exponent -= places;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`// TODO: accurate shift10 for big integers`
			`x *= intExp10OfSize<T>(places);`
			`places = 0;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`x += (byte - '0');`
			`break;`
			`}`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`}`
			`case 'e': [[fallthrough]];`
			`case 'E':`
			`{`
			`++buf.position();`
			`Int32 addition_exp = 0;`
variant without catch 2020-01-21 18:04:22 +00:00			`if (!tryReadIntText(addition_exp, buf))`
			`{`
			`if constexpr (_throw_on_error)`
			`throw Exception("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);`
			`else`
			`return false;`
			`}`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`exponent += addition_exp;`
			`stop = true;`
			`continue;`
			`}`

			`default:`
			`if (digits_only)`
variant without catch 2020-01-21 18:04:22 +00:00			`{`
			`if constexpr (_throw_on_error)`
			`throw Exception("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);`
			`return false;`
			`}`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`stop = true;`
			`continue;`
			`}`
			`++buf.position();`
			`}`

			`x *= sign;`
variant without catch 2020-01-21 18:04:22 +00:00			`return true;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`}`

			`template <typename T>`
Fix bad types 2020-06-20 14:00:38 +00:00			`inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale, bool digits_only = false)`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`{`
Fix bad types 2020-06-20 14:00:38 +00:00			`uint32_t digits = precision;`
			`int32_t exponent;`
variant without catch 2020-01-21 18:04:22 +00:00			`readDigits<true>(buf, x, digits, exponent, digits_only);`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00
Fix bad types 2020-06-20 14:00:38 +00:00			`if (static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale))`
Add support for extended precision integers and decimals (#13097) 2020-08-19 11:52:17 +00:00			`{`
			`static constexpr const char * pattern =`
Improve error message for Decimal CAST 2020-06-20 13:34:22 +00:00			`"Decimal value is too big: {} digits were read: {}e{}."`
Add support for extended precision integers and decimals (#13097) 2020-08-19 11:52:17 +00:00			`" Expected to read decimal with scale {} and precision {}";`

			`if constexpr (is_big_int_v<typename T::NativeType>)`
			`throw Exception(fmt::format(pattern, digits, x.value.str(), exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);`
			`else`
			`throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);`
			`}`
Improve error message for Decimal CAST 2020-06-20 13:34:22 +00:00
Fix bad types 2020-06-20 14:00:38 +00:00			`if (static_cast<int32_t>(scale) + exponent < 0)`
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`{`
Fix error 2020-08-02 20:19:26 +00:00			`auto divisor_exp = -exponent - static_cast<int32_t>(scale);`

			`if (divisor_exp >= std::numeric_limits<typename T::NativeType>::digits10)`
Fix assert when decimal has too large negative exponent 2020-08-02 02:35:44 +00:00			`{`
			`/// Too big negative exponent`
			`x.value = 0;`
			`scale = 0;`
			`return;`
			`}`
			`else`
			`{`
			`/// Too many digits after point. Just cut off excessive digits.`
Add support for extended precision integers and decimals (#13097) 2020-08-19 11:52:17 +00:00			`auto divisor = intExp10OfSize<typename T::NativeType>(divisor_exp);`
			`assert(divisor > T(0)); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.`
Fix assert when decimal has too large negative exponent 2020-08-02 02:35:44 +00:00			`x.value /= divisor;`
			`scale = 0;`
			`return;`
			`}`
Make reading of Decimal more compatible with other DBMS 2020-06-20 14:43:01 +00:00			`}`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00
			`scale += exponent;`
			`}`

			`template <typename T>`
Fix bad types 2020-06-20 14:00:38 +00:00			`inline bool tryReadDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`{`
Fix bad types 2020-06-20 14:00:38 +00:00			`uint32_t digits = precision;`
			`int32_t exponent;`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00
variant without catch 2020-01-21 18:04:22 +00:00			`if (!readDigits<false>(buf, x, digits, exponent, true) \|\|`
Fix bad types 2020-06-20 14:00:38 +00:00			`static_cast<int32_t>(digits) + exponent > static_cast<int32_t>(precision - scale) \|\|`
			`static_cast<int32_t>(scale) + exponent < 0)`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`return false;`

			`scale += exponent;`
			`return true;`
			`}`

			`template <typename T>`
Fix bad types 2020-06-20 14:00:38 +00:00			`inline void readCSVDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_t & scale)`
extract readDecimalText.h from readFloatText.h 2019-05-16 16:41:10 +00:00			`{`
			`if (buf.eof())`
			`throwReadAfterEOF();`

			`char maybe_quote = *buf.position();`

			`if (maybe_quote == '\'' \|\| maybe_quote == '\"')`
			`++buf.position();`

			`readDecimalText(buf, x, precision, scale, false);`

			`if (maybe_quote == '\'' \|\| maybe_quote == '\"')`
			`assertChar(maybe_quote, buf);`
			`}`

			`}`