Use fast_float by default

2024-11-21 15:12:02 +00:00 · 2020-12-05 19:45:22 +03:00 · 2020-12-05 19:45:22 +03:00 · b6bfb1cf20
commit b6bfb1cf20
parent 42f2243fd4
6 changed files with 51 additions and 216 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -211,4 +211,4 @@
 	url = https://github.com/ClickHouse-Extras/dragonbox.git
 [submodule "contrib/fast_float"]
 	path = contrib/fast_float
-	url = https://github.com/lemire/fast_float
+	url = https://github.com/fastfloat/fast_float
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -403,10 +403,7 @@ if (USE_MSGPACK)
    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR})
 endif()

-if (USE_FAST_FLOAT)
-    target_link_libraries (clickhouse_common_io PRIVATE fast_float)
-    target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR})
-endif()
+target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR})

 if (USE_ORC)
    dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -14,4 +14,3 @@
 #cmakedefine01 USE_GRPC
 #cmakedefine01 USE_STATS
 #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
-#cmakedefine01 USE_FAST_FLOAT
--- a/src/IO/readFloatText.cpp
+++ b/src/IO/readFloatText.cpp
@ -57,13 +57,6 @@ template void readFloatTextFast<Float64>(Float64 &, ReadBuffer &);
 template bool tryReadFloatTextFast<Float32>(Float32 &, ReadBuffer &);
 template bool tryReadFloatTextFast<Float64>(Float64 &, ReadBuffer &);

-#ifdef USE_FAST_FLOAT
-template void readFloatTextWithFastFloat<Float32>(Float32 &, ReadBuffer &);
-template void readFloatTextWithFastFloat<Float64>(Float64 &, ReadBuffer &);
-template bool tryReadFloatTextWithFastFloat<Float32>(Float32 &, ReadBuffer &);
-template bool tryReadFloatTextWithFastFloat<Float64>(Float64 &, ReadBuffer &);
-#endif
-
 template void readFloatTextSimple<Float32>(Float32 &, ReadBuffer &);
 template void readFloatTextSimple<Float64>(Float64 &, ReadBuffer &);
 template bool tryReadFloatTextSimple<Float32>(Float32 &, ReadBuffer &);
--- a/src/IO/readFloatText.h
+++ b/src/IO/readFloatText.h
@ -5,10 +5,8 @@
 #include <common/shift10.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <double-conversion/double-conversion.h>
+#include <fast_float/fast_float.h>

-#if !defined(ARCADIA_BUILD)
-#    include <Common/config.h>
-#endif

 /** Methods for reading floating point numbers from text with decimal representation.
  * There are "precise", "fast" and "simple" implementations.
@ -138,12 +136,56 @@ bool assertOrParseNaN(ReadBuffer & buf)

 /// Some garbage may be successfully parsed, examples: '--1' parsed as '1'.
 template <typename T, typename ReturnType>
-ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf)
+ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & in)
 {
-    static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
+    static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextFastFloatImpl must be float or double");
+    static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
+
    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;

-    if (buf.eof())
+    /// Fast path
+
+    char * initial_position = in.position();
+    auto res = fast_float::from_chars(initial_position, in.buffer().end(), x);
+    in.position() += res.ptr - initial_position;
+
+    /// Slow path
+
+    if (unlikely(!in.hasPendingData()))
+    {
+        String buffer;
+
+        while (true)
+        {
+            if (!in.hasPendingData())
+            {
+                buffer.insert(buffer.end(), initial_position, in.position());
+
+                if (in.next())
+                {
+                    initial_position = in.buffer().begin();
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            if (isWhitespaceASCII(*in.position()))
+            {
+                buffer.insert(buffer.end(), initial_position, in.position());
+                break;
+            }
+            else
+            {
+                ++in.position();
+            }
+        }
+
+        res = fast_float::from_chars(buffer.data(), buffer.data() + buffer.size(), x);
+    }
+
+    if (unlikely(res.ec != std::errc()))
    {
        if constexpr (throw_exception)
            throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
@ -151,124 +193,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf)
            return ReturnType(false);
    }

-    /// We use special code to read denormals (inf, nan), because we support slightly more variants that double-conversion library does:
-    /// Example: inf and Infinity.
-
-    bool negative = false;
-
-    while (true)
-    {
-        switch (*buf.position())
-        {
-            case '+':
-                continue;
-
-            case '-':
-            {
-                negative = true;
-                ++buf.position();
-                continue;
-            }
-
-            case 'i': [[fallthrough]];
-            case 'I':
-            {
-                if (assertOrParseInfinity<throw_exception>(buf))
-                {
-                    x = std::numeric_limits<T>::infinity();
-                    if (negative)
-                        x = -x;
-                    return ReturnType(true);
-                }
-                return ReturnType(false);
-            }
-
-            case 'n': [[fallthrough]];
-            case 'N':
-            {
-                if (assertOrParseNaN<throw_exception>(buf))
-                {
-                    x = std::numeric_limits<T>::quiet_NaN();
-                    if (negative)
-                        x = -x;
-                    return ReturnType(true);
-                }
-                return ReturnType(false);
-            }
-
-            default:
-                break;
-        }
-        break;
-    }
-
-    static const double_conversion::StringToDoubleConverter converter(
-        double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
-        0, 0, nullptr, nullptr);
-
-    /// Fast path (avoid copying) if the buffer have at least MAX_LENGTH bytes.
-    static constexpr int MAX_LENGTH = 316;
-
-    if (buf.position() + MAX_LENGTH <= buf.buffer().end())
-    {
-        int num_processed_characters = 0;
-
-        if constexpr (std::is_same_v<T, double>)
-            x = converter.StringToDouble(buf.position(), buf.buffer().end() - buf.position(), &num_processed_characters);
-        else
-            x = converter.StringToFloat(buf.position(), buf.buffer().end() - buf.position(), &num_processed_characters);
-
-        if (num_processed_characters < 0)
-        {
-            if constexpr (throw_exception)
-                throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
-            else
-                return ReturnType(false);
-        }
-
-        buf.position() += num_processed_characters;
-
-        if (negative)
-            x = -x;
-        return ReturnType(true);
-    }
-    else
-    {
-        /// Slow path. Copy characters that may be present in floating point number to temporary buffer.
-
-        char tmp_buf[MAX_LENGTH];
-        int num_copied_chars = 0;
-
-        while (!buf.eof() && num_copied_chars < MAX_LENGTH)
-        {
-            char c = *buf.position();
-            if (!(isNumericASCII(c) || c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E'))
-                break;
-
-            tmp_buf[num_copied_chars] = c;
-            ++buf.position();
-            ++num_copied_chars;
-        }
-
-        int num_processed_characters = 0;
-
-        if constexpr (std::is_same_v<T, double>)
-            x = converter.StringToDouble(tmp_buf, num_copied_chars, &num_processed_characters);
-        else
-            x = converter.StringToFloat(tmp_buf, num_copied_chars, &num_processed_characters);
-
-        if (num_processed_characters < num_copied_chars)
-        {
-            if constexpr (throw_exception)
-                throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
-            else
-                return ReturnType(false);
-        }
-
-        if (negative)
-            x = -x;
-        return ReturnType(true);
-    }
+    return ReturnType(true);
 }


@ -477,72 +402,6 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
    return ReturnType(true);
 }

-#ifdef USE_FAST_FLOAT
-#include <fast_float/fast_float.h>
-
-template <typename T, typename ReturnType>
-ReturnType readFloatTextWithFastFloatImpl(T & x, ReadBuffer & in)
-{
-    static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextFastFloatImpl must be float or double");
-    static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
-
-    static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
-
-    /// Fast path
-
-    char * initial_position = in.position();
-    auto res = fast_float::from_chars(initial_position, in.buffer().end(), x);
-    in.position() += res.ptr - initial_position;
-
-    /// Slow path
-
-    if (unlikely(!in.hasPendingData()))
-    {
-        String buffer;
-
-        while (true)
-        {
-            if (!in.hasPendingData())
-            {
-                buffer.insert(buffer.end(), initial_position, in.position());
-
-                if (in.next())
-                {
-                    initial_position = in.buffer().begin();
-                }
-                else
-                {
-                    break;
-                }
-            }
-
-            if (isWhitespaceASCII(*in.position()))
-            {
-                buffer.insert(buffer.end(), initial_position, in.position());
-                break;
-            }
-            else
-            {
-                ++in.position();
-            }
-        }
-
-        res = fast_float::from_chars(buffer.data(), buffer.data() + buffer.size(), x);
-    }
-
-    if (unlikely(res.ec != std::errc()))
-    {
-        if constexpr (throw_exception)
-            throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
-        else
-            return ReturnType(false);
-    }
-
-    return ReturnType(true);
-}
-
-#endif
-
 template <typename T, typename ReturnType>
 ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
 {
@ -643,18 +502,8 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
    return ReturnType(true);
 }

-#ifdef USE_FAST_FLOAT
-template <typename T> void readFloatTextWithFastFloat(T & x, ReadBuffer & in) { readFloatTextWithFastFloatImpl<T, void>(x, in); }
-template <typename T> bool tryReadFloatTextWithFastFloat(T & x, ReadBuffer & in) { return readFloatTextWithFastFloatImpl<T, bool>(x, in); }
-#endif
-
-#ifdef USE_FAST_FLOAT
-template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextWithFastFloat(x, in); }
-template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return tryReadFloatTextWithFastFloat(x, in); }
-#else
 template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
 template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
-#endif

 template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
 template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }
--- a/src/IO/tests/read_float_perf.cpp
+++ b/src/IO/tests/read_float_perf.cpp
@ -74,9 +74,6 @@ try
    if (method == 1) loop<T, readFloatTextPrecise>(in, out);
    if (method == 2) loop<T, readFloatTextFast>(in, out);
    if (method == 3) loop<T, readFloatTextSimple>(in, out);
-    #ifdef USE_FAST_FLOAT
-    if (method == 4) loop<T, readFloatTextWithFastFloat>(in, out);
-    #endif

    return 0;
 }