diff --git a/cmake/find/fast_float.cmake b/cmake/find/fast_float.cmake index 61437a32efc..4dd539730b8 100644 --- a/cmake/find/fast_float.cmake +++ b/cmake/find/fast_float.cmake @@ -12,4 +12,4 @@ if (USE_FAST_FLOAT) set(FAST_FLOAT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/") endif () -message(STATUS "Using fast_float=${USE_FAST_FLOAT}") \ No newline at end of file +message(STATUS "Using fast_float=${USE_FAST_FLOAT}") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e5f7842eb75..45e21bb95db 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -405,7 +405,7 @@ endif() if (USE_FAST_FLOAT) target_link_libraries (clickhouse_common_io PRIVATE fast_float) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${FAST_FLOAT_INCLUDE_DIR}) + target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR}) endif() if (USE_ORC) diff --git a/src/IO/readFloatText.cpp b/src/IO/readFloatText.cpp index 12ac0e80c88..da3d7ab1c4a 100644 --- a/src/IO/readFloatText.cpp +++ b/src/IO/readFloatText.cpp @@ -57,6 +57,13 @@ template void readFloatTextFast(Float64 &, ReadBuffer &); template bool tryReadFloatTextFast(Float32 &, ReadBuffer &); template bool tryReadFloatTextFast(Float64 &, ReadBuffer &); +#ifdef USE_FAST_FLOAT +template void readFloatTextWithFastFloat(Float32 &, ReadBuffer &); +template void readFloatTextWithFastFloat(Float64 &, ReadBuffer &); +template bool tryReadFloatTextWithFastFloat(Float32 &, ReadBuffer &); +template bool tryReadFloatTextWithFastFloat(Float64 &, ReadBuffer &); +#endif + template void readFloatTextSimple(Float32 &, ReadBuffer &); template void readFloatTextSimple(Float64 &, ReadBuffer &); template bool tryReadFloatTextSimple(Float32 &, ReadBuffer &); diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index d126dd52ef8..326456d3892 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -6,6 +6,9 @@ #include #include +#if !defined(ARCADIA_BUILD) +# include +#endif /** Methods for reading floating point numbers from text with decimal representation. * There are "precise", "fast" and "simple" implementations. @@ -474,6 +477,72 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) return ReturnType(true); } +#ifdef USE_FAST_FLOAT +#include + +template +ReturnType readFloatTextWithFastFloatImpl(T & x, ReadBuffer & in) +{ + static_assert(std::is_same_v || std::is_same_v, "Argument for readFloatTextFastFloatImpl must be float or double"); + static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590 + + static constexpr bool throw_exception = std::is_same_v; + + /// Fast path + + char *initial_position = in.position(); + auto res = fast_float::from_chars(initial_position, in.buffer().end(), x); + in.position() += res.ptr - initial_position; + + /// Slow path + + if (unlikely(!in.hasPendingData())) + { + /// TODO: Optimize in readFloatTextPreciseImpl there is MAX_LENGTH and array with MAX_LEGNTH can be used + String buffer; + + while (true) + { + if (!in.hasPendingData()) + { + buffer.insert(buffer.end(), initial_position, in.position()); + + if (in.next()) + { + initial_position = in.buffer().begin(); + } + else + { + break; + } + } + + if (isWhitespaceASCII(*in.position())) + { + buffer.insert(buffer.end(), initial_position, in.position()); + break; + } + else + { + ++in.position(); + } + } + + res = fast_float::from_chars(buffer.data(), buffer.data() + buffer.size(), x); + } + + if (unlikely(res.ec != std::errc())) + { + if constexpr (throw_exception) + throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER); + else + return ReturnType(false); + } + + return ReturnType(true); +} + +#endif template ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf) @@ -582,14 +651,23 @@ template bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { ret template void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl(x, in); } template bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl(x, in); } +#ifdef USE_FAST_FLOAT +template void readFloatTextWithFastFloat(T & x, ReadBuffer & in) { readFloatTextWithFastFloatImpl(x, in); } +template bool tryReadFloatTextWithFastFloat(T & x, ReadBuffer & in) { return readFloatTextWithFastFloatImpl(x, in); } +#endif + template void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl(x, in); } template bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl(x, in); } /// Implementation that is selected as default. +#ifdef USE_FAST_FLOAT +template void readFloatText(T & x, ReadBuffer & in) { readFloatTextWithFastFloat(x, in); } +template bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextWithFastFloat(x, in); } +#else template void readFloatText(T & x, ReadBuffer & in) { readFloatTextFast(x, in); } template bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); } - +#endif } diff --git a/src/IO/tests/CMakeLists.txt b/src/IO/tests/CMakeLists.txt index 1b758e12d4b..da4d330f0a9 100644 --- a/src/IO/tests/CMakeLists.txt +++ b/src/IO/tests/CMakeLists.txt @@ -7,11 +7,6 @@ target_link_libraries (read_buffer_perf PRIVATE clickhouse_common_io) add_executable (read_float_perf read_float_perf.cpp) target_link_libraries (read_float_perf PRIVATE clickhouse_common_io) -if (USE_FAST_FLOAT) - target_link_libraries (read_float_perf PRIVATE fast_float) - target_include_directories (read_float_perf SYSTEM BEFORE PRIVATE ${FAST_FLOAT_INCLUDE_DIR}) -endif() - add_executable (write_buffer write_buffer.cpp) target_link_libraries (write_buffer PRIVATE clickhouse_common_io) diff --git a/src/IO/tests/read_float_perf.cpp b/src/IO/tests/read_float_perf.cpp index ebb96dd0037..2e1d818cf97 100644 --- a/src/IO/tests/read_float_perf.cpp +++ b/src/IO/tests/read_float_perf.cpp @@ -13,10 +13,6 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include -#endif - /** How to test: # Prepare data @@ -42,45 +38,6 @@ $ for i in {1..10}; do echo $i; time ./read_float_perf 2 < numbers$i.tsv; done using namespace DB; -#ifdef USE_FAST_FLOAT -#include - -template -ReturnType readFloatTextFastFloatImpl(T & x, ReadBuffer & in) -{ - static_assert(std::is_same_v || std::is_same_v, "Argument for readFloatTextImpl must be float or double"); - static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590 - - static constexpr bool throw_exception = std::is_same_v; - - String buff; - - /// TODO: Optimize - /// Currently fast_float interface need begin and end - /// ReadBuffers current begin end can have only part of data - while (!in.eof() && (isAlphaNumericASCII(*in.position()) || (*in.position() == '.'))) { - buff += *in.position(); - ++in.position(); - } - - std::cerr << buff << std::endl; - - auto res = fast_float::from_chars(buff.data(), buff.data() + buff.size(), x); - - if (res.ec != std::errc()) - { - if constexpr (throw_exception) - throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER); - else - return ReturnType(false); - } - - return ReturnType(true); -} - -#endif - - template void NO_INLINE loop(ReadBuffer & in, WriteBuffer & out) { @@ -118,7 +75,7 @@ try if (method == 2) loop(in, out); if (method == 3) loop(in, out); #ifdef USE_FAST_FLOAT - if (method == 4) loop(in, out); + if (method == 4) loop(in, out); #endif return 0;