Fast float updated implementation

This commit is contained in:
Maksim Kita 2020-11-08 22:06:36 +03:00
parent 7d97f23db4
commit 87c50602bc
6 changed files with 89 additions and 52 deletions

View File

@ -12,4 +12,4 @@ if (USE_FAST_FLOAT)
set(FAST_FLOAT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/")
endif ()
message(STATUS "Using fast_float=${USE_FAST_FLOAT}")
message(STATUS "Using fast_float=${USE_FAST_FLOAT}")

View File

@ -405,7 +405,7 @@ endif()
if (USE_FAST_FLOAT)
target_link_libraries (clickhouse_common_io PRIVATE fast_float)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${FAST_FLOAT_INCLUDE_DIR})
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR})
endif()
if (USE_ORC)

View File

@ -57,6 +57,13 @@ template void readFloatTextFast<Float64>(Float64 &, ReadBuffer &);
template bool tryReadFloatTextFast<Float32>(Float32 &, ReadBuffer &);
template bool tryReadFloatTextFast<Float64>(Float64 &, ReadBuffer &);
#ifdef USE_FAST_FLOAT
template void readFloatTextWithFastFloat<Float32>(Float32 &, ReadBuffer &);
template void readFloatTextWithFastFloat<Float64>(Float64 &, ReadBuffer &);
template bool tryReadFloatTextWithFastFloat<Float32>(Float32 &, ReadBuffer &);
template bool tryReadFloatTextWithFastFloat<Float64>(Float64 &, ReadBuffer &);
#endif
template void readFloatTextSimple<Float32>(Float32 &, ReadBuffer &);
template void readFloatTextSimple<Float64>(Float64 &, ReadBuffer &);
template bool tryReadFloatTextSimple<Float32>(Float32 &, ReadBuffer &);

View File

@ -6,6 +6,9 @@
#include <Common/StringUtils/StringUtils.h>
#include <double-conversion/double-conversion.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
/** Methods for reading floating point numbers from text with decimal representation.
* There are "precise", "fast" and "simple" implementations.
@ -474,6 +477,72 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
return ReturnType(true);
}
#ifdef USE_FAST_FLOAT
#include <fast_float/fast_float.h>
template <typename T, typename ReturnType>
ReturnType readFloatTextWithFastFloatImpl(T & x, ReadBuffer & in)
{
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextFastFloatImpl must be float or double");
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
/// Fast path
char *initial_position = in.position();
auto res = fast_float::from_chars(initial_position, in.buffer().end(), x);
in.position() += res.ptr - initial_position;
/// Slow path
if (unlikely(!in.hasPendingData()))
{
/// TODO: Optimize in readFloatTextPreciseImpl there is MAX_LENGTH and array with MAX_LEGNTH can be used
String buffer;
while (true)
{
if (!in.hasPendingData())
{
buffer.insert(buffer.end(), initial_position, in.position());
if (in.next())
{
initial_position = in.buffer().begin();
}
else
{
break;
}
}
if (isWhitespaceASCII(*in.position()))
{
buffer.insert(buffer.end(), initial_position, in.position());
break;
}
else
{
++in.position();
}
}
res = fast_float::from_chars(buffer.data(), buffer.data() + buffer.size(), x);
}
if (unlikely(res.ec != std::errc()))
{
if constexpr (throw_exception)
throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
return ReturnType(true);
}
#endif
template <typename T, typename ReturnType>
ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
@ -582,14 +651,23 @@ template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { ret
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }
#ifdef USE_FAST_FLOAT
template <typename T> void readFloatTextWithFastFloat(T & x, ReadBuffer & in) { readFloatTextWithFastFloatImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextWithFastFloat(T & x, ReadBuffer & in) { return readFloatTextWithFastFloatImpl<T, bool>(x, in); }
#endif
template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
/// Implementation that is selected as default.
#ifdef USE_FAST_FLOAT
template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatTextWithFastFloat(x, in); }
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextWithFastFloat(x, in); }
#else
template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatTextFast(x, in); }
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
#endif
}

View File

@ -7,11 +7,6 @@ target_link_libraries (read_buffer_perf PRIVATE clickhouse_common_io)
add_executable (read_float_perf read_float_perf.cpp)
target_link_libraries (read_float_perf PRIVATE clickhouse_common_io)
if (USE_FAST_FLOAT)
target_link_libraries (read_float_perf PRIVATE fast_float)
target_include_directories (read_float_perf SYSTEM BEFORE PRIVATE ${FAST_FLOAT_INCLUDE_DIR})
endif()
add_executable (write_buffer write_buffer.cpp)
target_link_libraries (write_buffer PRIVATE clickhouse_common_io)

View File

@ -13,10 +13,6 @@
#include <IO/WriteBufferFromFileDescriptor.h>
#include <Compression/CompressedReadBuffer.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
/** How to test:
# Prepare data
@ -42,45 +38,6 @@ $ for i in {1..10}; do echo $i; time ./read_float_perf 2 < numbers$i.tsv; done
using namespace DB;
#ifdef USE_FAST_FLOAT
#include <fast_float/fast_float.h>
template <typename T, typename ReturnType>
ReturnType readFloatTextFastFloatImpl(T & x, ReadBuffer & in)
{
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
String buff;
/// TODO: Optimize
/// Currently fast_float interface need begin and end
/// ReadBuffers current begin end can have only part of data
while (!in.eof() && (isAlphaNumericASCII(*in.position()) || (*in.position() == '.'))) {
buff += *in.position();
++in.position();
}
std::cerr << buff << std::endl;
auto res = fast_float::from_chars(buff.data(), buff.data() + buff.size(), x);
if (res.ec != std::errc())
{
if constexpr (throw_exception)
throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
return ReturnType(true);
}
#endif
template <typename T, void F(T&, ReadBuffer&)>
void NO_INLINE loop(ReadBuffer & in, WriteBuffer & out)
{
@ -118,7 +75,7 @@ try
if (method == 2) loop<T, readFloatTextFast>(in, out);
if (method == 3) loop<T, readFloatTextSimple>(in, out);
#ifdef USE_FAST_FLOAT
if (method == 4) loop<T, readFloatTextFastFloatImpl>(in, out);
if (method == 4) loop<T, readFloatTextWithFastFloat>(in, out);
#endif
return 0;