Use fast_float by default

This commit is contained in:
Maksim Kita 2020-12-05 19:45:22 +03:00
parent 42f2243fd4
commit b6bfb1cf20
6 changed files with 51 additions and 216 deletions

2
.gitmodules vendored
View File

@ -211,4 +211,4 @@
url = https://github.com/ClickHouse-Extras/dragonbox.git url = https://github.com/ClickHouse-Extras/dragonbox.git
[submodule "contrib/fast_float"] [submodule "contrib/fast_float"]
path = contrib/fast_float path = contrib/fast_float
url = https://github.com/lemire/fast_float url = https://github.com/fastfloat/fast_float

View File

@ -403,10 +403,7 @@ if (USE_MSGPACK)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR})
endif() endif()
if (USE_FAST_FLOAT) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR})
target_link_libraries (clickhouse_common_io PRIVATE fast_float)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${FAST_FLOAT_INCLUDE_DIR})
endif()
if (USE_ORC) if (USE_ORC)
dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES}) dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})

View File

@ -14,4 +14,3 @@
#cmakedefine01 USE_GRPC #cmakedefine01 USE_GRPC
#cmakedefine01 USE_STATS #cmakedefine01 USE_STATS
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 USE_FAST_FLOAT

View File

@ -57,13 +57,6 @@ template void readFloatTextFast<Float64>(Float64 &, ReadBuffer &);
template bool tryReadFloatTextFast<Float32>(Float32 &, ReadBuffer &); template bool tryReadFloatTextFast<Float32>(Float32 &, ReadBuffer &);
template bool tryReadFloatTextFast<Float64>(Float64 &, ReadBuffer &); template bool tryReadFloatTextFast<Float64>(Float64 &, ReadBuffer &);
#ifdef USE_FAST_FLOAT
template void readFloatTextWithFastFloat<Float32>(Float32 &, ReadBuffer &);
template void readFloatTextWithFastFloat<Float64>(Float64 &, ReadBuffer &);
template bool tryReadFloatTextWithFastFloat<Float32>(Float32 &, ReadBuffer &);
template bool tryReadFloatTextWithFastFloat<Float64>(Float64 &, ReadBuffer &);
#endif
template void readFloatTextSimple<Float32>(Float32 &, ReadBuffer &); template void readFloatTextSimple<Float32>(Float32 &, ReadBuffer &);
template void readFloatTextSimple<Float64>(Float64 &, ReadBuffer &); template void readFloatTextSimple<Float64>(Float64 &, ReadBuffer &);
template bool tryReadFloatTextSimple<Float32>(Float32 &, ReadBuffer &); template bool tryReadFloatTextSimple<Float32>(Float32 &, ReadBuffer &);

View File

@ -5,10 +5,8 @@
#include <common/shift10.h> #include <common/shift10.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <double-conversion/double-conversion.h> #include <double-conversion/double-conversion.h>
#include <fast_float/fast_float.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
/** Methods for reading floating point numbers from text with decimal representation. /** Methods for reading floating point numbers from text with decimal representation.
* There are "precise", "fast" and "simple" implementations. * There are "precise", "fast" and "simple" implementations.
@ -138,12 +136,56 @@ bool assertOrParseNaN(ReadBuffer & buf)
/// Some garbage may be successfully parsed, examples: '--1' parsed as '1'. /// Some garbage may be successfully parsed, examples: '--1' parsed as '1'.
template <typename T, typename ReturnType> template <typename T, typename ReturnType>
ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & in)
{ {
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double"); static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextFastFloatImpl must be float or double");
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
if (buf.eof()) /// Fast path
char * initial_position = in.position();
auto res = fast_float::from_chars(initial_position, in.buffer().end(), x);
in.position() += res.ptr - initial_position;
/// Slow path
if (unlikely(!in.hasPendingData()))
{
String buffer;
while (true)
{
if (!in.hasPendingData())
{
buffer.insert(buffer.end(), initial_position, in.position());
if (in.next())
{
initial_position = in.buffer().begin();
}
else
{
break;
}
}
if (isWhitespaceASCII(*in.position()))
{
buffer.insert(buffer.end(), initial_position, in.position());
break;
}
else
{
++in.position();
}
}
res = fast_float::from_chars(buffer.data(), buffer.data() + buffer.size(), x);
}
if (unlikely(res.ec != std::errc()))
{ {
if constexpr (throw_exception) if constexpr (throw_exception)
throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER); throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
@ -151,124 +193,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf)
return ReturnType(false); return ReturnType(false);
} }
/// We use special code to read denormals (inf, nan), because we support slightly more variants that double-conversion library does: return ReturnType(true);
/// Example: inf and Infinity.
bool negative = false;
while (true)
{
switch (*buf.position())
{
case '+':
continue;
case '-':
{
negative = true;
++buf.position();
continue;
}
case 'i': [[fallthrough]];
case 'I':
{
if (assertOrParseInfinity<throw_exception>(buf))
{
x = std::numeric_limits<T>::infinity();
if (negative)
x = -x;
return ReturnType(true);
}
return ReturnType(false);
}
case 'n': [[fallthrough]];
case 'N':
{
if (assertOrParseNaN<throw_exception>(buf))
{
x = std::numeric_limits<T>::quiet_NaN();
if (negative)
x = -x;
return ReturnType(true);
}
return ReturnType(false);
}
default:
break;
}
break;
}
static const double_conversion::StringToDoubleConverter converter(
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
0, 0, nullptr, nullptr);
/// Fast path (avoid copying) if the buffer have at least MAX_LENGTH bytes.
static constexpr int MAX_LENGTH = 316;
if (buf.position() + MAX_LENGTH <= buf.buffer().end())
{
int num_processed_characters = 0;
if constexpr (std::is_same_v<T, double>)
x = converter.StringToDouble(buf.position(), buf.buffer().end() - buf.position(), &num_processed_characters);
else
x = converter.StringToFloat(buf.position(), buf.buffer().end() - buf.position(), &num_processed_characters);
if (num_processed_characters < 0)
{
if constexpr (throw_exception)
throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
buf.position() += num_processed_characters;
if (negative)
x = -x;
return ReturnType(true);
}
else
{
/// Slow path. Copy characters that may be present in floating point number to temporary buffer.
char tmp_buf[MAX_LENGTH];
int num_copied_chars = 0;
while (!buf.eof() && num_copied_chars < MAX_LENGTH)
{
char c = *buf.position();
if (!(isNumericASCII(c) || c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E'))
break;
tmp_buf[num_copied_chars] = c;
++buf.position();
++num_copied_chars;
}
int num_processed_characters = 0;
if constexpr (std::is_same_v<T, double>)
x = converter.StringToDouble(tmp_buf, num_copied_chars, &num_processed_characters);
else
x = converter.StringToFloat(tmp_buf, num_copied_chars, &num_processed_characters);
if (num_processed_characters < num_copied_chars)
{
if constexpr (throw_exception)
throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
if (negative)
x = -x;
return ReturnType(true);
}
} }
@ -477,72 +402,6 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
return ReturnType(true); return ReturnType(true);
} }
#ifdef USE_FAST_FLOAT
#include <fast_float/fast_float.h>
template <typename T, typename ReturnType>
ReturnType readFloatTextWithFastFloatImpl(T & x, ReadBuffer & in)
{
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextFastFloatImpl must be float or double");
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
/// Fast path
char * initial_position = in.position();
auto res = fast_float::from_chars(initial_position, in.buffer().end(), x);
in.position() += res.ptr - initial_position;
/// Slow path
if (unlikely(!in.hasPendingData()))
{
String buffer;
while (true)
{
if (!in.hasPendingData())
{
buffer.insert(buffer.end(), initial_position, in.position());
if (in.next())
{
initial_position = in.buffer().begin();
}
else
{
break;
}
}
if (isWhitespaceASCII(*in.position()))
{
buffer.insert(buffer.end(), initial_position, in.position());
break;
}
else
{
++in.position();
}
}
res = fast_float::from_chars(buffer.data(), buffer.data() + buffer.size(), x);
}
if (unlikely(res.ec != std::errc()))
{
if constexpr (throw_exception)
throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
else
return ReturnType(false);
}
return ReturnType(true);
}
#endif
template <typename T, typename ReturnType> template <typename T, typename ReturnType>
ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf) ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
{ {
@ -643,18 +502,8 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
return ReturnType(true); return ReturnType(true);
} }
#ifdef USE_FAST_FLOAT
template <typename T> void readFloatTextWithFastFloat(T & x, ReadBuffer & in) { readFloatTextWithFastFloatImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextWithFastFloat(T & x, ReadBuffer & in) { return readFloatTextWithFastFloatImpl<T, bool>(x, in); }
#endif
#ifdef USE_FAST_FLOAT
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextWithFastFloat(x, in); }
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return tryReadFloatTextWithFastFloat(x, in); }
#else
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); } template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); } template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
#endif
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); } template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); } template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }

View File

@ -74,9 +74,6 @@ try
if (method == 1) loop<T, readFloatTextPrecise>(in, out); if (method == 1) loop<T, readFloatTextPrecise>(in, out);
if (method == 2) loop<T, readFloatTextFast>(in, out); if (method == 2) loop<T, readFloatTextFast>(in, out);
if (method == 3) loop<T, readFloatTextSimple>(in, out); if (method == 3) loop<T, readFloatTextSimple>(in, out);
#ifdef USE_FAST_FLOAT
if (method == 4) loop<T, readFloatTextWithFastFloat>(in, out);
#endif
return 0; return 0;
} }