mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge pull request #64641 from azat/fix-float-inference
Fix type inference for float (in case of small buffer)
This commit is contained in:
commit
085c406f1f
@ -879,11 +879,11 @@ namespace
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings)
|
||||
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional)
|
||||
{
|
||||
if (is_json || settings.try_infer_exponent_floats)
|
||||
return tryReadFloatText(value, buf);
|
||||
return tryReadFloatTextNoExponent(value, buf);
|
||||
return tryReadFloatTextExt(value, buf, has_fractional);
|
||||
return tryReadFloatTextExtNoExponent(value, buf, has_fractional);
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
@ -893,46 +893,31 @@ namespace
|
||||
return nullptr;
|
||||
|
||||
Float64 tmp_float;
|
||||
bool has_fractional;
|
||||
if (settings.try_infer_integers)
|
||||
{
|
||||
/// If we read from String, we can do it in a more efficient way.
|
||||
if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
|
||||
{
|
||||
/// Remember the pointer to the start of the number to rollback to it.
|
||||
char * number_start = buf.position();
|
||||
Int64 tmp_int;
|
||||
bool read_int = tryReadIntText(tmp_int, buf);
|
||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
||||
if (buf.eof())
|
||||
return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
|
||||
|
||||
char * int_end = buf.position();
|
||||
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
|
||||
buf.position() = number_start;
|
||||
char * number_start = buf.position();
|
||||
|
||||
bool read_uint = false;
|
||||
char * uint_end = nullptr;
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
if (!read_int)
|
||||
{
|
||||
UInt64 tmp_uint;
|
||||
read_uint = tryReadIntText(tmp_uint, buf);
|
||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
||||
if (buf.eof())
|
||||
return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
|
||||
|
||||
uint_end = buf.position();
|
||||
buf.position() = number_start;
|
||||
}
|
||||
|
||||
if (tryReadFloat<is_json>(tmp_float, buf, settings))
|
||||
{
|
||||
if (read_int && buf.position() == int_end)
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
if (read_uint && buf.position() == uint_end)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
/// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e'
|
||||
/// But, for now it is true
|
||||
if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional) && has_fractional)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
Int64 tmp_int;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_int, buf))
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
UInt64 tmp_uint;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_uint, buf))
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@ -942,36 +927,22 @@ namespace
|
||||
/// and then as float.
|
||||
PeekableReadBuffer peekable_buf(buf);
|
||||
PeekableReadBufferCheckpoint checkpoint(peekable_buf);
|
||||
Int64 tmp_int;
|
||||
bool read_int = tryReadIntText(tmp_int, peekable_buf);
|
||||
auto * int_end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint(true);
|
||||
|
||||
bool read_uint = false;
|
||||
char * uint_end = nullptr;
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
if (!read_int)
|
||||
{
|
||||
PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
|
||||
UInt64 tmp_uint;
|
||||
read_uint = tryReadIntText(tmp_uint, peekable_buf);
|
||||
uint_end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint(true);
|
||||
}
|
||||
|
||||
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings))
|
||||
{
|
||||
/// Float parsing reads no fewer bytes than integer parsing,
|
||||
/// so position of the buffer is either the same, or further.
|
||||
/// If it's the same, then it's integer.
|
||||
if (read_int && peekable_buf.position() == int_end)
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
if (read_uint && peekable_buf.position() == uint_end)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings, has_fractional) && has_fractional)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
peekable_buf.rollbackToCheckpoint(/* drop= */ false);
|
||||
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, peekable_buf))
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
|
||||
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
UInt64 tmp_uint;
|
||||
if (tryReadIntText(tmp_uint, peekable_buf))
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
}
|
||||
else if (tryReadFloat<is_json>(tmp_float, buf, settings))
|
||||
else if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional))
|
||||
{
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
@ -1004,7 +975,8 @@ namespace
|
||||
buf.position() = buf.buffer().begin();
|
||||
|
||||
Float64 tmp;
|
||||
if (tryReadFloat<is_json>(tmp, buf, settings) && buf.eof())
|
||||
bool has_fractional;
|
||||
if (tryReadFloat<is_json>(tmp, buf, settings, has_fractional) && buf.eof())
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
|
||||
return nullptr;
|
||||
|
@ -320,11 +320,13 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf)
|
||||
|
||||
|
||||
template <typename T, typename ReturnType, bool allow_exponent = true>
|
||||
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in, bool & has_fractional)
|
||||
{
|
||||
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
|
||||
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII");
|
||||
|
||||
has_fractional = false;
|
||||
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
bool negative = false;
|
||||
@ -377,6 +379,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
|
||||
if (checkChar('.', in))
|
||||
{
|
||||
has_fractional = true;
|
||||
auto after_point_count = in.count();
|
||||
|
||||
while (!in.eof() && *in.position() == '0')
|
||||
@ -394,6 +397,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
{
|
||||
if (checkChar('e', in) || checkChar('E', in))
|
||||
{
|
||||
has_fractional = true;
|
||||
if (in.eof())
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
@ -420,10 +424,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
}
|
||||
|
||||
if (after_point)
|
||||
{
|
||||
x += static_cast<T>(shift10(after_point, after_point_exponent));
|
||||
}
|
||||
|
||||
if (exponent)
|
||||
{
|
||||
x = static_cast<T>(shift10(x, exponent));
|
||||
}
|
||||
|
||||
if (negative)
|
||||
x = -x;
|
||||
@ -590,8 +598,16 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
|
||||
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
|
||||
|
||||
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }
|
||||
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in)
|
||||
{
|
||||
bool has_fractional;
|
||||
readFloatTextFastImpl<T, void>(x, in, has_fractional);
|
||||
}
|
||||
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in)
|
||||
{
|
||||
bool has_fractional;
|
||||
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
|
||||
}
|
||||
|
||||
template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
|
||||
@ -603,6 +619,21 @@ template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatText
|
||||
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
|
||||
|
||||
/// Don't read exponent part of the number.
|
||||
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool, false>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in)
|
||||
{
|
||||
bool has_fractional;
|
||||
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
|
||||
}
|
||||
|
||||
/// With a @has_fractional flag
|
||||
/// Used for input_format_try_infer_integers
|
||||
template <typename T> bool tryReadFloatTextExt(T & x, ReadBuffer & in, bool & has_fractional)
|
||||
{
|
||||
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
|
||||
}
|
||||
template <typename T> bool tryReadFloatTextExtNoExponent(T & x, ReadBuffer & in, bool & has_fractional)
|
||||
{
|
||||
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,15 @@
|
||||
Int64
|
||||
x Nullable(Int64)
|
||||
x Nullable(Int64)
|
||||
x Nullable(Int64)
|
||||
Float64
|
||||
x Nullable(Float64)
|
||||
x Nullable(Float64)
|
||||
x Nullable(Float64)
|
||||
x Nullable(Float64)
|
||||
Float64.explicit File
|
||||
x Nullable(Float64)
|
||||
Float64.pipe
|
||||
x Nullable(Float64)
|
||||
Float64.default max_read_buffer_size
|
||||
x Nullable(Float64)
|
32
tests/queries/0_stateless/03170_float_schema_inference_small_block.sh
Executable file
32
tests/queries/0_stateless/03170_float_schema_inference_small_block.sh
Executable file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# do not fallback to float always
|
||||
echo "Int64"
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1}'
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1}'
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : -1}'
|
||||
|
||||
echo "Float64"
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1.1}'
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1.1}'
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1.111}'
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1.111}'
|
||||
|
||||
# this is requried due to previously clickhouse-local does not interprets
|
||||
# --max_read_buffer_size for fds [1]
|
||||
#
|
||||
# [1]: https://github.com/ClickHouse/ClickHouse/pull/64532
|
||||
echo "Float64.explicit File"
|
||||
tmp_path=$(mktemp "$CUR_DIR/03170_float_schema_inference_small_block.json.XXXXXX")
|
||||
trap 'rm -f $tmp_path' EXIT
|
||||
cat > "$tmp_path" <<<'{"x" : 1.111}'
|
||||
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' --file "$tmp_path"
|
||||
|
||||
echo "Float64.pipe"
|
||||
echo '{"x" : 1.1}' | $CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"'
|
||||
echo "Float64.default max_read_buffer_size"
|
||||
echo '{"x" : 1.1}' | $CLICKHOUSE_LOCAL --storage_file_read_method read --input-format JSONEachRow 'desc "table"'
|
Loading…
Reference in New Issue
Block a user