mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #64641 from azat/fix-float-inference
Fix type inference for float (in case of small buffer)
This commit is contained in:
commit
085c406f1f
@ -879,11 +879,11 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool is_json>
|
template <bool is_json>
|
||||||
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings)
|
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional)
|
||||||
{
|
{
|
||||||
if (is_json || settings.try_infer_exponent_floats)
|
if (is_json || settings.try_infer_exponent_floats)
|
||||||
return tryReadFloatText(value, buf);
|
return tryReadFloatTextExt(value, buf, has_fractional);
|
||||||
return tryReadFloatTextNoExponent(value, buf);
|
return tryReadFloatTextExtNoExponent(value, buf, has_fractional);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool is_json>
|
template <bool is_json>
|
||||||
@ -893,46 +893,31 @@ namespace
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
Float64 tmp_float;
|
Float64 tmp_float;
|
||||||
|
bool has_fractional;
|
||||||
if (settings.try_infer_integers)
|
if (settings.try_infer_integers)
|
||||||
{
|
{
|
||||||
/// If we read from String, we can do it in a more efficient way.
|
/// If we read from String, we can do it in a more efficient way.
|
||||||
if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
|
if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
|
||||||
{
|
{
|
||||||
/// Remember the pointer to the start of the number to rollback to it.
|
/// Remember the pointer to the start of the number to rollback to it.
|
||||||
char * number_start = buf.position();
|
|
||||||
Int64 tmp_int;
|
|
||||||
bool read_int = tryReadIntText(tmp_int, buf);
|
|
||||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
|
||||||
if (buf.eof())
|
|
||||||
return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
|
|
||||||
|
|
||||||
char * int_end = buf.position();
|
|
||||||
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
|
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
|
||||||
buf.position() = number_start;
|
char * number_start = buf.position();
|
||||||
|
|
||||||
bool read_uint = false;
|
/// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e'
|
||||||
char * uint_end = nullptr;
|
/// But, for now it is true
|
||||||
/// In case of Int64 overflow we can try to infer UInt64.
|
if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional) && has_fractional)
|
||||||
if (!read_int)
|
|
||||||
{
|
|
||||||
UInt64 tmp_uint;
|
|
||||||
read_uint = tryReadIntText(tmp_uint, buf);
|
|
||||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
|
||||||
if (buf.eof())
|
|
||||||
return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
|
|
||||||
|
|
||||||
uint_end = buf.position();
|
|
||||||
buf.position() = number_start;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tryReadFloat<is_json>(tmp_float, buf, settings))
|
|
||||||
{
|
|
||||||
if (read_int && buf.position() == int_end)
|
|
||||||
return std::make_shared<DataTypeInt64>();
|
|
||||||
if (read_uint && buf.position() == uint_end)
|
|
||||||
return std::make_shared<DataTypeUInt64>();
|
|
||||||
return std::make_shared<DataTypeFloat64>();
|
return std::make_shared<DataTypeFloat64>();
|
||||||
}
|
|
||||||
|
Int64 tmp_int;
|
||||||
|
buf.position() = number_start;
|
||||||
|
if (tryReadIntText(tmp_int, buf))
|
||||||
|
return std::make_shared<DataTypeInt64>();
|
||||||
|
|
||||||
|
/// In case of Int64 overflow we can try to infer UInt64.
|
||||||
|
UInt64 tmp_uint;
|
||||||
|
buf.position() = number_start;
|
||||||
|
if (tryReadIntText(tmp_uint, buf))
|
||||||
|
return std::make_shared<DataTypeUInt64>();
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -942,36 +927,22 @@ namespace
|
|||||||
/// and then as float.
|
/// and then as float.
|
||||||
PeekableReadBuffer peekable_buf(buf);
|
PeekableReadBuffer peekable_buf(buf);
|
||||||
PeekableReadBufferCheckpoint checkpoint(peekable_buf);
|
PeekableReadBufferCheckpoint checkpoint(peekable_buf);
|
||||||
Int64 tmp_int;
|
|
||||||
bool read_int = tryReadIntText(tmp_int, peekable_buf);
|
|
||||||
auto * int_end = peekable_buf.position();
|
|
||||||
peekable_buf.rollbackToCheckpoint(true);
|
|
||||||
|
|
||||||
bool read_uint = false;
|
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings, has_fractional) && has_fractional)
|
||||||
char * uint_end = nullptr;
|
|
||||||
/// In case of Int64 overflow we can try to infer UInt64.
|
|
||||||
if (!read_int)
|
|
||||||
{
|
|
||||||
PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
|
|
||||||
UInt64 tmp_uint;
|
|
||||||
read_uint = tryReadIntText(tmp_uint, peekable_buf);
|
|
||||||
uint_end = peekable_buf.position();
|
|
||||||
peekable_buf.rollbackToCheckpoint(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings))
|
|
||||||
{
|
|
||||||
/// Float parsing reads no fewer bytes than integer parsing,
|
|
||||||
/// so position of the buffer is either the same, or further.
|
|
||||||
/// If it's the same, then it's integer.
|
|
||||||
if (read_int && peekable_buf.position() == int_end)
|
|
||||||
return std::make_shared<DataTypeInt64>();
|
|
||||||
if (read_uint && peekable_buf.position() == uint_end)
|
|
||||||
return std::make_shared<DataTypeUInt64>();
|
|
||||||
return std::make_shared<DataTypeFloat64>();
|
return std::make_shared<DataTypeFloat64>();
|
||||||
}
|
peekable_buf.rollbackToCheckpoint(/* drop= */ false);
|
||||||
|
|
||||||
|
Int64 tmp_int;
|
||||||
|
if (tryReadIntText(tmp_int, peekable_buf))
|
||||||
|
return std::make_shared<DataTypeInt64>();
|
||||||
|
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
|
||||||
|
|
||||||
|
/// In case of Int64 overflow we can try to infer UInt64.
|
||||||
|
UInt64 tmp_uint;
|
||||||
|
if (tryReadIntText(tmp_uint, peekable_buf))
|
||||||
|
return std::make_shared<DataTypeUInt64>();
|
||||||
}
|
}
|
||||||
else if (tryReadFloat<is_json>(tmp_float, buf, settings))
|
else if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional))
|
||||||
{
|
{
|
||||||
return std::make_shared<DataTypeFloat64>();
|
return std::make_shared<DataTypeFloat64>();
|
||||||
}
|
}
|
||||||
@ -1004,7 +975,8 @@ namespace
|
|||||||
buf.position() = buf.buffer().begin();
|
buf.position() = buf.buffer().begin();
|
||||||
|
|
||||||
Float64 tmp;
|
Float64 tmp;
|
||||||
if (tryReadFloat<is_json>(tmp, buf, settings) && buf.eof())
|
bool has_fractional;
|
||||||
|
if (tryReadFloat<is_json>(tmp, buf, settings, has_fractional) && buf.eof())
|
||||||
return std::make_shared<DataTypeFloat64>();
|
return std::make_shared<DataTypeFloat64>();
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -320,11 +320,13 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf)
|
|||||||
|
|
||||||
|
|
||||||
template <typename T, typename ReturnType, bool allow_exponent = true>
|
template <typename T, typename ReturnType, bool allow_exponent = true>
|
||||||
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in, bool & has_fractional)
|
||||||
{
|
{
|
||||||
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
|
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
|
||||||
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII");
|
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII");
|
||||||
|
|
||||||
|
has_fractional = false;
|
||||||
|
|
||||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||||
|
|
||||||
bool negative = false;
|
bool negative = false;
|
||||||
@ -377,6 +379,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
|||||||
|
|
||||||
if (checkChar('.', in))
|
if (checkChar('.', in))
|
||||||
{
|
{
|
||||||
|
has_fractional = true;
|
||||||
auto after_point_count = in.count();
|
auto after_point_count = in.count();
|
||||||
|
|
||||||
while (!in.eof() && *in.position() == '0')
|
while (!in.eof() && *in.position() == '0')
|
||||||
@ -394,6 +397,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
|||||||
{
|
{
|
||||||
if (checkChar('e', in) || checkChar('E', in))
|
if (checkChar('e', in) || checkChar('E', in))
|
||||||
{
|
{
|
||||||
|
has_fractional = true;
|
||||||
if (in.eof())
|
if (in.eof())
|
||||||
{
|
{
|
||||||
if constexpr (throw_exception)
|
if constexpr (throw_exception)
|
||||||
@ -420,10 +424,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (after_point)
|
if (after_point)
|
||||||
|
{
|
||||||
x += static_cast<T>(shift10(after_point, after_point_exponent));
|
x += static_cast<T>(shift10(after_point, after_point_exponent));
|
||||||
|
}
|
||||||
|
|
||||||
if (exponent)
|
if (exponent)
|
||||||
|
{
|
||||||
x = static_cast<T>(shift10(x, exponent));
|
x = static_cast<T>(shift10(x, exponent));
|
||||||
|
}
|
||||||
|
|
||||||
if (negative)
|
if (negative)
|
||||||
x = -x;
|
x = -x;
|
||||||
@ -590,8 +598,16 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
|
|||||||
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
|
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
|
||||||
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
|
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
|
||||||
|
|
||||||
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
|
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in)
|
||||||
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }
|
{
|
||||||
|
bool has_fractional;
|
||||||
|
readFloatTextFastImpl<T, void>(x, in, has_fractional);
|
||||||
|
}
|
||||||
|
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in)
|
||||||
|
{
|
||||||
|
bool has_fractional;
|
||||||
|
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
|
template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
|
||||||
template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
|
template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
|
||||||
@ -603,6 +619,21 @@ template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatText
|
|||||||
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
|
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
|
||||||
|
|
||||||
/// Don't read exponent part of the number.
|
/// Don't read exponent part of the number.
|
||||||
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool, false>(x, in); }
|
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in)
|
||||||
|
{
|
||||||
|
bool has_fractional;
|
||||||
|
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// With a @has_fractional flag
|
||||||
|
/// Used for input_format_try_infer_integers
|
||||||
|
template <typename T> bool tryReadFloatTextExt(T & x, ReadBuffer & in, bool & has_fractional)
|
||||||
|
{
|
||||||
|
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
|
||||||
|
}
|
||||||
|
template <typename T> bool tryReadFloatTextExtNoExponent(T & x, ReadBuffer & in, bool & has_fractional)
|
||||||
|
{
|
||||||
|
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,15 @@
|
|||||||
|
Int64
|
||||||
|
x Nullable(Int64)
|
||||||
|
x Nullable(Int64)
|
||||||
|
x Nullable(Int64)
|
||||||
|
Float64
|
||||||
|
x Nullable(Float64)
|
||||||
|
x Nullable(Float64)
|
||||||
|
x Nullable(Float64)
|
||||||
|
x Nullable(Float64)
|
||||||
|
Float64.explicit File
|
||||||
|
x Nullable(Float64)
|
||||||
|
Float64.pipe
|
||||||
|
x Nullable(Float64)
|
||||||
|
Float64.default max_read_buffer_size
|
||||||
|
x Nullable(Float64)
|
32
tests/queries/0_stateless/03170_float_schema_inference_small_block.sh
Executable file
32
tests/queries/0_stateless/03170_float_schema_inference_small_block.sh
Executable file
@ -0,0 +1,32 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CUR_DIR"/../shell_config.sh
|
||||||
|
|
||||||
|
# do not fallback to float always
|
||||||
|
echo "Int64"
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1}'
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1}'
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : -1}'
|
||||||
|
|
||||||
|
echo "Float64"
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1.1}'
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1.1}'
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : 1.111}'
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' <<<'{"x" : +1.111}'
|
||||||
|
|
||||||
|
# this is requried due to previously clickhouse-local does not interprets
|
||||||
|
# --max_read_buffer_size for fds [1]
|
||||||
|
#
|
||||||
|
# [1]: https://github.com/ClickHouse/ClickHouse/pull/64532
|
||||||
|
echo "Float64.explicit File"
|
||||||
|
tmp_path=$(mktemp "$CUR_DIR/03170_float_schema_inference_small_block.json.XXXXXX")
|
||||||
|
trap 'rm -f $tmp_path' EXIT
|
||||||
|
cat > "$tmp_path" <<<'{"x" : 1.111}'
|
||||||
|
$CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"' --file "$tmp_path"
|
||||||
|
|
||||||
|
echo "Float64.pipe"
|
||||||
|
echo '{"x" : 1.1}' | $CLICKHOUSE_LOCAL --storage_file_read_method read --max_read_buffer_size 1 --input-format JSONEachRow 'desc "table"'
|
||||||
|
echo "Float64.default max_read_buffer_size"
|
||||||
|
echo '{"x" : 1.1}' | $CLICKHOUSE_LOCAL --storage_file_read_method read --input-format JSONEachRow 'desc "table"'
|
Loading…
Reference in New Issue
Block a user