mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Fix NaN deserialization for Quoted escaping rule
This commit is contained in:
parent
049b2c0c14
commit
4f136cb30c
@ -394,14 +394,67 @@ template<typename ReturnType>
|
||||
ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
|
||||
const SerializationPtr & nested)
|
||||
{
|
||||
return safeDeserialize<ReturnType>(column, *nested,
|
||||
[&istr]
|
||||
if (istr.eof() || (*istr.position() != 'N' && *istr.position() != 'n'))
|
||||
{
|
||||
return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr);
|
||||
},
|
||||
/// This is not null, surely.
|
||||
return safeDeserialize<ReturnType>(column, *nested,
|
||||
[] { return false; },
|
||||
[&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); });
|
||||
}
|
||||
|
||||
/// Check if we have enough data in buffer to check if it's a null.
|
||||
if (istr.available() >= 4)
|
||||
{
|
||||
auto check_for_null = [&istr]()
|
||||
{
|
||||
auto * pos = istr.position();
|
||||
if (checkStringCaseInsensitive("NULL", istr))
|
||||
return true;
|
||||
istr.position() = pos;
|
||||
return false;
|
||||
};
|
||||
auto deserialize_nested = [&nested, &settings, &istr] (IColumn & nested_column)
|
||||
{
|
||||
nested->deserializeTextQuoted(nested_column, istr, settings);
|
||||
};
|
||||
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
|
||||
}
|
||||
|
||||
/// We don't have enough data in buffer to check if it's a NULL
|
||||
/// and we cannot check it just by one symbol (otherwise we won't be able
|
||||
/// to differentiate for example NULL and NaN for float)
|
||||
/// Use PeekableReadBuffer to make a checkpoint before checking
|
||||
/// null and rollback if the check was failed.
|
||||
PeekableReadBuffer buf(istr, true);
|
||||
auto check_for_null = [&buf]()
|
||||
{
|
||||
buf.setCheckpoint();
|
||||
SCOPE_EXIT(buf.dropCheckpoint());
|
||||
if (checkStringCaseInsensitive("NULL", buf))
|
||||
return true;
|
||||
|
||||
buf.rollbackToCheckpoint();
|
||||
return false;
|
||||
};
|
||||
|
||||
auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
|
||||
{
|
||||
nested->deserializeTextQuoted(nested_column, buf, settings);
|
||||
/// Check that we don't have any unread data in PeekableReadBuffer own memory.
|
||||
if (likely(!buf.hasUnreadData()))
|
||||
return;
|
||||
|
||||
/// We have some unread data in PeekableReadBuffer own memory.
|
||||
/// It can happen only if there is an unquoted string instead of a number.
|
||||
throw DB::ParsingException(
|
||||
ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||
"Error while parsing Nullable: got an unquoted string {} instead of a number",
|
||||
String(buf.position(), std::min(10ul, buf.available())));
|
||||
};
|
||||
|
||||
return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
|
||||
}
|
||||
|
||||
|
||||
void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
|
@ -205,7 +205,7 @@ void CustomSeparatedRowInputFormat::syncAfterError()
|
||||
|
||||
bool CustomSeparatedRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
|
||||
{
|
||||
return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first firld", ignore_spaces);
|
||||
return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first field", ignore_spaces);
|
||||
}
|
||||
|
||||
bool CustomSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
|
||||
|
12
tests/queries/0_stateless/02130_parse_quoted_null.reference
Normal file
12
tests/queries/0_stateless/02130_parse_quoted_null.reference
Normal file
@ -0,0 +1,12 @@
|
||||
\N 1
|
||||
nan 2
|
||||
42.42 3
|
||||
\N 4
|
||||
\N 5
|
||||
\N 6
|
||||
\N 7
|
||||
nan 8
|
||||
nan 9
|
||||
nan 10
|
||||
nan 11
|
||||
OK
|
56
tests/queries/0_stateless/02130_parse_quoted_null.sh
Executable file
56
tests/queries/0_stateless/02130_parse_quoted_null.sh
Executable file
@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-parallel
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
|
||||
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
DATA_FILE=$USER_FILES_PATH/test_02130.data
|
||||
SELECT_QUERY="select * from file('test_02130.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'"
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists test_02130"
|
||||
$CLICKHOUSE_CLIENT -q "create table test_02130 (x Nullable(Float64), y Nullable(UInt64)) engine=Memory()"
|
||||
|
||||
echo -e "null\t1" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY"
|
||||
|
||||
echo -e "nan\t2" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY"
|
||||
|
||||
echo -e "42.42\t3" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY"
|
||||
|
||||
echo -e "null\t4" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1
|
||||
|
||||
echo -e "null\t5" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2
|
||||
|
||||
echo -e "null\t6" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3
|
||||
|
||||
echo -e "null\t7" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4
|
||||
|
||||
echo -e "nan\t8" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1
|
||||
|
||||
echo -e "nan\t9" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2
|
||||
|
||||
echo -e "nan\t10" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3
|
||||
|
||||
echo -e "nan\t11" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4
|
||||
|
||||
echo -e "42\tnan" > $DATA_FILE
|
||||
$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_02130 order by y"
|
||||
$CLICKHOUSE_CLIENT -q "drop table test_02130"
|
||||
|
||||
rm $DATA_FILE
|
Loading…
Reference in New Issue
Block a user