mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge pull request #49960 from Avogar/fix-tsv-nullable-parsing
Fix possible Logical error on bad Nullable parsing for text formats
This commit is contained in:
commit
0e346c78ae
@ -355,6 +355,9 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
|
||||
/// It can happen only if there is a string instead of a number
|
||||
/// or if someone uses tab or LF in TSV null_representation.
|
||||
/// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
|
||||
/// We also should delete incorrectly deserialized value from nested column.
|
||||
nested_column.popBack(1);
|
||||
|
||||
if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
|
||||
throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
|
||||
"containing '\\t' or '\\n' may not work correctly for large input.");
|
||||
@ -447,6 +450,8 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re
|
||||
|
||||
/// We have some unread data in PeekableReadBuffer own memory.
|
||||
/// It can happen only if there is an unquoted string instead of a number.
|
||||
/// We also should delete incorrectly deserialized value from nested column.
|
||||
nested_column.popBack(1);
|
||||
throw DB::ParsingException(
|
||||
ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||
"Error while parsing Nullable: got an unquoted string {} instead of a number",
|
||||
@ -579,6 +584,9 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
|
||||
/// It can happen only if there is an unquoted string instead of a number
|
||||
/// or if someone uses csv delimiter, LF or CR in CSV null representation.
|
||||
/// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
|
||||
/// We also should delete incorrectly deserialized value from nested column.
|
||||
nested_column.popBack(1);
|
||||
|
||||
if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos
|
||||
|| null_representation.find('\n') != std::string::npos)
|
||||
throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing "
|
||||
|
@ -44,7 +44,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
|
||||
format_settings_,
|
||||
std::make_unique<CustomSeparatedFormatReader>(*buf_, ignore_spaces_, format_settings_),
|
||||
format_settings_.custom.try_detect_header)
|
||||
, buf(std::move(buf_))
|
||||
, buf(std::move(buf_)), ignore_spaces(ignore_spaces_)
|
||||
{
|
||||
/// In case of CustomSeparatedWithNames(AndTypes) formats and enabled setting input_format_with_names_use_header we don't know
|
||||
/// the exact number of columns in data (because it can contain unknown columns). So, if field_delimiter and row_after_delimiter are
|
||||
|
@ -0,0 +1,3 @@
|
||||
1 \N
|
||||
1 \N
|
||||
1 \N
|
15
tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.sh
Executable file
15
tests/queries/0_stateless/02751_text_formats_bad_nullable_parsing.sh
Executable file
@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# NOTE: this sh wrapper is required because of shell_config
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists test"
|
||||
$CLICKHOUSE_CLIENT -q "create table test (x UInt32, y Nullable(UInt32)) engine=MergeTree order by x"
|
||||
$CLICKHOUSE_CLIENT -q "select '1\t\\\N\n2\t\\\' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=1 format TSV"
|
||||
$CLICKHOUSE_CLIENT -q "select '1,\\\N\n2,\\\' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=1 format CSV"
|
||||
$CLICKHOUSE_CLIENT -q "select '1\tNULL\n2\tN' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=2, format_custom_escaping_rule='Quoted' format CustomSeparated"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test"
|
||||
$CLICKHOUSE_CLIENT -q "drop table test";
|
Loading…
Reference in New Issue
Block a user