From 7a4637c6452bd8411f242c78cbf8a63a501efd98 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Apr 2021 23:54:03 +0300 Subject: [PATCH 1/3] Fix deserialization of empty string without newline at end of TSV format --- src/DataTypes/Serializations/SerializationNullable.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 87805c53aa9..146aef67cd5 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -224,12 +224,9 @@ ReturnType SerializationNullable::deserializeTextEscapedImpl(IColumn & column, R { /// Little tricky, because we cannot discriminate null from first character. - if (istr.eof()) - throw ParsingException("Unexpected end of stream, while parsing value of Nullable type", ErrorCodes::CANNOT_READ_ALL_DATA); - - /// This is not null, surely. - if (*istr.position() != '\\') + if (istr.eof() || *istr.position() != '\\') /// Some data types can deserialize absense of data (e.g. empty string), so eof is ok. { + /// This is not null, surely. return safeDeserialize(column, *nested, [] { return false; }, [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextEscaped(nested_column, istr, settings); }); From 47a12a7b0b1760bf6c86565a3a3afddce9e2a891 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Apr 2021 00:14:13 +0300 Subject: [PATCH 2/3] Add a test --- .../01786_nullable_string_tsv_at_eof.reference | 6 ++++++ .../0_stateless/01786_nullable_string_tsv_at_eof.sh | 12 ++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.reference create mode 100755 tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.sh diff --git a/tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.reference b/tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.reference new file mode 100644 index 00000000000..35b388bbafb --- /dev/null +++ b/tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.reference @@ -0,0 +1,6 @@ +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.sh b/tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.sh new file mode 100755 index 00000000000..f0a663ae409 --- /dev/null +++ b/tests/queries/0_stateless/01786_nullable_string_tsv_at_eof.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +printf '1\t' | $CLICKHOUSE_LOCAL --query="SELECT * FROM table" --structure='a String, b String' +printf '1\t' | $CLICKHOUSE_LOCAL --input_format_null_as_default 0 --query="SELECT * FROM table" --structure='a String, b String' +printf '1\t' | $CLICKHOUSE_LOCAL --input_format_null_as_default 1 --query="SELECT * FROM table" --structure='a String, b String' +printf '1\t' | $CLICKHOUSE_LOCAL --query="SELECT * FROM table" --structure='a String, b Nullable(String)' +printf '1\t' | $CLICKHOUSE_LOCAL --input_format_null_as_default 0 --query="SELECT * FROM table" --structure='a String, b Nullable(String)' +printf '1\t' | $CLICKHOUSE_LOCAL --input_format_null_as_default 1 --query="SELECT * FROM table" --structure='a Nullable(String), b Nullable(String)' From 2de808ec5acc6d306d7470f4bacc8b3cd19ce578 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 3 Apr 2021 12:50:57 +0300 Subject: [PATCH 3/3] Update SerializationNullable.cpp --- src/DataTypes/Serializations/SerializationNullable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 146aef67cd5..4de2b08c043 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -224,7 +224,7 @@ ReturnType SerializationNullable::deserializeTextEscapedImpl(IColumn & column, R { /// Little tricky, because we cannot discriminate null from first character. - if (istr.eof() || *istr.position() != '\\') /// Some data types can deserialize absense of data (e.g. empty string), so eof is ok. + if (istr.eof() || *istr.position() != '\\') /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok. { /// This is not null, surely. return safeDeserialize(column, *nested,