From a2c83ac82e98da717f6105c147fdcc76f378a6bd Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 7 Oct 2019 19:08:07 +0300 Subject: [PATCH] add tests --- .../Formats/Impl/CSVRowInputFormat.cpp | 4 +- .../Impl/JSONEachRowRowInputFormat.cpp | 2 +- .../Formats/Impl/TSKVRowInputFormat.cpp | 4 +- .../Impl/TabSeparatedRowInputFormat.cpp | 8 ++-- .../Formats/Impl/TemplateRowInputFormat.cpp | 9 ++-- .../Formats/Impl/TemplateRowInputFormat.h | 1 + .../queries/0_stateless/00301_csv.reference | 4 -- dbms/tests/queries/0_stateless/00301_csv.sh | 11 ----- .../00937_template_output_format.sh | 1 + .../00938_template_input_format.sh | 1 + ...01015_insert_values_parametrized.reference | 1 + .../01015_insert_values_parametrized.sh | 5 +- .../01016_input_null_as_default.reference | 25 ++++++++++ .../01016_input_null_as_default.sh | 47 +++++++++++++++++++ ...nput_defaults_for_omitted_fields.reference | 4 ++ ...01017_input_defaults_for_omitted_fields.sh | 14 ++++++ 16 files changed, 114 insertions(+), 27 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01016_input_null_as_default.reference create mode 100755 dbms/tests/queries/0_stateless/01016_input_null_as_default.sh create mode 100644 dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.reference create mode 100755 dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.sh diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 595184180d3..6f2097fdef2 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -209,7 +209,7 @@ bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext /// it doesn't have to check it. bool have_default_columns = have_always_default_columns; - ext.read_columns.assign(column_indexes_for_input_fields.size(), true); + ext.read_columns.assign(read_columns.size(), true); const auto delimiter = format_settings.csv.delimiter; for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) { @@ -372,6 +372,8 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bo const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n' || *in.position() == '\r'); + /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default + /// only one empty or NULL column will be expected if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) { diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index d5c061a3d47..e49f9315887 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -242,7 +242,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi auto & header = getPort().getHeader(); /// Fill non-visited columns with the default values. for (size_t i = 0; i < num_columns; ++i) - if (!read_columns[i]) + if (!seen_columns[i]) header.getByPosition(i).type->insertDefaultInto(*columns[i]); /// return info about defaults set diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 571d09fc31b..60df642836c 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -139,7 +139,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex seen_columns[index] = read_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; if (format_settings.null_as_default && !type->isNullable()) - read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type); + read_columns[index] = DataTypeNullable::deserializeTextEscaped(*columns[index], in, format_settings, type); else header.getByPosition(index).type->deserializeAsTextEscaped(*columns[index], in, format_settings); } @@ -181,7 +181,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex /// Fill in the not met columns with default values. for (size_t i = 0; i < num_columns; ++i) - if (!read_columns[i]) + if (!seen_columns[i]) header.getByPosition(i).type->insertDefaultInto(*columns[i]); /// return info about defaults set diff --git a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 8aa24e35aec..b57fc51f183 100644 --- a/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -176,7 +176,7 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens updateDiagnosticInfo(); - ext.read_columns.assign(column_indexes_for_input_fields.size(), true); + ext.read_columns.assign(read_columns.size(), true); for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) { const auto & column_index = column_indexes_for_input_fields[file_column]; @@ -214,7 +214,7 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column) { - const bool at_delimiter = !in.eof() && *in.position() == '\t'; + const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t'; const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n'); if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -222,8 +222,8 @@ bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & return false; } else if (format_settings.null_as_default && !type->isNullable()) - return DataTypeNullable::deserializeTextCSV(column, in, format_settings, type); - type->deserializeAsTextCSV(column, in, format_settings); + return DataTypeNullable::deserializeTextEscaped(column, in, format_settings, type); + type->deserializeAsTextEscaped(column, in, format_settings); return true; } diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 16ebf5957ac..b77ec5417b0 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -70,6 +70,10 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer column_in_format[col_idx] = true; } } + + for (size_t i = 0; i < header_.columns(); ++i) + if (!column_in_format[i]) + always_default_columns.push_back(i); } void TemplateRowInputFormat::readPrefix() @@ -178,9 +182,8 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension skipSpaces(); assertString(row_format.delimiters.back(), buf); - for (size_t i = 0; i < columns.size(); ++i) - if (!extra.read_columns[i]) - data_types[i]->insertDefaultInto(*columns[i]); + for (const auto & idx : always_default_columns) + data_types[idx]->insertDefaultInto(*columns[idx]); return true; } diff --git a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h index c449a2d0889..7b62347c37d 100644 --- a/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -57,6 +57,7 @@ private: size_t format_data_idx; bool end_of_stream = false; + std::vector always_default_columns; char default_csv_delimiter; }; diff --git a/dbms/tests/queries/0_stateless/00301_csv.reference b/dbms/tests/queries/0_stateless/00301_csv.reference index 92cb50c0727..a9351f91f70 100644 --- a/dbms/tests/queries/0_stateless/00301_csv.reference +++ b/dbms/tests/queries/0_stateless/00301_csv.reference @@ -4,10 +4,6 @@ Hello "world" 789 2016-01-03 Hello\n world 100 2016-01-04 default 1 2019-06-19 default-eof 1 2019-06-19 -0 1 42 2019-07-22 -1 world 3 2019-07-23 -2 Hello 123 2019-06-19 -3 Hello 42 2019-06-19 2016-01-01 01:02:03 1 2016-01-02 01:02:03 2 2017-08-15 13:15:01 3 diff --git a/dbms/tests/queries/0_stateless/00301_csv.sh b/dbms/tests/queries/0_stateless/00301_csv.sh index c1bb6710c1f..cb0167b4e99 100755 --- a/dbms/tests/queries/0_stateless/00301_csv.sh +++ b/dbms/tests/queries/0_stateless/00301_csv.sh @@ -17,17 +17,6 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; - -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (i Int8, s String DEFAULT 'Hello', n UInt64 DEFAULT 42, d Date DEFAULT '2019-06-19') ENGINE = Memory"; -echo '\N, 1, \N, "2019-07-22" -1, world, 3, "2019-07-23" -2, \N, 123, \N -3, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO csv FORMAT CSV"; - -$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY i"; -$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; - - $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Europe/Moscow'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" diff --git a/dbms/tests/queries/0_stateless/00937_template_output_format.sh b/dbms/tests/queries/0_stateless/00937_template_output_format.sh index 239f7d672d0..0b0cba4e2bd 100755 --- a/dbms/tests/queries/0_stateless/00937_template_output_format.sh +++ b/dbms/tests/queries/0_stateless/00937_template_output_format.sh @@ -20,3 +20,4 @@ format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \ format_template_rows_between_delimiter = ';\n'"; $CLICKHOUSE_CLIENT --query="DROP TABLE template"; +rm $CURDIR/00937_template_output_format_resultset.tmp $CURDIR/00937_template_output_format_row.tmp diff --git a/dbms/tests/queries/0_stateless/00938_template_input_format.sh b/dbms/tests/queries/0_stateless/00938_template_input_format.sh index 998fe195203..ca314db13de 100755 --- a/dbms/tests/queries/0_stateless/00938_template_input_format.sh +++ b/dbms/tests/queries/0_stateless/00938_template_input_format.sh @@ -50,3 +50,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV"; $CLICKHOUSE_CLIENT --query="DROP TABLE template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE template2"; +rm $CURDIR/00938_template_input_format_resultset.tmp $CURDIR/00938_template_input_format_row.tmp diff --git a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference index e98738050e5..c887e5feb5f 100644 --- a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference +++ b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.reference @@ -2,3 +2,4 @@ 1 worldparam [0.2,0.3] 2 testparam [0.3] 3 paramparam [] +4 evaluateparam [0.2] diff --git a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh index 2fd06535ebe..8edda6629b0 100755 --- a/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh +++ b/dbms/tests/queries/0_stateless/01015_insert_values_parametrized.sh @@ -6,12 +6,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS insert_values_parametrized"; $CLICKHOUSE_CLIENT --query="CREATE TABLE insert_values_parametrized (n UInt8, s String, a Array(Float32)) ENGINE = Memory"; -$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=1 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES +$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=1 --input_format_values_interpret_expressions=0 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES (1 + {p_n:Int8}, lower(concat('Hello', {p_s:String})), arraySort(arrayIntersect([], {p_a:Array(Nullable(Float32))}))),\ (2 + {p_n:Int8}, lower(concat('world', {p_s:String})), arraySort(arrayIntersect([0.1,0.2,0.3], {p_a:Array(Nullable(Float32))}))),\ (3 + {p_n:Int8}, lower(concat('TEST', {p_s:String})), arraySort(arrayIntersect([0.1,0.3,0.4], {p_a:Array(Nullable(Float32))}))),\ (4 + {p_n:Int8}, lower(concat('PaRaM', {p_s:String})), arraySort(arrayIntersect([0.5], {p_a:Array(Nullable(Float32))})))"; +$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=0 --input_format_values_interpret_expressions=1 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES \ +(5 + {p_n:Int8}, lower(concat('Evaluate', {p_s:String})), arrayIntersect([0, 0.2, 0.6], {p_a:Array(Nullable(Float32))}))" + $CLICKHOUSE_CLIENT --query="SELECT * FROM insert_values_parametrized ORDER BY n"; $CLICKHOUSE_CLIENT --query="DROP TABLE insert_values_parametrized"; diff --git a/dbms/tests/queries/0_stateless/01016_input_null_as_default.reference b/dbms/tests/queries/0_stateless/01016_input_null_as_default.reference new file mode 100644 index 00000000000..2424edf1c2e --- /dev/null +++ b/dbms/tests/queries/0_stateless/01016_input_null_as_default.reference @@ -0,0 +1,25 @@ +CSV +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +TSV +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +TSKV +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +JSONEachRow +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) +Template (Quoted) +0 1 42 2019-07-22 [10,20,30] ('default',0) +1 world 3 2019-07-23 [1,2,3] ('tuple',3.14) +2 Hello 123 2019-06-19 [] ('test',2.71828) +3 Hello 42 2019-06-19 [1,2,3] ('default',0.75) diff --git a/dbms/tests/queries/0_stateless/01016_input_null_as_default.sh b/dbms/tests/queries/0_stateless/01016_input_null_as_default.sh new file mode 100755 index 00000000000..139b66f50ae --- /dev/null +++ b/dbms/tests/queries/0_stateless/01016_input_null_as_default.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS null_as_default"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE null_as_default (i Int8, s String DEFAULT 'Hello', n UInt64 DEFAULT 42, d Date DEFAULT '2019-06-19', a Array(UInt8) DEFAULT [1, 2, 3], t Tuple(String, Float64) DEFAULT ('default', i / 4)) ENGINE = Memory"; + +echo 'CSV' +echo '\N, 1, \N, "2019-07-22", "[10, 20, 30]", \N +1, world, 3, "2019-07-23", \N, tuple, 3.14 +2, \N, 123, \N, "[]", test, 2.71828 +3, \N, \N, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'TSV' +echo -e '\N\t1\t\N\t2019-07-22\t[10, 20, 30]\t\N +1\tworld\t3\t2019-07-23\t\N\t('\''tuple'\'', 3.14) +2\t\N\t123\t\N\t[]\t('\''test'\'', 2.71828) +3\t\N\t\N\t\N\t\N\t\N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'TSKV' +echo -e 'i=\N\ts=1\tn=\N\td=2019-07-22\ta=[10, 20, 30]\tt=\N +i=1\ts=world\tn=3\td=2019-07-23\ta=\N\tt=('\''tuple'\'', 3.14) +i=2\ts=\N\tn=123\td=\N\ta=[]\tt=('\''test'\'', 2.71828) +i=3\ts=\N\tn=\N\td=\N\ta=\N\tt=\N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT TSKV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'JSONEachRow' +echo '{"i": null, "s": "1", "n": null, "d": "2019-07-22", "a": [10, 20, 30], "t": null} +{"i": 1, "s": "world", "n": 3, "d": "2019-07-23", "a": null, "t": ["tuple", 3.14]} +{"i": 2, "s": null, "n": 123, "d": null, "a": [], "t": ["test", 2.71828]} +{"i": 3, "s": null, "n": null, "d": null, "a": null, "t": null}' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT JSONEachRow"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default"; + +echo 'Template (Quoted)' +echo 'NULL, '\''1'\'', null, '\''2019-07-22'\'', [10, 20, 30], NuLl +1, '\''world'\'', 3, '\''2019-07-23'\'', NULL, ('\''tuple'\'', 3.14) +2, null, 123, null, [], ('\''test'\'', 2.71828) +3, null, null, null, null, null' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --format_custom_escaping_rule=Quoted --format_custom_field_delimiter=', ' --query="INSERT INTO null_as_default FORMAT CustomSeparated"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i"; +$CLICKHOUSE_CLIENT --query="DROP TABLE null_as_default"; diff --git a/dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.reference b/dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.reference new file mode 100644 index 00000000000..feca2ec6484 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.reference @@ -0,0 +1,4 @@ + 1 2019-06-19 +abcd 100 2016-01-01 +default 1 2019-06-19 +default-eof 1 2019-06-19 diff --git a/dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.sh b/dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.sh new file mode 100755 index 00000000000..97eed67c24c --- /dev/null +++ b/dbms/tests/queries/0_stateless/01017_input_defaults_for_omitted_fields.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS empty_as_default"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE empty_as_default (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; + +echo -ne 'abcd\t100\t2016-01-01 +default\t\t +\t\t +default-eof\t\t' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --query="INSERT INTO empty_as_default FORMAT TSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM empty_as_default ORDER BY s"; +$CLICKHOUSE_CLIENT --query="DROP TABLE empty_as_default";