add tests

This commit is contained in:
Alexander Tokmakov 2019-10-07 19:08:07 +03:00
parent 32b0b8272c
commit a2c83ac82e
16 changed files with 114 additions and 27 deletions

View File

@ -209,7 +209,7 @@ bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext
/// it doesn't have to check it.
bool have_default_columns = have_always_default_columns;
ext.read_columns.assign(column_indexes_for_input_fields.size(), true);
ext.read_columns.assign(read_columns.size(), true);
const auto delimiter = format_settings.csv.delimiter;
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
{
@ -372,6 +372,8 @@ bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bo
const bool at_last_column_line_end = is_last_file_column
&& (in.eof() || *in.position() == '\n' || *in.position() == '\r');
/// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default
/// only one empty or NULL column will be expected
if (format_settings.csv.empty_as_default
&& (at_delimiter || at_last_column_line_end))
{

View File

@ -242,7 +242,7 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
auto & header = getPort().getHeader();
/// Fill non-visited columns with the default values.
for (size_t i = 0; i < num_columns; ++i)
if (!read_columns[i])
if (!seen_columns[i])
header.getByPosition(i).type->insertDefaultInto(*columns[i]);
/// return info about defaults set

View File

@ -139,7 +139,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
seen_columns[index] = read_columns[index] = true;
const auto & type = getPort().getHeader().getByPosition(index).type;
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
read_columns[index] = DataTypeNullable::deserializeTextEscaped(*columns[index], in, format_settings, type);
else
header.getByPosition(index).type->deserializeAsTextEscaped(*columns[index], in, format_settings);
}
@ -181,7 +181,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
/// Fill in the not met columns with default values.
for (size_t i = 0; i < num_columns; ++i)
if (!read_columns[i])
if (!seen_columns[i])
header.getByPosition(i).type->insertDefaultInto(*columns[i]);
/// return info about defaults set

View File

@ -176,7 +176,7 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
updateDiagnosticInfo();
ext.read_columns.assign(column_indexes_for_input_fields.size(), true);
ext.read_columns.assign(read_columns.size(), true);
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
{
const auto & column_index = column_indexes_for_input_fields[file_column];
@ -214,7 +214,7 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column)
{
const bool at_delimiter = !in.eof() && *in.position() == '\t';
const bool at_delimiter = !is_last_file_column && !in.eof() && *in.position() == '\t';
const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n');
if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
{
@ -222,8 +222,8 @@ bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr &
return false;
}
else if (format_settings.null_as_default && !type->isNullable())
return DataTypeNullable::deserializeTextCSV(column, in, format_settings, type);
type->deserializeAsTextCSV(column, in, format_settings);
return DataTypeNullable::deserializeTextEscaped(column, in, format_settings, type);
type->deserializeAsTextEscaped(column, in, format_settings);
return true;
}

View File

@ -70,6 +70,10 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, ReadBuffer
column_in_format[col_idx] = true;
}
}
for (size_t i = 0; i < header_.columns(); ++i)
if (!column_in_format[i])
always_default_columns.push_back(i);
}
void TemplateRowInputFormat::readPrefix()
@ -178,9 +182,8 @@ bool TemplateRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
skipSpaces();
assertString(row_format.delimiters.back(), buf);
for (size_t i = 0; i < columns.size(); ++i)
if (!extra.read_columns[i])
data_types[i]->insertDefaultInto(*columns[i]);
for (const auto & idx : always_default_columns)
data_types[idx]->insertDefaultInto(*columns[idx]);
return true;
}

View File

@ -57,6 +57,7 @@ private:
size_t format_data_idx;
bool end_of_stream = false;
std::vector<size_t> always_default_columns;
char default_csv_delimiter;
};

View File

@ -4,10 +4,6 @@ Hello "world" 789 2016-01-03
Hello\n world 100 2016-01-04
default 1 2019-06-19
default-eof 1 2019-06-19
0 1 42 2019-07-22
1 world 3 2019-07-23
2 Hello 123 2019-06-19
3 Hello 42 2019-06-19
2016-01-01 01:02:03 1
2016-01-02 01:02:03 2
2017-08-15 13:15:01 3

View File

@ -17,17 +17,6 @@ Hello "world", 789 ,2016-01-03
$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d";
$CLICKHOUSE_CLIENT --query="DROP TABLE csv";
$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (i Int8, s String DEFAULT 'Hello', n UInt64 DEFAULT 42, d Date DEFAULT '2019-06-19') ENGINE = Memory";
echo '\N, 1, \N, "2019-07-22"
1, world, 3, "2019-07-23"
2, \N, 123, \N
3, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO csv FORMAT CSV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY i";
$CLICKHOUSE_CLIENT --query="DROP TABLE csv";
$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Europe/Moscow'), s String) ENGINE = Memory";
echo '"2016-01-01 01:02:03","1"

View File

@ -20,3 +20,4 @@ format_template_row = '$CURDIR/00937_template_output_format_row.tmp', \
format_template_rows_between_delimiter = ';\n'";
$CLICKHOUSE_CLIENT --query="DROP TABLE template";
rm $CURDIR/00937_template_output_format_resultset.tmp $CURDIR/00937_template_output_format_row.tmp

View File

@ -50,3 +50,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV";
$CLICKHOUSE_CLIENT --query="DROP TABLE template1";
$CLICKHOUSE_CLIENT --query="DROP TABLE template2";
rm $CURDIR/00938_template_input_format_resultset.tmp $CURDIR/00938_template_input_format_row.tmp

View File

@ -2,3 +2,4 @@
1 worldparam [0.2,0.3]
2 testparam [0.3]
3 paramparam []
4 evaluateparam [0.2]

View File

@ -6,12 +6,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS insert_values_parametrized";
$CLICKHOUSE_CLIENT --query="CREATE TABLE insert_values_parametrized (n UInt8, s String, a Array(Float32)) ENGINE = Memory";
$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=1 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES
$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=1 --input_format_values_interpret_expressions=0 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES
(1 + {p_n:Int8}, lower(concat('Hello', {p_s:String})), arraySort(arrayIntersect([], {p_a:Array(Nullable(Float32))}))),\
(2 + {p_n:Int8}, lower(concat('world', {p_s:String})), arraySort(arrayIntersect([0.1,0.2,0.3], {p_a:Array(Nullable(Float32))}))),\
(3 + {p_n:Int8}, lower(concat('TEST', {p_s:String})), arraySort(arrayIntersect([0.1,0.3,0.4], {p_a:Array(Nullable(Float32))}))),\
(4 + {p_n:Int8}, lower(concat('PaRaM', {p_s:String})), arraySort(arrayIntersect([0.5], {p_a:Array(Nullable(Float32))})))";
$CLICKHOUSE_CLIENT --input_format_values_deduce_templates_of_expressions=0 --input_format_values_interpret_expressions=1 --param_p_n="-1" --param_p_s="param" --param_p_a="[0.2,0.3]" --query="INSERT INTO insert_values_parametrized VALUES \
(5 + {p_n:Int8}, lower(concat('Evaluate', {p_s:String})), arrayIntersect([0, 0.2, 0.6], {p_a:Array(Nullable(Float32))}))"
$CLICKHOUSE_CLIENT --query="SELECT * FROM insert_values_parametrized ORDER BY n";
$CLICKHOUSE_CLIENT --query="DROP TABLE insert_values_parametrized";

View File

@ -0,0 +1,25 @@
CSV
0 1 42 2019-07-22 [10,20,30] ('default',0)
1 world 3 2019-07-23 [1,2,3] ('tuple',3.14)
2 Hello 123 2019-06-19 [] ('test',2.71828)
3 Hello 42 2019-06-19 [1,2,3] ('default',0.75)
TSV
0 1 42 2019-07-22 [10,20,30] ('default',0)
1 world 3 2019-07-23 [1,2,3] ('tuple',3.14)
2 Hello 123 2019-06-19 [] ('test',2.71828)
3 Hello 42 2019-06-19 [1,2,3] ('default',0.75)
TSKV
0 1 42 2019-07-22 [10,20,30] ('default',0)
1 world 3 2019-07-23 [1,2,3] ('tuple',3.14)
2 Hello 123 2019-06-19 [] ('test',2.71828)
3 Hello 42 2019-06-19 [1,2,3] ('default',0.75)
JSONEachRow
0 1 42 2019-07-22 [10,20,30] ('default',0)
1 world 3 2019-07-23 [1,2,3] ('tuple',3.14)
2 Hello 123 2019-06-19 [] ('test',2.71828)
3 Hello 42 2019-06-19 [1,2,3] ('default',0.75)
Template (Quoted)
0 1 42 2019-07-22 [10,20,30] ('default',0)
1 world 3 2019-07-23 [1,2,3] ('tuple',3.14)
2 Hello 123 2019-06-19 [] ('test',2.71828)
3 Hello 42 2019-06-19 [1,2,3] ('default',0.75)

View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS null_as_default";
$CLICKHOUSE_CLIENT --query="CREATE TABLE null_as_default (i Int8, s String DEFAULT 'Hello', n UInt64 DEFAULT 42, d Date DEFAULT '2019-06-19', a Array(UInt8) DEFAULT [1, 2, 3], t Tuple(String, Float64) DEFAULT ('default', i / 4)) ENGINE = Memory";
echo 'CSV'
echo '\N, 1, \N, "2019-07-22", "[10, 20, 30]", \N
1, world, 3, "2019-07-23", \N, tuple, 3.14
2, \N, 123, \N, "[]", test, 2.71828
3, \N, \N, \N, \N, \N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT CSV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
echo 'TSV'
echo -e '\N\t1\t\N\t2019-07-22\t[10, 20, 30]\t\N
1\tworld\t3\t2019-07-23\t\N\t('\''tuple'\'', 3.14)
2\t\N\t123\t\N\t[]\t('\''test'\'', 2.71828)
3\t\N\t\N\t\N\t\N\t\N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT TSV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
echo 'TSKV'
echo -e 'i=\N\ts=1\tn=\N\td=2019-07-22\ta=[10, 20, 30]\tt=\N
i=1\ts=world\tn=3\td=2019-07-23\ta=\N\tt=('\''tuple'\'', 3.14)
i=2\ts=\N\tn=123\td=\N\ta=[]\tt=('\''test'\'', 2.71828)
i=3\ts=\N\tn=\N\td=\N\ta=\N\tt=\N' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT TSKV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
echo 'JSONEachRow'
echo '{"i": null, "s": "1", "n": null, "d": "2019-07-22", "a": [10, 20, 30], "t": null}
{"i": 1, "s": "world", "n": 3, "d": "2019-07-23", "a": null, "t": ["tuple", 3.14]}
{"i": 2, "s": null, "n": 123, "d": null, "a": [], "t": ["test", 2.71828]}
{"i": 3, "s": null, "n": null, "d": null, "a": null, "t": null}' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --query="INSERT INTO null_as_default FORMAT JSONEachRow";
$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE null_as_default";
echo 'Template (Quoted)'
echo 'NULL, '\''1'\'', null, '\''2019-07-22'\'', [10, 20, 30], NuLl
1, '\''world'\'', 3, '\''2019-07-23'\'', NULL, ('\''tuple'\'', 3.14)
2, null, 123, null, [], ('\''test'\'', 2.71828)
3, null, null, null, null, null' | $CLICKHOUSE_CLIENT --input_format_null_as_default=1 --format_custom_escaping_rule=Quoted --format_custom_field_delimiter=', ' --query="INSERT INTO null_as_default FORMAT CustomSeparated";
$CLICKHOUSE_CLIENT --query="SELECT * FROM null_as_default ORDER BY i";
$CLICKHOUSE_CLIENT --query="DROP TABLE null_as_default";

View File

@ -0,0 +1,4 @@
1 2019-06-19
abcd 100 2016-01-01
default 1 2019-06-19
default-eof 1 2019-06-19

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS empty_as_default";
$CLICKHOUSE_CLIENT --query="CREATE TABLE empty_as_default (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory";
echo -ne 'abcd\t100\t2016-01-01
default\t\t
\t\t
default-eof\t\t' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --query="INSERT INTO empty_as_default FORMAT TSV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM empty_as_default ORDER BY s";
$CLICKHOUSE_CLIENT --query="DROP TABLE empty_as_default";