Add input_format_csv_ignore_extra_columns setting (prototype)

This commit is contained in:
Dmitry Kardymon 2023-06-14 10:35:36 +00:00
parent 91d794cf0a
commit ed318d1035
6 changed files with 31 additions and 1 deletions

View File

@ -835,6 +835,7 @@ class IColumn;
M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \
M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \
M(Bool, input_format_csv_ignore_extra_columns, false, "", 0) \
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \
M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \

View File

@ -63,6 +63,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.csv.delimiter = settings.format_csv_delimiter;
format_settings.csv.tuple_delimiter = settings.format_csv_delimiter;
format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default;
format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns;
format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number;
format_settings.csv.null_representation = settings.format_csv_null_representation;
format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;

View File

@ -128,6 +128,7 @@ struct FormatSettings
bool allow_single_quotes = true;
bool allow_double_quotes = true;
bool empty_as_default = false;
bool ignore_extra_columns = false;
bool crlf_end_of_line = false;
bool enum_as_number = false;
bool arrays_as_nested_csv = false;

View File

@ -302,14 +302,27 @@ bool CSVFormatReader::readField(
return false;
}
auto skip_all = [&]()
{
if (!is_last_file_column || !format_settings.csv.ignore_extra_columns)
{
return;
}
//std::cout << "skip !!!" << std::endl;
buf->position() = find_first_symbols<'\n'>(buf->position(), buf->buffer().end());
};
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
{
/// If value is null but type is not nullable then use default value instead.
return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);
bool res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);
skip_all();
return res;
}
/// Read the column normally.
serialization->deserializeTextCSV(column, *buf, format_settings);
skip_all();
return true;
}

View File

@ -11,3 +11,7 @@ default-eof 1 2019-06-19
2016-01-01 01:02:03 NUL
2016-01-02 01:02:03 Nhello
\N \N
Hello world 1 2016-01-01
Hello world 2 2016-01-02
Hello world 3 2016-01-03
Hello world 4 2016-01-04

View File

@ -37,3 +37,13 @@ echo 'NULL, NULL
$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST";
$CLICKHOUSE_CLIENT --query="DROP TABLE csv";
$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory";
echo 'Hello world, 1, 2016-01-01
Hello world, 2 ,2016-01-02,
Hello world, 3 ,2016-01-03, 2016-01-13
Hello world, 4 ,2016-01-04, 2016-01-14, 2016-01-15' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV";
$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n";
$CLICKHOUSE_CLIENT --query="DROP TABLE csv";