mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Add input_format_csv_ignore_extra_columns setting (prototype)
This commit is contained in:
parent
91d794cf0a
commit
ed318d1035
@ -835,6 +835,7 @@ class IColumn;
|
||||
M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
|
||||
M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \
|
||||
M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \
|
||||
M(Bool, input_format_csv_ignore_extra_columns, false, "", 0) \
|
||||
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
|
||||
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \
|
||||
M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \
|
||||
|
@ -63,6 +63,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.delimiter = settings.format_csv_delimiter;
|
||||
format_settings.csv.tuple_delimiter = settings.format_csv_delimiter;
|
||||
format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default;
|
||||
format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns;
|
||||
format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number;
|
||||
format_settings.csv.null_representation = settings.format_csv_null_representation;
|
||||
format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
|
||||
|
@ -128,6 +128,7 @@ struct FormatSettings
|
||||
bool allow_single_quotes = true;
|
||||
bool allow_double_quotes = true;
|
||||
bool empty_as_default = false;
|
||||
bool ignore_extra_columns = false;
|
||||
bool crlf_end_of_line = false;
|
||||
bool enum_as_number = false;
|
||||
bool arrays_as_nested_csv = false;
|
||||
|
@ -302,14 +302,27 @@ bool CSVFormatReader::readField(
|
||||
return false;
|
||||
}
|
||||
|
||||
auto skip_all = [&]()
|
||||
{
|
||||
if (!is_last_file_column || !format_settings.csv.ignore_extra_columns)
|
||||
{
|
||||
return;
|
||||
}
|
||||
//std::cout << "skip !!!" << std::endl;
|
||||
buf->position() = find_first_symbols<'\n'>(buf->position(), buf->buffer().end());
|
||||
};
|
||||
if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type))
|
||||
{
|
||||
/// If value is null but type is not nullable then use default value instead.
|
||||
return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);
|
||||
bool res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization);
|
||||
skip_all();
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Read the column normally.
|
||||
serialization->deserializeTextCSV(column, *buf, format_settings);
|
||||
|
||||
skip_all();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -11,3 +11,7 @@ default-eof 1 2019-06-19
|
||||
2016-01-01 01:02:03 NUL
|
||||
2016-01-02 01:02:03 Nhello
|
||||
\N \N
|
||||
Hello world 1 2016-01-01
|
||||
Hello world 2 2016-01-02
|
||||
Hello world 3 2016-01-03
|
||||
Hello world 4 2016-01-04
|
||||
|
@ -37,3 +37,13 @@ echo 'NULL, NULL
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST";
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE csv";
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory";
|
||||
|
||||
echo 'Hello world, 1, 2016-01-01
|
||||
Hello world, 2 ,2016-01-02,
|
||||
Hello world, 3 ,2016-01-03, 2016-01-13
|
||||
Hello world, 4 ,2016-01-04, 2016-01-14, 2016-01-15' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV";
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n";
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE csv";
|
Loading…
Reference in New Issue
Block a user