support defaults_for_omitted_fields in TSV

This commit is contained in:
Alexander Tokmakov 2019-10-04 22:44:48 +03:00
parent d936cee836
commit a90d126b93
7 changed files with 33 additions and 13 deletions

View File

@ -176,7 +176,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \ M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \
M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \ M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \ M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow format).") \ M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).") \
M(SettingBool, input_format_null_as_default, false, "For text input format initialize null fields with default values if data type of this field is not nullable") \ M(SettingBool, input_format_null_as_default, false, "For text input format initialize null fields with default values if data type of this field is not nullable") \
\ \
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \ M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \

View File

@ -53,6 +53,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings)
format_settings.template_settings.resultset_format = settings.format_template_resultset; format_settings.template_settings.resultset_format = settings.format_template_resultset;
format_settings.template_settings.row_format = settings.format_template_row; format_settings.template_settings.row_format = settings.format_template_row;
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
format_settings.tsv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
return format_settings; return format_settings;
} }

View File

@ -60,6 +60,13 @@ struct FormatSettings
Template template_settings; Template template_settings;
struct TSV
{
bool empty_as_default = false;
};
TSV tsv;
bool skip_unknown_fields = false; bool skip_unknown_fields = false;
bool with_names_use_header = false; bool with_names_use_header = false;
bool write_statistics = true; bool write_statistics = true;

View File

@ -180,16 +180,11 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
{ {
const auto & column_index = column_indexes_for_input_fields[file_column]; const auto & column_index = column_indexes_for_input_fields[file_column];
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
if (column_index) if (column_index)
{ {
const auto & type = data_types[*column_index]; const auto & type = data_types[*column_index];
if (format_settings.null_as_default && !type->isNullable()) ext.read_columns[*column_index] = readField(*columns[*column_index], type, is_last_file_column);
ext.read_columns[*column_index] = DataTypeNullable::deserializeTextEscaped(*columns[*column_index], in, format_settings, type);
else
{
type->deserializeAsTextEscaped(*columns[*column_index], in, format_settings);
ext.read_columns[*column_index] = true;
}
} }
else else
{ {
@ -216,6 +211,22 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
return true; return true;
} }
bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column)
{
const bool at_delimiter = !in.eof() && *in.position() == '\t';
const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n');
if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
{
column.insertDefault();
return false;
}
else if (format_settings.null_as_default && !type->isNullable())
return DataTypeNullable::deserializeTextCSV(column, in, format_settings, type);
type->deserializeAsTextCSV(column, in, format_settings);
return true;
}
bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
{ {
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column) for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
@ -314,10 +325,8 @@ void TabSeparatedRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, I
prev_pos = in.position(); prev_pos = in.position();
if (column_indexes_for_input_fields[file_column]) if (column_indexes_for_input_fields[file_column])
{ {
if (format_settings.null_as_default && !type->isNullable()) const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
DataTypeNullable::deserializeTextEscaped(column, in, format_settings, type); readField(column, type, is_last_file_column);
else
type->deserializeAsTextEscaped(column, in, format_settings);
} }
else else
{ {

View File

@ -41,6 +41,8 @@ private:
std::vector<UInt8> read_columns; std::vector<UInt8> read_columns;
std::vector<size_t> columns_to_fill_with_default_values; std::vector<size_t> columns_to_fill_with_default_values;
bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
void addInputColumn(const String & column_name); void addInputColumn(const String & column_name);
void setupAllColumnsByTableSchema(); void setupAllColumnsByTableSchema();
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext); void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);

View File

@ -227,7 +227,7 @@ Enabled by default.
## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields} ## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields}
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow) and [CSV](../../interfaces/formats.md#csv) formats. When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats.
!!! note "Note" !!! note "Note"
When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.

View File

@ -207,6 +207,7 @@ Ok.
- [JSONEachRow](../../interfaces/formats.md#jsoneachrow) - [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
- [CSV](../../interfaces/formats.md#csv) - [CSV](../../interfaces/formats.md#csv)
- [TabSeparated](../../interfaces/formats.md#tabseparated)
!!! note "Примечание" !!! note "Примечание"
Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность.