mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 02:52:13 +00:00
support defaults_for_omitted_fields in TSV
This commit is contained in:
parent
d936cee836
commit
a90d126b93
@ -176,7 +176,7 @@ struct Settings : public SettingsCollection<Settings>
|
|||||||
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \
|
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \
|
||||||
M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \
|
M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \
|
||||||
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
|
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
|
||||||
M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow format).") \
|
M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).") \
|
||||||
M(SettingBool, input_format_null_as_default, false, "For text input format initialize null fields with default values if data type of this field is not nullable") \
|
M(SettingBool, input_format_null_as_default, false, "For text input format initialize null fields with default values if data type of this field is not nullable") \
|
||||||
\
|
\
|
||||||
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \
|
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \
|
||||||
|
@ -53,6 +53,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings)
|
|||||||
format_settings.template_settings.resultset_format = settings.format_template_resultset;
|
format_settings.template_settings.resultset_format = settings.format_template_resultset;
|
||||||
format_settings.template_settings.row_format = settings.format_template_row;
|
format_settings.template_settings.row_format = settings.format_template_row;
|
||||||
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
|
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
|
||||||
|
format_settings.tsv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
|
||||||
|
|
||||||
return format_settings;
|
return format_settings;
|
||||||
}
|
}
|
||||||
|
@ -60,6 +60,13 @@ struct FormatSettings
|
|||||||
|
|
||||||
Template template_settings;
|
Template template_settings;
|
||||||
|
|
||||||
|
struct TSV
|
||||||
|
{
|
||||||
|
bool empty_as_default = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
TSV tsv;
|
||||||
|
|
||||||
bool skip_unknown_fields = false;
|
bool skip_unknown_fields = false;
|
||||||
bool with_names_use_header = false;
|
bool with_names_use_header = false;
|
||||||
bool write_statistics = true;
|
bool write_statistics = true;
|
||||||
|
@ -180,16 +180,11 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
|
|||||||
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
|
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
|
||||||
{
|
{
|
||||||
const auto & column_index = column_indexes_for_input_fields[file_column];
|
const auto & column_index = column_indexes_for_input_fields[file_column];
|
||||||
|
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
|
||||||
if (column_index)
|
if (column_index)
|
||||||
{
|
{
|
||||||
const auto & type = data_types[*column_index];
|
const auto & type = data_types[*column_index];
|
||||||
if (format_settings.null_as_default && !type->isNullable())
|
ext.read_columns[*column_index] = readField(*columns[*column_index], type, is_last_file_column);
|
||||||
ext.read_columns[*column_index] = DataTypeNullable::deserializeTextEscaped(*columns[*column_index], in, format_settings, type);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type->deserializeAsTextEscaped(*columns[*column_index], in, format_settings);
|
|
||||||
ext.read_columns[*column_index] = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -216,6 +211,22 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column)
|
||||||
|
{
|
||||||
|
const bool at_delimiter = !in.eof() && *in.position() == '\t';
|
||||||
|
const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n');
|
||||||
|
if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
|
||||||
|
{
|
||||||
|
column.insertDefault();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (format_settings.null_as_default && !type->isNullable())
|
||||||
|
return DataTypeNullable::deserializeTextCSV(column, in, format_settings, type);
|
||||||
|
type->deserializeAsTextCSV(column, in, format_settings);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
|
bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
|
||||||
{
|
{
|
||||||
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
|
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
|
||||||
@ -314,10 +325,8 @@ void TabSeparatedRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, I
|
|||||||
prev_pos = in.position();
|
prev_pos = in.position();
|
||||||
if (column_indexes_for_input_fields[file_column])
|
if (column_indexes_for_input_fields[file_column])
|
||||||
{
|
{
|
||||||
if (format_settings.null_as_default && !type->isNullable())
|
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
|
||||||
DataTypeNullable::deserializeTextEscaped(column, in, format_settings, type);
|
readField(column, type, is_last_file_column);
|
||||||
else
|
|
||||||
type->deserializeAsTextEscaped(column, in, format_settings);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -41,6 +41,8 @@ private:
|
|||||||
std::vector<UInt8> read_columns;
|
std::vector<UInt8> read_columns;
|
||||||
std::vector<size_t> columns_to_fill_with_default_values;
|
std::vector<size_t> columns_to_fill_with_default_values;
|
||||||
|
|
||||||
|
bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
|
||||||
|
|
||||||
void addInputColumn(const String & column_name);
|
void addInputColumn(const String & column_name);
|
||||||
void setupAllColumnsByTableSchema();
|
void setupAllColumnsByTableSchema();
|
||||||
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);
|
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);
|
||||||
|
@ -227,7 +227,7 @@ Enabled by default.
|
|||||||
|
|
||||||
## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields}
|
## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields}
|
||||||
|
|
||||||
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow) and [CSV](../../interfaces/formats.md#csv) formats.
|
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats.
|
||||||
|
|
||||||
!!! note "Note"
|
!!! note "Note"
|
||||||
When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.
|
When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.
|
||||||
|
@ -207,6 +207,7 @@ Ok.
|
|||||||
|
|
||||||
- [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
|
- [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
|
||||||
- [CSV](../../interfaces/formats.md#csv)
|
- [CSV](../../interfaces/formats.md#csv)
|
||||||
|
- [TabSeparated](../../interfaces/formats.md#tabseparated)
|
||||||
|
|
||||||
!!! note "Примечание"
|
!!! note "Примечание"
|
||||||
Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность.
|
Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность.
|
||||||
|
Loading…
Reference in New Issue
Block a user