mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
support defaults_for_omitted_fields in TSV
This commit is contained in:
parent
d936cee836
commit
a90d126b93
@ -176,7 +176,7 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \
|
||||
M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \
|
||||
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
|
||||
M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow format).") \
|
||||
M(SettingBool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).") \
|
||||
M(SettingBool, input_format_null_as_default, false, "For text input format initialize null fields with default values if data type of this field is not nullable") \
|
||||
\
|
||||
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \
|
||||
|
@ -53,6 +53,7 @@ static FormatSettings getInputFormatSetting(const Settings & settings)
|
||||
format_settings.template_settings.resultset_format = settings.format_template_resultset;
|
||||
format_settings.template_settings.row_format = settings.format_template_row;
|
||||
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;
|
||||
format_settings.tsv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
|
||||
|
||||
return format_settings;
|
||||
}
|
||||
|
@ -60,6 +60,13 @@ struct FormatSettings
|
||||
|
||||
Template template_settings;
|
||||
|
||||
struct TSV
|
||||
{
|
||||
bool empty_as_default = false;
|
||||
};
|
||||
|
||||
TSV tsv;
|
||||
|
||||
bool skip_unknown_fields = false;
|
||||
bool with_names_use_header = false;
|
||||
bool write_statistics = true;
|
||||
|
@ -180,16 +180,11 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
|
||||
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
|
||||
{
|
||||
const auto & column_index = column_indexes_for_input_fields[file_column];
|
||||
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
|
||||
if (column_index)
|
||||
{
|
||||
const auto & type = data_types[*column_index];
|
||||
if (format_settings.null_as_default && !type->isNullable())
|
||||
ext.read_columns[*column_index] = DataTypeNullable::deserializeTextEscaped(*columns[*column_index], in, format_settings, type);
|
||||
else
|
||||
{
|
||||
type->deserializeAsTextEscaped(*columns[*column_index], in, format_settings);
|
||||
ext.read_columns[*column_index] = true;
|
||||
}
|
||||
ext.read_columns[*column_index] = readField(*columns[*column_index], type, is_last_file_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -216,6 +211,22 @@ bool TabSeparatedRowInputFormat::readRow(MutableColumns & columns, RowReadExtens
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool TabSeparatedRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column)
|
||||
{
|
||||
const bool at_delimiter = !in.eof() && *in.position() == '\t';
|
||||
const bool at_last_column_line_end = is_last_file_column && (in.eof() || *in.position() == '\n');
|
||||
if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
|
||||
{
|
||||
column.insertDefault();
|
||||
return false;
|
||||
}
|
||||
else if (format_settings.null_as_default && !type->isNullable())
|
||||
return DataTypeNullable::deserializeTextCSV(column, in, format_settings, type);
|
||||
type->deserializeAsTextCSV(column, in, format_settings);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
|
||||
{
|
||||
for (size_t file_column = 0; file_column < column_indexes_for_input_fields.size(); ++file_column)
|
||||
@ -314,10 +325,8 @@ void TabSeparatedRowInputFormat::tryDeserializeFiled(const DataTypePtr & type, I
|
||||
prev_pos = in.position();
|
||||
if (column_indexes_for_input_fields[file_column])
|
||||
{
|
||||
if (format_settings.null_as_default && !type->isNullable())
|
||||
DataTypeNullable::deserializeTextEscaped(column, in, format_settings, type);
|
||||
else
|
||||
type->deserializeAsTextEscaped(column, in, format_settings);
|
||||
const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size();
|
||||
readField(column, type, is_last_file_column);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -41,6 +41,8 @@ private:
|
||||
std::vector<UInt8> read_columns;
|
||||
std::vector<size_t> columns_to_fill_with_default_values;
|
||||
|
||||
bool readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column);
|
||||
|
||||
void addInputColumn(const String & column_name);
|
||||
void setupAllColumnsByTableSchema();
|
||||
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);
|
||||
|
@ -227,7 +227,7 @@ Enabled by default.
|
||||
|
||||
## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields}
|
||||
|
||||
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow) and [CSV](../../interfaces/formats.md#csv) formats.
|
||||
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats.
|
||||
|
||||
!!! note "Note"
|
||||
When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.
|
||||
|
@ -207,6 +207,7 @@ Ok.
|
||||
|
||||
- [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
|
||||
- [CSV](../../interfaces/formats.md#csv)
|
||||
- [TabSeparated](../../interfaces/formats.md#tabseparated)
|
||||
|
||||
!!! note "Примечание"
|
||||
Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность.
|
||||
|
Loading…
Reference in New Issue
Block a user