add config input_format_hive_text_allow_variable_number_of_columns

This commit is contained in:
kevinyhzou 2024-04-02 19:37:23 +08:00
parent 70e8477a48
commit 6018434f82
4 changed files with 4 additions and 1 deletions

View File

@ -1009,6 +1009,7 @@ class IColumn;
M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \
M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \
M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
M(Bool, input_format_hive_text_allow_variable_number_of_columns, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values", 0) \
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \

View File

@ -96,6 +96,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
format_settings.hive_text.allow_variable_number_of_columns = settings.input_format_hive_text_allow_variable_number_of_columns;
format_settings.custom.escaping_rule = settings.format_custom_escaping_rule;
format_settings.custom.field_delimiter = settings.format_custom_field_delimiter;
format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;

View File

@ -176,6 +176,7 @@ struct FormatSettings
char fields_delimiter = '\x01';
char collection_items_delimiter = '\x02';
char map_keys_delimiter = '\x03';
bool allow_variable_number_of_columns = true;
Names input_field_names;
} hive_text{};

View File

@ -19,7 +19,7 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings, cons
updated.date_time_input_format = FormatSettings::DateTimeInputFormat::BestEffort;
updated.defaults_for_omitted_fields = true;
updated.csv.delimiter = updated.hive_text.fields_delimiter;
updated.csv.allow_variable_number_of_columns = true;
updated.csv.allow_variable_number_of_columns = settings.hive_text.allow_variable_number_of_columns;
if (settings.hive_text.input_field_names.empty())
updated.hive_text.input_field_names = header.getNames();
return updated;