mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Allow to infer numbers from strings in CSV format
This commit is contained in:
parent
7bd6b42af2
commit
c3a76fcc08
@ -942,6 +942,7 @@ class IColumn;
|
||||
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \
|
||||
M(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, "The maximum bytes of data to read for automatic schema inference", 0) \
|
||||
M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
|
||||
M(Bool, input_format_csv_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference in CSV format", 0) \
|
||||
M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
|
||||
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
|
||||
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
|
@ -303,8 +303,8 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
|
||||
/// Try to determine the type of value inside quotes
|
||||
auto type = tryInferDataTypeForSingleField(data, format_settings);
|
||||
|
||||
/// If we couldn't infer any type or it's a number or tuple in quotes, we determine it as a string.
|
||||
if (!type || isNumber(removeNullable(type)) || isTuple(type))
|
||||
/// If we couldn't infer any type or it's tuple in quotes or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
|
||||
if (!type || isTuple(type) || (isNumber(type) && !format_settings.csv.try_infer_numbers_from_strings))
|
||||
return std::make_shared<DataTypeString>();
|
||||
|
||||
return type;
|
||||
|
@ -74,6 +74,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
|
||||
format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns;
|
||||
format_settings.csv.use_default_on_bad_values = settings.input_format_csv_use_default_on_bad_values;
|
||||
format_settings.csv.try_infer_numbers_from_strings = settings.input_format_csv_try_infer_numbers_from_strings;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
|
@ -164,6 +164,7 @@ struct FormatSettings
|
||||
bool allow_whitespace_or_tab_as_delimiter = false;
|
||||
bool allow_variable_number_of_columns = false;
|
||||
bool use_default_on_bad_values = false;
|
||||
bool try_infer_numbers_from_strings = true;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
|
@ -0,0 +1,6 @@
|
||||
c1 Nullable(Int64)
|
||||
c2 Nullable(Float64)
|
||||
c3 Nullable(Bool)
|
||||
c1 Nullable(String)
|
||||
c2 Nullable(String)
|
||||
c3 Nullable(String)
|
@ -0,0 +1,4 @@
|
||||
set input_format_csv_try_infer_numbers_from_strings=1;
|
||||
desc format(CSV, '"42","42.42","True"');
|
||||
desc format(CSV, '"42","42.42","True"\n"abc","def","ghk"');
|
||||
|
Loading…
Reference in New Issue
Block a user