Merge pull request #69109 from Avogar/csv-bool-from-string

Don't infer Bool type from String in CSV when input_format_csv_try_infer_numbers_from_strings = 1
This commit is contained in:
Kruglov Pavel 2024-09-04 11:27:21 +00:00 committed by GitHub
commit 10893d3ea8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 15 additions and 3 deletions

View File

@ -302,8 +302,12 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet
/// Try to determine the type of value inside quotes
auto type = tryInferDataTypeForSingleField(data, format_settings);
/// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string.
if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)))
/// Return String type if one of the following conditions apply
/// - we couldn't infer any type
/// - it's a number and csv.try_infer_numbers_from_strings = 0
/// - it's a tuple and try_infer_strings_from_quoted_tuples = 0
/// - it's a Bool type (we don't allow reading bool values from strings)
if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)) || isBool(type))
return std::make_shared<DataTypeString>();
return type;

View File

@ -1,6 +1,6 @@
c1 Nullable(Int64)
c2 Nullable(Float64)
c3 Nullable(Bool)
c3 Nullable(String)
c1 Nullable(String)
c2 Nullable(String)
c3 Nullable(String)

View File

@ -0,0 +1,4 @@
c1 Nullable(Int64)
c2 Nullable(Float64)
c3 Nullable(String)
42 42.42 True

View File

@ -0,0 +1,4 @@
set input_format_csv_try_infer_numbers_from_strings = 1;
desc format(CSV, '"42","42.42","True"');
select * from format(CSV, '"42","42.42","True"');