This commit is contained in:
avogar 2022-03-24 13:08:58 +00:00
parent 557edbd172
commit abc020a502
3 changed files with 13 additions and 7 deletions

View File

@ -429,6 +429,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
if (checkStringCaseInsensitive("NULL", buf)) if (checkStringCaseInsensitive("NULL", buf))
return std::make_shared<DataTypeNothing>(); return std::make_shared<DataTypeNothing>();
/// Number
Float64 tmp; Float64 tmp;
if (tryReadFloatText(tmp, buf)) if (tryReadFloatText(tmp, buf))
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();

View File

@ -43,12 +43,17 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es
/// - For JSON escaping rule we can use JSON parser to parse a single field /// - For JSON escaping rule we can use JSON parser to parse a single field
/// and then convert JSON type of this field to ClickHouse type. /// and then convert JSON type of this field to ClickHouse type.
/// - For CSV escaping rule we can do the next: /// - For CSV escaping rule we can do the next:
/// - If the field is an unquoted string, then we could try to evaluate it /// - If the field is an unquoted string, then we try to parse it as s number,
/// as a constant expression, and if it fails, treat it as a String. /// and if we cannot, treat it as a String.
/// - If the field is a string in quotes, then we can try to evaluate /// - If the field is a string in quotes, then we try to use some
/// expression inside quotes as a constant expression, and if it fails or /// tweaks and heuristics to determine the type inside quotes, and if we can't or
/// the result is a number (we don't parse numbers in quotes) we treat it as a String. /// the result is a number or tuple (we don't parse numbers in quotes and don't
/// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here) /// support tuples in CSV) we treat it as a String.
/// - If input_format_csv_use_best_effort_in_schema_inference is disabled, we
/// treat everything as a string.
/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type
/// of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled,
/// otherwise we treat everything as a string.
DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule); DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);
DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule); DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule);

View File

@ -112,7 +112,7 @@ struct FormatSettings
char fields_delimiter = '\x01'; char fields_delimiter = '\x01';
char collection_items_delimiter = '\x02'; char collection_items_delimiter = '\x02';
char map_keys_delimiter = '\x03'; char map_keys_delimiter = '\x03';
Names input_field_names = {""}; Names input_field_names;
} hive_text; } hive_text;
struct Custom struct Custom