Merge pull request #39186 from Avogar/numbers-schema-inference

Add new features in schema inference
This commit is contained in:
Anton Popov 2022-08-11 00:53:54 +02:00 committed by GitHub
commit 3fdf428834
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 1145 additions and 285 deletions

View File

@ -705,6 +705,10 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Arrow", 0) \
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer numbers from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \
M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified number of lines at the beginning of data in CSV format", 0) \

View File

@ -0,0 +1,178 @@
#include <DataTypes/transformTypesRecursively.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &)> transform_simple_types, std::function<void(DataTypes &)> transform_complex_types)
{
{
/// Arrays
bool have_array = false;
bool all_arrays = true;
DataTypes nested_types;
for (const auto & type : types)
{
if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))
{
have_array = true;
nested_types.push_back(type_array->getNestedType());
}
else
all_arrays = false;
}
if (have_array)
{
if (all_arrays)
{
transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
for (size_t i = 0; i != types.size(); ++i)
types[i] = std::make_shared<DataTypeArray>(nested_types[i]);
}
if (transform_complex_types)
transform_complex_types(types);
return;
}
}
{
/// Tuples
bool have_tuple = false;
bool all_tuples = true;
size_t tuple_size = 0;
std::vector<DataTypes> nested_types;
for (const auto & type : types)
{
if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
{
if (!have_tuple)
{
tuple_size = type_tuple->getElements().size();
nested_types.resize(tuple_size);
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
nested_types[elem_idx].reserve(types.size());
}
else if (tuple_size != type_tuple->getElements().size())
return;
have_tuple = true;
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]);
}
else
all_tuples = false;
}
if (have_tuple)
{
if (all_tuples)
{
std::vector<DataTypes> transposed_nested_types(types.size());
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
{
transformTypesRecursively(nested_types[elem_idx], transform_simple_types, transform_complex_types);
for (size_t i = 0; i != types.size(); ++i)
transposed_nested_types[i].push_back(nested_types[elem_idx][i]);
}
for (size_t i = 0; i != types.size(); ++i)
types[i] = std::make_shared<DataTypeTuple>(transposed_nested_types[i]);
}
if (transform_complex_types)
transform_complex_types(types);
return;
}
}
{
/// Maps
bool have_maps = false;
bool all_maps = true;
DataTypes key_types;
DataTypes value_types;
key_types.reserve(types.size());
value_types.reserve(types.size());
for (const auto & type : types)
{
if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get()))
{
have_maps = true;
key_types.emplace_back(type_map->getKeyType());
value_types.emplace_back(type_map->getValueType());
}
else
all_maps = false;
}
if (have_maps)
{
if (all_maps)
{
transformTypesRecursively(key_types, transform_simple_types, transform_complex_types);
transformTypesRecursively(value_types, transform_simple_types, transform_complex_types);
for (size_t i = 0; i != types.size(); ++i)
types[i] = std::make_shared<DataTypeMap>(key_types[i], value_types[i]);
}
if (transform_complex_types)
transform_complex_types(types);
return;
}
}
{
/// Nullable
bool have_nullable = false;
std::vector<UInt8> is_nullable;
is_nullable.reserve(types.size());
DataTypes nested_types;
nested_types.reserve(types.size());
for (const auto & type : types)
{
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
{
have_nullable = true;
is_nullable.push_back(1);
nested_types.push_back(type_nullable->getNestedType());
}
else
{
is_nullable.push_back(0);
nested_types.push_back(type);
}
}
if (have_nullable)
{
transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
for (size_t i = 0; i != types.size(); ++i)
{
if (is_nullable[i])
types[i] = makeNullable(nested_types[i]);
else
types[i] = nested_types[i];
}
return;
}
}
transform_simple_types(types);
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <functional>
namespace DB
{
/// Function that applies custom transformation functions to provided types recursively.
/// Implementation is similar to function getLeastSuperType:
/// If all types are Array/Map/Tuple/Nullable, this function will be called to nested types.
/// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
/// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
/// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &)> transform_simple_types, std::function<void(DataTypes &)> transform_complex_types);
}

View File

@ -9,8 +9,12 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/transformTypesRecursively.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromString.h>
@ -255,7 +259,220 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e
return readByEscapingRule<true>(buf, escaping_rule, format_settings);
}
static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, bool is_json, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings = nullptr)
{
/// Do nothing if we didn't try to infer something special.
if (!settings.try_infer_integers && !settings.try_infer_dates && !settings.try_infer_datetimes && !is_json)
return;
auto transform_simple_types = [&](DataTypes & data_types)
{
/// If we have floats and integers convert them all to float.
if (settings.try_infer_integers)
{
bool have_floats = false;
bool have_integers = false;
for (const auto & type : data_types)
{
have_floats |= isFloat(type);
have_integers |= isInteger(type) && !isBool(type);
}
if (have_floats && have_integers)
{
for (auto & type : data_types)
{
if (isInteger(type))
type = std::make_shared<DataTypeFloat64>();
}
}
}
/// If we have only dates and datetimes, convert dates to datetime.
/// If we have date/datetimes and smth else, convert them to string, because
/// There is a special case when we inferred both Date/DateTime and Int64 from Strings,
/// for example: "arr: ["2020-01-01", "2000"]" -> Tuple(Date, Int64),
/// so if we have Date/DateTime and smth else (not only String) we should
/// convert Date/DateTime back to String, so then we will be able to
/// convert Int64 back to String as well.
if (settings.try_infer_dates || settings.try_infer_datetimes)
{
bool have_dates = false;
bool have_datetimes = false;
bool all_dates_or_datetimes = true;
for (const auto & type : data_types)
{
have_dates |= isDate(type);
have_datetimes |= isDateTime64(type);
all_dates_or_datetimes &= isDate(type) || isDateTime64(type);
}
if (!all_dates_or_datetimes && (have_dates || have_datetimes))
{
for (auto & type : data_types)
{
if (isDate(type) || isDateTime64(type))
type = std::make_shared<DataTypeString>();
}
}
else if (have_dates && have_datetimes)
{
for (auto & type : data_types)
{
if (isDate(type))
type = std::make_shared<DataTypeDateTime64>(9);
}
}
}
if (!is_json)
return;
/// Check settings specific for JSON formats.
/// If we have numbers and strings, convert numbers to strings.
if (settings.json.try_infer_numbers_from_strings)
{
bool have_strings = false;
bool have_numbers = false;
for (const auto & type : data_types)
{
have_strings |= isString(type);
have_numbers |= isNumber(type);
}
if (have_strings && have_numbers)
{
for (auto & type : data_types)
{
if (isNumber(type) && (!numbers_parsed_from_json_strings || numbers_parsed_from_json_strings->contains(type.get())))
type = std::make_shared<DataTypeString>();
}
}
}
if (settings.json.read_bools_as_numbers)
{
/// Note that have_floats and have_integers both cannot be
/// equal to true as in one of previous checks we convert
/// integers to floats if we have both.
bool have_floats = false;
bool have_integers = false;
bool have_bools = false;
for (const auto & type : data_types)
{
have_floats |= isFloat(type);
have_integers |= isInteger(type) && !isBool(type);
have_bools |= isBool(type);
}
if (have_bools && (have_integers || have_floats))
{
for (auto & type : data_types)
{
if (isBool(type))
{
if (have_integers)
type = std::make_shared<DataTypeInt64>();
else
type = std::make_shared<DataTypeFloat64>();
}
}
}
}
};
auto transform_complex_types = [&](DataTypes & data_types)
{
if (!is_json)
return;
bool have_maps = false;
bool have_objects = false;
bool are_maps_equal = true;
DataTypePtr first_map_type;
for (const auto & type : data_types)
{
if (isMap(type))
{
if (!have_maps)
{
first_map_type = type;
have_maps = true;
}
else
{
are_maps_equal &= type->equals(*first_map_type);
}
}
else if (isObject(type))
{
have_objects = true;
}
}
if (have_maps && (have_objects || !are_maps_equal))
{
for (auto & type : data_types)
{
if (isMap(type))
type = std::make_shared<DataTypeObject>("json", true);
}
}
};
transformTypesRecursively(types, transform_simple_types, transform_complex_types);
}
void transformInferredTypesIfNeeded(DataTypes & types, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule)
{
transformInferredTypesIfNeededImpl(types, settings, escaping_rule == FormatSettings::EscapingRule::JSON);
}
void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule)
{
DataTypes types = {first, second};
transformInferredTypesIfNeeded(types, settings, escaping_rule);
first = std::move(types[0]);
second = std::move(types[1]);
}
void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings)
{
transformInferredTypesIfNeededImpl(types, settings, true, numbers_parsed_from_json_strings);
}
void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
{
DataTypes types = {first, second};
transformInferredJSONTypesIfNeeded(types, settings);
first = std::move(types[0]);
second = std::move(types[1]);
}
DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings)
{
if (settings.try_infer_dates)
{
ReadBufferFromString buf(field);
DayNum tmp;
if (tryReadDateText(tmp, buf) && buf.eof())
return makeNullable(std::make_shared<DataTypeDate>());
}
if (settings.try_infer_datetimes)
{
ReadBufferFromString buf(field);
DateTime64 tmp;
if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof())
return makeNullable(std::make_shared<DataTypeDateTime64>(9));
}
return nullptr;
}
static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBufferFromString & buf, const FormatSettings & settings)
{
if (buf.eof())
return nullptr;
@ -279,7 +496,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
else
first = false;
auto nested_type = determineDataTypeForSingleFieldImpl(buf);
auto nested_type = determineDataTypeForSingleFieldImpl(buf, settings);
if (!nested_type)
return nullptr;
@ -294,6 +511,8 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
if (nested_types.empty())
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
transformInferredTypesIfNeeded(nested_types, settings);
auto least_supertype = tryGetLeastSupertype(nested_types);
if (!least_supertype)
return nullptr;
@ -320,7 +539,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
else
first = false;
auto nested_type = determineDataTypeForSingleFieldImpl(buf);
auto nested_type = determineDataTypeForSingleFieldImpl(buf, settings);
if (!nested_type)
return nullptr;
@ -355,7 +574,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
else
first = false;
auto key_type = determineDataTypeForSingleFieldImpl(buf);
auto key_type = determineDataTypeForSingleFieldImpl(buf, settings);
if (!key_type)
return nullptr;
@ -366,7 +585,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
return nullptr;
skipWhitespaceIfAny(buf);
auto value_type = determineDataTypeForSingleFieldImpl(buf);
auto value_type = determineDataTypeForSingleFieldImpl(buf, settings);
if (!value_type)
return nullptr;
@ -382,6 +601,9 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
if (key_types.empty())
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeNothing>(), std::make_shared<DataTypeNothing>());
transformInferredTypesIfNeeded(key_types, settings);
transformInferredTypesIfNeeded(value_types, settings);
auto key_least_supertype = tryGetLeastSupertype(key_types);
auto value_least_supertype = tryGetLeastSupertype(value_types);
@ -398,9 +620,11 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
if (*buf.position() == '\'')
{
++buf.position();
String field;
while (!buf.eof())
{
char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end());
field.append(buf.position(), next_pos);
buf.position() = next_pos;
if (!buf.hasPendingData())
@ -409,6 +633,7 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
if (*buf.position() == '\'')
break;
field.push_back(*buf.position());
if (*buf.position() == '\\')
++buf.position();
}
@ -417,6 +642,9 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
return nullptr;
++buf.position();
if (auto type = tryInferDateOrDateTime(field, settings))
return type;
return std::make_shared<DataTypeString>();
}
@ -430,15 +658,29 @@ static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf)
/// Number
Float64 tmp;
auto * pos_before_float = buf.position();
if (tryReadFloatText(tmp, buf))
{
if (settings.try_infer_integers)
{
auto * float_end_pos = buf.position();
buf.position() = pos_before_float;
Int64 tmp_int;
if (tryReadIntText(tmp_int, buf) && buf.position() == float_end_pos)
return std::make_shared<DataTypeInt64>();
buf.position() = float_end_pos;
}
return std::make_shared<DataTypeFloat64>();
}
return nullptr;
}
static DataTypePtr determineDataTypeForSingleField(ReadBuffer & buf)
static DataTypePtr determineDataTypeForSingleField(ReadBufferFromString & buf, const FormatSettings & settings)
{
return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf));
return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf, settings));
}
DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule)
@ -448,11 +690,11 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
case FormatSettings::EscapingRule::Quoted:
{
ReadBufferFromString buf(field);
auto type = determineDataTypeForSingleField(buf);
auto type = determineDataTypeForSingleField(buf, format_settings);
return buf.eof() ? type : nullptr;
}
case FormatSettings::EscapingRule::JSON:
return JSONUtils::getDataTypeFromField(field);
return JSONUtils::getDataTypeFromField(field, format_settings);
case FormatSettings::EscapingRule::CSV:
{
if (!format_settings.csv.input_format_use_best_effort_in_schema_inference)
@ -466,9 +708,13 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"')))
{
ReadBufferFromString buf(std::string_view(field.data() + 1, field.size() - 2));
auto data = std::string_view(field.data() + 1, field.size() - 2);
if (auto date_type = tryInferDateOrDateTime(data, format_settings))
return date_type;
ReadBufferFromString buf(data);
/// Try to determine the type of value inside quotes
auto type = determineDataTypeForSingleField(buf);
auto type = determineDataTypeForSingleField(buf, format_settings);
if (!type)
return nullptr;
@ -481,6 +727,14 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
}
/// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string.
if (format_settings.try_infer_integers)
{
ReadBufferFromString buf(field);
Int64 tmp_int;
if (tryReadIntText(tmp_int, buf) && buf.eof())
return makeNullable(std::make_shared<DataTypeInt64>());
}
ReadBufferFromString buf(field);
Float64 tmp;
if (tryReadFloatText(tmp, buf) && buf.eof())
@ -500,8 +754,11 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation)
return DataTypeFactory::instance().get("Nullable(Bool)");
if (auto date_type = tryInferDateOrDateTime(field, format_settings))
return date_type;
ReadBufferFromString buf(field);
auto type = determineDataTypeForSingleField(buf);
auto type = determineDataTypeForSingleField(buf, format_settings);
if (!buf.eof())
return makeNullable(std::make_shared<DataTypeString>());

View File

@ -60,4 +60,21 @@ DataTypes determineDataTypesByEscapingRule(const std::vector<String> & fields, c
DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule);
DataTypes getDefaultDataTypeForEscapingRules(const std::vector<FormatSettings::EscapingRule> & escaping_rules);
/// Try to infer Date or Datetime from string if corresponding settings are enabled.
DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings);
/// Check if we need to transform types inferred from data and transform it if necessary.
/// It's used when we try to infer some not ordinary types from another types.
/// For example dates from strings, we should check if dates were inferred from all strings
/// in the same way and if not, transform inferred dates back to strings.
/// For example, if we have array of strings and we tried to infer dates from them,
/// to make the result type Array(Date) we should ensure that all strings were
/// successfully parsed as dated and if not, convert all dates back to strings and make result type Array(String).
void transformInferredTypesIfNeeded(DataTypes & types, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule = FormatSettings::EscapingRule::Escaped);
void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule = FormatSettings::EscapingRule::Escaped);
/// Same as transformInferredTypesIfNeeded but takes into account settings that are special for JSON formats.
void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set<const IDataType *> * numbers_parsed_from_json_strings = nullptr);
void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings);
}

View File

@ -94,6 +94,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
@ -165,6 +166,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.sql_insert.table_name = settings.output_format_sql_insert_table_name;
format_settings.sql_insert.use_replace = settings.output_format_sql_insert_use_replace;
format_settings.sql_insert.quote_names = settings.output_format_sql_insert_quote_names;
format_settings.try_infer_integers = settings.input_format_try_infer_integers;
format_settings.try_infer_dates = settings.input_format_try_infer_dates;
format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes;
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
if (format_settings.schema.is_server)

View File

@ -38,6 +38,9 @@ struct FormatSettings
UInt64 max_rows_to_read_for_schema_inference = 100;
String column_names_for_schema_inference;
bool try_infer_integers = false;
bool try_infer_dates = false;
bool try_infer_datetimes = false;
enum class DateTimeInputFormat
{
@ -142,6 +145,7 @@ struct FormatSettings
bool named_tuples_as_objects = false;
bool serialize_as_strings = false;
bool read_bools_as_numbers = true;
bool try_infer_numbers_from_strings = false;
} json;
struct

View File

@ -1,6 +1,7 @@
#include <IO/ReadHelpers.h>
#include <Formats/JSONUtils.h>
#include <Formats/ReadSchemaUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferValidUTF8.h>
#include <DataTypes/Serializations/SerializationNullable.h>
@ -121,7 +122,7 @@ namespace JSONUtils
}
template <class Element>
DataTypePtr getDataTypeFromFieldImpl(const Element & field)
DataTypePtr getDataTypeFromFieldImpl(const Element & field, const FormatSettings & settings, std::unordered_set<const IDataType *> & numbers_parsed_from_json_strings)
{
if (field.isNull())
return nullptr;
@ -129,11 +130,48 @@ namespace JSONUtils
if (field.isBool())
return DataTypeFactory::instance().get("Nullable(Bool)");
if (field.isInt64() || field.isUInt64() || field.isDouble())
if (field.isInt64() || field.isUInt64())
{
if (settings.try_infer_integers)
return makeNullable(std::make_shared<DataTypeInt64>());
return makeNullable(std::make_shared<DataTypeFloat64>());
}
if (field.isDouble())
return makeNullable(std::make_shared<DataTypeFloat64>());
if (field.isString())
{
if (auto date_type = tryInferDateOrDateTime(field.getString(), settings))
return date_type;
if (!settings.json.try_infer_numbers_from_strings)
return makeNullable(std::make_shared<DataTypeString>());
ReadBufferFromString buf(field.getString());
if (settings.try_infer_integers)
{
Int64 tmp_int;
if (tryReadIntText(tmp_int, buf) && buf.eof())
{
auto type = std::make_shared<DataTypeInt64>();
numbers_parsed_from_json_strings.insert(type.get());
return makeNullable(type);
}
}
Float64 tmp;
if (tryReadFloatText(tmp, buf) && buf.eof())
{
auto type = std::make_shared<DataTypeFloat64>();
numbers_parsed_from_json_strings.insert(type.get());
return makeNullable(type);
}
return makeNullable(std::make_shared<DataTypeString>());
}
if (field.isArray())
{
@ -145,20 +183,32 @@ namespace JSONUtils
DataTypes nested_data_types;
/// If this array contains fields with different types we will treat it as Tuple.
bool is_tuple = false;
bool are_types_the_same = true;
for (const auto element : array)
{
auto type = getDataTypeFromFieldImpl(element);
auto type = getDataTypeFromFieldImpl(element, settings, numbers_parsed_from_json_strings);
if (!type)
return nullptr;
if (!nested_data_types.empty() && type->getName() != nested_data_types.back()->getName())
is_tuple = true;
if (!nested_data_types.empty() && !type->equals(*nested_data_types.back()))
are_types_the_same = false;
nested_data_types.push_back(std::move(type));
}
if (is_tuple)
if (!are_types_the_same)
{
auto nested_types_copy = nested_data_types;
transformInferredJSONTypesIfNeeded(nested_types_copy, settings, &numbers_parsed_from_json_strings);
are_types_the_same = true;
for (size_t i = 1; i < nested_types_copy.size(); ++i)
are_types_the_same &= nested_types_copy[i]->equals(*nested_types_copy[i - 1]);
if (are_types_the_same)
nested_data_types = std::move(nested_types_copy);
}
if (!are_types_the_same)
return std::make_shared<DataTypeTuple>(nested_data_types);
return std::make_shared<DataTypeArray>(nested_data_types.back());
@ -167,38 +217,35 @@ namespace JSONUtils
if (field.isObject())
{
auto object = field.getObject();
DataTypePtr value_type;
bool is_object = false;
DataTypes value_types;
bool have_object_value = false;
for (const auto key_value_pair : object)
{
auto type = getDataTypeFromFieldImpl(key_value_pair.second);
auto type = getDataTypeFromFieldImpl(key_value_pair.second, settings, numbers_parsed_from_json_strings);
if (!type)
continue;
if (isObject(type))
{
is_object = true;
have_object_value = true;
break;
}
if (!value_type)
{
value_type = type;
}
else if (!value_type->equals(*type))
{
is_object = true;
break;
}
value_types.push_back(type);
}
if (is_object)
if (value_types.empty())
return nullptr;
transformInferredJSONTypesIfNeeded(value_types, settings, &numbers_parsed_from_json_strings);
bool are_types_equal = true;
for (size_t i = 1; i < value_types.size(); ++i)
are_types_equal &= value_types[i]->equals(*value_types[0]);
if (have_object_value || !are_types_equal)
return std::make_shared<DataTypeObject>("json", true);
if (value_type)
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
return nullptr;
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_types[0]);
}
throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
@ -215,18 +262,19 @@ namespace JSONUtils
#endif
}
DataTypePtr getDataTypeFromField(const String & field)
DataTypePtr getDataTypeFromField(const String & field, const FormatSettings & settings)
{
auto [parser, element] = getJSONParserAndElement();
bool parsed = parser.parse(field, element);
if (!parsed)
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object here: {}", field);
return getDataTypeFromFieldImpl(element);
std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
return getDataTypeFromFieldImpl(element, settings, numbers_parsed_from_json_strings);
}
template <class Extractor, const char opening_bracket, const char closing_bracket>
static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor)
static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, const FormatSettings & settings, bool /*json_strings*/, Extractor & extractor)
{
String line = readJSONEachRowLineIntoStringImpl<opening_bracket, closing_bracket>(in);
auto [parser, element] = getJSONParserAndElement();
@ -238,8 +286,9 @@ namespace JSONUtils
DataTypes data_types;
data_types.reserve(fields.size());
std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
for (const auto & field : fields)
data_types.push_back(getDataTypeFromFieldImpl(field));
data_types.push_back(getDataTypeFromFieldImpl(field, settings, numbers_parsed_from_json_strings));
/// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings.
/// Should we try to parse data inside strings somehow in this case?
@ -284,11 +333,11 @@ namespace JSONUtils
std::vector<String> column_names;
};
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings)
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings)
{
JSONEachRowFieldsExtractor extractor;
auto data_types
= determineColumnDataTypesFromJSONEachRowDataImpl<JSONEachRowFieldsExtractor, '{', '}'>(in, json_strings, extractor);
= determineColumnDataTypesFromJSONEachRowDataImpl<JSONEachRowFieldsExtractor, '{', '}'>(in, settings, json_strings, extractor);
NamesAndTypesList result;
for (size_t i = 0; i != extractor.column_names.size(); ++i)
result.emplace_back(extractor.column_names[i], data_types[i]);
@ -313,10 +362,10 @@ namespace JSONUtils
}
};
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings)
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings)
{
JSONCompactEachRowFieldsExtractor extractor;
return determineColumnDataTypesFromJSONEachRowDataImpl<JSONCompactEachRowFieldsExtractor, '[', ']'>(in, json_strings, extractor);
return determineColumnDataTypesFromJSONEachRowDataImpl<JSONCompactEachRowFieldsExtractor, '[', ']'>(in, settings, json_strings, extractor);
}

View File

@ -22,16 +22,16 @@ namespace JSONUtils
/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
/// JSON array with different nested types is treated as Tuple.
/// If cannot convert (for example when field contains null), return nullptr.
DataTypePtr getDataTypeFromField(const String & field);
DataTypePtr getDataTypeFromField(const String & field, const FormatSettings & settings);
/// Read row in JSONEachRow format and try to determine type for each field.
/// Return list of names and types.
/// If cannot determine the type of some field, return nullptr for it.
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings);
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings);
/// Read row in JSONCompactEachRow format and try to determine type for each field.
/// If cannot determine the type of some field, return nullptr for it.
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings);
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, bool json_strings);
bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf);

View File

@ -88,8 +88,13 @@ ColumnsDescription readSchemaFromFormat(
catch (...)
{
auto exception_message = getCurrentExceptionMessage(false);
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file: {}. You can specify the structure manually", format_name, exception_message);
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Cannot extract table structure from {} format file:\n{}\nYou can specify the structure manually",
format_name,
exception_message);
}
++iterations;
if (is_eof)

View File

@ -1,5 +1,6 @@
#include <Processors/Formats/ISchemaReader.h>
#include <Formats/ReadSchemaUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <DataTypes/DataTypeString.h>
#include <boost/algorithm/string.hpp>
@ -17,35 +18,38 @@ namespace ErrorCodes
void chooseResultColumnType(
DataTypePtr & type,
const DataTypePtr & new_type,
CommonDataTypeChecker common_type_checker,
DataTypePtr & new_type,
std::function<void(DataTypePtr &, DataTypePtr &)> transform_types_if_needed,
const DataTypePtr & default_type,
const String & column_name,
size_t row)
{
if (!type)
{
type = new_type;
return;
}
if (!new_type || type->equals(*new_type))
return;
transform_types_if_needed(type, new_type);
if (type->equals(*new_type))
return;
/// If the new type and the previous type for this column are different,
/// we will use default type if we have it or throw an exception.
if (new_type && !type->equals(*new_type))
if (default_type)
type = default_type;
else
{
DataTypePtr common_type;
if (common_type_checker)
common_type = common_type_checker(type, new_type);
if (common_type)
type = common_type;
else if (default_type)
type = default_type;
else
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}",
type->getName(),
column_name,
row,
new_type->getName());
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Automatically defined type {} for column {} in row {} differs from type defined by previous rows: {}",
type->getName(),
column_name,
row,
new_type->getName());
}
}
@ -63,8 +67,8 @@ void checkResultColumnTypeAndAppend(NamesAndTypesList & result, DataTypePtr & ty
result.emplace_back(name, type);
}
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings)
: ISchemaReader(in_)
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: ISchemaReader(in_), format_settings(format_settings_)
{
if (!format_settings.column_names_for_schema_inference.empty())
{
@ -79,14 +83,14 @@ IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & form
}
}
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_)
: IRowSchemaReader(in_, format_settings)
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_)
: IRowSchemaReader(in_, format_settings_)
{
default_type = default_type_;
}
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_)
: IRowSchemaReader(in_, format_settings)
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, const DataTypes & default_types_)
: IRowSchemaReader(in_, format_settings_)
{
default_types = default_types_;
}
@ -116,7 +120,8 @@ NamesAndTypesList IRowSchemaReader::readSchema()
if (!new_data_types[i])
continue;
chooseResultColumnType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), rows_read);
auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type, i); };
chooseResultColumnType(data_types[i], new_data_types[i], transform_types_if_needed, getDefaultType(i), std::to_string(i + 1), rows_read);
}
}
@ -156,8 +161,13 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const
return nullptr;
}
IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_)
: ISchemaReader(in_), default_type(default_type_)
void IRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
{
transformInferredTypesIfNeeded(type, new_type, format_settings);
}
IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_)
: ISchemaReader(in_), format_settings(format_settings_), default_type(default_type_)
{
}
@ -181,6 +191,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
names_order.push_back(name);
}
auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type); };
for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read)
{
auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof);
@ -188,7 +199,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
/// We reached eof.
break;
for (const auto & [name, new_type] : new_names_and_types)
for (auto & [name, new_type] : new_names_and_types)
{
auto it = names_to_types.find(name);
/// If we didn't see this column before, just add it.
@ -200,7 +211,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
}
auto & type = it->second;
chooseResultColumnType(type, new_type, common_type_checker, default_type, name, rows_read);
chooseResultColumnType(type, new_type, transform_types_if_needed, default_type, name, rows_read);
}
}
@ -219,4 +230,9 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
return result;
}
void IRowWithNamesSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
{
transformInferredTypesIfNeeded(type, new_type, format_settings);
}
}

View File

@ -53,8 +53,6 @@ public:
NamesAndTypesList readSchema() override;
void setCommonTypeChecker(CommonDataTypeChecker checker) { common_type_checker = checker; }
protected:
/// Read one row and determine types of columns in it.
/// Return types in the same order in which the values were in the row.
@ -67,6 +65,10 @@ protected:
void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; }
size_t getNumRowsRead() const override { return rows_read; }
FormatSettings format_settings;
virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx);
private:
DataTypePtr getDefaultType(size_t column) const;
@ -74,7 +76,6 @@ private:
size_t rows_read = 0;
DataTypePtr default_type;
DataTypes default_types;
CommonDataTypeChecker common_type_checker;
std::vector<String> column_names;
};
@ -86,12 +87,10 @@ private:
class IRowWithNamesSchemaReader : public ISchemaReader
{
public:
IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_ = nullptr);
IRowWithNamesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_ = nullptr);
NamesAndTypesList readSchema() override;
bool hasStrictOrderOfColumns() const override { return false; }
void setCommonTypeChecker(CommonDataTypeChecker checker) { common_type_checker = checker; }
protected:
/// Read one row and determine types of columns in it.
/// Return list with names and types.
@ -102,11 +101,14 @@ protected:
void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; }
size_t getNumRowsRead() const override { return rows_read; }
virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type);
FormatSettings format_settings;
private:
size_t max_rows_to_read;
size_t rows_read = 0;
DataTypePtr default_type;
CommonDataTypeChecker common_type_checker;
};
/// Base class for schema inference for formats that don't need any data to
@ -122,8 +124,8 @@ public:
void chooseResultColumnType(
DataTypePtr & type,
const DataTypePtr & new_type,
CommonDataTypeChecker common_type_checker,
DataTypePtr & new_type,
std::function<void(DataTypePtr &, DataTypePtr &)> transform_types_if_needed,
const DataTypePtr & default_type,
const String & column_name,
size_t row);

View File

@ -318,6 +318,11 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes()
return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
}
void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
{
transformInferredTypesIfNeeded(type, new_type, format_settings, reader.getEscapingRule());
}
void registerInputFormatCustomSeparated(FormatFactory & factory)
{
for (bool ignore_spaces : {false, true})

View File

@ -97,6 +97,8 @@ public:
private:
DataTypes readRowAndGetDataTypes() override;
void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override;
PeekableReadBuffer buf;
CustomSeparatedFormatReader reader;
bool first_row = true;

View File

@ -1,5 +1,6 @@
#include <Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h>
#include <Formats/JSONUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/ReadHelpers.h>
#include <base/find_symbols.h>
@ -181,13 +182,14 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase(
{
}
void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const
void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const
{
auto common_type_checker = [&](const DataTypePtr & first, const DataTypePtr & second)
auto convert_types_if_needed = [&](DataTypePtr & first, DataTypePtr & second)
{
return JSONUtils::getCommonTypeForJSONFormats(first, second, format_settings.json.read_bools_as_numbers);
DataTypes types = {first, second};
transformInferredJSONTypesIfNeeded(types, format_settings);
};
chooseResultColumnType(type, new_type, common_type_checker, nullptr, column_name, row);
chooseResultColumnType(type, new_type, convert_types_if_needed, nullptr, column_name, row);
}
NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema()
@ -260,7 +262,7 @@ DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String &
}
readJSONField(field, in);
DataTypePtr field_type = JSONUtils::getDataTypeFromField(field);
DataTypePtr field_type = JSONUtils::getDataTypeFromField(field, format_settings);
chooseResulType(column_type, field_type, column_name, rows_read);
++rows_read;
}

View File

@ -83,7 +83,7 @@ private:
DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read);
/// Choose result type for column from two inferred types from different rows.
void chooseResulType(DataTypePtr & type, const DataTypePtr & new_type, const String & column_name, size_t row) const;
void chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const;
const FormatSettings format_settings;
std::unique_ptr<JSONColumnsReaderBase> reader;

View File

@ -6,6 +6,7 @@
#include <Formats/FormatFactory.h>
#include <Formats/verbosePrintString.h>
#include <Formats/JSONUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <Formats/registerWithNamesAndTypes.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/Serializations/SerializationNullable.h>
@ -187,11 +188,6 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(
: FormatWithNamesAndTypesSchemaReader(in_, format_settings_, with_names_, with_types_, &reader)
, reader(in_, yield_strings_, format_settings_)
{
bool allow_bools_as_numbers = format_settings_.json.read_bools_as_numbers;
setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second)
{
return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers);
});
}
DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes()
@ -210,7 +206,12 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes()
if (in.eof())
return {};
return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, reader.yieldStrings());
return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, reader.yieldStrings());
}
void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
{
transformInferredJSONTypesIfNeeded(type, new_type, format_settings);
}
void registerInputFormatJSONCompactEachRow(FormatFactory & factory)

View File

@ -80,6 +80,8 @@ public:
private:
DataTypes readRowAndGetDataTypes() override;
void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override;
JSONCompactEachRowFormatReader reader;
bool first_row = true;
};

View File

@ -3,6 +3,7 @@
#include <Processors/Formats/Impl/JSONEachRowRowInputFormat.h>
#include <Formats/JSONUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <Formats/FormatFactory.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/Serializations/SerializationNullable.h>
@ -306,18 +307,12 @@ void JSONEachRowRowInputFormat::readSuffix()
assertEOF(*in);
}
JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings)
: IRowWithNamesSchemaReader(in_)
JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings_)
: IRowWithNamesSchemaReader(in_, format_settings_)
, json_strings(json_strings_)
{
bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers;
setCommonTypeChecker([allow_bools_as_numbers](const DataTypePtr & first, const DataTypePtr & second)
{
return JSONUtils::getCommonTypeForJSONFormats(first, second, allow_bools_as_numbers);
});
}
NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof)
{
if (first_row)
@ -350,7 +345,12 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool &
return {};
}
return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, json_strings);
return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, json_strings);
}
void JSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
{
transformInferredJSONTypesIfNeeded(type, new_type, format_settings);
}
void registerInputFormatJSONEachRow(FormatFactory & factory)

View File

@ -92,6 +92,7 @@ public:
private:
NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override;
void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
bool json_strings;
bool first_row = true;

View File

@ -402,7 +402,7 @@ void MySQLDumpRowInputFormat::skipField()
}
MySQLDumpSchemaReader::MySQLDumpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: IRowSchemaReader(in_, format_settings_), format_settings(format_settings_), table_name(format_settings_.mysql_dump.table_name)
: IRowSchemaReader(in_, format_settings_), table_name(format_settings_.mysql_dump.table_name)
{
}

View File

@ -35,7 +35,6 @@ private:
NamesAndTypesList readSchema() override;
DataTypes readRowAndGetDataTypes() override;
const FormatSettings format_settings;
String table_name;
};

View File

@ -133,7 +133,6 @@ RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings &
buf,
format_settings_,
getDefaultDataTypeForEscapingRule(format_settings_.regexp.escaping_rule))
, format_settings(format_settings_)
, field_extractor(format_settings)
, buf(in_)
{
@ -157,6 +156,12 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes()
return data_types;
}
void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t)
{
transformInferredTypesIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule);
}
void registerInputFormatRegexp(FormatFactory & factory)
{
factory.registerInputFormat("Regexp", [](

View File

@ -81,8 +81,10 @@ public:
private:
DataTypes readRowAndGetDataTypes() override;
void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override;
using EscapingRule = FormatSettings::EscapingRule;
const FormatSettings format_settings;
RegexpFieldExtractor field_extractor;
PeekableReadBuffer buf;
};

View File

@ -214,8 +214,7 @@ void TSKVRowInputFormat::resetParser()
}
TSKVSchemaReader::TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: IRowWithNamesSchemaReader(in_, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped))
, format_settings(format_settings_)
: IRowWithNamesSchemaReader(in_, format_settings_, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped))
{
}

View File

@ -61,7 +61,6 @@ public:
private:
NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override;
const FormatSettings format_settings;
bool first_row = true;
};

View File

@ -458,7 +458,6 @@ TemplateSchemaReader::TemplateSchemaReader(
, buf(in_)
, format(format_)
, row_format(row_format_)
, format_settings(format_settings_)
, format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings)
{
setColumnNames(row_format.column_names);
@ -494,6 +493,11 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
return data_types;
}
void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx)
{
transformInferredTypesIfNeeded(type, new_type, format_settings, row_format.escaping_rules[column_idx]);
}
static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & settings)
{
ParsedTemplateFormatString resultset_format;

View File

@ -121,10 +121,11 @@ public:
DataTypes readRowAndGetDataTypes() override;
private:
void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx) override;
PeekableReadBuffer buf;
const ParsedTemplateFormatString format;
const ParsedTemplateFormatString row_format;
FormatSettings format_settings;
TemplateFormatReader format_reader;
bool first_row = true;
};

View File

@ -567,7 +567,7 @@ void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_)
}
ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: IRowSchemaReader(buf, format_settings_), buf(in_), format_settings(format_settings_)
: IRowSchemaReader(buf, format_settings_), buf(in_)
{
}

View File

@ -103,7 +103,6 @@ private:
DataTypes readRowAndGetDataTypes() override;
PeekableReadBuffer buf;
const FormatSettings format_settings;
ParserExpression parser;
bool first_row = true;
bool end_of_data = false;

View File

@ -229,12 +229,12 @@ void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_)
FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader(
ReadBuffer & in_,
const FormatSettings & format_settings,
const FormatSettings & format_settings_,
bool with_names_,
bool with_types_,
FormatWithNamesAndTypesReader * format_reader_,
DataTypePtr default_type_)
: IRowSchemaReader(in_, format_settings, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
: IRowSchemaReader(in_, format_settings_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_)
{
}

View File

@ -123,7 +123,7 @@ class FormatWithNamesAndTypesSchemaReader : public IRowSchemaReader
public:
FormatWithNamesAndTypesSchemaReader(
ReadBuffer & in,
const FormatSettings & format_settings,
const FormatSettings & format_settings_,
bool with_names_,
bool with_types_,
FormatWithNamesAndTypesReader * format_reader_,

View File

@ -537,7 +537,7 @@ def test_schema_inference_with_globs(started_cluster):
)
result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/data*.jsoncompacteachrow')")
assert result.strip() == "c1\tNullable(Float64)"
assert result.strip() == "c1\tNullable(Int64)"
result = node1.query(
f"select * from hdfs('hdfs://hdfs1:9000/data*.jsoncompacteachrow')"

View File

@ -1307,7 +1307,7 @@ def test_schema_inference_from_globs(started_cluster):
result = instance.query(
f"desc url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}')"
)
assert result.strip() == "c1\tNullable(Float64)"
assert result.strip() == "c1\tNullable(Int64)"
result = instance.query(
f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{url_filename}')"
@ -1317,7 +1317,7 @@ def test_schema_inference_from_globs(started_cluster):
result = instance.query(
f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow')"
)
assert result.strip() == "c1\tNullable(Float64)"
assert result.strip() == "c1\tNullable(Int64)"
result = instance.query(
f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test*.jsoncompacteachrow')"

View File

@ -3,6 +3,6 @@
Tuple(k1 Nullable(Int8), k2 Tuple(k3 Nullable(String), k4 Nested(k5 Nullable(Int8), k6 Nullable(Int8))), some Nullable(Int8))
{"id":"1","obj":"aaa","s":"foo"}
{"id":"2","obj":"bbb","s":"bar"}
{"map":{"k1":1,"k2":2},"obj":{"k1":1,"k2.k3":2},"map_type":"Map(String, Nullable(Float64))","obj_type":"Object(Nullable('json'))"}
{"obj":{"k1":1,"k2":2},"map":{"k1":"1","k2":"2"}}
Tuple(k1 Float64, k2 Float64)
{"map":{"k1":"1","k2":"2"},"obj":{"k1":1,"k2.k3":2},"map_type":"Map(String, Nullable(Int64))","obj_type":"Object(Nullable('json'))"}
{"obj":{"k1":"1","k2":"2"},"map":{"k1":"1","k2":"2"}}
Tuple(k1 Int64, k2 Int64)

View File

@ -1,22 +1,22 @@
TSV
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Nullable(Float64))
c4 Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))
c3 Array(Nullable(Int64))
c4 Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64))
42 Some string [1,2,3,4] (1,2,3)
42 abcd [] (4,5,6)
TSVWithNames
number Nullable(Float64)
number Nullable(Int64)
string Nullable(String)
array Array(Nullable(Float64))
tuple Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))
array Array(Nullable(Int64))
tuple Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64))
42 Some string [1,2,3,4] (1,2,3)
42 abcd [] (4,5,6)
CSV
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c4 Array(Nullable(Float64))
c4 Array(Nullable(Int64))
\N Some string [([1,2.3],'String'),([],NULL)] [1,NULL,3]
42 \N [([1,2.3],'String'),([3],'abcd')] [4,5,6]
c1 Nullable(String)
@ -28,54 +28,54 @@ c2 Nullable(String)
\N [NULL, NULL]
\N []
CSVWithNames
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
d Array(Nullable(Float64))
d Array(Nullable(Int64))
\N Some string [([1,2.3],'String'),([],NULL)] [1,NULL,3]
42 \N [([1,2.3],'String'),([3],'abcd')] [4,5,6]
JSONCompactEachRow
c1 Nullable(Float64)
c2 Array(Tuple(Nullable(Float64), Nullable(String)))
c3 Map(String, Nullable(Float64))
c2 Array(Tuple(Nullable(Int64), Nullable(String)))
c3 Map(String, Nullable(Int64))
c4 Nullable(Bool)
42.42 [(1,'String'),(2,'abcd')] {'key':42,'key2':24} true
c1 Nullable(Float64)
c2 Array(Tuple(Nullable(Float64), Nullable(String)))
c3 Map(String, Nullable(Float64))
c1 Nullable(Int64)
c2 Array(Tuple(Nullable(Int64), Nullable(String)))
c3 Map(String, Nullable(Int64))
c4 Nullable(Bool)
\N [(1,'String'),(2,NULL)] {'key':NULL,'key2':24} \N
32 [(2,'String 2'),(3,'hello')] {'key3':4242,'key4':2424} true
JSONCompactEachRowWithNames
a Nullable(Float64)
b Array(Tuple(Nullable(Float64), Nullable(String)))
c Map(String, Nullable(Float64))
b Array(Tuple(Nullable(Int64), Nullable(String)))
c Map(String, Nullable(Int64))
d Nullable(Bool)
42.42 [(1,'String'),(2,'abcd')] {'key':42,'key2':24} true
JSONEachRow
a Nullable(Float64)
b Array(Tuple(Nullable(Float64), Nullable(String)))
c Map(String, Nullable(Float64))
b Array(Tuple(Nullable(Int64), Nullable(String)))
c Map(String, Nullable(Int64))
d Nullable(Bool)
42.42 [(1,'String'),(2,'abcd')] {'key':42,'key2':24} true
a Nullable(Float64)
b Array(Tuple(Nullable(Float64), Nullable(String)))
c Map(String, Nullable(Float64))
a Nullable(Int64)
b Array(Tuple(Nullable(Int64), Nullable(String)))
c Map(String, Nullable(Int64))
d Nullable(Bool)
\N [(1,'String'),(2,NULL)] {'key':NULL,'key2':24} \N
32 [(2,'String 2'),(3,'hello')] {'key3':4242,'key4':2424} true
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Nullable(Float64))
c Array(Nullable(Int64))
1 s1 []
2 \N [2]
\N \N []
\N \N []
\N \N [3]
TSKV
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Nullable(Float64))
c Array(Nullable(Int64))
1 s1 []
2 } [2]
\N \N []
@ -84,77 +84,77 @@ c Array(Nullable(Float64))
Values
c1 Nullable(Float64)
c2 Nullable(String)
c3 Array(Nullable(Float64))
c4 Tuple(Nullable(Float64), Nullable(String))
c5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))
c3 Array(Nullable(Int64))
c4 Tuple(Nullable(Int64), Nullable(String))
c5 Tuple(Array(Nullable(Int64)), Array(Tuple(Nullable(Int64), Nullable(String))))
42.42 Some string [1,2,3] (1,'2') ([1,2],[(3,'4'),(5,'6')])
c1 Nullable(Float64)
c2 Nullable(String)
c3 Array(Nullable(Float64))
c4 Tuple(Nullable(Float64), Nullable(Float64))
c5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))
c3 Array(Nullable(Int64))
c4 Tuple(Nullable(Int64), Nullable(Int64))
c5 Tuple(Array(Nullable(Int64)), Array(Tuple(Nullable(Int64), Nullable(String))))
42.42 \N [1,NULL,3] (1,NULL) ([1,2],[(3,'4'),(5,'6')])
\N Some string [10] (1,2) ([],[])
Regexp
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Nullable(String)
42 Some string 1 [([1, 2, 3], String 1), ([], String 1)]
2 Some string 2 [([4, 5, 6], String 2), ([], String 2)]
312 Some string 3 [([1, 2, 3], String 2), ([], String 2)]
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42 Some string 1 [([1,2,3],'String 1'),([],'String 1')]
3 Some string 2 [([3,5,1],'String 2'),([],'String 2')]
244 Some string 3 [([],'String 3'),([],'String 3')]
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42 Some string 1 [([1,2,3],'String 1'),([],'String 1')]
2 Some string 2 [([],'String 2'),([],'String 2')]
43 Some string 3 [([1,5,3],'String 3'),([],'String 3')]
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
52 Some string 2 [([],'String 2'),([1],'String 2')]
24 Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
CustomSeparated
c1 Nullable(Float64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
42 Some string 2 [([],'String 2'),([],'String 2')]
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
c1 Nullable(Float64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
42 Some string 2 [([],'String 2'),([],'String 2')]
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
c1 Nullable(Float64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
42 Some string 2 [([],'String 2'),([],'String 2')]
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
Template
column_1 Nullable(Float64)
column_2 Nullable(String)
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
column_3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
42 Some string 2 [([],'String 2'),([],'String 2')]
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
column_1 Nullable(Float64)
column_2 Nullable(String)
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
column_3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
42 Some string 2 [([],'String 2'),([],'String 2')]
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
column_1 Nullable(Float64)
column_2 Nullable(String)
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
column_3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
42 Some string 2 [([],'String 2'),([],'String 2')]
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]

View File

@ -39,9 +39,9 @@ World 123
Hello 111
World 123
1 2 [1,2,3] [['abc'],[],['d','e']]
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Array(Nullable(Float64))
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Array(Nullable(Int64))
c4 Array(Array(Nullable(String)))
Hello 111
World 123
@ -49,4 +49,4 @@ Hello 111
Hello 131
World 123
a Nullable(String)
b Nullable(Float64)
b Nullable(Int64)

View File

@ -1,4 +1,4 @@
x Nullable(Float64)
x Nullable(Int64)
0
1
2
@ -9,7 +9,7 @@ x Nullable(Float64)
7
8
9
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
1 2 3

View File

@ -1,3 +1,3 @@
CREATE TABLE default.test\n(\n `x` Nullable(Float64),\n `y` Nullable(String)\n)\nENGINE = File(\'JSONEachRow\', \'data.jsonl\')
CREATE TABLE default.test\n(\n `x` Nullable(Int64),\n `y` Nullable(String)\n)\nENGINE = File(\'JSONEachRow\', \'data.jsonl\')
OK
OK

View File

@ -1,6 +1,6 @@
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Nullable(Float64))
c Array(Nullable(Int64))
1 s1 []
2 } [2]
\N \N []

View File

@ -1,8 +1,8 @@
x Nullable(String)
y Nullable(Float64)
y Nullable(Int64)
x Nullable(String)
y Nullable(Float64)
y Nullable(Int64)
x Nullable(String)
y Nullable(Float64)
y Nullable(Int64)
x Nullable(String)
y Nullable(Float64)
y Nullable(Int64)

View File

@ -22,24 +22,24 @@
1 2 3
4 5 6
7 8 9
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 UInt32
c2 UInt32
c3 UInt32
c1 UInt32
c2 UInt32
c3 UInt32
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 UInt32
c2 UInt32
c3 UInt32

View File

@ -1,21 +1,21 @@
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 UInt64
c2 UInt64
c3 UInt64
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 UInt64
c2 UInt64
c3 UInt64
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 UInt64
c2 UInt64
c3 UInt64

View File

@ -1,8 +1,8 @@
TSV
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Nullable(Float64))
c4 Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))
c3 Array(Nullable(Int64))
c4 Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64))
42 Some string [1,2,3,4] (1,2,3)
42 abcd [] (4,5,6)
c1 Nullable(String)
@ -70,8 +70,8 @@ c1 Nullable(String)
CSV
c1 Nullable(String)
c2 Nullable(String)
c3 Array(Nullable(Float64))
c4 Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))
c3 Array(Nullable(Int64))
c4 Array(Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)))
42 Some string [1,2,3,4] [(1,2,3)]
42\\ abcd [] [(4,5,6)]
c1 Nullable(String)
@ -101,7 +101,7 @@ c1 Nullable(String)
(1, 2, 3)
c1 Nullable(String)
123.123
c1 Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))
c1 Array(Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)))
[(1,2,3)]
c1 Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)))
c1 Array(Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)))
[(1,2,3)]

View File

@ -1,32 +1,32 @@
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Nullable(Float64))
c Array(Nullable(Int64))
1 s1 []
2 } [2]
\N \N []
\N \N []
\N \N [3]
b Nullable(Float64)
a Nullable(Float64)
c Nullable(Float64)
e Nullable(Float64)
b Nullable(Int64)
a Nullable(Int64)
c Nullable(Int64)
e Nullable(Int64)
1 \N \N \N
\N 2 3 \N
\N \N \N \N
\N \N \N 3
3 3 1 \N
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Nullable(Float64))
c Array(Nullable(Int64))
1 s1 []
2 \N [2]
\N \N []
\N \N []
\N \N [3]
b Nullable(Float64)
a Nullable(Float64)
c Nullable(Float64)
e Nullable(Float64)
b Nullable(Int64)
a Nullable(Int64)
c Nullable(Int64)
e Nullable(Int64)
1 \N \N \N
\N 2 3 \N
\N \N \N \N

View File

@ -7,6 +7,12 @@ x Nullable(Float64)
x Nullable(Float64)
1
0.42
x Array(Nullable(Float64))
[1,0]
[0.42]
x Array(Array(Nullable(Int64)))
[[1,2,3],[1,0],[1,1,0]]
[[1,2,3]]
c1 Nullable(Bool)
true
false
@ -16,3 +22,9 @@ c1 Nullable(Float64)
c1 Nullable(Float64)
1
0.42
c1 Array(Nullable(Float64))
[1,0]
[0.42]
c1 Array(Array(Nullable(Int64)))
[[1,2,3],[1,0],[1,1,0]]
[[1,2,3]]

View File

@ -27,6 +27,16 @@ echo -e '{"x" : true}
$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONEachRow')"
$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONEachRow')"
echo -e '{"x" : [true, false]}
{"x" : [0.42]}' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONEachRow')"
$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONEachRow')"
echo -e '{"x" : [[1, 2, 3], [true, false], [1, true, false]]}
{"x" : [[1, 2, 3]]}' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONEachRow')"
$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONEachRow')"
echo -e '[true]
[false]' > $DATA_FILE
@ -43,5 +53,14 @@ echo -e '[true]
$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONCompactEachRow')"
$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONCompactEachRow')"
echo -e '[[true, false]]
[[0.42]]' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONCompactEachRow')"
$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONCompactEachRow')"
echo -e '[[[1, 2, 3], [true, false], [1, true, false]]]
[[[1, 2, 3]]]' > $DATA_FILE
$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'JSONCompactEachRow')"
$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'JSONCompactEachRow')"
rm $DATA_FILE

View File

@ -1,2 +1,5 @@
x Object(Nullable(\'json\'))
x Object(Nullable(\'json\'))
x Array(Object(Nullable(\'json\')))
x Array(Object(Nullable(\'json\')))
x Tuple(Map(String, Nullable(String)), Map(String, Array(Nullable(Int64))), Array(Nullable(Int64)))

View File

@ -1,3 +1,6 @@
-- Tags: no-fasttest
desc format(JSONEachRow, '{"x" : {"a" : "Some string"}}, {"x" : {"b" : [1, 2, 3]}}, {"x" : {"c" : {"d" : 10}}}');
desc format(JSONEachRow, '{"x" : {"a" : "Some string"}}, {"x" : {"b" : [1, 2, 3], "c" : {"42" : 42}}}');
desc format(JSONEachRow, '{"x" : [{"a" : "Some string"}]}, {"x" : [{"b" : [1, 2, 3]}]}');
desc format(JSONEachRow, '{"x" : [{"a" : "Some string"}, {"b" : [1, 2, 3]}]}');
desc format(JSONEachRow, '{"x" : [{"a" : "Some string"}, {"b" : [1, 2, 3]}, [1, 2, 3]]}');

View File

@ -130,8 +130,8 @@ x Nullable(Int32)
x Nullable(Int32)
1
dump7
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -139,8 +139,8 @@ c2 Nullable(Float64)
4 \N
5 \N
6 7
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -148,15 +148,15 @@ c2 Nullable(Float64)
4 \N
5 \N
6 7
c1 Nullable(Float64)
c1 Nullable(Int64)
1
2
3
c1 Nullable(Float64)
c1 Nullable(Int64)
1
dump8
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -164,8 +164,8 @@ c2 Nullable(Float64)
4 \N
5 \N
6 7
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -174,8 +174,8 @@ c2 Nullable(Float64)
5 \N
6 7
dump9
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -183,8 +183,8 @@ c2 Nullable(Float64)
4 \N
5 \N
6 7
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -193,8 +193,8 @@ c2 Nullable(Float64)
5 \N
6 7
dump10
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -202,8 +202,8 @@ c2 Nullable(Float64)
4 \N
5 \N
6 7
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -212,8 +212,8 @@ c2 Nullable(Float64)
5 \N
6 7
dump11
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -221,8 +221,8 @@ c2 Nullable(Float64)
4 \N
5 \N
6 7
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
1 \N
2 \N
3 \N
@ -265,8 +265,8 @@ color Nullable(String)
price Nullable(Int32)
apple red 42
dump14
x Nullable(Float64)
y Nullable(Float64)
x Nullable(Int64)
y Nullable(Int64)
1 \N
2 \N
3 \N
@ -274,8 +274,8 @@ y Nullable(Float64)
4 \N
5 \N
6 7
x Nullable(Float64)
y Nullable(Float64)
x Nullable(Int64)
y Nullable(Int64)
1 \N
2 \N
3 \N
@ -283,15 +283,15 @@ y Nullable(Float64)
4 \N
5 \N
6 7
x Nullable(Float64)
x Nullable(Int64)
1
2
3
x Nullable(Float64)
x Nullable(Int64)
1
dump15
x Nullable(Float64)
y Nullable(Float64)
x Nullable(Int64)
y Nullable(Int64)
1 \N
2 \N
3 \N
@ -299,8 +299,8 @@ y Nullable(Float64)
4 \N
5 \N
6 7
x Nullable(Float64)
y Nullable(Float64)
x Nullable(Int64)
y Nullable(Int64)
1 \N
2 \N
3 \N
@ -308,10 +308,10 @@ y Nullable(Float64)
4 \N
5 \N
6 7
x Nullable(Float64)
x Nullable(Int64)
1
2
3
x Nullable(Float64)
y Nullable(Float64)
x Nullable(Int64)
y Nullable(Int64)
1 2

View File

@ -4,9 +4,9 @@ JSONColumns
"b": ["String", "String", "String", "String", "String"],
"c": [[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]], [[[0],"String"],[[],"gnirtS"]]]
}
a Nullable(Float64)
a Nullable(Int64)
b Nullable(String)
c Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
0 String [([],'String'),([],'gnirtS')]
1 String [([0],'String'),([0],'gnirtS')]
2 String [([0,1],'String'),([0,1],'gnirtS')]
@ -18,9 +18,9 @@ JSONCompactColumns
["String", "String", "String", "String", "String"],
[[[[],"String"],[[],"gnirtS"]], [[[0],"String"],[[0],"gnirtS"]], [[[0,1],"String"],[[0,1],"gnirtS"]], [[[],"String"],[[0,1,2],"gnirtS"]], [[[0],"String"],[[],"gnirtS"]]]
]
c1 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(String)
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
c3 Array(Tuple(Array(Nullable(Int64)), Nullable(String)))
0 String [([],'String'),([],'gnirtS')]
1 String [([0],'String'),([0],'gnirtS')]
2 String [([0,1],'String'),([0,1],'gnirtS')]
@ -74,9 +74,9 @@ JSONColumnsWithMetadata
"bytes_read": 20
}
}
b Nullable(Float64)
a Nullable(Float64)
c Nullable(Float64)
b Nullable(Int64)
a Nullable(Int64)
c Nullable(Int64)
d Nullable(String)
1 3 \N \N
2 2 \N \N
@ -89,8 +89,8 @@ OK
3
2
1
c1 Nullable(Float64)
c2 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(String)
1 1 \N
2 2 \N

View File

@ -1,5 +1,5 @@
2
4
6
x Nullable(String)
x Nullable(String)
x Nullable(Int64)
x Nullable(Int64)

View File

@ -1,14 +1,14 @@
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
0 1 2
1 2 3
2 3 4
3 4 5
4 5 6
c1 Nullable(Float64)
c2 Nullable(Float64)
c3 Nullable(Float64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
0 1 2
1 2 3
2 3 4

View File

@ -0,0 +1,60 @@
JSONEachRow
x Nullable(Date)
x Nullable(DateTime64(9))
x Nullable(DateTime64(9))
x Array(Nullable(Date))
x Array(Nullable(DateTime64(9)))
x Array(Nullable(DateTime64(9)))
x Map(String, Nullable(DateTime64(9)))
x Array(Nullable(DateTime64(9)))
x Array(Nullable(DateTime64(9)))
x Nullable(DateTime64(9))
x Array(Nullable(String))
x Nullable(String)
x Array(Nullable(String))
x Map(String, Array(Array(Nullable(String))))
CSV
c1 Nullable(Date)
c1 Nullable(DateTime64(9))
c1 Nullable(DateTime64(9))
c1 Array(Nullable(Date))
c1 Array(Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Map(String, Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Nullable(DateTime64(9))
c1 Array(Nullable(String))
c1 Nullable(String)
c1 Array(Nullable(String))
c1 Map(String, Array(Array(Nullable(String))))
TSV
c1 Nullable(Date)
c1 Nullable(DateTime64(9))
c1 Nullable(DateTime64(9))
c1 Array(Nullable(Date))
c1 Array(Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Map(String, Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Nullable(DateTime64(9))
c1 Array(Nullable(String))
c1 Nullable(String)
c1 Array(Nullable(String))
c1 Map(String, Array(Array(Nullable(String))))
Values
c1 Nullable(Date)
c1 Nullable(DateTime64(9))
c1 Nullable(DateTime64(9))
c1 Array(Nullable(Date))
c1 Array(Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Map(String, Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Array(Nullable(DateTime64(9)))
c1 Nullable(DateTime64(9))
c1 Array(Nullable(String))
c1 Nullable(String)
c1 Array(Nullable(String))
c1 Map(String, Array(Array(Nullable(String))))

View File

@ -0,0 +1,70 @@
-- Tags: no-fasttest
set input_format_try_infer_dates=1;
set input_format_try_infer_datetimes=1;
select 'JSONEachRow';
desc format(JSONEachRow, '{"x" : "2020-01-01"}');
desc format(JSONEachRow, '{"x" : "2020-01-01 00:00:00.00000"}');
desc format(JSONEachRow, '{"x" : "2020-01-01 00:00:00"}');
desc format(JSONEachRow, '{"x" : ["2020-01-01", "2020-01-02"]}');
desc format(JSONEachRow, '{"x" : ["2020-01-01", "2020-01-01 00:00:00"]}');
desc format(JSONEachRow, '{"x" : ["2020-01-01 00:00:00", "2020-01-01 00:00:00"]}');
desc format(JSONEachRow, '{"x" : {"date1" : "2020-01-01 00:00:00", "date2" : "2020-01-01"}}');
desc format(JSONEachRow, '{"x" : ["2020-01-01 00:00:00", "2020-01-01"]}\n{"x" : ["2020-01-01"]}');
desc format(JSONEachRow, '{"x" : ["2020-01-01 00:00:00"]}\n{"x" : ["2020-01-01"]}');
desc format(JSONEachRow, '{"x" : "2020-01-01 00:00:00"}\n{"x" : "2020-01-01"}');
desc format(JSONEachRow, '{"x" : ["2020-01-01 00:00:00", "Some string"]}');
desc format(JSONEachRow, '{"x" : "2020-01-01 00:00:00"}\n{"x" : "Some string"}');
desc format(JSONEachRow, '{"x" : ["2020-01-01 00:00:00", "2020-01-01"]}\n{"x" : ["2020-01-01", "Some string"]}');
desc format(JSONEachRow, '{"x" : {"key1" : [["2020-01-01 00:00:00"]], "key2" : [["2020-01-01"]]}}\n{"x" : {"key1" : [["2020-01-01"]], "key2" : [["Some string"]]}}');
select 'CSV';
desc format(CSV, '"2020-01-01"');
desc format(CSV, '"2020-01-01 00:00:00.00000"');
desc format(CSV, '"2020-01-01 00:00:00"');
desc format(CSV, '"[\'2020-01-01\', \'2020-01-02\']"');
desc format(CSV, '"[\'2020-01-01\', \'2020-01-01 00:00:00\']"');
desc format(CSV, '"[\'2020-01-01 00:00:00\', \'2020-01-01 00:00:00\']"');
desc format(CSV, '"{\'date1\' : \'2020-01-01 00:00:00\', \'date2\' : \'2020-01-01\'}"');
desc format(CSV, '"[\'2020-01-01 00:00:00\', \'2020-01-01\']"\n"[\'2020-01-01\']"');
desc format(CSV, '"[\'2020-01-01 00:00:00\']"\n"[\'2020-01-01\']"');
desc format(CSV, '"2020-01-01 00:00:00"\n"2020-01-01"');
desc format(CSV, '"[\'2020-01-01 00:00:00\', \'Some string\']"');
desc format(CSV, '"2020-01-01 00:00:00"\n"Some string"');
desc format(CSV, '"[\'2020-01-01 00:00:00\', \'2020-01-01\']"\n"[\'2020-01-01\', \'Some string\']"');
desc format(CSV, '"{\'key1\' : [[\'2020-01-01 00:00:00\']], \'key2\' : [[\'2020-01-01\']]}"\n"{\'key1\' : [[\'2020-01-01\']], \'key2\' : [[\'Some string\']]}"');
select 'TSV';
desc format(TSV, '2020-01-01');
desc format(TSV, '2020-01-01 00:00:00.00000');
desc format(TSV, '2020-01-01 00:00:00');
desc format(TSV, '[\'2020-01-01\', \'2020-01-02\']');
desc format(TSV, '[\'2020-01-01\', \'2020-01-01 00:00:00\']');
desc format(TSV, '[\'2020-01-01 00:00:00\', \'2020-01-01 00:00:00\']');
desc format(TSV, '{\'date1\' : \'2020-01-01 00:00:00\', \'date2\' : \'2020-01-01\'}');
desc format(TSV, '[\'2020-01-01 00:00:00\', \'2020-01-01\']\n[\'2020-01-01\']');
desc format(TSV, '[\'2020-01-01 00:00:00\']\n[\'2020-01-01\']');
desc format(TSV, '2020-01-01 00:00:00\n2020-01-01');
desc format(TSV, '[\'2020-01-01 00:00:00\', \'Some string\']');
desc format(TSV, '2020-01-01 00:00:00\nSome string');
desc format(TSV, '[\'2020-01-01 00:00:00\', \'2020-01-01\']\n[\'2020-01-01\', \'Some string\']');
desc format(TSV, '{\'key1\' : [[\'2020-01-01 00:00:00\']], \'key2\' : [[\'2020-01-01\']]}\n{\'key1\' : [[\'2020-01-01\']], \'key2\' : [[\'Some string\']]}');
select 'Values';
desc format(Values, '(\'2020-01-01\')');
desc format(Values, '(\'2020-01-01 00:00:00.00000\')');
desc format(Values, '(\'2020-01-01 00:00:00\')');
desc format(Values, '([\'2020-01-01\', \'2020-01-02\'])');
desc format(Values, '([\'2020-01-01\', \'2020-01-01 00:00:00\'])');
desc format(Values, '([\'2020-01-01 00:00:00\', \'2020-01-01 00:00:00\'])');
desc format(Values, '({\'date1\' : \'2020-01-01 00:00:00\', \'date2\' : \'2020-01-01\'})');
desc format(Values, '([\'2020-01-01 00:00:00\', \'2020-01-01\'])\n([\'2020-01-01\'])');
desc format(Values, '([\'2020-01-01 00:00:00\']), ([\'2020-01-01\'])');
desc format(Values, '(\'2020-01-01 00:00:00\')\n(\'2020-01-01\')');
desc format(Values, '([\'2020-01-01 00:00:00\', \'Some string\'])');
desc format(Values, '(\'2020-01-01 00:00:00\')\n(\'Some string\')');
desc format(Values, '([\'2020-01-01 00:00:00\', \'2020-01-01\'])\n([\'2020-01-01\', \'Some string\'])');
desc format(Values, '({\'key1\' : [[\'2020-01-01 00:00:00\']], \'key2\' : [[\'2020-01-01\']]})\n({\'key1\' : [[\'2020-01-01\']], \'key2\' : [[\'Some string\']]})');

View File

@ -0,0 +1,17 @@
x Nullable(Int64)
x Array(Nullable(Float64))
x Map(String, Nullable(Int64))
x Map(String, Array(Nullable(Int64)))
x Nullable(Int64)
x Array(Nullable(Int64))
x Map(String, Nullable(Int64))
x Map(String, Array(Nullable(Int64)))
x Array(Nullable(String))
x Map(String, Nullable(String))
x Map(String, Array(Nullable(String)))
x Nullable(String)
x Array(Nullable(String))
x Map(String, Nullable(String))
x Map(String, Array(Nullable(String)))
x Tuple(Nullable(Int64), Nullable(String))
x Object(Nullable(\'json\'))

View File

@ -0,0 +1,21 @@
-- Tags: no-fasttest
set input_format_json_try_infer_numbers_from_strings=1;
desc format(JSONEachRow, '{"x" : "123"}');
desc format(JSONEachRow, '{"x" : ["123", 123, 12.3]}');
desc format(JSONEachRow, '{"x" : {"k1" : "123", "k2" : 123}}');
desc format(JSONEachRow, '{"x" : {"k1" : ["123", "123"], "k2" : [123, 123]}}');
desc format(JSONEachRow, '{"x" : "123"}\n{"x" : 123}');
desc format(JSONEachRow, '{"x" : ["123", "456"]}\n{"x" : [123, 456]}');
desc format(JSONEachRow, '{"x" : {"k1" : "123"}}\n{"x" : {"k2" : 123}}');
desc format(JSONEachRow, '{"x" : {"k1" : ["123", "123"]}}\n{"x": {"k2" : [123, 123]}}');
desc format(JSONEachRow, '{"x" : ["123", "Some string"]}');
desc format(JSONEachRow, '{"x" : {"k1" : "123", "k2" : "Some string"}}');
desc format(JSONEachRow, '{"x" : {"k1" : ["123", "123"], "k2" : ["Some string"]}}');
desc format(JSONEachRow, '{"x" : "123"}\n{"x" : "Some string"}');
desc format(JSONEachRow, '{"x" : ["123", "456"]}\n{"x" : ["Some string"]}');
desc format(JSONEachRow, '{"x" : {"k1" : "123"}}\n{"x" : {"k2" : "Some string"}}');
desc format(JSONEachRow, '{"x" : {"k1" : ["123", "123"]}}\n{"x": {"k2" : ["Some string"]}}');
desc format(JSONEachRow, '{"x" : [123, "Some string"]}');
desc format(JSONEachRow, '{"x" : {"a" : 123, "b" : "Some string"}}');

View File

@ -0,0 +1,36 @@
JSONEachRow
x Nullable(Int64)
x Array(Nullable(Int64))
x Map(String, Array(Nullable(Int64)))
x Map(String, Array(Nullable(Int64)))
x Nullable(Float64)
x Nullable(Float64)
x Array(Nullable(Float64))
x Map(String, Array(Nullable(Float64)))
CSV
c1 Nullable(Int64)
c1 Array(Nullable(Int64))
c1 Map(String, Array(Nullable(Int64)))
c1 Map(String, Array(Nullable(Int64)))
c1 Nullable(Float64)
c1 Nullable(Float64)
c1 Array(Nullable(Float64))
c1 Map(String, Array(Nullable(Float64)))
TSV
c1 Nullable(Int64)
c1 Array(Nullable(Int64))
c1 Map(String, Array(Nullable(Int64)))
c1 Map(String, Array(Nullable(Int64)))
c1 Nullable(Float64)
c1 Nullable(Float64)
c1 Array(Nullable(Float64))
c1 Map(String, Array(Nullable(Float64)))
Values
c1 Nullable(Int64)
c1 Array(Nullable(Int64))
c1 Map(String, Array(Nullable(Int64)))
c1 Map(String, Array(Nullable(Int64)))
c1 Nullable(Float64)
c1 Nullable(Float64)
c1 Array(Nullable(Float64))
c1 Map(String, Array(Nullable(Float64)))

View File

@ -0,0 +1,45 @@
-- Tags: no-fasttest
set input_format_try_infer_integers=1;
select 'JSONEachRow';
desc format(JSONEachRow, '{"x" : 123}');
desc format(JSONEachRow, '{"x" : [123, 123]}');
desc format(JSONEachRow, '{"x" : {"a" : [123, 123]}}');
desc format(JSONEachRow, '{"x" : {"a" : [123, 123]}}\n{"x" : {"b" : [321, 321]}}');
desc format(JSONEachRow, '{"x" : 123}\n{"x" : 123.123}');
desc format(JSONEachRow, '{"x" : 123}\n{"x" : 1e2}');
desc format(JSONEachRow, '{"x" : [123, 123]}\n{"x" : [321.321, 312]}');
desc format(JSONEachRow, '{"x" : {"a" : [123, 123]}}\n{"x" : {"b" : [321.321, 123]}}');
select 'CSV';
desc format(CSV, '123');
desc format(CSV, '"[123, 123]"');
desc format(CSV, '"{\'a\' : [123, 123]}"');
desc format(CSV, '"{\'a\' : [123, 123]}"\n"{\'b\' : [321, 321]}"');
desc format(CSV, '123\n123.123');
desc format(CSV, '122\n1e2');
desc format(CSV, '"[123, 123]"\n"[321.321, 312]"');
desc format(CSV, '"{\'a\' : [123, 123]}"\n"{\'b\' : [321.321, 123]}"');
select 'TSV';
desc format(TSV, '123');
desc format(TSV, '[123, 123]');
desc format(TSV, '{\'a\' : [123, 123]}');
desc format(TSV, '{\'a\' : [123, 123]}\n{\'b\' : [321, 321]}');
desc format(TSV, '123\n123.123');
desc format(TSV, '122\n1e2');
desc format(TSV, '[123, 123]\n[321.321, 312]');
desc format(TSV, '{\'a\' : [123, 123]}\n{\'b\' : [321.321, 123]}');
select 'Values';
desc format(Values, '(123)');
desc format(Values, '([123, 123])');
desc format(Values, '({\'a\' : [123, 123]})');
desc format(Values, '({\'a\' : [123, 123]}), ({\'b\' : [321, 321]})');
desc format(Values, '(123), (123.123)');
desc format(Values, '(122), (1e2)');
desc format(Values, '([123, 123])\n([321.321, 312])');
desc format(Values, '({\'a\' : [123, 123]}), ({\'b\' : [321.321, 123]})');