mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Allow to parse JSON objects into Strings
This commit is contained in:
parent
9c81fd7afc
commit
d0f9bb2ec2
@ -1202,6 +1202,7 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_import_nested_json](../operations/settings/settings.md#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`.
|
||||
- [input_format_json_read_bools_as_numbers](../operations/settings/settings.md#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_numbers_as_strings](../operations/settings/settings.md#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_read_objects_as_strings](../operations/settings/settings.md#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
|
||||
- [output_format_json_quote_64bit_integers](../operations/settings/settings.md#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_floats](../operations/settings/settings.md#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
|
||||
|
@ -3753,6 +3753,28 @@ Allow parsing numbers as strings in JSON input formats.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings}
|
||||
|
||||
Allow parsing JSON objects as strings in JSON input formats.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (obj String) ENGINE=Memory();
|
||||
INSERT INTO test FORMAT JSONEachRow {"obj" : {"a" : 1, "b" : "Hello"}};
|
||||
SELECT * FROM test;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─obj──────────────────────┐
|
||||
│ {"a" : 1, "b" : "Hello"} │
|
||||
└──────────────────────────┘
|
||||
```
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata}
|
||||
|
||||
For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1,
|
||||
|
@ -546,7 +546,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
\
|
||||
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
|
||||
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
|
||||
M(UInt64, max_size_to_preallocate_for_aggregation, 10'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
|
||||
M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
|
||||
\
|
||||
M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \
|
||||
M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \
|
||||
@ -763,6 +763,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \
|
||||
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
||||
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_objects_as_strings, false, "Allow to parse JSON objects as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_try_infer_integers, true, "Try to infer numbers from string fields while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
|
||||
|
@ -311,13 +311,20 @@ void SerializationString::serializeTextJSON(const IColumn & column, size_t row_n
|
||||
|
||||
void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
||||
if (settings.json.read_objects_as_strings && !istr.eof() && *istr.position() == '{')
|
||||
{
|
||||
String field;
|
||||
readJSONObjectPossiblyInvalid(field, istr);
|
||||
ReadBufferFromString buf(field);
|
||||
read(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
|
||||
}
|
||||
else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
||||
{
|
||||
String field;
|
||||
readJSONField(field, istr);
|
||||
Float64 tmp;
|
||||
ReadBufferFromString buf(field);
|
||||
if (tryReadFloatText(tmp, buf))
|
||||
if (tryReadFloatText(tmp, buf) && buf.eof())
|
||||
read(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON String value here: {}", field);
|
||||
|
@ -394,6 +394,7 @@ void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings
|
||||
|
||||
bool have_maps = false;
|
||||
bool have_objects = false;
|
||||
bool have_strings = false;
|
||||
bool are_maps_equal = true;
|
||||
DataTypePtr first_map_type;
|
||||
for (const auto & type : data_types)
|
||||
@ -414,6 +415,10 @@ void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings
|
||||
{
|
||||
have_objects = true;
|
||||
}
|
||||
else if (isString(type))
|
||||
{
|
||||
have_strings = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (have_maps && (have_objects || !are_maps_equal))
|
||||
@ -424,6 +429,15 @@ void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings
|
||||
type = std::make_shared<DataTypeObject>("json", true);
|
||||
}
|
||||
}
|
||||
|
||||
if (settings.json.read_objects_as_strings && have_strings && (have_maps || have_objects))
|
||||
{
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isMap(type) || isObject(type))
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
transformTypesRecursively(types, transform_simple_types, transform_complex_types);
|
||||
|
@ -98,6 +98,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.json.quote_decimals = settings.output_format_json_quote_decimals;
|
||||
format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
|
||||
format_settings.json.read_numbers_as_strings = settings.input_format_json_read_numbers_as_strings;
|
||||
format_settings.json.read_objects_as_strings = settings.input_format_json_read_objects_as_strings;
|
||||
format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings;
|
||||
format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata;
|
||||
format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
|
||||
|
@ -152,6 +152,7 @@ struct FormatSettings
|
||||
bool serialize_as_strings = false;
|
||||
bool read_bools_as_numbers = true;
|
||||
bool read_numbers_as_strings = true;
|
||||
bool read_objects_as_strings = true;
|
||||
bool try_infer_numbers_from_strings = false;
|
||||
bool validate_types_from_metadata = true;
|
||||
bool validate_utf8 = false;
|
||||
|
@ -235,7 +235,12 @@ namespace JSONUtils
|
||||
/// If we couldn't infer nested type and Object type is not enabled,
|
||||
/// we can't determine the type of this JSON field.
|
||||
if (!settings.json.try_infer_objects)
|
||||
{
|
||||
/// If read_objects_as_strings is enabled, we can read objects into strings.
|
||||
if (settings.json.read_objects_as_strings)
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
@ -257,7 +262,12 @@ namespace JSONUtils
|
||||
if (!are_types_equal)
|
||||
{
|
||||
if (!settings.json.try_infer_objects)
|
||||
{
|
||||
/// If read_objects_as_strings is enabled, we can read objects into strings.
|
||||
if (settings.json.read_objects_as_strings)
|
||||
return makeNullable(std::make_shared<DataTypeString>());
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_shared<DataTypeObject>("json", true);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,3 @@
|
||||
x Nullable(String)
|
||||
abc
|
||||
{"a" : 10, "b" : "abc"}
|
@ -0,0 +1,4 @@
|
||||
set input_format_json_read_objects_as_strings=1;
|
||||
desc format(JSONEachRow, '{"x" : "abc"}, {"x" : {"a" : 10, "b" : "abc"}}');
|
||||
select * from format(JSONEachRow, '{"x" : "abc"}, {"x" : {"a" : 10, "b" : "abc"}}');
|
||||
|
Loading…
Reference in New Issue
Block a user