mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge pull request #58561 from Avogar/json-bool-as-string
Allow to read Bool values into String in JSON input formats
This commit is contained in:
commit
a899f0a9ed
@ -1262,6 +1262,7 @@ SELECT * FROM json_each_row_nested
|
||||
|
||||
- [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`.
|
||||
- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_bools_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_strings) - allow to parse bools as strings in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`.
|
||||
|
@ -614,6 +614,26 @@ DESC format(JSONEachRow, $$
|
||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
##### input_format_json_read_bools_as_strings
|
||||
|
||||
Enabling this setting allows reading Bool values as strings.
|
||||
|
||||
This setting is enabled by default.
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
SET input_format_json_read_bools_as_strings = 1;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"value" : true}
|
||||
{"value" : "Hello, World"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name──┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ value │ Nullable(String) │ │ │ │ │ │
|
||||
└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
##### input_format_json_read_arrays_as_strings
|
||||
|
||||
Enabling this setting allows reading JSON array values as strings.
|
||||
|
@ -377,6 +377,12 @@ Allow parsing bools as numbers in JSON input formats.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_read_bools_as_strings {#input_format_json_read_bools_as_strings}
|
||||
|
||||
Allow parsing bools as strings in JSON input formats.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings}
|
||||
|
||||
Allow parsing numbers as strings in JSON input formats.
|
||||
|
@ -983,6 +983,7 @@ class IColumn;
|
||||
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
|
||||
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
|
||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
||||
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
||||
M(Bool, input_format_json_read_numbers_as_strings, true, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
|
@ -81,6 +81,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"24.1", {{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}}},
|
||||
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
|
||||
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
|
||||
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
|
||||
|
@ -335,6 +335,22 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
|
||||
{
|
||||
read(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
|
||||
}
|
||||
else if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
|
||||
{
|
||||
String str_value;
|
||||
if (*istr.position() == 't')
|
||||
{
|
||||
assertString("true", istr);
|
||||
str_value = "true";
|
||||
}
|
||||
else if (*istr.position() == 'f')
|
||||
{
|
||||
assertString("false", istr);
|
||||
str_value = "false";
|
||||
}
|
||||
|
||||
read(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
|
||||
}
|
||||
else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
||||
{
|
||||
String field;
|
||||
|
@ -450,10 +450,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
|
||||
break;
|
||||
case FormatSettings::EscapingRule::JSON:
|
||||
result += fmt::format(
|
||||
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
|
||||
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
|
||||
"read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}",
|
||||
settings.json.try_infer_numbers_from_strings,
|
||||
settings.json.read_bools_as_numbers,
|
||||
settings.json.read_bools_as_strings,
|
||||
settings.json.read_objects_as_strings,
|
||||
settings.json.read_numbers_as_strings,
|
||||
settings.json.read_arrays_as_strings,
|
||||
|
@ -111,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
||||
format_settings.json.quote_decimals = settings.output_format_json_quote_decimals;
|
||||
format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
|
||||
format_settings.json.read_bools_as_strings = settings.input_format_json_read_bools_as_strings;
|
||||
format_settings.json.read_numbers_as_strings = settings.input_format_json_read_numbers_as_strings;
|
||||
format_settings.json.read_objects_as_strings = settings.input_format_json_read_objects_as_strings;
|
||||
format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
|
||||
|
@ -204,6 +204,7 @@ struct FormatSettings
|
||||
bool ignore_unknown_keys_in_named_tuple = false;
|
||||
bool serialize_as_strings = false;
|
||||
bool read_bools_as_numbers = true;
|
||||
bool read_bools_as_strings = true;
|
||||
bool read_numbers_as_strings = true;
|
||||
bool read_objects_as_strings = true;
|
||||
bool read_arrays_as_strings = true;
|
||||
|
@ -377,6 +377,22 @@ namespace
|
||||
type_indexes.erase(TypeIndex::UInt8);
|
||||
}
|
||||
|
||||
/// If we have Bool and String types convert all numbers to String.
|
||||
/// It's applied only when setting input_format_json_read_bools_as_strings is enabled.
|
||||
void transformJSONBoolsAndStringsToString(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::String) || !type_indexes.contains(TypeIndex::UInt8))
|
||||
return;
|
||||
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isBool(type))
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::UInt8);
|
||||
}
|
||||
|
||||
/// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
|
||||
/// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
|
||||
/// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
|
||||
@ -628,6 +644,10 @@ namespace
|
||||
if (settings.json.read_bools_as_numbers)
|
||||
transformBoolsAndNumbersToNumbers(data_types, type_indexes);
|
||||
|
||||
/// Convert Bool to String if needed.
|
||||
if (settings.json.read_bools_as_strings)
|
||||
transformJSONBoolsAndStringsToString(data_types, type_indexes);
|
||||
|
||||
if (settings.json.try_infer_objects_as_tuples)
|
||||
mergeJSONPaths(data_types, type_indexes, settings, json_info);
|
||||
};
|
||||
|
@ -1382,8 +1382,12 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol '{}' for key '{}'",
|
||||
std::string(*buf.position(), 1), name_of_field.toString());
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
|
||||
String(buf.position(), std::min(buf.available(), size_t(10))),
|
||||
std::string(1, *buf.position()),
|
||||
name_of_field.empty() ? "" : " for key " + name_of_field.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1753,7 +1757,7 @@ void readQuotedField(String & s, ReadBuffer & buf)
|
||||
void readJSONField(String & s, ReadBuffer & buf)
|
||||
{
|
||||
s.clear();
|
||||
auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); };
|
||||
auto parse_func = [](ReadBuffer & in) { skipJSONField(in, ""); };
|
||||
readParsedValueInto(s, buf, parse_func);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,12 @@
|
||||
true
|
||||
false
|
||||
str
|
||||
true
|
||||
false
|
||||
str
|
||||
['true','false']
|
||||
['false','true']
|
||||
['str1','str2']
|
||||
['true','false']
|
||||
['false','true']
|
||||
['str1','str2']
|
@ -0,0 +1,9 @@
|
||||
set input_format_json_read_bools_as_strings=1;
|
||||
select * from format(JSONEachRow, 'x String', '{"x" : true}, {"x" : false}, {"x" : "str"}');
|
||||
select * from format(JSONEachRow, '{"x" : true}, {"x" : false}, {"x" : "str"}');
|
||||
select * from format(JSONEachRow, 'x String', '{"x" : tru}'); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
|
||||
select * from format(JSONEachRow, 'x String', '{"x" : fals}'); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
|
||||
select * from format(JSONEachRow, 'x String', '{"x" : atru}'); -- {serverError INCORRECT_DATA}
|
||||
select * from format(JSONEachRow, 'x Array(String)', '{"x" : [true, false]}, {"x" : [false, true]}, {"x" : ["str1", "str2"]}');
|
||||
select * from format(JSONEachRow, '{"x" : [true, false]}, {"x" : [false, true]}, {"x" : ["str1", "str2"]}');
|
||||
|
Loading…
Reference in New Issue
Block a user