mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 11:02:08 +00:00
Allow to read Bool values into String in JSON input formats
This commit is contained in:
parent
7e6e835e2e
commit
7e5ba62017
@ -1262,6 +1262,7 @@ SELECT * FROM json_each_row_nested
|
|||||||
|
|
||||||
- [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`.
|
- [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`.
|
||||||
- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
|
- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
|
||||||
|
- [input_format_json_read_bools_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_strings) - allow to parse bools as strings in JSON input formats. Default value - `true`.
|
||||||
- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `true`.
|
- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `true`.
|
||||||
- [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`.
|
- [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`.
|
||||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`.
|
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`.
|
||||||
|
@ -614,6 +614,26 @@ DESC format(JSONEachRow, $$
|
|||||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### input_format_json_read_bools_as_strings
|
||||||
|
|
||||||
|
Enabling this setting allows reading Bool values as strings.
|
||||||
|
|
||||||
|
This setting is enabled by default.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SET input_format_json_read_bools_as_strings = 1;
|
||||||
|
DESC format(JSONEachRow, $$
|
||||||
|
{"value" : true}
|
||||||
|
{"value" : "Hello, World"}
|
||||||
|
$$)
|
||||||
|
```
|
||||||
|
```response
|
||||||
|
┌─name──┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||||
|
│ value │ Nullable(String) │ │ │ │ │ │
|
||||||
|
└───────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||||
|
```
|
||||||
##### input_format_json_read_arrays_as_strings
|
##### input_format_json_read_arrays_as_strings
|
||||||
|
|
||||||
Enabling this setting allows reading JSON array values as strings.
|
Enabling this setting allows reading JSON array values as strings.
|
||||||
|
@ -377,6 +377,12 @@ Allow parsing bools as numbers in JSON input formats.
|
|||||||
|
|
||||||
Enabled by default.
|
Enabled by default.
|
||||||
|
|
||||||
|
## input_format_json_read_bools_as_strings {#input_format_json_read_bools_as_strings}
|
||||||
|
|
||||||
|
Allow parsing bools as strings in JSON input formats.
|
||||||
|
|
||||||
|
Enabled by default.
|
||||||
|
|
||||||
## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings}
|
## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings}
|
||||||
|
|
||||||
Allow parsing numbers as strings in JSON input formats.
|
Allow parsing numbers as strings in JSON input formats.
|
||||||
|
@ -983,6 +983,7 @@ class IColumn;
|
|||||||
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
|
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
|
||||||
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
|
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
|
||||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||||
|
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
|
||||||
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
||||||
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
||||||
M(Bool, input_format_json_read_numbers_as_strings, true, "Allow to parse numbers as strings in JSON input formats", 0) \
|
M(Bool, input_format_json_read_numbers_as_strings, true, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||||
|
@ -81,6 +81,7 @@ namespace SettingsChangesHistory
|
|||||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||||
{
|
{
|
||||||
|
{"24.1", {{"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}}},
|
||||||
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
|
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
|
||||||
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
|
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
|
||||||
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
|
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
|
||||||
|
@ -335,6 +335,22 @@ void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & ist
|
|||||||
{
|
{
|
||||||
read(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
|
read(column, [&](ColumnString::Chars & data) { readJSONArrayInto(data, istr); });
|
||||||
}
|
}
|
||||||
|
else if (settings.json.read_bools_as_strings && !istr.eof() && (*istr.position() == 't' || *istr.position() == 'f'))
|
||||||
|
{
|
||||||
|
String str_value;
|
||||||
|
if (*istr.position() == 't')
|
||||||
|
{
|
||||||
|
assertString("true", istr);
|
||||||
|
str_value = "true";
|
||||||
|
}
|
||||||
|
else if (*istr.position() == 'f')
|
||||||
|
{
|
||||||
|
assertString("false", istr);
|
||||||
|
str_value = "false";
|
||||||
|
}
|
||||||
|
|
||||||
|
read(column, [&](ColumnString::Chars & data) { data.insert(str_value.begin(), str_value.end()); });
|
||||||
|
}
|
||||||
else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
|
||||||
{
|
{
|
||||||
String field;
|
String field;
|
||||||
|
@ -450,10 +450,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
|
|||||||
break;
|
break;
|
||||||
case FormatSettings::EscapingRule::JSON:
|
case FormatSettings::EscapingRule::JSON:
|
||||||
result += fmt::format(
|
result += fmt::format(
|
||||||
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
|
", try_infer_numbers_from_strings={}, read_bools_as_numbers={}, read_bools_as_strings={}, read_objects_as_strings={}, read_numbers_as_strings={}, "
|
||||||
"read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}",
|
"read_arrays_as_strings={}, try_infer_objects_as_tuples={}, infer_incomplete_types_as_strings={}, try_infer_objects={}",
|
||||||
settings.json.try_infer_numbers_from_strings,
|
settings.json.try_infer_numbers_from_strings,
|
||||||
settings.json.read_bools_as_numbers,
|
settings.json.read_bools_as_numbers,
|
||||||
|
settings.json.read_bools_as_strings,
|
||||||
settings.json.read_objects_as_strings,
|
settings.json.read_objects_as_strings,
|
||||||
settings.json.read_numbers_as_strings,
|
settings.json.read_numbers_as_strings,
|
||||||
settings.json.read_arrays_as_strings,
|
settings.json.read_arrays_as_strings,
|
||||||
|
@ -111,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
|||||||
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
||||||
format_settings.json.quote_decimals = settings.output_format_json_quote_decimals;
|
format_settings.json.quote_decimals = settings.output_format_json_quote_decimals;
|
||||||
format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
|
format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
|
||||||
|
format_settings.json.read_bools_as_strings = settings.input_format_json_read_bools_as_strings;
|
||||||
format_settings.json.read_numbers_as_strings = settings.input_format_json_read_numbers_as_strings;
|
format_settings.json.read_numbers_as_strings = settings.input_format_json_read_numbers_as_strings;
|
||||||
format_settings.json.read_objects_as_strings = settings.input_format_json_read_objects_as_strings;
|
format_settings.json.read_objects_as_strings = settings.input_format_json_read_objects_as_strings;
|
||||||
format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
|
format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings;
|
||||||
|
@ -204,6 +204,7 @@ struct FormatSettings
|
|||||||
bool ignore_unknown_keys_in_named_tuple = false;
|
bool ignore_unknown_keys_in_named_tuple = false;
|
||||||
bool serialize_as_strings = false;
|
bool serialize_as_strings = false;
|
||||||
bool read_bools_as_numbers = true;
|
bool read_bools_as_numbers = true;
|
||||||
|
bool read_bools_as_strings = true;
|
||||||
bool read_numbers_as_strings = true;
|
bool read_numbers_as_strings = true;
|
||||||
bool read_objects_as_strings = true;
|
bool read_objects_as_strings = true;
|
||||||
bool read_arrays_as_strings = true;
|
bool read_arrays_as_strings = true;
|
||||||
|
@ -377,6 +377,22 @@ namespace
|
|||||||
type_indexes.erase(TypeIndex::UInt8);
|
type_indexes.erase(TypeIndex::UInt8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If we have Bool and String types convert all numbers to String.
|
||||||
|
/// It's applied only when setting input_format_json_read_bools_as_strings is enabled.
|
||||||
|
void transformJSONBoolsAndStringsToString(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||||
|
{
|
||||||
|
if (!type_indexes.contains(TypeIndex::String) || !type_indexes.contains(TypeIndex::UInt8))
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (auto & type : data_types)
|
||||||
|
{
|
||||||
|
if (isBool(type))
|
||||||
|
type = std::make_shared<DataTypeString>();
|
||||||
|
}
|
||||||
|
|
||||||
|
type_indexes.erase(TypeIndex::UInt8);
|
||||||
|
}
|
||||||
|
|
||||||
/// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
|
/// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
|
||||||
/// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
|
/// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
|
||||||
/// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
|
/// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
|
||||||
@ -628,6 +644,10 @@ namespace
|
|||||||
if (settings.json.read_bools_as_numbers)
|
if (settings.json.read_bools_as_numbers)
|
||||||
transformBoolsAndNumbersToNumbers(data_types, type_indexes);
|
transformBoolsAndNumbersToNumbers(data_types, type_indexes);
|
||||||
|
|
||||||
|
/// Convert Bool to String if needed.
|
||||||
|
if (settings.json.read_bools_as_strings)
|
||||||
|
transformJSONBoolsAndStringsToString(data_types, type_indexes);
|
||||||
|
|
||||||
if (settings.json.try_infer_objects_as_tuples)
|
if (settings.json.try_infer_objects_as_tuples)
|
||||||
mergeJSONPaths(data_types, type_indexes, settings, json_info);
|
mergeJSONPaths(data_types, type_indexes, settings, json_info);
|
||||||
};
|
};
|
||||||
|
@ -1382,8 +1382,12 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol '{}' for key '{}'",
|
throw Exception(
|
||||||
std::string(*buf.position(), 1), name_of_field.toString());
|
ErrorCodes::INCORRECT_DATA,
|
||||||
|
"Cannot read JSON field here: '{}'. Unexpected symbol '{}'{}",
|
||||||
|
String(buf.position(), std::min(buf.available(), size_t(10))),
|
||||||
|
std::string(1, *buf.position()),
|
||||||
|
name_of_field.empty() ? "" : " for key " + name_of_field.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1753,7 +1757,7 @@ void readQuotedField(String & s, ReadBuffer & buf)
|
|||||||
void readJSONField(String & s, ReadBuffer & buf)
|
void readJSONField(String & s, ReadBuffer & buf)
|
||||||
{
|
{
|
||||||
s.clear();
|
s.clear();
|
||||||
auto parse_func = [](ReadBuffer & in) { skipJSONField(in, "json_field"); };
|
auto parse_func = [](ReadBuffer & in) { skipJSONField(in, ""); };
|
||||||
readParsedValueInto(s, buf, parse_func);
|
readParsedValueInto(s, buf, parse_func);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,12 @@
|
|||||||
|
true
|
||||||
|
false
|
||||||
|
str
|
||||||
|
true
|
||||||
|
false
|
||||||
|
str
|
||||||
|
['true','false']
|
||||||
|
['false','true']
|
||||||
|
['str1','str2']
|
||||||
|
['true','false']
|
||||||
|
['false','true']
|
||||||
|
['str1','str2']
|
@ -0,0 +1,9 @@
|
|||||||
|
set input_format_json_read_bools_as_strings=1;
|
||||||
|
select * from format(JSONEachRow, 'x String', '{"x" : true}, {"x" : false}, {"x" : "str"}');
|
||||||
|
select * from format(JSONEachRow, '{"x" : true}, {"x" : false}, {"x" : "str"}');
|
||||||
|
select * from format(JSONEachRow, 'x String', '{"x" : tru}'); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
|
||||||
|
select * from format(JSONEachRow, 'x String', '{"x" : fals}'); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
|
||||||
|
select * from format(JSONEachRow, 'x String', '{"x" : atru}'); -- {serverError INCORRECT_DATA}
|
||||||
|
select * from format(JSONEachRow, 'x Array(String)', '{"x" : [true, false]}, {"x" : [false, true]}, {"x" : ["str1", "str2"]}');
|
||||||
|
select * from format(JSONEachRow, '{"x" : [true, false]}, {"x" : [false, true]}, {"x" : ["str1", "str2"]}');
|
||||||
|
|
Loading…
Reference in New Issue
Block a user