mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge pull request #55099 from Avogar/dont-infer-numbers-from-strings-by-default
Don't try to infer numbers from strings in JSON formats by default to avoid parsing errors
This commit is contained in:
commit
785df775bb
@ -1263,6 +1263,7 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
|
||||
- [input_format_json_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.
|
||||
- [input_format_json_try_infer_named_tuples_from_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_named_tuples_from_objects) - try to infer named tuple from JSON objects during schema inference. Default value - `true`.
|
||||
- [input_format_json_infer_incomplete_types_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_infer_incomplete_types_as_strings) - use type String for keys that contains only Nulls or empty objects/arrays during schema inference in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
|
||||
|
@ -497,7 +497,7 @@ most likely this column contains only Nulls or empty Arrays/Maps.
|
||||
|
||||
Enabling this setting allows inferring numbers from string values.
|
||||
|
||||
This setting is enabled by default.
|
||||
This setting is disabled by default.
|
||||
|
||||
**Example:**
|
||||
|
||||
|
@ -383,6 +383,13 @@ Allow parsing numbers as strings in JSON input formats.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_try_infer_numbers_from_strings {#input_format_json_try_infer_numbers_from_strings}
|
||||
|
||||
If enabled, during schema inference ClickHouse will try to infer numbers from string fields.
|
||||
It can be useful if JSON data contains quoted UInt64 numbers.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings}
|
||||
|
||||
Allow parsing JSON objects as strings in JSON input formats.
|
||||
|
@ -923,7 +923,7 @@ class IColumn;
|
||||
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
|
||||
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \
|
||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_try_infer_numbers_from_strings, true, "Try to infer numbers from string fields while schema inference", 0) \
|
||||
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \
|
||||
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
||||
M(Bool, input_format_json_read_numbers_as_strings, true, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
|
||||
|
@ -85,6 +85,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},
|
||||
{"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"},
|
||||
{"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"},
|
||||
{"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"},
|
||||
{"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}},
|
||||
{"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}},
|
||||
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
|
||||
|
@ -1,5 +1,6 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
set input_format_json_try_infer_numbers_from_strings=1;
|
||||
insert into function file('02374_data1.jsonl') select number as x, 'str' as s from numbers(10);
|
||||
insert into function file('02374_data2.jsonl') select number as x, 'str' as s from numbers(10);
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
-- Tags: no-parallel, no-fasttest
|
||||
|
||||
system drop schema cache for file;
|
||||
|
||||
set input_format_json_try_infer_numbers_from_strings=1;
|
||||
{% for format in ['TSV', 'TSVWithNames', 'CSV', 'CSVWithNames', 'TSKV', 'CustomSeparated', 'JSONEachRow', 'JSONCompactEachRow', 'Values'] -%}
|
||||
|
||||
select '{{ format }}';
|
||||
|
@ -1,4 +1,5 @@
|
||||
-- Tags: no-parallel, no-fasttest
|
||||
set input_format_json_try_infer_numbers_from_strings=1;
|
||||
select number, 'Hello' as str, range(number) as arr from numbers(3) format JSONObjectEachRow;
|
||||
insert into function file(02417_data.jsonObjectEachRow) select number, 'Hello' as str, range(number) as arr from numbers(3) settings engine_file_truncate_on_insert=1;
|
||||
desc file(02417_data.jsonObjectEachRow);
|
||||
|
@ -1,5 +1,6 @@
|
||||
-- Tags: no-fasttest, no-parallel
|
||||
set format_json_object_each_row_column_for_object_name='name';
|
||||
set input_format_json_try_infer_numbers_from_strings=1;
|
||||
|
||||
select number, concat('name_', toString(number)) as name from numbers(3) format JSONObjectEachRow;
|
||||
select number, concat('name_', toString(number)) as name, number + 1 as x from numbers(3) format JSONObjectEachRow;
|
||||
|
@ -15,4 +15,4 @@ echo -ne '{"number" : [18446744073709551615, 10, 11]}'| $CLICKHOUSE_LOCAL --tabl
|
||||
echo -ne '{"number" : [18446744073709551615, true, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : 18446744073709551615}, {"number" : 10}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : 18446744073709551615}, {"number" : false}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : "18446744073709551615"}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : "18446744073709551615"}' | $CLICKHOUSE_LOCAL --input_format_json_try_infer_numbers_from_strings=1 --table=test --input-format=JSONEachRow -q "desc test";
|
||||
|
@ -1,3 +1,4 @@
|
||||
set input_format_json_try_infer_numbers_from_strings=1;
|
||||
desc format(JSONEachRow, '{"x" : "20000101"}');
|
||||
select * from format(JSONEachRow, '{"x" : "20000101"}');
|
||||
select * from format(JSONEachRow, '{"x" : "19000101"}');
|
||||
|
Loading…
Reference in New Issue
Block a user