From a296717e14bf43714ab921f8f5f97105104f3055 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 20 Jul 2024 22:57:42 +0000 Subject: [PATCH] Fix tests, add docs for setting type_json_skip_duplicated_paths --- docs/en/operations/settings/settings.md | 8 +++++++- src/Core/SettingsChangesHistory.cpp | 1 + .../Serializations/SerializationObject.cpp | 6 +++--- src/Formats/JSONExtractTree.cpp | 6 +++--- src/Formats/SchemaInferenceUtils.cpp | 9 ++++++++- .../02910_object-json-crash-add-column.sql | 16 ++++++++-------- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 9ddfe985cee..720cf1ce959 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5611,7 +5611,13 @@ Default value: `1GiB`. ## use_json_alias_for_old_object_type -When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/ob) type instead of the new [JSON](../../sql-reference/data-types/json.md) type. +When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/object-json.md) type instead of the new [JSON](../../sql-reference/data-types/json.md) type. This setting requires server restart to take effect when changed. Default value: `false`. + +## type_json_skip_duplicated_paths + +When enabled, ClickHouse will skip duplicated paths during parsing of [JSON](../../sql-reference/data-types/json.md) object. Only the value of the first occurrence of each path will be inserted. + +Default value: `false` diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 50a99c51dcb..47555ec290f 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -83,6 +83,7 @@ static std::initializer_listsize() > prev_size) { if (!settings.json.type_json_skip_duplicated_paths) - throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of Object type: {}", path); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); } else { @@ -613,7 +613,7 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co if (dynamic_it->second->size() > prev_size) { if (!settings.json.type_json_skip_duplicated_paths) - throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of Object type: {}", path); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); } dynamic_serialization->deserializeBinary(*dynamic_it->second, istr, settings); @@ -648,7 +648,7 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co if (i != 0 && path == paths_and_values_for_shared_data[i - 1].first) { if (!settings.json.type_json_skip_duplicated_paths) - throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of Object type: {}", path); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); } else { diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index e2c127b228d..680588c7825 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1629,7 +1629,7 @@ public: { if (!format_settings.json.type_json_skip_duplicated_paths) { - error = fmt::format("Duplicate path found during parsing JSON object: {}", path); + error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path); SerializationObject::restoreColumnObject(column_object, prev_size); return false; } @@ -1697,7 +1697,7 @@ private: { if (!format_settings.json.type_json_skip_duplicated_paths) { - error = fmt::format("Duplicate path found during parsing JSON object: {}", current_path); + error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", current_path); return false; } } @@ -1715,7 +1715,7 @@ private: { if (!format_settings.json.type_json_skip_duplicated_paths) { - error = fmt::format("Duplicate path found during parsing JSON object: {}", current_path); + error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", current_path); return false; } } diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index d245200eefa..441e7d77d24 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -780,11 +780,18 @@ namespace /// Check if it's just a number, and if so, don't try to infer DateTime from it, /// because we can interpret this number as a timestamp and it will lead to - /// inferring DateTime instead of simple Int64/Float64 in some cases. + /// inferring DateTime instead of simple Int64 in some cases. if (std::all_of(field.begin(), field.end(), isNumericASCII)) return false; ReadBufferFromString buf(field); + Float64 tmp_float; + /// Check if it's a float value, and if so, don't try to infer DateTime from it, + /// because it will lead to inferring DateTime instead of simple Float64 in some cases. + if (tryReadFloatText(tmp_float, buf) && buf.eof()) + return false; + + buf.seek(0, SEEK_SET); /// Return position to the beginning DateTime64 tmp; switch (settings.date_time_input_format) { diff --git a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql index b2d64be1676..0573ac928f8 100644 --- a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql +++ b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql @@ -9,10 +9,10 @@ ORDER BY i; INSERT INTO test02910 (i, jString) SELECT 1, '{"a":"123"}'; -ALTER TABLE test02910 ADD COLUMN j2 Tuple(JSON) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, JSON); -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(JSON))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910 ADD COLUMN j2 JSON default jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Tuple(Object('json')) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, Object('json')); -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(Object('json')))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910 ADD COLUMN j2 Object('json') default jString; -- { serverError SUPPORT_IS_DISABLED } -- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server. -- SELECT * FROM test02910; @@ -37,10 +37,10 @@ INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', ['c', 'd'] FROM numbers(10); INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', [] FROM numbers(10); -ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(JSON) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, JSON); -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(JSON))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } -ALTER TABLE test02910_second ADD COLUMN `tags_json` JSON; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Object('json')) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, Object('json')); -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(Object('json')))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED } +ALTER TABLE test02910_second ADD COLUMN `tags_json` Object('json'); -- { serverError SUPPORT_IS_DISABLED } -- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server. -- SELECT * FROM test02910;