Fix tests, add docs for setting type_json_skip_duplicated_paths

This commit is contained in:
avogar 2024-07-20 22:57:42 +00:00
parent 1ce13df07c
commit a296717e14
6 changed files with 30 additions and 16 deletions

View File

@ -5611,7 +5611,13 @@ Default value: `1GiB`.
## use_json_alias_for_old_object_type
When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/ob) type instead of the new [JSON](../../sql-reference/data-types/json.md) type.
When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/object-json.md) type instead of the new [JSON](../../sql-reference/data-types/json.md) type.
This setting requires server restart to take effect when changed.
Default value: `false`.
## type_json_skip_duplicated_paths
When enabled, ClickHouse will skip duplicated paths during parsing of [JSON](../../sql-reference/data-types/json.md) object. Only the value of the first occurrence of each path will be inserted.
Default value: `false`

View File

@ -83,6 +83,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."},
{"allow_experimental_json_type", false, false, "Add new experimental JSON type"},
{"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
{"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"}
}},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},

View File

@ -599,7 +599,7 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co
if (typed_column->size() > prev_size)
{
if (!settings.json.type_json_skip_duplicated_paths)
throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of Object type: {}", path);
throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path);
}
else
{
@ -613,7 +613,7 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co
if (dynamic_it->second->size() > prev_size)
{
if (!settings.json.type_json_skip_duplicated_paths)
throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of Object type: {}", path);
throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path);
}
dynamic_serialization->deserializeBinary(*dynamic_it->second, istr, settings);
@ -648,7 +648,7 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co
if (i != 0 && path == paths_and_values_for_shared_data[i - 1].first)
{
if (!settings.json.type_json_skip_duplicated_paths)
throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of Object type: {}", path);
throw Exception(ErrorCodes::INCORRECT_DATA, "Found duplicated path during binary deserialization of JSON type: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path);
}
else
{

View File

@ -1629,7 +1629,7 @@ public:
{
if (!format_settings.json.type_json_skip_duplicated_paths)
{
error = fmt::format("Duplicate path found during parsing JSON object: {}", path);
error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", path);
SerializationObject::restoreColumnObject(column_object, prev_size);
return false;
}
@ -1697,7 +1697,7 @@ private:
{
if (!format_settings.json.type_json_skip_duplicated_paths)
{
error = fmt::format("Duplicate path found during parsing JSON object: {}", current_path);
error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", current_path);
return false;
}
}
@ -1715,7 +1715,7 @@ private:
{
if (!format_settings.json.type_json_skip_duplicated_paths)
{
error = fmt::format("Duplicate path found during parsing JSON object: {}", current_path);
error = fmt::format("Duplicate path found during parsing JSON object: {}. You can enable setting type_json_skip_duplicated_paths to skip duplicated paths during insert", current_path);
return false;
}
}

View File

@ -780,11 +780,18 @@ namespace
/// Check if it's just a number, and if so, don't try to infer DateTime from it,
/// because we can interpret this number as a timestamp and it will lead to
/// inferring DateTime instead of simple Int64/Float64 in some cases.
/// inferring DateTime instead of simple Int64 in some cases.
if (std::all_of(field.begin(), field.end(), isNumericASCII))
return false;
ReadBufferFromString buf(field);
Float64 tmp_float;
/// Check if it's a float value, and if so, don't try to infer DateTime from it,
/// because it will lead to inferring DateTime instead of simple Float64 in some cases.
if (tryReadFloatText(tmp_float, buf) && buf.eof())
return false;
buf.seek(0, SEEK_SET); /// Return position to the beginning
DateTime64 tmp;
switch (settings.date_time_input_format)
{

View File

@ -9,10 +9,10 @@ ORDER BY i;
INSERT INTO test02910 (i, jString) SELECT 1, '{"a":"123"}';
ALTER TABLE test02910 ADD COLUMN j2 Tuple(JSON) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, JSON); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(JSON))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 JSON default jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 Tuple(Object('json')) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 Tuple(Float64, Object('json')); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 Tuple(Array(Tuple(Object('json')))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910 ADD COLUMN j2 Object('json') default jString; -- { serverError SUPPORT_IS_DISABLED }
-- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server.
-- SELECT * FROM test02910;
@ -37,10 +37,10 @@ INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111',
INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', ['c', 'd'] FROM numbers(10);
INSERT INTO test02910_second SELECT number, number, '2023-10-28 11:11:11.11111', [] FROM numbers(10);
ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(JSON) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, JSON); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(JSON))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` JSON; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Object('json')) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Float64, Object('json')); -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` Tuple(Array(Tuple(Object('json')))) DEFAULT jString; -- { serverError SUPPORT_IS_DISABLED }
ALTER TABLE test02910_second ADD COLUMN `tags_json` Object('json'); -- { serverError SUPPORT_IS_DISABLED }
-- If we would allow adding a column with dynamic subcolumns the subsequent select would crash the server.
-- SELECT * FROM test02910;