Merge pull request #46725 from azat/input_format_null_as_default-improvement

Handle input_format_null_as_default for nested types
This commit is contained in:
Kruglov Pavel 2023-02-23 14:55:05 +01:00 committed by GitHub
commit cec282d3e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 95 additions and 28 deletions

View File

@ -1,4 +1,5 @@
#include <DataTypes/Serializations/SerializationArray.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationNamed.h>
#include <DataTypes/DataTypeArray.h>
@ -510,7 +511,10 @@ void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeTextJSON(nested_column, istr, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextJSONImpl(nested_column, istr, settings, nested);
else
nested->deserializeTextJSON(nested_column, istr, settings);
}, false);
}

View File

@ -1,4 +1,5 @@
#include <DataTypes/Serializations/SerializationMap.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeMap.h>
#include <Common/StringUtils/StringUtils.h>
@ -211,7 +212,10 @@ void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextJSONImpl(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
});
}

View File

@ -219,13 +219,9 @@ static ReturnType safeDeserialize(
/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, typename std::enable_if_t<std::is_same_v<ReturnType, bool>, ReturnType>* = nullptr>
static ReturnType safeDeserialize(
IColumn & column, const ISerialization & nested,
IColumn & column, const ISerialization &,
CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
{
assert(!dynamic_cast<ColumnNullable *>(&column));
assert(!dynamic_cast<const SerializationNullable *>(&nested));
UNUSED(nested);
bool insert_default = check_for_null();
if (insert_default)
column.insertDefault();

View File

@ -1,4 +1,5 @@
#include <DataTypes/Serializations/SerializationTuple.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/Serializations/SerializationInfoTuple.h>
#include <DataTypes/DataTypeTuple.h>
#include <Core/Field.h>
@ -231,9 +232,13 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
seen_elements[element_pos] = 1;
auto & element_column = extractElementColumn(column, element_pos);
try
{
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]);
else
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
}
catch (Exception & e)
{

View File

@ -3,11 +3,11 @@ INCORRECT_DATA
NOT_FOUND_COLUMN_IN_BLOCK
(1)
{
"row_1": {"type":"CreateEvent","actor":{"login":"foobar"},"repo":{"name":"ClickHouse\/ClickHouse"},"created_at":"2023-01-26 10:48:02","payload":{"updated_at":"1970-01-01 00:00:00","action":"","comment":{"id":"0","path":"","position":0,"line":0,"user":{"login":""},"diff_hunk":"","original_position":0,"commit_id":"","original_commit_id":""},"review":{"body":"","author_association":"","state":""},"ref":"backport","ref_type":"branch","issue":{"number":0,"title":"","labels":[],"state":"","locked":0,"assignee":{"login":""},"assignees":[],"comment":"","closed_at":"1970-01-01 00:00:00"},"pull_request":{"merged_at":null,"merge_commit_sha":"","requested_reviewers":[],"requested_teams":[],"head":{"ref":"","sha":""},"base":{"ref":"","sha":""},"merged":0,"mergeable":0,"rebaseable":0,"mergeable_state":"","merged_by":null,"review_comments":0,"maintainer_can_modify":0,"commits":0,"additions":0,"deletions":0,"changed_files":0},"size":0,"distinct_size":0,"member":{"login":""},"release":{"tag_name":"","name":""}}}
"row_1": {"type":"CreateEvent","actor":{"login":"foobar"},"repo":{"name":"ClickHouse\/ClickHouse"},"created_at":"2023-01-26 10:48:02","payload":{"updated_at":"1970-01-01 00:00:00","action":"","comment":{"id":"0","path":"","position":0,"line":0,"user":{"login":""},"diff_hunk":"","original_position":0,"commit_id":"","original_commit_id":""},"review":{"body":"","author_association":"","state":""},"ref":"backport","ref_type":"branch","issue":{"number":0,"title":"","labels":[],"state":"","locked":0,"assignee":{"login":""},"assignees":[],"comment":"","closed_at":"1970-01-01 00:00:00"},"pull_request":{"merged_at":"1970-01-01 00:00:00","merge_commit_sha":"","requested_reviewers":[],"requested_teams":[],"head":{"ref":"","sha":""},"base":{"ref":"","sha":""},"merged":0,"mergeable":0,"rebaseable":0,"mergeable_state":"","merged_by":{"login":""},"review_comments":0,"maintainer_can_modify":0,"commits":0,"additions":0,"deletions":0,"changed_files":0},"size":0,"distinct_size":0,"member":{"login":""},"release":{"tag_name":"","name":""}}}
}
{
"row_1": {"labels":[],"merged_by":"<not_merged>"},
"row_1": {"labels":[],"merged_by":""},
"row_2": {"labels":[],"merged_by":"foobar"},
"row_3": {"labels":[],"merged_by":"<not_merged>"},
"row_4": {"labels":["backport"],"merged_by":"<not_merged>"}
"row_3": {"labels":[],"merged_by":""},
"row_4": {"labels":["backport"],"merged_by":""}
}

View File

@ -60,7 +60,7 @@ gharchive_structure=(
closed_at DateTime('UTC')
),
pull_request Tuple(
merged_at Nullable(DateTime('UTC')),
merged_at DateTime('UTC'),
merge_commit_sha String,
requested_reviewers Nested(
login String
@ -80,16 +80,9 @@ gharchive_structure=(
mergeable UInt8,
rebaseable UInt8,
mergeable_state String,
merged_by Nullable(String),
/* NOTE: correct type is Tuple, however Tuple cannot be Nullable,
* so you still have to use Nullable(String) and rely on
* input_format_json_read_objects_as_strings, but see also
* https://github.com/ClickHouse/ClickHouse/issues/36464
*/
/* merged_by Tuple(
* login String
* ),
*/
merged_by Tuple(
login String
),
review_comments UInt32,
maintainer_can_modify UInt8,
commits UInt32,
@ -122,12 +115,10 @@ EOL
# NOTE: due to [1] we cannot use dot.dot notation, only tupleElement()
#
# [1]: https://github.com/ClickHouse/ClickHouse/issues/24607
$CLICKHOUSE_LOCAL "${gharchive_settings[@]}" --structure="${gharchive_structure[*]}" -q "
WITH
tupleElement(tupleElement(payload, 'pull_request'), 'merged_by') AS merged_by_
$CLICKHOUSE_LOCAL --allow_experimental_analyzer=1 "${gharchive_settings[@]}" --structure="${gharchive_structure[*]}" -q "
SELECT
tupleElement(tupleElement(tupleElement(payload, 'issue'), 'labels'), 'name') AS labels,
if(merged_by_ IS NULL, '<not_merged>', JSONExtractString(merged_by_, 'login')) AS merged_by
payload.issue.labels.name AS labels,
payload.pull_request.merged_by.login AS merged_by
FROM table
" <<EOL
{"type":"PullRequestEvent","actor":{"login":"foobar"},"repo":{"name":"ClickHouse/ClickHouse"},"payload":{"ref":"backport","ref_type":"branch","pull_request":{"merged_by":null}}}

View File

@ -0,0 +1,42 @@
-- { echo }
--- ensure that input_format_null_as_default allow writes to Nullable columns too
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1;
((('root')))
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1;
(((NULL)))
--- tuple
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0;
((('root')))
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1;
((('root')))
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=0;
((('')))
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=1;
((('')))
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1;
((('')))
--- map
select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0;
{'pull_request':{'merged_by':{'login':'root'}}}
select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1;
{'pull_request':{'merged_by':{'login':'root'}}}
select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=0;
{}
select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=1;
{}
select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }
select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1;
{'pull_request':{'merged_by':{}}}
--- array
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=0;
['root']
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=1;
['root']
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=0;
[]
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=1;
[]
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=0; -- { serverError CANNOT_READ_ARRAY_FROM_TEXT }
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=1;
[]

View File

@ -0,0 +1,25 @@
-- { echo }
--- ensure that input_format_null_as_default allow writes to Nullable columns too
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login Nullable(String))))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1;
--- tuple
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0;
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=0;
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {}}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }
select * from format(JSONEachRow, 'payload Tuple(pull_request Tuple(merged_by Tuple(login String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1;
--- map
select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=0;
select * from format(JSONEachRow, '{"payload" : {"pull_request": {"merged_by": {"login": "root"}}}}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=0;
select * from format(JSONEachRow, 'payload Map(String, String)', '{"payload" : {}}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=0; -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED }
select * from format(JSONEachRow, 'payload Map(String, Map(String, Map(String, String)))', '{"payload" : {"pull_request": {"merged_by": null}}}') settings input_format_null_as_default=1;
--- array
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=0;
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : ["root"]}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=0;
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : []}') settings input_format_null_as_default=1;
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=0; -- { serverError CANNOT_READ_ARRAY_FROM_TEXT }
select * from format(JSONEachRow, 'payload Array(String)', '{"payload" : null}') settings input_format_null_as_default=1;