Merge pull request #47650 from Avogar/nested-arrays-protobuf

Allow reading/writing nested arrays in Protobuf with only root field name as column name
This commit is contained in:
Alexey Milovidov 2023-03-18 22:18:01 +03:00 committed by GitHub
commit 8821f45a20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 0 deletions

View File

@ -3401,7 +3401,41 @@ namespace
const auto & array_data_type = assert_cast<const DataTypeArray &>(*data_type);
if (!allow_repeat)
{
/// Case of nested Arrays. Nested Array can be a message with one repeated field.
/// For example we have an column `arr Array(Array(UInt32))` and the next proto schema:
/// message Message {
/// message NestedArray {
/// repeated uint32 nested = 2;
/// }
/// repeated NestedArray arr = 1;
/// }
if (field_descriptor.message_type() && field_descriptor.message_type()->field_count() == 1)
{
Names column_names = {field_descriptor.message_type()->field(0)->name()};
DataTypes data_types = {data_type};
/// Try to serialize as a nested message.
std::vector<size_t> used_column_indices;
auto message_serializer = buildMessageSerializerImpl(
1,
column_names.data(),
data_types.data(),
*field_descriptor.message_type(),
/* with_length_delimiter = */ false,
google_wrappers_special_treatment,
&field_descriptor,
used_column_indices,
/* columns_are_reordered_outside = */ false,
/* check_nested_while_filling_missing_columns = */ false);
if (!message_serializer)
return nullptr;
return message_serializer;
}
throwFieldNotRepeated(field_descriptor, column_name);
}
auto nested_serializer = buildFieldSerializer(column_name, array_data_type.getNestedType(), field_descriptor,
/* allow_repeat = */ false, // We do our repeating now, so for nested type we forget about the repeating.

View File

@ -0,0 +1 @@
[[[42,42],[],[42]],[[],[42],[42,42,42,42]]]

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_LOCAL -q "select [[[42, 42], [], [42]], [[], [42], [42, 42, 42, 42]]] as a format Protobuf settings format_schema = '$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" | $CLICKHOUSE_LOCAL --input-format Protobuf --format_schema="$SCHEMADIR/00825_protobuf_format_array_3dim:ABC" --structure="a Array(Array(Array(Int32)))" -q "select * from table"