mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Backport #70697 to 24.8: Fix infinite recursion when infering a proto schema with skip unsupported fields enabled
This commit is contained in:
parent
d2acaba718
commit
f4d546fd2e
@ -3725,20 +3725,14 @@ namespace
|
||||
const google::protobuf::FieldDescriptor * field_descriptor,
|
||||
bool skip_unsupported_fields,
|
||||
bool allow_repeat,
|
||||
std::unordered_set<const google::protobuf::FieldDescriptor *> & pending_resolution)
|
||||
std::unordered_set<const google::protobuf::FieldDescriptor *> & unresolved_descriptors)
|
||||
{
|
||||
if (pending_resolution.contains(field_descriptor))
|
||||
{
|
||||
if (skip_unsupported_fields)
|
||||
return std::nullopt;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouse doesn't support type recursion ({})", field_descriptor->full_name());
|
||||
}
|
||||
pending_resolution.emplace(field_descriptor);
|
||||
SCOPE_EXIT({ pending_resolution.erase(field_descriptor); });
|
||||
|
||||
chassert(unresolved_descriptors.contains(field_descriptor));
|
||||
if (allow_repeat && field_descriptor->is_map())
|
||||
{
|
||||
auto name_and_type = getNameAndDataTypeFromField(field_descriptor, skip_unsupported_fields, false);
|
||||
/// We don't add the same unresolved descriptor again since we are trying to re-resolve and put in under a Tuple
|
||||
auto name_and_type
|
||||
= getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, false, unresolved_descriptors);
|
||||
if (!name_and_type)
|
||||
return std::nullopt;
|
||||
const auto * tuple_type = assert_cast<const DataTypeTuple *>(name_and_type->type.get());
|
||||
@ -3747,7 +3741,9 @@ namespace
|
||||
|
||||
if (allow_repeat && field_descriptor->is_repeated())
|
||||
{
|
||||
auto name_and_type = getNameAndDataTypeFromField(field_descriptor, skip_unsupported_fields, false);
|
||||
/// We don't add the same unresolved descriptor again since we are trying to re-resolve and put in under an Array
|
||||
auto name_and_type
|
||||
= getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, false, unresolved_descriptors);
|
||||
if (!name_and_type)
|
||||
return std::nullopt;
|
||||
return NameAndTypePair{name_and_type->name, std::make_shared<DataTypeArray>(name_and_type->type)};
|
||||
@ -3814,10 +3810,21 @@ namespace
|
||||
else if (message_descriptor->field_count() == 1)
|
||||
{
|
||||
const auto * nested_field_descriptor = message_descriptor->field(0);
|
||||
auto nested_name_and_type
|
||||
= getNameAndDataTypeFromFieldRecursive(nested_field_descriptor, skip_unsupported_fields, true, pending_resolution);
|
||||
if (auto p = unresolved_descriptors.emplace(nested_field_descriptor); !p.second)
|
||||
{
|
||||
if (skip_unsupported_fields)
|
||||
return std::nullopt;
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"ClickHouse doesn't support type recursion ({})",
|
||||
nested_field_descriptor->full_name());
|
||||
}
|
||||
|
||||
auto nested_name_and_type = getNameAndDataTypeFromFieldRecursive(
|
||||
nested_field_descriptor, skip_unsupported_fields, true, unresolved_descriptors);
|
||||
if (!nested_name_and_type)
|
||||
return std::nullopt;
|
||||
unresolved_descriptors.erase(nested_field_descriptor);
|
||||
return NameAndTypePair{field_descriptor->name() + "_" + nested_name_and_type->name, nested_name_and_type->type};
|
||||
}
|
||||
else
|
||||
@ -3826,10 +3833,20 @@ namespace
|
||||
Strings nested_names;
|
||||
for (int i = 0; i != message_descriptor->field_count(); ++i)
|
||||
{
|
||||
if (auto p = unresolved_descriptors.emplace(message_descriptor->field(i)); !p.second)
|
||||
{
|
||||
if (skip_unsupported_fields)
|
||||
continue;
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"ClickHouse doesn't support type recursion ({})",
|
||||
message_descriptor->field(i)->full_name());
|
||||
}
|
||||
auto nested_name_and_type = getNameAndDataTypeFromFieldRecursive(
|
||||
message_descriptor->field(i), skip_unsupported_fields, true, pending_resolution);
|
||||
message_descriptor->field(i), skip_unsupported_fields, true, unresolved_descriptors);
|
||||
if (!nested_name_and_type)
|
||||
continue;
|
||||
unresolved_descriptors.erase(message_descriptor->field(i));
|
||||
nested_types.push_back(nested_name_and_type->type);
|
||||
nested_names.push_back(nested_name_and_type->name);
|
||||
}
|
||||
@ -3848,8 +3865,9 @@ namespace
|
||||
const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat = true)
|
||||
{
|
||||
/// Keep track of the fields that are pending resolution to avoid recursive types, which are unsupported
|
||||
std::unordered_set<const google::protobuf::FieldDescriptor *> pending_resolution{};
|
||||
return getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, allow_repeat, pending_resolution);
|
||||
std::unordered_set<const google::protobuf::FieldDescriptor *> unresolved_descriptors{};
|
||||
unresolved_descriptors.emplace(field_descriptor);
|
||||
return getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, allow_repeat, unresolved_descriptors);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,19 @@
|
||||
1
|
||||
Row 1:
|
||||
──────
|
||||
name: fields
|
||||
type: Map(String, Tuple(
|
||||
null_value Enum8('NULL_VALUE' = 0),
|
||||
number_value Float64,
|
||||
string_value String,
|
||||
bool_value UInt8,
|
||||
list_value_values Array(Tuple(
|
||||
null_value Enum8('NULL_VALUE' = 0),
|
||||
number_value Float64,
|
||||
string_value String,
|
||||
bool_value UInt8))))
|
||||
default_type:
|
||||
default_expression:
|
||||
comment:
|
||||
codec_expression:
|
||||
ttl_expression:
|
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
SCHEMADIR="$CUR_DIR/format_schemas"
|
||||
$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') FORMAT Vertical SETTINGS format_schema='$SCHEMADIR/03252_recursive_type.proto:Struct', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=0" |& grep -c BAD_ARGUMENTS
|
||||
$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') FORMAT Vertical SETTINGS format_schema='$SCHEMADIR/03252_recursive_type.proto:Struct', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=1"
|
@ -0,0 +1,62 @@
|
||||
// Protocol Buffers - Google's data interchange format
|
||||
// Copyright 2008 Google Inc. All rights reserved.
|
||||
// https://developers.google.com/protocol-buffers/
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
// Based on Google's struct.proto (see above license)
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
message Struct {
|
||||
map<string, Value> fields = 1;
|
||||
}
|
||||
|
||||
message Value {
|
||||
oneof kind {
|
||||
NullValue null_value = 1;
|
||||
double number_value = 2;
|
||||
string string_value = 3;
|
||||
bool bool_value = 4;
|
||||
Struct struct_value = 5;
|
||||
ListValue list_value = 6;
|
||||
}
|
||||
}
|
||||
|
||||
enum NullValue {
|
||||
NULL_VALUE = 0;
|
||||
}
|
||||
|
||||
message ListValue {
|
||||
repeated Value values = 1;
|
||||
}
|
||||
|
||||
message Message {
|
||||
string event = 1;
|
||||
Struct payload = 2;
|
||||
}
|
Loading…
Reference in New Issue
Block a user