mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #55974 from Avogar/fix-protobuf-auto-schema
Fix autogenerated Protobuf schema with fields with underscore
This commit is contained in:
commit
754ab9fa6c
@ -126,7 +126,7 @@ String prepareNullableAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypeP
|
||||
String prepareTupleAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent)
|
||||
{
|
||||
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
|
||||
auto nested_names_and_types = getCollectedTupleElements(tuple_type);
|
||||
auto nested_names_and_types = getCollectedTupleElements(tuple_type, false, "CapnProto");
|
||||
|
||||
String struct_name = getSchemaMessageName(column_name);
|
||||
startStruct(buf, struct_name, indent);
|
||||
@ -222,7 +222,7 @@ String prepareAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & dat
|
||||
|
||||
void StructureToCapnProtoSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_)
|
||||
{
|
||||
auto names_and_types = collectNested(names_and_types_);
|
||||
auto names_and_types = collectNested(names_and_types_, true, "CapnProto");
|
||||
writeCapnProtoHeader(buf);
|
||||
startStruct(buf, getSchemaMessageName(message_name), 0);
|
||||
|
||||
|
@ -4,6 +4,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace StructureToFormatSchemaUtils
|
||||
{
|
||||
|
||||
@ -57,27 +62,34 @@ String getSchemaMessageName(const String & column_name)
|
||||
|
||||
namespace
|
||||
{
|
||||
std::pair<String, String> splitName(const String & name)
|
||||
std::pair<String, String> splitName(const String & name, bool allow_split_by_underscore)
|
||||
{
|
||||
const auto * begin = name.data();
|
||||
const auto * end = name.data() + name.size();
|
||||
const auto * it = find_first_symbols<'_', '.'>(begin, end);
|
||||
const char * it = nullptr;
|
||||
if (allow_split_by_underscore)
|
||||
it = find_first_symbols<'_', '.'>(begin, end);
|
||||
else
|
||||
it = find_first_symbols<'.'>(begin, end);
|
||||
String first = String(begin, it);
|
||||
String second = it == end ? "" : String(it + 1, end);
|
||||
return {std::move(first), std::move(second)};
|
||||
}
|
||||
}
|
||||
|
||||
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types)
|
||||
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types, bool allow_split_by_underscore, const String & format_name)
|
||||
{
|
||||
/// Find all columns with dots '.' or underscores '_' and move them into a tuple.
|
||||
/// Find all columns with dots '.' or underscores '_' (if allowed) and move them into a tuple.
|
||||
/// For example if we have columns 'a.b UInt32, a.c UInt32, x_y String' we will
|
||||
/// change it to 'a Tuple(b UInt32, c UInt32), x Tuple(y String)'
|
||||
NamesAndTypesList result;
|
||||
std::unordered_map<String, NamesAndTypesList> nested;
|
||||
for (const auto & [name, type] : names_and_types)
|
||||
{
|
||||
auto [field_name, nested_name] = splitName(name);
|
||||
auto [field_name, nested_name] = splitName(name, allow_split_by_underscore);
|
||||
if (isdigit(field_name[0]))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format {} doesn't support field names that starts with a digit: '{}'", format_name, field_name);
|
||||
|
||||
if (nested_name.empty())
|
||||
result.emplace_back(name, type);
|
||||
else
|
||||
@ -90,7 +102,7 @@ NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types)
|
||||
return result;
|
||||
}
|
||||
|
||||
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type)
|
||||
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type, bool allow_split_by_underscore, const String & format_name)
|
||||
{
|
||||
const auto & nested_types = tuple_type.getElements();
|
||||
Names nested_names;
|
||||
@ -109,7 +121,7 @@ NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type)
|
||||
for (size_t i = 0; i != nested_names.size(); ++i)
|
||||
result.emplace_back(nested_names[i], nested_types[i]);
|
||||
|
||||
return collectNested(result);
|
||||
return collectNested(result, allow_split_by_underscore, format_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -19,9 +19,9 @@ namespace StructureToFormatSchemaUtils
|
||||
|
||||
String getSchemaMessageName(const String & column_name);
|
||||
|
||||
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types);
|
||||
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types, bool allow_split_by_underscore, const String & format_name);
|
||||
|
||||
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type);
|
||||
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type, bool allow_split_by_underscore, const String & format_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -105,7 +105,7 @@ String prepareArrayAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr &
|
||||
String prepareTupleAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent)
|
||||
{
|
||||
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
|
||||
auto nested_names_and_types = getCollectedTupleElements(tuple_type);
|
||||
auto nested_names_and_types = getCollectedTupleElements(tuple_type, false, "Protobuf");
|
||||
|
||||
String message_name = getSchemaMessageName(column_name);
|
||||
startMessage(buf, message_name, indent);
|
||||
@ -202,7 +202,7 @@ String prepareAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data
|
||||
|
||||
void StructureToProtobufSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_)
|
||||
{
|
||||
auto names_and_types = collectNested(names_and_types_);
|
||||
auto names_and_types = collectNested(names_and_types_, false, "Protobuf");
|
||||
writeProtobufHeader(buf);
|
||||
startMessage(buf, getSchemaMessageName(message_name), 0);
|
||||
size_t field_index = 1;
|
||||
|
@ -0,0 +1,7 @@
|
||||
|
||||
message Message
|
||||
{
|
||||
uint32 col_1 = 1;
|
||||
}
|
||||
1
|
||||
1
|
17
tests/queries/0_stateless/02905_structure_to_schema_bad_names.sh
Executable file
17
tests/queries/0_stateless/02905_structure_to_schema_bad_names.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select 42 as col_1 format Protobuf settings output_format_schema='$SCHEMA_FILE.proto'" > /dev/null
|
||||
tail -n +2 $SCHEMA_FILE.proto
|
||||
|
||||
$CLICKHOUSE_LOCAL -q "select 42 as \`col.1\` format Protobuf" 2>&1 | grep -c -F "BAD_ARGUMENTS"
|
||||
$CLICKHOUSE_LOCAL -q "select 42 as \`col.1\` format CapnProto" 2>&1 | grep -c -F "BAD_ARGUMENTS"
|
||||
|
||||
rm $SCHEMA_FILE*
|
||||
|
Loading…
Reference in New Issue
Block a user