mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #55974 from Avogar/fix-protobuf-auto-schema
Fix autogenerated Protobuf schema with fields with underscore
This commit is contained in:
commit
754ab9fa6c
@ -126,7 +126,7 @@ String prepareNullableAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypeP
|
|||||||
String prepareTupleAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent)
|
String prepareTupleAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent)
|
||||||
{
|
{
|
||||||
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
|
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
|
||||||
auto nested_names_and_types = getCollectedTupleElements(tuple_type);
|
auto nested_names_and_types = getCollectedTupleElements(tuple_type, false, "CapnProto");
|
||||||
|
|
||||||
String struct_name = getSchemaMessageName(column_name);
|
String struct_name = getSchemaMessageName(column_name);
|
||||||
startStruct(buf, struct_name, indent);
|
startStruct(buf, struct_name, indent);
|
||||||
@ -222,7 +222,7 @@ String prepareAndGetCapnProtoTypeName(WriteBuffer & buf, const DataTypePtr & dat
|
|||||||
|
|
||||||
void StructureToCapnProtoSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_)
|
void StructureToCapnProtoSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_)
|
||||||
{
|
{
|
||||||
auto names_and_types = collectNested(names_and_types_);
|
auto names_and_types = collectNested(names_and_types_, true, "CapnProto");
|
||||||
writeCapnProtoHeader(buf);
|
writeCapnProtoHeader(buf);
|
||||||
startStruct(buf, getSchemaMessageName(message_name), 0);
|
startStruct(buf, getSchemaMessageName(message_name), 0);
|
||||||
|
|
||||||
|
@ -4,6 +4,11 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
namespace StructureToFormatSchemaUtils
|
namespace StructureToFormatSchemaUtils
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -57,27 +62,34 @@ String getSchemaMessageName(const String & column_name)
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
std::pair<String, String> splitName(const String & name)
|
std::pair<String, String> splitName(const String & name, bool allow_split_by_underscore)
|
||||||
{
|
{
|
||||||
const auto * begin = name.data();
|
const auto * begin = name.data();
|
||||||
const auto * end = name.data() + name.size();
|
const auto * end = name.data() + name.size();
|
||||||
const auto * it = find_first_symbols<'_', '.'>(begin, end);
|
const char * it = nullptr;
|
||||||
|
if (allow_split_by_underscore)
|
||||||
|
it = find_first_symbols<'_', '.'>(begin, end);
|
||||||
|
else
|
||||||
|
it = find_first_symbols<'.'>(begin, end);
|
||||||
String first = String(begin, it);
|
String first = String(begin, it);
|
||||||
String second = it == end ? "" : String(it + 1, end);
|
String second = it == end ? "" : String(it + 1, end);
|
||||||
return {std::move(first), std::move(second)};
|
return {std::move(first), std::move(second)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types)
|
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types, bool allow_split_by_underscore, const String & format_name)
|
||||||
{
|
{
|
||||||
/// Find all columns with dots '.' or underscores '_' and move them into a tuple.
|
/// Find all columns with dots '.' or underscores '_' (if allowed) and move them into a tuple.
|
||||||
/// For example if we have columns 'a.b UInt32, a.c UInt32, x_y String' we will
|
/// For example if we have columns 'a.b UInt32, a.c UInt32, x_y String' we will
|
||||||
/// change it to 'a Tuple(b UInt32, c UInt32), x Tuple(y String)'
|
/// change it to 'a Tuple(b UInt32, c UInt32), x Tuple(y String)'
|
||||||
NamesAndTypesList result;
|
NamesAndTypesList result;
|
||||||
std::unordered_map<String, NamesAndTypesList> nested;
|
std::unordered_map<String, NamesAndTypesList> nested;
|
||||||
for (const auto & [name, type] : names_and_types)
|
for (const auto & [name, type] : names_and_types)
|
||||||
{
|
{
|
||||||
auto [field_name, nested_name] = splitName(name);
|
auto [field_name, nested_name] = splitName(name, allow_split_by_underscore);
|
||||||
|
if (isdigit(field_name[0]))
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format {} doesn't support field names that starts with a digit: '{}'", format_name, field_name);
|
||||||
|
|
||||||
if (nested_name.empty())
|
if (nested_name.empty())
|
||||||
result.emplace_back(name, type);
|
result.emplace_back(name, type);
|
||||||
else
|
else
|
||||||
@ -90,7 +102,7 @@ NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type)
|
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type, bool allow_split_by_underscore, const String & format_name)
|
||||||
{
|
{
|
||||||
const auto & nested_types = tuple_type.getElements();
|
const auto & nested_types = tuple_type.getElements();
|
||||||
Names nested_names;
|
Names nested_names;
|
||||||
@ -109,7 +121,7 @@ NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type)
|
|||||||
for (size_t i = 0; i != nested_names.size(); ++i)
|
for (size_t i = 0; i != nested_names.size(); ++i)
|
||||||
result.emplace_back(nested_names[i], nested_types[i]);
|
result.emplace_back(nested_names[i], nested_types[i]);
|
||||||
|
|
||||||
return collectNested(result);
|
return collectNested(result, allow_split_by_underscore, format_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -19,9 +19,9 @@ namespace StructureToFormatSchemaUtils
|
|||||||
|
|
||||||
String getSchemaMessageName(const String & column_name);
|
String getSchemaMessageName(const String & column_name);
|
||||||
|
|
||||||
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types);
|
NamesAndTypesList collectNested(const NamesAndTypesList & names_and_types, bool allow_split_by_underscore, const String & format_name);
|
||||||
|
|
||||||
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type);
|
NamesAndTypesList getCollectedTupleElements(const DataTypeTuple & tuple_type, bool allow_split_by_underscore, const String & format_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -105,7 +105,7 @@ String prepareArrayAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr &
|
|||||||
String prepareTupleAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent)
|
String prepareTupleAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data_type, const String & column_name, size_t indent)
|
||||||
{
|
{
|
||||||
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
|
const auto & tuple_type = assert_cast<const DataTypeTuple &>(*data_type);
|
||||||
auto nested_names_and_types = getCollectedTupleElements(tuple_type);
|
auto nested_names_and_types = getCollectedTupleElements(tuple_type, false, "Protobuf");
|
||||||
|
|
||||||
String message_name = getSchemaMessageName(column_name);
|
String message_name = getSchemaMessageName(column_name);
|
||||||
startMessage(buf, message_name, indent);
|
startMessage(buf, message_name, indent);
|
||||||
@ -202,7 +202,7 @@ String prepareAndGetProtobufTypeName(WriteBuffer & buf, const DataTypePtr & data
|
|||||||
|
|
||||||
void StructureToProtobufSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_)
|
void StructureToProtobufSchema::writeSchema(WriteBuffer & buf, const String & message_name, const NamesAndTypesList & names_and_types_)
|
||||||
{
|
{
|
||||||
auto names_and_types = collectNested(names_and_types_);
|
auto names_and_types = collectNested(names_and_types_, false, "Protobuf");
|
||||||
writeProtobufHeader(buf);
|
writeProtobufHeader(buf);
|
||||||
startMessage(buf, getSchemaMessageName(message_name), 0);
|
startMessage(buf, getSchemaMessageName(message_name), 0);
|
||||||
size_t field_index = 1;
|
size_t field_index = 1;
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
message Message
|
||||||
|
{
|
||||||
|
uint32 col_1 = 1;
|
||||||
|
}
|
||||||
|
1
|
||||||
|
1
|
17
tests/queries/0_stateless/02905_structure_to_schema_bad_names.sh
Executable file
17
tests/queries/0_stateless/02905_structure_to_schema_bad_names.sh
Executable file
@ -0,0 +1,17 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tags: no-fasttest
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
SCHEMA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME-schema
|
||||||
|
|
||||||
|
$CLICKHOUSE_LOCAL -q "select 42 as col_1 format Protobuf settings output_format_schema='$SCHEMA_FILE.proto'" > /dev/null
|
||||||
|
tail -n +2 $SCHEMA_FILE.proto
|
||||||
|
|
||||||
|
$CLICKHOUSE_LOCAL -q "select 42 as \`col.1\` format Protobuf" 2>&1 | grep -c -F "BAD_ARGUMENTS"
|
||||||
|
$CLICKHOUSE_LOCAL -q "select 42 as \`col.1\` format CapnProto" 2>&1 | grep -c -F "BAD_ARGUMENTS"
|
||||||
|
|
||||||
|
rm $SCHEMA_FILE*
|
||||||
|
|
Loading…
Reference in New Issue
Block a user