#pragma once #include #if USE_PROTOBUF #include #include #include #include #include #include #include namespace google { namespace protobuf { class Descriptor; class FieldDescriptor; } } namespace DB { namespace ProtobufColumnMatcher { struct DefaultTraits { using MessageData = boost::blank; using FieldData = boost::blank; }; template struct Message; /// Represents a field in a protobuf message. template struct Field { const google::protobuf::FieldDescriptor * field_descriptor = nullptr; /// Same as field_descriptor->number(). UInt32 field_number = 0; /// Index of a column; either 'column_index' or 'nested_message' is set. size_t column_index = -1; std::unique_ptr> nested_message; typename Traits::FieldData data; }; /// Represents a protobuf message. template struct Message { std::vector> fields; /// Points to the parent message if this is a nested message. Message * parent = nullptr; size_t index_in_parent = -1; typename Traits::MessageData data; }; /// Utility function finding matching columns for each protobuf field. template static std::unique_ptr> matchColumns( const std::vector & column_names, const google::protobuf::Descriptor * message_type); namespace details { void throwNoCommonColumns(); class ColumnNameMatcher { public: ColumnNameMatcher(const std::vector & column_names); size_t findColumn(const String & field_name); private: std::unordered_map column_name_to_index_map; std::vector column_usage; }; template std::unique_ptr> matchColumnsRecursive( ColumnNameMatcher & name_matcher, const google::protobuf::Descriptor * message_type, const String & field_name_prefix) { auto message = std::make_unique>(); for (int i = 0; i != message_type->field_count(); ++i) { const google::protobuf::FieldDescriptor * field_descriptor = message_type->field(i); if ((field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_MESSAGE) || (field_descriptor->type() == google::protobuf::FieldDescriptor::TYPE_GROUP)) { auto nested_message = matchColumnsRecursive( name_matcher, field_descriptor->message_type(), field_name_prefix + field_descriptor->name() + "."); if (nested_message) { message->fields.emplace_back(); auto & current_field = message->fields.back(); current_field.field_number = field_descriptor->number(); current_field.field_descriptor = field_descriptor; current_field.nested_message = std::move(nested_message); current_field.nested_message->parent = message.get(); } } else { size_t column_index = name_matcher.findColumn(field_name_prefix + field_descriptor->name()); if (column_index != static_cast(-1)) { message->fields.emplace_back(); auto & current_field = message->fields.back(); current_field.field_number = field_descriptor->number(); current_field.field_descriptor = field_descriptor; current_field.column_index = column_index; } } } if (message->fields.empty()) return nullptr; // Columns should be sorted by field_number, it's necessary for writing protobufs and useful reading protobufs. std::sort(message->fields.begin(), message->fields.end(), [](const Field & left, const Field & right) { return left.field_number < right.field_number; }); for (size_t i = 0; i != message->fields.size(); ++i) { auto & field = message->fields[i]; if (field.nested_message) field.nested_message->index_in_parent = i; } return message; } } template static std::unique_ptr> matchColumns( const std::vector & column_names, const google::protobuf::Descriptor * message_type) { details::ColumnNameMatcher name_matcher(column_names); auto message = details::matchColumnsRecursive(name_matcher, message_type, ""); if (!message) details::throwNoCommonColumns(); return message; } } } #endif