Merge pull request #31988 from vitlibar/fix-skipping-columns-while-writing-protobuf

Fix skipping columns while writing protobuf
This commit is contained in:
Vitaly Baranov 2021-12-05 18:01:11 +03:00 committed by GitHub
commit d709782088
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 542 additions and 193 deletions

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,7 @@ class ProtobufWriter;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
using DataTypes = std::vector<DataTypePtr>;
class WriteBuffer;
/// Utility class, does all the work for serialization in the Protobuf format.
class ProtobufSerializer
@ -30,6 +30,8 @@ public:
virtual void readRow(size_t row_num) = 0;
virtual void insertDefaults(size_t row_num) = 0;
virtual void describeTree(WriteBuffer & out, size_t indent) const = 0;
static std::unique_ptr<ProtobufSerializer> create(
const Strings & column_names,
const DataTypes & data_types,

View File

@ -0,0 +1,27 @@
e4048ead-30a2-45e5-90be-2af1c7137523 dummy [1] [50639] [58114] [[5393]] [[1]] [[3411]] [[17811]] [[(10,20)]]
Binary representation:
00000000 44 0a 24 65 34 30 34 38 65 61 64 2d 33 30 61 32 |D.$e4048ead-30a2|
00000010 2d 34 35 65 35 2d 39 30 62 65 2d 32 61 66 31 63 |-45e5-90be-2af1c|
00000020 37 31 33 37 35 32 33 62 1c 10 01 18 cf 8b 03 20 |7137523b....... |
00000030 82 c6 03 5a 10 28 01 30 91 2a 40 93 8b 01 52 05 |...Z.(.0.*@...R.|
00000040 4d 00 00 a0 41 |M...A|
00000045
MESSAGE #1 AT 0x00000001
identifier: "e4048ead-30a2-45e5-90be-2af1c7137523"
modules {
module_id: 1
supply: 50639
temp: 58114
nodes {
node_id: 1
opening_time: 5393
current: 17811
coords {
y: 20
}
}
}
Binary representation is as expected

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# https://github.com/ClickHouse/ClickHouse/issues/31160
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
SCHEMADIR=$CURDIR/format_schemas
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<EOF
DROP TABLE IF EXISTS table_skipped_column_in_nested_00825;
CREATE TABLE table_skipped_column_in_nested_00825 (
identifier UUID,
unused1 String,
modules Nested (
module_id UInt32,
supply UInt32,
temp UInt32
),
modules_nodes Nested (
opening_time Array(UInt32),
node_id Array(UInt32),
closing_time_time Array(UInt32),
current Array(UInt32),
coords Nested (
x Float32,
y Float64
)
)
) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO table_skipped_column_in_nested_00825 VALUES ('e4048ead-30a2-45e5-90be-2af1c7137523', 'dummy', [1], [50639], [58114], [[5393]], [[1]], [[3411]], [[17811]], [[(10, 20)]]);
SELECT * FROM table_skipped_column_in_nested_00825;
EOF
BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_skipped_column_in_nested.XXXXXX.binary")
$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema = '$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" > "$BINARY_FILE_PATH"
# Check the output in the protobuf format
echo
$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage" --input "$BINARY_FILE_PATH"
# Check the input in the protobuf format (now the table contains the same data twice).
#echo
#$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH"
#$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825"
rm "$BINARY_FILE_PATH"
$CLICKHOUSE_CLIENT --query "DROP TABLE table_skipped_column_in_nested_00825"

View File

@ -0,0 +1,29 @@
syntax = "proto3";
message UpdateMessage {
string identifier = 1;
//string unused1 = 100;
message Module {
uint32 module_id = 2;
uint32 supply = 3;
uint32 temp = 4;
message ModuleNode {
uint32 node_id = 5;
uint32 opening_time = 6;
uint32 closing_time = 7; // The column in the table is named `closing_time_time`
uint32 current = 8;
message Coords {
//float x = 8;
float y = 9;
}
Coords coords = 10;
}
repeated ModuleNode nodes = 11;
}
repeated Module modules = 12;
}