From a452d037efe71a7b9d200ad42c06267279176d99 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 28 May 2020 18:35:35 +0300 Subject: [PATCH 1/2] Fix handling bad data while reading in Protobuf format. --- src/Formats/ProtobufReader.cpp | 29 ++++++++++++++++++++--------- src/Formats/ProtobufReader.h | 10 +++++----- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/Formats/ProtobufReader.cpp b/src/Formats/ProtobufReader.cpp index d8302771002..d76b048c1e9 100644 --- a/src/Formats/ProtobufReader.cpp +++ b/src/Formats/ProtobufReader.cpp @@ -32,11 +32,12 @@ namespace BITS32 = 5, }; - // The following condition must always be true: - // any_cursor_position < min(END_OF_VARINT, END_OF_GROUP) - // This inequation helps to check conditions in SimpleReader. - constexpr UInt64 END_OF_VARINT = static_cast(-1); - constexpr UInt64 END_OF_GROUP = static_cast(-2); + // The following conditions must always be true: + // any_cursor_position > END_OF_VARINT + // any_cursor_position > END_OF_GROUP + // Those inequations helps checking conditions in ProtobufReader::SimpleReader. + constexpr Int64 END_OF_VARINT = -1; + constexpr Int64 END_OF_GROUP = -2; Int64 decodeZigZag(UInt64 n) { return static_cast((n >> 1) ^ (~(n & 1) + 1)); } @@ -77,7 +78,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors) if (!current_message_level) return; - UInt64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front(); + Int64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front(); if (cursor != root_message_end) { if (cursor < root_message_end) @@ -95,6 +96,9 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors) void ProtobufReader::SimpleReader::startNestedMessage() { assert(current_message_level >= 1); + if ((cursor > field_end) && (field_end != END_OF_GROUP)) + throwUnknownFormat(); + // Start reading a nested message which is located inside a length-delimited field // of another message. parent_message_ends.emplace_back(current_message_end); @@ -146,7 +150,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number) throwUnknownFormat(); } - if (cursor >= current_message_end) + if ((cursor >= current_message_end) && (current_message_end != END_OF_GROUP)) return false; UInt64 varint = readVarint(); @@ -196,11 +200,17 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number) bool ProtobufReader::SimpleReader::readUInt(UInt64 & value) { + if (field_end == END_OF_VARINT) + { + value = readVarint(); + field_end = cursor; + return true; + } + if (unlikely(cursor >= field_end)) return false; + value = readVarint(); - if (field_end == END_OF_VARINT) - field_end = cursor; return true; } @@ -227,6 +237,7 @@ bool ProtobufReader::SimpleReader::readFixed(T & value) { if (unlikely(cursor >= field_end)) return false; + readBinary(&value, sizeof(T)); return true; } diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h index a50c2f202f0..7c3c19ba376 100644 --- a/src/Formats/ProtobufReader.h +++ b/src/Formats/ProtobufReader.h @@ -124,12 +124,12 @@ private: void ignoreGroup(); ReadBuffer & in; - UInt64 cursor; + Int64 cursor; size_t current_message_level; - UInt64 current_message_end; - std::vector parent_message_ends; - UInt64 field_end; - UInt64 last_string_pos; + Int64 current_message_end; + std::vector parent_message_ends; + Int64 field_end; + Int64 last_string_pos; }; class IConverter From 99626a5bd3fa3ba0470142793ce93faec1a0efc5 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 28 May 2020 20:07:47 +0300 Subject: [PATCH 2/2] Add test for handling malformed protobuf data. --- .../0_stateless/00825_protobuf_format_input.reference | 1 + tests/queries/0_stateless/00825_protobuf_format_input.sh | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.reference b/tests/queries/0_stateless/00825_protobuf_format_input.reference index 884cc74c4e5..0c56bc4ebf0 100644 --- a/tests/queries/0_stateless/00825_protobuf_format_input.reference +++ b/tests/queries/0_stateless/00825_protobuf_format_input.reference @@ -8,3 +8,4 @@ a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +7495123456 0 0 2 4 3 9 +ok diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.sh b/tests/queries/0_stateless/00825_protobuf_format_input.sh index d28b70bb002..1c915bc3f24 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_input.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_input.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -set -e -o pipefail +set -eo pipefail # Run the client. $CLICKHOUSE_CLIENT --multiquery <<'EOF' @@ -48,5 +48,12 @@ source $CURDIR/00825_protobuf_format_input.insh $CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;" $CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;" +# Try to input malformed data. +set +eo pipefail +echo -ne '\xe0\x80\x3f\x0b' \ + | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \ + | grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail" +set -eo pipefail + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;"