Merge pull request #11258 from vitlibar/protobuf-fix-handling-bad-data

Fix handling bad data while reading in Protobuf format.
This commit is contained in:
alexey-milovidov 2020-05-28 23:26:20 +03:00 committed by GitHub
commit fb4d272f5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 15 deletions

View File

@ -32,11 +32,12 @@ namespace
BITS32 = 5,
};
// The following condition must always be true:
// any_cursor_position < min(END_OF_VARINT, END_OF_GROUP)
// This inequation helps to check conditions in SimpleReader.
constexpr UInt64 END_OF_VARINT = static_cast<UInt64>(-1);
constexpr UInt64 END_OF_GROUP = static_cast<UInt64>(-2);
// The following conditions must always be true:
// any_cursor_position > END_OF_VARINT
// any_cursor_position > END_OF_GROUP
// Those inequations helps checking conditions in ProtobufReader::SimpleReader.
constexpr Int64 END_OF_VARINT = -1;
constexpr Int64 END_OF_GROUP = -2;
Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); }
@ -77,7 +78,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
if (!current_message_level)
return;
UInt64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front();
Int64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front();
if (cursor != root_message_end)
{
if (cursor < root_message_end)
@ -95,6 +96,9 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
void ProtobufReader::SimpleReader::startNestedMessage()
{
assert(current_message_level >= 1);
if ((cursor > field_end) && (field_end != END_OF_GROUP))
throwUnknownFormat();
// Start reading a nested message which is located inside a length-delimited field
// of another message.
parent_message_ends.emplace_back(current_message_end);
@ -146,7 +150,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
throwUnknownFormat();
}
if (cursor >= current_message_end)
if ((cursor >= current_message_end) && (current_message_end != END_OF_GROUP))
return false;
UInt64 varint = readVarint();
@ -196,11 +200,17 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
bool ProtobufReader::SimpleReader::readUInt(UInt64 & value)
{
if (field_end == END_OF_VARINT)
{
value = readVarint();
field_end = cursor;
return true;
}
if (unlikely(cursor >= field_end))
return false;
value = readVarint();
if (field_end == END_OF_VARINT)
field_end = cursor;
return true;
}
@ -227,6 +237,7 @@ bool ProtobufReader::SimpleReader::readFixed(T & value)
{
if (unlikely(cursor >= field_end))
return false;
readBinary(&value, sizeof(T));
return true;
}

View File

@ -124,12 +124,12 @@ private:
void ignoreGroup();
ReadBuffer & in;
UInt64 cursor;
Int64 cursor;
size_t current_message_level;
UInt64 current_message_end;
std::vector<UInt64> parent_message_ends;
UInt64 field_end;
UInt64 last_string_pos;
Int64 current_message_end;
std::vector<Int64> parent_message_ends;
Int64 field_end;
Int64 last_string_pos;
};
class IConverter

View File

@ -8,3 +8,4 @@ a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +7495123456
0 0
2 4
3 9
ok

View File

@ -3,7 +3,7 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
set -e -o pipefail
set -eo pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
@ -48,5 +48,12 @@ source $CURDIR/00825_protobuf_format_input.insh
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
# Try to input malformed data.
set +eo pipefail
echo -ne '\xe0\x80\x3f\x0b' \
| $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
| grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
set -eo pipefail
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;"