Merge pull request #11258 from vitlibar/protobuf-fix-handling-bad-data

Fix handling bad data while reading in Protobuf format.
This commit is contained in:
alexey-milovidov 2020-05-28 23:26:20 +03:00 committed by GitHub
commit fb4d272f5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 15 deletions

View File

@ -32,11 +32,12 @@ namespace
BITS32 = 5, BITS32 = 5,
}; };
// The following condition must always be true: // The following conditions must always be true:
// any_cursor_position < min(END_OF_VARINT, END_OF_GROUP) // any_cursor_position > END_OF_VARINT
// This inequation helps to check conditions in SimpleReader. // any_cursor_position > END_OF_GROUP
constexpr UInt64 END_OF_VARINT = static_cast<UInt64>(-1); // Those inequations helps checking conditions in ProtobufReader::SimpleReader.
constexpr UInt64 END_OF_GROUP = static_cast<UInt64>(-2); constexpr Int64 END_OF_VARINT = -1;
constexpr Int64 END_OF_GROUP = -2;
Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); } Int64 decodeZigZag(UInt64 n) { return static_cast<Int64>((n >> 1) ^ (~(n & 1) + 1)); }
@ -77,7 +78,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
if (!current_message_level) if (!current_message_level)
return; return;
UInt64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front(); Int64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front();
if (cursor != root_message_end) if (cursor != root_message_end)
{ {
if (cursor < root_message_end) if (cursor < root_message_end)
@ -95,6 +96,9 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors)
void ProtobufReader::SimpleReader::startNestedMessage() void ProtobufReader::SimpleReader::startNestedMessage()
{ {
assert(current_message_level >= 1); assert(current_message_level >= 1);
if ((cursor > field_end) && (field_end != END_OF_GROUP))
throwUnknownFormat();
// Start reading a nested message which is located inside a length-delimited field // Start reading a nested message which is located inside a length-delimited field
// of another message. // of another message.
parent_message_ends.emplace_back(current_message_end); parent_message_ends.emplace_back(current_message_end);
@ -146,7 +150,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
throwUnknownFormat(); throwUnknownFormat();
} }
if (cursor >= current_message_end) if ((cursor >= current_message_end) && (current_message_end != END_OF_GROUP))
return false; return false;
UInt64 varint = readVarint(); UInt64 varint = readVarint();
@ -196,11 +200,17 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number)
bool ProtobufReader::SimpleReader::readUInt(UInt64 & value) bool ProtobufReader::SimpleReader::readUInt(UInt64 & value)
{ {
if (field_end == END_OF_VARINT)
{
value = readVarint();
field_end = cursor;
return true;
}
if (unlikely(cursor >= field_end)) if (unlikely(cursor >= field_end))
return false; return false;
value = readVarint(); value = readVarint();
if (field_end == END_OF_VARINT)
field_end = cursor;
return true; return true;
} }
@ -227,6 +237,7 @@ bool ProtobufReader::SimpleReader::readFixed(T & value)
{ {
if (unlikely(cursor >= field_end)) if (unlikely(cursor >= field_end))
return false; return false;
readBinary(&value, sizeof(T)); readBinary(&value, sizeof(T));
return true; return true;
} }

View File

@ -124,12 +124,12 @@ private:
void ignoreGroup(); void ignoreGroup();
ReadBuffer & in; ReadBuffer & in;
UInt64 cursor; Int64 cursor;
size_t current_message_level; size_t current_message_level;
UInt64 current_message_end; Int64 current_message_end;
std::vector<UInt64> parent_message_ends; std::vector<Int64> parent_message_ends;
UInt64 field_end; Int64 field_end;
UInt64 last_string_pos; Int64 last_string_pos;
}; };
class IConverter class IConverter

View File

@ -8,3 +8,4 @@ a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +7495123456
0 0 0 0
2 4 2 4
3 9 3 9
ok

View File

@ -3,7 +3,7 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh . $CURDIR/../shell_config.sh
set -e -o pipefail set -eo pipefail
# Run the client. # Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF' $CLICKHOUSE_CLIENT --multiquery <<'EOF'
@ -48,5 +48,12 @@ source $CURDIR/00825_protobuf_format_input.insh
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;" $CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;"
$CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;" $CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;"
# Try to input malformed data.
set +eo pipefail
echo -ne '\xe0\x80\x3f\x0b' \
| $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \
| grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail"
set -eo pipefail
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;"