make read(...) compatible with buggy versions

This commit is contained in:
Alexander Tokmakov 2022-11-17 18:54:07 +01:00
parent 415eaff8d5
commit 35f677d7d8

View File

@ -29,6 +29,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
extern const int TOO_LARGE_STRING_SIZE;
}
/** Aggregate functions that store one of passed values.
@ -461,7 +462,7 @@ struct Compatibility
auto res = column.getDataAt(n);
/// ColumnString always reserves extra byte for null-character after string.
/// But getDataAt returns StringRef without the null-character. Let's add it.
chassert(res.data[res.size] == 0);
chassert(res.data[res.size] == '\0');
++res.size;
return res;
}
@ -471,7 +472,7 @@ struct Compatibility
/// String already has terminating null-character.
/// But insertData will add another one unconditionally. Trim existing null-character to avoid duplication.
chassert(0 < length);
chassert(pos[length - 1] == 0);
chassert(pos[length - 1] == '\0');
column.insertData(pos, length - 1);
}
};
@ -505,16 +506,22 @@ public:
}
private:
const char * getData() const
char * getDataMutable()
{
return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
}
StringRef getStringRef() const
const char * getData() const
{
const char * data_ptr = size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
/// It must always be terminated with null-character
chassert(0 < size);
chassert(getData()[size - 1] == 0);
chassert(data_ptr[size - 1] == '\0');
return data_ptr;
}
StringRef getStringRef() const
{
return StringRef(getData(), size);
}
@ -534,13 +541,32 @@ public:
buf.write(getData(), size);
}
void allocateLargeDataIfNeeded(Int32 size_to_reserve, Arena * arena)
{
if (capacity < size_to_reserve)
{
capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(size_to_reserve));
/// It might happen if the size was too big and the rounded value does not fit a size_t
if (unlikely(capacity < size_to_reserve))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve);
/// Don't free large_data here.
large_data = arena->alloc(capacity);
}
}
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
{
Int32 rhs_size;
readBinary(rhs_size, buf);
if (rhs_size >= 0)
if (rhs_size < 0)
{
/// Don't free large_data here.
size = rhs_size;
return;
}
if (rhs_size <= MAX_SMALL_STRING_SIZE)
{
/// Don't free large_data here.
@ -552,22 +578,38 @@ public:
}
else
{
if (capacity < rhs_size)
{
capacity = static_cast<UInt32>(roundUpToPowerOfTwoOrZero(rhs_size));
/// Don't free large_data here.
large_data = arena->alloc(capacity);
}
/// Reserve one byte more for null-character
Int32 rhs_size_to_reserve = rhs_size + 1;
allocateLargeDataIfNeeded(rhs_size_to_reserve, arena);
size = rhs_size;
buf.readStrict(large_data, size);
}
}
else
/// Check if the string we read is null-terminated (getDataMutable does not have the assertion)
if (0 < size && getDataMutable()[size - 1] == '\0')
return;
/// It's not null-terminated, but it must be (for historical reasons). There are two variants:
/// - The value was serialized by one of the incompatible versions of ClickHouse. We had some range of versions
/// that used to serialize SingleValueDataString without terminating '\0'. Let's just append it.
/// - An attacker sent crafted data. Sanitize it and append '\0'.
/// In all other cases the string must be already null-terminated.
/// NOTE We cannot add '\0' unconditionally, because it will be duplicated.
/// NOTE It's possible that a string that actually ends with '\0' was written by one of the incompatible versions.
/// Unfortunately, we cannot distinguish it from normal string written by normal version.
/// So such strings will be trimmed.
if (size == MAX_SMALL_STRING_SIZE)
{
/// Don't free large_data here.
size = rhs_size;
/// Special case: We have to move value to large_data
allocateLargeDataIfNeeded(size + 1, arena);
memcpy(large_data, small_data, size);
}
/// We have enough space to append
getDataMutable()[size] = '\0';
++size;
}
/// Assuming to.has()
@ -585,13 +627,7 @@ public:
}
else
{
if (capacity < value_size)
{
/// Don't free large_data here.
capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(value_size));
large_data = arena->alloc(capacity);
}
allocateLargeDataIfNeeded(value_size, arena);
size = value_size;
memcpy(large_data, value.data, size);
}