make read(...) compatible with buggy versions

This commit is contained in:
Alexander Tokmakov 2022-11-17 18:54:07 +01:00
parent 415eaff8d5
commit 35f677d7d8

View File

@ -29,6 +29,7 @@ namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int TOO_LARGE_STRING_SIZE;
} }
/** Aggregate functions that store one of passed values. /** Aggregate functions that store one of passed values.
@ -461,7 +462,7 @@ struct Compatibility
auto res = column.getDataAt(n); auto res = column.getDataAt(n);
/// ColumnString always reserves extra byte for null-character after string. /// ColumnString always reserves extra byte for null-character after string.
/// But getDataAt returns StringRef without the null-character. Let's add it. /// But getDataAt returns StringRef without the null-character. Let's add it.
chassert(res.data[res.size] == 0); chassert(res.data[res.size] == '\0');
++res.size; ++res.size;
return res; return res;
} }
@ -471,7 +472,7 @@ struct Compatibility
/// String already has terminating null-character. /// String already has terminating null-character.
/// But insertData will add another one unconditionally. Trim existing null-character to avoid duplication. /// But insertData will add another one unconditionally. Trim existing null-character to avoid duplication.
chassert(0 < length); chassert(0 < length);
chassert(pos[length - 1] == 0); chassert(pos[length - 1] == '\0');
column.insertData(pos, length - 1); column.insertData(pos, length - 1);
} }
}; };
@ -505,16 +506,22 @@ public:
} }
private: private:
const char * getData() const char * getDataMutable()
{ {
return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data; return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
} }
StringRef getStringRef() const const char * getData() const
{ {
const char * data_ptr = size <= MAX_SMALL_STRING_SIZE ? small_data : large_data;
/// It must always be terminated with null-character /// It must always be terminated with null-character
chassert(0 < size); chassert(0 < size);
chassert(getData()[size - 1] == 0); chassert(data_ptr[size - 1] == '\0');
return data_ptr;
}
StringRef getStringRef() const
{
return StringRef(getData(), size); return StringRef(getData(), size);
} }
@ -534,13 +541,32 @@ public:
buf.write(getData(), size); buf.write(getData(), size);
} }
void allocateLargeDataIfNeeded(Int32 size_to_reserve, Arena * arena)
{
if (capacity < size_to_reserve)
{
capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(size_to_reserve));
/// It might happen if the size was too big and the rounded value does not fit a size_t
if (unlikely(capacity < size_to_reserve))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve);
/// Don't free large_data here.
large_data = arena->alloc(capacity);
}
}
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena) void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
{ {
Int32 rhs_size; Int32 rhs_size;
readBinary(rhs_size, buf); readBinary(rhs_size, buf);
if (rhs_size >= 0) if (rhs_size < 0)
{ {
/// Don't free large_data here.
size = rhs_size;
return;
}
if (rhs_size <= MAX_SMALL_STRING_SIZE) if (rhs_size <= MAX_SMALL_STRING_SIZE)
{ {
/// Don't free large_data here. /// Don't free large_data here.
@ -552,22 +578,38 @@ public:
} }
else else
{ {
if (capacity < rhs_size) /// Reserve one byte more for null-character
{ Int32 rhs_size_to_reserve = rhs_size + 1;
capacity = static_cast<UInt32>(roundUpToPowerOfTwoOrZero(rhs_size)); allocateLargeDataIfNeeded(rhs_size_to_reserve, arena);
/// Don't free large_data here.
large_data = arena->alloc(capacity);
}
size = rhs_size; size = rhs_size;
buf.readStrict(large_data, size); buf.readStrict(large_data, size);
} }
}
else /// Check if the string we read is null-terminated (getDataMutable does not have the assertion)
if (0 < size && getDataMutable()[size - 1] == '\0')
return;
/// It's not null-terminated, but it must be (for historical reasons). There are two variants:
/// - The value was serialized by one of the incompatible versions of ClickHouse. We had some range of versions
/// that used to serialize SingleValueDataString without terminating '\0'. Let's just append it.
/// - An attacker sent crafted data. Sanitize it and append '\0'.
/// In all other cases the string must be already null-terminated.
/// NOTE We cannot add '\0' unconditionally, because it will be duplicated.
/// NOTE It's possible that a string that actually ends with '\0' was written by one of the incompatible versions.
/// Unfortunately, we cannot distinguish it from normal string written by normal version.
/// So such strings will be trimmed.
if (size == MAX_SMALL_STRING_SIZE)
{ {
/// Don't free large_data here. /// Special case: We have to move value to large_data
size = rhs_size; allocateLargeDataIfNeeded(size + 1, arena);
memcpy(large_data, small_data, size);
} }
/// We have enough space to append
getDataMutable()[size] = '\0';
++size;
} }
/// Assuming to.has() /// Assuming to.has()
@ -585,13 +627,7 @@ public:
} }
else else
{ {
if (capacity < value_size) allocateLargeDataIfNeeded(value_size, arena);
{
/// Don't free large_data here.
capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(value_size));
large_data = arena->alloc(capacity);
}
size = value_size; size = value_size;
memcpy(large_data, value.data, size); memcpy(large_data, value.data, size);
} }