mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 17:12:03 +00:00
Allow var-int encoded 64-bit integers with MSB=1
Resolves: #51486 Until now, it was illegal to encode 64-bit (unsigned) integers with MSB=1, i.e. values > (1ULL<<63) - 1, as var-int. In more detail, the var-int code used by ClickHouse server and client spent at most 9 bytes per value such that 9 * 7 = 63 bits could be encoded. Some 3rd party clients (e.g. Rust clickhouse-rs) had the same limitation, whereas other clients understand the full range (Python clickhouse-driver). PRs #47608 and #48628 added sanity checks as asserts or exceptions during var-int encoding on the server side. This was considered okay as such huge integers so far occurred only during testing (usually fuzzing) but not in practice. Issue #51486 is a new fuzzing issue where the exception thrown from the sanity check led to a half-baked progress packet and as a result, a logical error / server crash. The only fix which is not another bandaid is to allow the full range in var-int coding. Clients will have to allow the full range too, a note will be added to the changelog. (the alternative was to create another protocol version but as var-int is used all over the place this was considered infeasible) Review note: this is the relevant commit.
This commit is contained in:
parent
3f744c1e14
commit
271297823a
@ -6,7 +6,6 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void throwReadAfterEOF()
|
||||
@ -14,12 +13,4 @@ void throwReadAfterEOF()
|
||||
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof");
|
||||
}
|
||||
|
||||
void throwValueTooLargeForVarIntEncoding(UInt64 x)
|
||||
{
|
||||
/// Under practical circumstances, we should virtually never end up here but AST Fuzzer manages to create superlarge input integers
|
||||
/// which trigger this exception. Intentionally not throwing LOGICAL_ERROR or calling abort() or [ch]assert(false), so AST Fuzzer
|
||||
/// can swallow the exception and continue to run.
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is too large for VarInt encoding", x);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,73 +13,59 @@ namespace DB
|
||||
/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.
|
||||
|
||||
[[noreturn]] void throwReadAfterEOF();
|
||||
[[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x);
|
||||
|
||||
|
||||
/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under
|
||||
/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the
|
||||
/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python))
|
||||
constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
|
||||
|
||||
inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
|
||||
{
|
||||
if (x > VAR_UINT_MAX) [[unlikely]]
|
||||
throwValueTooLargeForVarIntEncoding(x);
|
||||
|
||||
for (size_t i = 0; i < 9; ++i)
|
||||
while (x > 0x7F)
|
||||
{
|
||||
uint8_t byte = x & 0x7F;
|
||||
if (x > 0x7F)
|
||||
byte |= 0x80;
|
||||
uint8_t byte = 0x80 | (x & 0x7F);
|
||||
|
||||
ostr.nextIfAtEnd();
|
||||
*ostr.position() = byte;
|
||||
++ostr.position();
|
||||
|
||||
x >>= 7;
|
||||
if (!x)
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t final_byte = static_cast<uint8_t>(x);
|
||||
|
||||
ostr.nextIfAtEnd();
|
||||
*ostr.position() = final_byte;
|
||||
++ostr.position();
|
||||
}
|
||||
|
||||
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
|
||||
{
|
||||
if (x > VAR_UINT_MAX) [[unlikely]]
|
||||
throwValueTooLargeForVarIntEncoding(x);
|
||||
|
||||
for (size_t i = 0; i < 9; ++i)
|
||||
while (x > 0x7F)
|
||||
{
|
||||
uint8_t byte = x & 0x7F;
|
||||
if (x > 0x7F)
|
||||
byte |= 0x80;
|
||||
|
||||
uint8_t byte = 0x80 | (x & 0x7F);
|
||||
ostr.put(byte);
|
||||
|
||||
x >>= 7;
|
||||
if (!x)
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t final_byte = static_cast<uint8_t>(x);
|
||||
ostr.put(final_byte);
|
||||
}
|
||||
|
||||
inline char * writeVarUInt(UInt64 x, char * ostr)
|
||||
{
|
||||
if (x > VAR_UINT_MAX) [[unlikely]]
|
||||
throwValueTooLargeForVarIntEncoding(x);
|
||||
|
||||
for (size_t i = 0; i < 9; ++i)
|
||||
while (x > 0x7F)
|
||||
{
|
||||
uint8_t byte = x & 0x7F;
|
||||
if (x > 0x7F)
|
||||
byte |= 0x80;
|
||||
uint8_t byte = 0x80 | (x & 0x7F);
|
||||
|
||||
*ostr = byte;
|
||||
++ostr;
|
||||
|
||||
x >>= 7;
|
||||
if (!x)
|
||||
return ostr;
|
||||
}
|
||||
|
||||
uint8_t final_byte = static_cast<uint8_t>(x);
|
||||
|
||||
*ostr = final_byte;
|
||||
++ostr;
|
||||
|
||||
return ostr;
|
||||
}
|
||||
|
||||
@ -101,7 +87,7 @@ template <bool check_eof>
|
||||
inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
|
||||
{
|
||||
x = 0;
|
||||
for (size_t i = 0; i < 9; ++i)
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
if constexpr (check_eof)
|
||||
if (istr.eof()) [[unlikely]]
|
||||
@ -120,7 +106,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
|
||||
|
||||
inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
|
||||
{
|
||||
if (istr.buffer().end() - istr.position() >= 9)
|
||||
if (istr.buffer().end() - istr.position() >= 10)
|
||||
return impl::readVarUInt<false>(x, istr);
|
||||
return impl::readVarUInt<true>(x, istr);
|
||||
}
|
||||
@ -128,7 +114,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr)
|
||||
inline void readVarUInt(UInt64 & x, std::istream & istr)
|
||||
{
|
||||
x = 0;
|
||||
for (size_t i = 0; i < 9; ++i)
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
UInt64 byte = istr.get();
|
||||
x |= (byte & 0x7F) << (7 * i);
|
||||
@ -143,7 +129,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
|
||||
const char * end = istr + size;
|
||||
|
||||
x = 0;
|
||||
for (size_t i = 0; i < 9; ++i)
|
||||
for (size_t i = 0; i < 10; ++i)
|
||||
{
|
||||
if (istr == end) [[unlikely]]
|
||||
throwReadAfterEOF();
|
||||
@ -220,7 +206,8 @@ inline size_t getLengthOfVarUInt(UInt64 x)
|
||||
: (x < (1ULL << 42) ? 6
|
||||
: (x < (1ULL << 49) ? 7
|
||||
: (x < (1ULL << 56) ? 8
|
||||
: 9)))))));
|
||||
: (x < (1ULL << 63) ? 9
|
||||
: 10))))))));
|
||||
}
|
||||
|
||||
|
||||
|
@ -1905,17 +1905,18 @@ void TCPHandler::sendData(const Block & block)
|
||||
{
|
||||
initBlockOutput(block);
|
||||
|
||||
auto prev_bytes_written_out = out->count();
|
||||
auto prev_bytes_written_compressed_out = state.maybe_compressed_out->count();
|
||||
size_t prev_bytes_written_out = out->count();
|
||||
size_t prev_bytes_written_compressed_out = state.maybe_compressed_out->count();
|
||||
|
||||
try
|
||||
{
|
||||
/// For testing hedged requests
|
||||
if (unknown_packet_in_send_data)
|
||||
{
|
||||
constexpr UInt64 marker = (1ULL<<63) - 1;
|
||||
--unknown_packet_in_send_data;
|
||||
if (unknown_packet_in_send_data == 0)
|
||||
writeVarUInt(VAR_UINT_MAX, *out);
|
||||
writeVarUInt(marker, *out);
|
||||
}
|
||||
|
||||
writeVarUInt(Protocol::Server::Data, *out);
|
||||
|
4
tests/queries/0_stateless/02812_large_varints.sql
Normal file
4
tests/queries/0_stateless/02812_large_varints.sql
Normal file
@ -0,0 +1,4 @@
|
||||
-- 64-bit integers with MSB set (i.e. values > (1ULL<<63) - 1) could for historical/compat reasons not be serialized as var-ints (issue #51486).
|
||||
-- These two queries internally produce such big values, run them to be sure no bad things happen.
|
||||
SELECT topKWeightedState(65535)(now(), -2) FORMAT Null;
|
||||
SELECT number FROM numbers(toUInt64(-1)) limit 10 Format Null;
|
Loading…
Reference in New Issue
Block a user