mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 01:51:59 +00:00
Merge pull request #48740 from ClickHouse/rs/varint-sanity
VarInt coding: Always perform sanity check
This commit is contained in:
commit
75887c30f5
@ -94,19 +94,15 @@ inline char parseEscapeSequence(char c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// These functions are located in VarInt.h
|
/// Function throwReadAfterEOF is located in VarInt.h
|
||||||
/// inline void throwReadAfterEOF()
|
|
||||||
|
|
||||||
|
|
||||||
inline void readChar(char & x, ReadBuffer & buf)
|
inline void readChar(char & x, ReadBuffer & buf)
|
||||||
{
|
{
|
||||||
if (!buf.eof())
|
if (buf.eof()) [[unlikely]]
|
||||||
{
|
|
||||||
x = *buf.position();
|
|
||||||
++buf.position();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
|
x = *buf.position();
|
||||||
|
++buf.position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -256,7 +252,7 @@ inline void readBoolText(bool & x, ReadBuffer & buf)
|
|||||||
|
|
||||||
inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false)
|
inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case = false)
|
||||||
{
|
{
|
||||||
if (buf.eof())
|
if (buf.eof()) [[unlikely]]
|
||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
|
|
||||||
switch (*buf.position())
|
switch (*buf.position())
|
||||||
@ -311,7 +307,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
|
|||||||
|
|
||||||
bool negative = false;
|
bool negative = false;
|
||||||
UnsignedT res{};
|
UnsignedT res{};
|
||||||
if (buf.eof())
|
if (buf.eof()) [[unlikely]]
|
||||||
{
|
{
|
||||||
if constexpr (throw_exception)
|
if constexpr (throw_exception)
|
||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
@ -486,14 +482,14 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)
|
|||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
};
|
};
|
||||||
|
|
||||||
if (unlikely(buf.eof()))
|
if (buf.eof()) [[unlikely]]
|
||||||
return on_error();
|
return on_error();
|
||||||
|
|
||||||
if (is_signed_v<T> && *buf.position() == '-')
|
if (is_signed_v<T> && *buf.position() == '-')
|
||||||
{
|
{
|
||||||
++buf.position();
|
++buf.position();
|
||||||
negative = true;
|
negative = true;
|
||||||
if (unlikely(buf.eof()))
|
if (buf.eof()) [[unlikely]]
|
||||||
return on_error();
|
return on_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1247,7 +1243,7 @@ inline void readDoubleQuoted(LocalDateTime & x, ReadBuffer & buf)
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
inline void readCSVSimple(T & x, ReadBuffer & buf)
|
inline void readCSVSimple(T & x, ReadBuffer & buf)
|
||||||
{
|
{
|
||||||
if (buf.eof())
|
if (buf.eof()) [[unlikely]]
|
||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
|
|
||||||
char maybe_quote = *buf.position();
|
char maybe_quote = *buf.position();
|
||||||
|
25
src/IO/VarInt.cpp
Normal file
25
src/IO/VarInt.cpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#include <IO/VarInt.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
void throwReadAfterEOF()
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof");
|
||||||
|
}
|
||||||
|
|
||||||
|
void throwValueTooLargeForVarIntEncoding(UInt64 x)
|
||||||
|
{
|
||||||
|
/// Under practical circumstances, we should virtually never end up here but AST Fuzzer manages to create superlarge input integers
|
||||||
|
/// which trigger this exception. Intentionally not throwing LOGICAL_ERROR or calling abort() or [ch]assert(false), so AST Fuzzer
|
||||||
|
/// can swallow the exception and continue to run.
|
||||||
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is too large for VarInt encoding", x);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -9,12 +9,6 @@
|
|||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
|
||||||
extern const int BAD_ARGUMENTS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.
|
/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.
|
||||||
|
|
||||||
@ -23,27 +17,21 @@ void writeVarUInt(UInt64 x, std::ostream & ostr);
|
|||||||
void writeVarUInt(UInt64 x, WriteBuffer & ostr);
|
void writeVarUInt(UInt64 x, WriteBuffer & ostr);
|
||||||
char * writeVarUInt(UInt64 x, char * ostr);
|
char * writeVarUInt(UInt64 x, char * ostr);
|
||||||
|
|
||||||
/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under
|
|
||||||
/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the
|
|
||||||
/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python))
|
|
||||||
constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
|
|
||||||
|
|
||||||
/// Read UInt64, written in variable length format (base128)
|
/// Read UInt64, written in variable length format (base128)
|
||||||
void readVarUInt(UInt64 & x, std::istream & istr);
|
void readVarUInt(UInt64 & x, std::istream & istr);
|
||||||
void readVarUInt(UInt64 & x, ReadBuffer & istr);
|
void readVarUInt(UInt64 & x, ReadBuffer & istr);
|
||||||
const char * readVarUInt(UInt64 & x, const char * istr, size_t size);
|
const char * readVarUInt(UInt64 & x, const char * istr, size_t size);
|
||||||
|
|
||||||
|
/// Get the length of an variable-length-encoded integer
|
||||||
/// Get the length of UInt64 in VarUInt format
|
|
||||||
size_t getLengthOfVarUInt(UInt64 x);
|
size_t getLengthOfVarUInt(UInt64 x);
|
||||||
|
|
||||||
/// Get the Int64 length in VarInt format
|
|
||||||
size_t getLengthOfVarInt(Int64 x);
|
size_t getLengthOfVarInt(Int64 x);
|
||||||
|
|
||||||
|
[[noreturn]] void throwReadAfterEOF();
|
||||||
|
[[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x);
|
||||||
|
|
||||||
/// Write Int64 in variable length format (base128)
|
/// Write Int64 in variable length format (base128)
|
||||||
template <typename OUT>
|
template <typename Out>
|
||||||
inline void writeVarInt(Int64 x, OUT & ostr)
|
inline void writeVarInt(Int64 x, Out & ostr)
|
||||||
{
|
{
|
||||||
writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
|
writeVarUInt(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
|
||||||
}
|
}
|
||||||
@ -55,8 +43,8 @@ inline char * writeVarInt(Int64 x, char * ostr)
|
|||||||
|
|
||||||
|
|
||||||
/// Read Int64, written in variable length format (base128)
|
/// Read Int64, written in variable length format (base128)
|
||||||
template <typename IN>
|
template <typename In>
|
||||||
inline void readVarInt(Int64 & x, IN & istr)
|
inline void readVarInt(Int64 & x, In & istr)
|
||||||
{
|
{
|
||||||
readVarUInt(*reinterpret_cast<UInt64*>(&x), istr);
|
readVarUInt(*reinterpret_cast<UInt64*>(&x), istr);
|
||||||
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
|
x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
|
||||||
@ -70,21 +58,6 @@ inline const char * readVarInt(Int64 & x, const char * istr, size_t size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void writeVarT(UInt64 x, std::ostream & ostr) { writeVarUInt(x, ostr); }
|
|
||||||
inline void writeVarT(Int64 x, std::ostream & ostr) { writeVarInt(x, ostr); }
|
|
||||||
inline void writeVarT(UInt64 x, WriteBuffer & ostr) { writeVarUInt(x, ostr); }
|
|
||||||
inline void writeVarT(Int64 x, WriteBuffer & ostr) { writeVarInt(x, ostr); }
|
|
||||||
inline char * writeVarT(UInt64 x, char * & ostr) { return writeVarUInt(x, ostr); }
|
|
||||||
inline char * writeVarT(Int64 x, char * & ostr) { return writeVarInt(x, ostr); }
|
|
||||||
|
|
||||||
inline void readVarT(UInt64 & x, std::istream & istr) { readVarUInt(x, istr); }
|
|
||||||
inline void readVarT(Int64 & x, std::istream & istr) { readVarInt(x, istr); }
|
|
||||||
inline void readVarT(UInt64 & x, ReadBuffer & istr) { readVarUInt(x, istr); }
|
|
||||||
inline void readVarT(Int64 & x, ReadBuffer & istr) { readVarInt(x, istr); }
|
|
||||||
inline const char * readVarT(UInt64 & x, const char * istr, size_t size) { return readVarUInt(x, istr, size); }
|
|
||||||
inline const char * readVarT(Int64 & x, const char * istr, size_t size) { return readVarInt(x, istr, size); }
|
|
||||||
|
|
||||||
|
|
||||||
/// For [U]Int32, [U]Int16, size_t.
|
/// For [U]Int32, [U]Int16, size_t.
|
||||||
|
|
||||||
inline void readVarUInt(UInt32 & x, ReadBuffer & istr)
|
inline void readVarUInt(UInt32 & x, ReadBuffer & istr)
|
||||||
@ -124,12 +97,6 @@ inline void readVarUInt(T & x, ReadBuffer & istr)
|
|||||||
x = tmp;
|
x = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
[[noreturn]] inline void throwReadAfterEOF()
|
|
||||||
{
|
|
||||||
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof");
|
|
||||||
}
|
|
||||||
|
|
||||||
template <bool fast>
|
template <bool fast>
|
||||||
inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
|
inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
|
||||||
{
|
{
|
||||||
@ -137,10 +104,10 @@ inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr)
|
|||||||
for (size_t i = 0; i < 9; ++i)
|
for (size_t i = 0; i < 9; ++i)
|
||||||
{
|
{
|
||||||
if constexpr (!fast)
|
if constexpr (!fast)
|
||||||
if (istr.eof())
|
if (istr.eof()) [[unlikely]]
|
||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
|
|
||||||
UInt64 byte = *istr.position(); /// NOLINT
|
UInt64 byte = *istr.position();
|
||||||
++istr.position();
|
++istr.position();
|
||||||
x |= (byte & 0x7F) << (7 * i);
|
x |= (byte & 0x7F) << (7 * i);
|
||||||
|
|
||||||
@ -177,10 +144,10 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
|
|||||||
x = 0;
|
x = 0;
|
||||||
for (size_t i = 0; i < 9; ++i)
|
for (size_t i = 0; i < 9; ++i)
|
||||||
{
|
{
|
||||||
if (istr == end)
|
if (istr == end) [[unlikely]]
|
||||||
throwReadAfterEOF();
|
throwReadAfterEOF();
|
||||||
|
|
||||||
UInt64 byte = *istr; /// NOLINT
|
UInt64 byte = *istr;
|
||||||
++istr;
|
++istr;
|
||||||
x |= (byte & 0x7F) << (7 * i);
|
x |= (byte & 0x7F) << (7 * i);
|
||||||
|
|
||||||
@ -191,20 +158,16 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
|
|||||||
return istr;
|
return istr;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[noreturn]] inline void throwValueTooLargeForVarIntEncodingException(UInt64 x)
|
/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under
|
||||||
{
|
/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the
|
||||||
/// Under practical circumstances, we should virtually never end up here but AST Fuzzer manages to create superlarge input integers
|
/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python))
|
||||||
/// which trigger this exception. Intentionally not throwing LOGICAL_ERROR or calling abort() or [ch]assert(false), so AST Fuzzer
|
constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
|
||||||
/// can swallow the exception and continue to run.
|
|
||||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is too large for VarInt encoding", x);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
|
inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
|
||||||
{
|
{
|
||||||
#ifndef NDEBUG
|
if (x > VAR_UINT_MAX) [[unlikely]]
|
||||||
if (x > VAR_UINT_MAX)
|
throwValueTooLargeForVarIntEncoding(x);
|
||||||
throwValueTooLargeForVarIntEncodingException(x);
|
|
||||||
#endif
|
|
||||||
for (size_t i = 0; i < 9; ++i)
|
for (size_t i = 0; i < 9; ++i)
|
||||||
{
|
{
|
||||||
uint8_t byte = x & 0x7F;
|
uint8_t byte = x & 0x7F;
|
||||||
@ -224,10 +187,9 @@ inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
|
|||||||
|
|
||||||
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
|
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
|
||||||
{
|
{
|
||||||
#ifndef NDEBUG
|
if (x > VAR_UINT_MAX) [[unlikely]]
|
||||||
if (x > VAR_UINT_MAX)
|
throwValueTooLargeForVarIntEncoding(x);
|
||||||
throwValueTooLargeForVarIntEncodingException(x);
|
|
||||||
#endif
|
|
||||||
for (size_t i = 0; i < 9; ++i)
|
for (size_t i = 0; i < 9; ++i)
|
||||||
{
|
{
|
||||||
uint8_t byte = x & 0x7F;
|
uint8_t byte = x & 0x7F;
|
||||||
@ -245,10 +207,9 @@ inline void writeVarUInt(UInt64 x, std::ostream & ostr)
|
|||||||
|
|
||||||
inline char * writeVarUInt(UInt64 x, char * ostr)
|
inline char * writeVarUInt(UInt64 x, char * ostr)
|
||||||
{
|
{
|
||||||
#ifndef NDEBUG
|
if (x > VAR_UINT_MAX) [[unlikely]]
|
||||||
if (x > VAR_UINT_MAX)
|
throwValueTooLargeForVarIntEncoding(x);
|
||||||
throwValueTooLargeForVarIntEncodingException(x);
|
|
||||||
#endif
|
|
||||||
for (size_t i = 0; i < 9; ++i)
|
for (size_t i = 0; i < 9; ++i)
|
||||||
{
|
{
|
||||||
uint8_t byte = x & 0x7F;
|
uint8_t byte = x & 0x7F;
|
||||||
|
Loading…
Reference in New Issue
Block a user