Merge pull request #48154 from azat/varuint-v2

Add sanity checks for writing number in variable length format (resubmit)
This commit is contained in:
Robert Schulze 2023-03-31 10:59:21 +02:00 committed by GitHub
commit eb93ec35f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 15 deletions

View File

@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If
The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-14
## Install Clang-15
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-14 main" >> /etc/apt/sources.list
sudo apt-get install clang-14
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-15 main" >> /etc/apt/sources.list
sudo apt-get install clang-15
```
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1
cd ClickHouse
mkdir build-darwin
cd build-darwin
CC=clang-14 CXX=clang++-14 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
CC=clang-15 CXX=clang++-15 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
ninja
```

View File

@ -28,13 +28,13 @@ void ProgressValues::read(ReadBuffer & in, UInt64 server_revision)
void ProgressValues::write(WriteBuffer & out, UInt64 client_revision) const
{
writeVarUInt(read_rows, out);
writeVarUInt(read_bytes, out);
writeVarUInt(total_rows_to_read, out);
writeVarUIntOverflow(read_rows, out);
writeVarUIntOverflow(read_bytes, out);
writeVarUIntOverflow(total_rows_to_read, out);
if (client_revision >= DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO)
{
writeVarUInt(written_rows, out);
writeVarUInt(written_bytes, out);
writeVarUIntOverflow(written_rows, out);
writeVarUIntOverflow(written_bytes, out);
}
if (client_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS)
{

View File

@ -2,6 +2,7 @@
#include <iostream>
#include <base/types.h>
#include <base/defines.h>
#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>
@ -14,12 +15,32 @@ namespace ErrorCodes
}
/** Write UInt64 in variable length format (base128) NOTE Only up to 2^63 - 1 are supported. */
/** Variable-Length Quantity (VLQ) Base-128 compression
*
* NOTE: Due to historical reasons, only up to 1<<63-1 are supported, which
* cannot be changed without breaking the backward compatibility.
* Also some drivers may support full 1<<64 range (i.e. python -
* clickhouse-driver), while others has the same limitations as ClickHouse
* (i.e. Rust - clickhouse-rs).
* So implementing VLQ for the whole 1<<64 range will require different set of
* helpers.
*/
constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
/** Write UInt64 in variable length format (base128) */
void writeVarUInt(UInt64 x, std::ostream & ostr);
void writeVarUInt(UInt64 x, WriteBuffer & ostr);
char * writeVarUInt(UInt64 x, char * ostr);
/** Write UInt64 in variable length format, wrap the value to VAR_UINT_MAX if it exceed VAR_UINT_MAX (to bypass sanity check) */
template <typename ...Args>
auto writeVarUIntOverflow(UInt64 x, Args && ... args)
{
return writeVarUInt(std::min(x, VAR_UINT_MAX), std::forward<Args>(args)...);
}
/** Read UInt64, written in variable length format (base128) */
void readVarUInt(UInt64 & x, std::istream & istr);
void readVarUInt(UInt64 & x, ReadBuffer & istr);
@ -186,6 +207,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size)
inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
{
chassert(x <= VAR_UINT_MAX);
for (size_t i = 0; i < 9; ++i)
{
uint8_t byte = x & 0x7F;
@ -205,6 +227,7 @@ inline void writeVarUInt(UInt64 x, WriteBuffer & ostr)
inline void writeVarUInt(UInt64 x, std::ostream & ostr)
{
chassert(x <= VAR_UINT_MAX);
for (size_t i = 0; i < 9; ++i)
{
uint8_t byte = x & 0x7F;
@ -222,6 +245,7 @@ inline void writeVarUInt(UInt64 x, std::ostream & ostr)
inline char * writeVarUInt(UInt64 x, char * ostr)
{
chassert(x <= VAR_UINT_MAX);
for (size_t i = 0; i < 9; ++i)
{
uint8_t byte = x & 0x7F;

View File

@ -18,26 +18,32 @@ int main(int argc, char ** argv)
}
DB::UInt64 x = DB::parse<UInt64>(argv[1]);
std::cout << std::hex << std::showbase << "Input: " << x << std::endl;
Poco::HexBinaryEncoder hex(std::cout);
DB::writeVarUInt(x, hex);
std::cout << "writeVarUIntOverflow(std::ostream): 0x";
DB::writeVarUIntOverflow(x, hex);
std::cout << std::endl;
std::string s;
{
DB::WriteBufferFromString wb(s);
DB::writeVarUInt(x, wb);
DB::writeVarUIntOverflow(x, wb);
wb.next();
}
std::cout << "writeVarUIntOverflow(WriteBuffer): 0x";
hex << s;
std::cout << std::endl;
s.clear();
s.resize(9);
s.resize(DB::writeVarUInt(x, s.data()) - s.data());
s.resize(DB::writeVarUIntOverflow(x, s.data()) - s.data());
std::cout << "writeVarUIntOverflow(char *): 0x";
hex << s;
std::cout << std::endl;
@ -46,7 +52,7 @@ int main(int argc, char ** argv)
DB::ReadBufferFromString rb(s);
DB::readVarUInt(y, rb);
std::cerr << "x: " << x << ", y: " << y << std::endl;
std::cerr << "Input: " << x << ", readVarUInt(writeVarUIntOverflow()): " << y << std::endl;
return 0;
}

View File

@ -1895,7 +1895,7 @@ void TCPHandler::sendData(const Block & block)
{
--unknown_packet_in_send_data;
if (unknown_packet_in_send_data == 0)
writeVarUInt(UInt64(-1), *out);
writeVarUInt(VAR_UINT_MAX, *out);
}
writeVarUInt(Protocol::Server::Data, *out);