Workaround for bug in zlib-ng; using zlib level 3 by default

This commit is contained in:
Alexey Milovidov 2020-01-05 09:39:06 +03:00
parent 33bba81f47
commit 9a6531fc1c
8 changed files with 69 additions and 38 deletions

View File

@ -5,6 +5,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ZLIB_DEFLATE_FAILED;
}
ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
std::unique_ptr<WriteBuffer> out_,
CompressionMethod compression_method,
@ -84,6 +90,21 @@ void ZlibDeflatingWriteBuffer::finish()
next();
/// https://github.com/zlib-ng/zlib-ng/issues/494
do
{
out->nextIfAtEnd();
zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
zstr.avail_out = out->buffer().end() - out->position();
int rc = deflate(&zstr, Z_FULL_FLUSH);
out->position() = out->buffer().end() - zstr.avail_out;
if (rc != Z_OK)
throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
}
while (zstr.avail_out == 0);
while (true)
{
out->nextIfAtEnd();

View File

@ -10,11 +10,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ZLIB_DEFLATE_FAILED;
}
/// Performs compression using zlib library and writes compressed data to out_ WriteBuffer.
class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{

View File

@ -78,7 +78,4 @@ add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp)
target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io)
add_executable (zlib_ng_bug zlib_ng_bug.cpp)
target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY})
if(NOT USE_INTERNAL_POCO_LIBRARY)
target_include_directories(zlib_ng_bug SYSTEM BEFORE PRIVATE ${Poco_INCLUDE_DIRS})
endif()
target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY} ${ZLIB_LIBRARY})

View File

@ -1,32 +1,50 @@
#include <Poco/FileStream.h>
#include <Poco/NullStream.h>
#include <Poco/StreamCopier.h>
#include <Poco/DeflatingStream.h>
#include <unistd.h>
#include <vector>
#include <stdexcept>
#include <zlib.h>
/** This script reproduces the bug in zlib-ng library.
* Put the following content to "data.bin" file:
abcdefghijklmn!@Aab#AAabcdefghijklmn$%
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
* There are two lines. First line make sense. Second line contains padding to make file size large enough.
* Compile with
* cmake -D SANITIZE=address
* and run:
#pragma GCC diagnostic ignored "-Wold-style-cast"
./zlib_ng_bug data2.bin
=================================================================
==204952==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6310000147ff at pc 0x000000596d7a bp 0x7ffd139edd50 sp 0x7ffd139edd48
READ of size 1 at 0x6310000147ff thread T0
*/
int main(int argc, char ** argv)
/// https://github.com/zlib-ng/zlib-ng/issues/494
int main(int, char **)
{
using namespace Poco;
std::vector<unsigned char> in(1048576);
std::vector<unsigned char> out(1048576);
std::string filename(argc >= 2 ? argv[1] : "data.bin");
FileInputStream istr(filename);
NullOutputStream ostr;
DeflatingOutputStream deflater(ostr, DeflatingStreamBuf::STREAM_GZIP);
StreamCopier::copyStream(istr, deflater);
ssize_t in_size = read(STDIN_FILENO, in.data(), 1048576);
if (in_size < 0)
throw std::runtime_error("Cannot read");
in.resize(in_size);
z_stream zstr{};
if (Z_OK != deflateInit2(&zstr, 1, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY))
throw std::runtime_error("Cannot deflateInit2");
zstr.next_in = in.data();
zstr.avail_in = in.size();
zstr.next_out = out.data();
zstr.avail_out = out.size();
while (zstr.avail_in > 0)
if (Z_OK != deflate(&zstr, Z_NO_FLUSH))
throw std::runtime_error("Cannot deflate");
while (true)
{
int rc = deflate(&zstr, Z_FINISH);
if (rc == Z_STREAM_END)
break;
if (rc != Z_OK)
throw std::runtime_error("Cannot finish deflate");
}
deflateEnd(&zstr);
if (ssize_t(zstr.total_out) != write(STDOUT_FILENO, out.data(), zstr.total_out))
throw std::runtime_error("Cannot write");
return 0;
}

View File

@ -291,7 +291,7 @@ public:
* INSERT data; SELECT *; last SELECT returns only insert_data
*/
storage.table_fd_was_used = true;
write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd), compression_method, 1);
write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique<WriteBufferFromFileDescriptor>(storage.table_fd), compression_method, 3);
}
else
{
@ -299,7 +299,7 @@ public:
throw Exception("Table '" + storage.table_name + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED);
write_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT),
compression_method, 1);
compression_method, 3);
}
writer = FormatFactory::instance().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), context);

View File

@ -112,7 +112,7 @@ public:
const CompressionMethod compression_method)
: sample_block(sample_block_)
{
write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique<WriteBufferFromHDFS>(uri), compression_method, 1);
write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique<WriteBufferFromHDFS>(uri), compression_method, 3);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
}

View File

@ -99,7 +99,7 @@ namespace
: sample_block(sample_block_)
{
write_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromS3>(client, bucket, key, min_upload_part_size), compression_method, 1);
std::make_unique<WriteBufferFromS3>(client, bucket, key, min_upload_part_size), compression_method, 3);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
}

View File

@ -120,7 +120,7 @@ namespace
{
write_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts),
compression_method, 1);
compression_method, 3);
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
}