From 9a6531fc1cbf7c2f4075a16abb232a645ef9d35e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 5 Jan 2020 09:39:06 +0300 Subject: [PATCH] Workaround for bug in zlib-ng; using zlib level 3 by default --- dbms/src/IO/ZlibDeflatingWriteBuffer.cpp | 21 ++++++++ dbms/src/IO/ZlibDeflatingWriteBuffer.h | 5 -- dbms/src/IO/tests/CMakeLists.txt | 5 +- dbms/src/IO/tests/zlib_ng_bug.cpp | 66 +++++++++++++++--------- dbms/src/Storages/StorageFile.cpp | 4 +- dbms/src/Storages/StorageHDFS.cpp | 2 +- dbms/src/Storages/StorageS3.cpp | 2 +- dbms/src/Storages/StorageURL.cpp | 2 +- 8 files changed, 69 insertions(+), 38 deletions(-) diff --git a/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp b/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp index c4d7fac56a6..8efe96877e4 100644 --- a/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp +++ b/dbms/src/IO/ZlibDeflatingWriteBuffer.cpp @@ -5,6 +5,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ZLIB_DEFLATE_FAILED; +} + + ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer( std::unique_ptr out_, CompressionMethod compression_method, @@ -84,6 +90,21 @@ void ZlibDeflatingWriteBuffer::finish() next(); + /// https://github.com/zlib-ng/zlib-ng/issues/494 + do + { + out->nextIfAtEnd(); + zstr.next_out = reinterpret_cast(out->position()); + zstr.avail_out = out->buffer().end() - out->position(); + + int rc = deflate(&zstr, Z_FULL_FLUSH); + out->position() = out->buffer().end() - zstr.avail_out; + + if (rc != Z_OK) + throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); + } + while (zstr.avail_out == 0); + while (true) { out->nextIfAtEnd(); diff --git a/dbms/src/IO/ZlibDeflatingWriteBuffer.h b/dbms/src/IO/ZlibDeflatingWriteBuffer.h index 86eee1cffe5..f9df8f8157b 100644 --- a/dbms/src/IO/ZlibDeflatingWriteBuffer.h +++ b/dbms/src/IO/ZlibDeflatingWriteBuffer.h @@ -10,11 +10,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int ZLIB_DEFLATE_FAILED; -} - /// Performs compression using zlib library and writes compressed data to out_ WriteBuffer. class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory { diff --git a/dbms/src/IO/tests/CMakeLists.txt b/dbms/src/IO/tests/CMakeLists.txt index 38802718dd1..40defb50470 100644 --- a/dbms/src/IO/tests/CMakeLists.txt +++ b/dbms/src/IO/tests/CMakeLists.txt @@ -78,7 +78,4 @@ add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp) target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) add_executable (zlib_ng_bug zlib_ng_bug.cpp) -target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY}) -if(NOT USE_INTERNAL_POCO_LIBRARY) - target_include_directories(zlib_ng_bug SYSTEM BEFORE PRIVATE ${Poco_INCLUDE_DIRS}) -endif() +target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY} ${ZLIB_LIBRARY}) diff --git a/dbms/src/IO/tests/zlib_ng_bug.cpp b/dbms/src/IO/tests/zlib_ng_bug.cpp index 8b94b4e49d2..e9b3c448b88 100644 --- a/dbms/src/IO/tests/zlib_ng_bug.cpp +++ b/dbms/src/IO/tests/zlib_ng_bug.cpp @@ -1,32 +1,50 @@ -#include -#include -#include -#include +#include +#include +#include +#include -/** This script reproduces the bug in zlib-ng library. - * Put the following content to "data.bin" file: -abcdefghijklmn!@Aab#AAabcdefghijklmn$% -xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - * There are two lines. First line make sense. Second line contains padding to make file size large enough. - * Compile with - * cmake -D SANITIZE=address - * and run: +#pragma GCC diagnostic ignored "-Wold-style-cast" -./zlib_ng_bug data2.bin -================================================================= -==204952==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6310000147ff at pc 0x000000596d7a bp 0x7ffd139edd50 sp 0x7ffd139edd48 -READ of size 1 at 0x6310000147ff thread T0 - */ -int main(int argc, char ** argv) +/// https://github.com/zlib-ng/zlib-ng/issues/494 +int main(int, char **) { - using namespace Poco; + std::vector in(1048576); + std::vector out(1048576); - std::string filename(argc >= 2 ? argv[1] : "data.bin"); - FileInputStream istr(filename); - NullOutputStream ostr; - DeflatingOutputStream deflater(ostr, DeflatingStreamBuf::STREAM_GZIP); - StreamCopier::copyStream(istr, deflater); + ssize_t in_size = read(STDIN_FILENO, in.data(), 1048576); + if (in_size < 0) + throw std::runtime_error("Cannot read"); + in.resize(in_size); + + z_stream zstr{}; + if (Z_OK != deflateInit2(&zstr, 1, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY)) + throw std::runtime_error("Cannot deflateInit2"); + + zstr.next_in = in.data(); + zstr.avail_in = in.size(); + zstr.next_out = out.data(); + zstr.avail_out = out.size(); + + while (zstr.avail_in > 0) + if (Z_OK != deflate(&zstr, Z_NO_FLUSH)) + throw std::runtime_error("Cannot deflate"); + + while (true) + { + int rc = deflate(&zstr, Z_FINISH); + + if (rc == Z_STREAM_END) + break; + + if (rc != Z_OK) + throw std::runtime_error("Cannot finish deflate"); + } + + deflateEnd(&zstr); + + if (ssize_t(zstr.total_out) != write(STDOUT_FILENO, out.data(), zstr.total_out)) + throw std::runtime_error("Cannot write"); return 0; } diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 53c213eb1eb..2a193de0eee 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -291,7 +291,7 @@ public: * INSERT data; SELECT *; last SELECT returns only insert_data */ storage.table_fd_was_used = true; - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(storage.table_fd), compression_method, 1); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(storage.table_fd), compression_method, 3); } else { @@ -299,7 +299,7 @@ public: throw Exception("Table '" + storage.table_name + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED); write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT), - compression_method, 1); + compression_method, 3); } writer = FormatFactory::instance().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), context); diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 1f6ca6d4893..8e5db910092 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -112,7 +112,7 @@ public: const CompressionMethod compression_method) : sample_block(sample_block_) { - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri), compression_method, 1); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri), compression_method, 3); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); } diff --git a/dbms/src/Storages/StorageS3.cpp b/dbms/src/Storages/StorageS3.cpp index e848eb655a0..14732a291b1 100644 --- a/dbms/src/Storages/StorageS3.cpp +++ b/dbms/src/Storages/StorageS3.cpp @@ -99,7 +99,7 @@ namespace : sample_block(sample_block_) { write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(client, bucket, key, min_upload_part_size), compression_method, 1); + std::make_unique(client, bucket, key, min_upload_part_size), compression_method, 3); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); } diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index c8deeb24bc1..efe15dc1928 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -120,7 +120,7 @@ namespace { write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts), - compression_method, 1); + compression_method, 3); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); }