ClickHouse/src/IO/Bzip2ReadBuffer.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

135 lines
3.3 KiB
C++
Raw Normal View History

#include "config.h"
2021-08-06 23:55:04 +00:00
#if USE_BZIP2
# include <IO/Bzip2ReadBuffer.h>
2021-10-27 23:10:39 +00:00
# include <bzlib.h>
2021-08-06 23:55:04 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int BZIP2_STREAM_DECODER_FAILED;
Fix endless loop for truncated bzip2 Stress test (thread) found an issue [1]: elapsed: 2094.356407533 query: SELECT count(), max(x) FROM file thread_ids: [2927,11494,7502] current_database: test_19 Stacktrace for 7502: 0 __syscall () at ../base/glibc-compatibility/musl/x86_64/syscall.s:14 1 0x000000001ba033b8 in clock_nanosleep (clk=1, flags=1, req=0x7fbb8adaed68, rem=0x0) at ../base/glibc-compatibility/musl/clock_nanosleep.c:21 2 0x0000000018e25b84 in sleepForNanoseconds (nanoseconds=<optimized out>) at ../base/common/sleep.cpp:48 3 0x0000000009a47143 in DB::injection (yield_probability=0, migrate_probability=0, sleep_probability=0.10000000000000001, sleep_time_us=100000) at ../src/Common/ThreadFuzzer.cpp:193 4 DB::ThreadFuzzer::signalHandler () at ../src/Common/ThreadFuzzer.cpp:204 5 0x00000000098e1447 in __tsan::CallUserSignalHandler(__tsan::ThreadState*, bool, bool, bool, int, __sanitizer::__sanitizer_siginfo*, void*) () 6 0x00000000098d7f99 in __tsan::ProcessPendingSignals(__tsan::ThreadState*) () 7 0x00000000098e57de in pread () 8 0x00000000099fab84 in DB::ReadBufferFromFileDescriptor::nextImpl (this=0x7b38010ddb00) at ../src/IO/ReadBufferFromFileDescriptor.cpp:62 9 0x0000000009ab0cf3 in DB::ReadBuffer::next (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:62 10 DB::ReadBuffer::nextIfAtEnd (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:78 11 DB::Bzip2ReadBuffer::nextImpl (this=0x7b20032f4800) at ../src/IO/Bzip2ReadBuffer.cpp:59 It looks like bz2 is not capable to read/fail partially written archive: 2021.09.03 00:52:36.744818 [ 3553 ] {90419c0a-c410-42ea-ba76-7672ff5ae5ec} <Debug> executeQuery: (from [::1]:54714) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') CREATE TABLE file (x UInt64) ENGINE = File(TSV, 'test_19/bz2.tsv.bz2') 2021.09.03 00:52:41.255409 [ 2927 ] {9c7b4e21-cc37-438d-9c13-f9dc01efd30c} <Debug> executeQuery: (from [::1]:54750) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') TRUNCATE TABLE file 2021.09.03 00:52:43.762791 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Debug> executeQuery: (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') INSERT INTO file SELECT * FROM numbers(1000000) 2021.09.03 00:52:54.939333 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Error> executeQuery: Code: 241. DB::Exception: Memory limit (total) exceeded: would use 63.07 GiB (attempt to allocate chunk of 4194364 bytes), maximum: 62.94 GiB. (MEMORY_LIMIT_EXCEEDED) (version 21.10.1.7982) (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') (in query: INSERT INTO file SELECT * FROM numbers(1000000)), Stack trace (when copying this message, always include the lines below): 2021.09.03 00:53:08.505015 [ 2927 ] {2bc7444e-c0d6-462e-9d6a-567f70b01697} <Debug> executeQuery: (from [::1]:54842) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') SELECT count(), max(x) FROM file [1]: https://clickhouse-test-reports.s3.yandex.net/28474/6d5f01a56bb1715c47de8444bfc85b39228f3081/stress_test_(thread).html#fail1
2021-09-03 07:35:11 +00:00
extern const int UNEXPECTED_END_OF_FILE;
2021-08-06 23:55:04 +00:00
}
class Bzip2ReadBuffer::Bzip2StateWrapper
{
public:
Bzip2StateWrapper()
{
memset(&stream, 0, sizeof(stream));
int ret = BZ2_bzDecompressInit(&stream, 0, 0);
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream encoder init failed: error code: {}",
ret);
}
~Bzip2StateWrapper()
{
BZ2_bzDecompressEnd(&stream);
}
2022-10-04 00:10:04 +00:00
void reinitialize()
{
auto avail_out = stream.avail_out;
2022-10-05 08:11:04 +00:00
auto * next_out = stream.next_out;
2022-10-04 00:10:04 +00:00
int ret = BZ2_bzDecompressEnd(&stream);
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream encoder reinit decompress end failed: error code: {}",
ret);
memset(&stream, 0, sizeof(bz->stream));
ret = BZ2_bzDecompressInit(&stream, 0, 0);
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream encoder reinit failed: error code: {}",
ret);
stream.avail_out = avail_out;
stream.next_out = next_out;
}
2021-08-06 23:55:04 +00:00
bz_stream stream;
};
Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char *existing_memory, size_t alignment)
2022-04-15 23:56:45 +00:00
: CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment)
, bz(std::make_unique<Bzip2StateWrapper>())
, eof_flag(false)
2021-08-06 23:55:04 +00:00
{
}
Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
bool Bzip2ReadBuffer::nextImpl()
{
2021-12-30 04:47:34 +00:00
if (eof_flag)
2021-08-06 23:55:04 +00:00
return false;
int ret;
do
2021-08-06 23:55:04 +00:00
{
if (!bz->stream.avail_in)
{
in->nextIfAtEnd();
bz->stream.avail_in = static_cast<unsigned>(in->buffer().end() - in->position());
bz->stream.next_in = in->position();
}
2021-08-06 23:55:04 +00:00
bz->stream.avail_out = static_cast<unsigned>(internal_buffer.size());
bz->stream.next_out = internal_buffer.begin();
2021-08-06 23:55:04 +00:00
ret = BZ2_bzDecompress(&bz->stream);
in->position() = in->buffer().end() - bz->stream.avail_in;
2022-10-04 00:10:04 +00:00
if (ret == BZ_STREAM_END && !in->eof())
{
bz->reinitialize();
bz->stream.avail_in = static_cast<unsigned>(in->buffer().end() - in->position());
2022-10-04 00:10:04 +00:00
bz->stream.next_in = in->position();
ret = BZ_OK;
}
}
while (bz->stream.avail_out == internal_buffer.size() && ret == BZ_OK && !in->eof());
2021-08-06 23:55:04 +00:00
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
2022-10-04 00:10:04 +00:00
if (ret == BZ_STREAM_END && in->eof())
2021-08-06 23:55:04 +00:00
{
2022-10-04 00:10:04 +00:00
eof_flag = true;
return !working_buffer.empty();
2021-08-06 23:55:04 +00:00
}
if (ret != BZ_OK)
throw Exception(
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
"bzip2 stream decoder failed: error code: {}",
ret);
Fix endless loop for truncated bzip2 Stress test (thread) found an issue [1]: elapsed: 2094.356407533 query: SELECT count(), max(x) FROM file thread_ids: [2927,11494,7502] current_database: test_19 Stacktrace for 7502: 0 __syscall () at ../base/glibc-compatibility/musl/x86_64/syscall.s:14 1 0x000000001ba033b8 in clock_nanosleep (clk=1, flags=1, req=0x7fbb8adaed68, rem=0x0) at ../base/glibc-compatibility/musl/clock_nanosleep.c:21 2 0x0000000018e25b84 in sleepForNanoseconds (nanoseconds=<optimized out>) at ../base/common/sleep.cpp:48 3 0x0000000009a47143 in DB::injection (yield_probability=0, migrate_probability=0, sleep_probability=0.10000000000000001, sleep_time_us=100000) at ../src/Common/ThreadFuzzer.cpp:193 4 DB::ThreadFuzzer::signalHandler () at ../src/Common/ThreadFuzzer.cpp:204 5 0x00000000098e1447 in __tsan::CallUserSignalHandler(__tsan::ThreadState*, bool, bool, bool, int, __sanitizer::__sanitizer_siginfo*, void*) () 6 0x00000000098d7f99 in __tsan::ProcessPendingSignals(__tsan::ThreadState*) () 7 0x00000000098e57de in pread () 8 0x00000000099fab84 in DB::ReadBufferFromFileDescriptor::nextImpl (this=0x7b38010ddb00) at ../src/IO/ReadBufferFromFileDescriptor.cpp:62 9 0x0000000009ab0cf3 in DB::ReadBuffer::next (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:62 10 DB::ReadBuffer::nextIfAtEnd (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:78 11 DB::Bzip2ReadBuffer::nextImpl (this=0x7b20032f4800) at ../src/IO/Bzip2ReadBuffer.cpp:59 It looks like bz2 is not capable to read/fail partially written archive: 2021.09.03 00:52:36.744818 [ 3553 ] {90419c0a-c410-42ea-ba76-7672ff5ae5ec} <Debug> executeQuery: (from [::1]:54714) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') CREATE TABLE file (x UInt64) ENGINE = File(TSV, 'test_19/bz2.tsv.bz2') 2021.09.03 00:52:41.255409 [ 2927 ] {9c7b4e21-cc37-438d-9c13-f9dc01efd30c} <Debug> executeQuery: (from [::1]:54750) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') TRUNCATE TABLE file 2021.09.03 00:52:43.762791 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Debug> executeQuery: (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') INSERT INTO file SELECT * FROM numbers(1000000) 2021.09.03 00:52:54.939333 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Error> executeQuery: Code: 241. DB::Exception: Memory limit (total) exceeded: would use 63.07 GiB (attempt to allocate chunk of 4194364 bytes), maximum: 62.94 GiB. (MEMORY_LIMIT_EXCEEDED) (version 21.10.1.7982) (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') (in query: INSERT INTO file SELECT * FROM numbers(1000000)), Stack trace (when copying this message, always include the lines below): 2021.09.03 00:53:08.505015 [ 2927 ] {2bc7444e-c0d6-462e-9d6a-567f70b01697} <Debug> executeQuery: (from [::1]:54842) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') SELECT count(), max(x) FROM file [1]: https://clickhouse-test-reports.s3.yandex.net/28474/6d5f01a56bb1715c47de8444bfc85b39228f3081/stress_test_(thread).html#fail1
2021-09-03 07:35:11 +00:00
if (in->eof())
{
2021-12-30 04:47:34 +00:00
eof_flag = true;
Fix endless loop for truncated bzip2 Stress test (thread) found an issue [1]: elapsed: 2094.356407533 query: SELECT count(), max(x) FROM file thread_ids: [2927,11494,7502] current_database: test_19 Stacktrace for 7502: 0 __syscall () at ../base/glibc-compatibility/musl/x86_64/syscall.s:14 1 0x000000001ba033b8 in clock_nanosleep (clk=1, flags=1, req=0x7fbb8adaed68, rem=0x0) at ../base/glibc-compatibility/musl/clock_nanosleep.c:21 2 0x0000000018e25b84 in sleepForNanoseconds (nanoseconds=<optimized out>) at ../base/common/sleep.cpp:48 3 0x0000000009a47143 in DB::injection (yield_probability=0, migrate_probability=0, sleep_probability=0.10000000000000001, sleep_time_us=100000) at ../src/Common/ThreadFuzzer.cpp:193 4 DB::ThreadFuzzer::signalHandler () at ../src/Common/ThreadFuzzer.cpp:204 5 0x00000000098e1447 in __tsan::CallUserSignalHandler(__tsan::ThreadState*, bool, bool, bool, int, __sanitizer::__sanitizer_siginfo*, void*) () 6 0x00000000098d7f99 in __tsan::ProcessPendingSignals(__tsan::ThreadState*) () 7 0x00000000098e57de in pread () 8 0x00000000099fab84 in DB::ReadBufferFromFileDescriptor::nextImpl (this=0x7b38010ddb00) at ../src/IO/ReadBufferFromFileDescriptor.cpp:62 9 0x0000000009ab0cf3 in DB::ReadBuffer::next (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:62 10 DB::ReadBuffer::nextIfAtEnd (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:78 11 DB::Bzip2ReadBuffer::nextImpl (this=0x7b20032f4800) at ../src/IO/Bzip2ReadBuffer.cpp:59 It looks like bz2 is not capable to read/fail partially written archive: 2021.09.03 00:52:36.744818 [ 3553 ] {90419c0a-c410-42ea-ba76-7672ff5ae5ec} <Debug> executeQuery: (from [::1]:54714) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') CREATE TABLE file (x UInt64) ENGINE = File(TSV, 'test_19/bz2.tsv.bz2') 2021.09.03 00:52:41.255409 [ 2927 ] {9c7b4e21-cc37-438d-9c13-f9dc01efd30c} <Debug> executeQuery: (from [::1]:54750) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') TRUNCATE TABLE file 2021.09.03 00:52:43.762791 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Debug> executeQuery: (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') INSERT INTO file SELECT * FROM numbers(1000000) 2021.09.03 00:52:54.939333 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Error> executeQuery: Code: 241. DB::Exception: Memory limit (total) exceeded: would use 63.07 GiB (attempt to allocate chunk of 4194364 bytes), maximum: 62.94 GiB. (MEMORY_LIMIT_EXCEEDED) (version 21.10.1.7982) (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') (in query: INSERT INTO file SELECT * FROM numbers(1000000)), Stack trace (when copying this message, always include the lines below): 2021.09.03 00:53:08.505015 [ 2927 ] {2bc7444e-c0d6-462e-9d6a-567f70b01697} <Debug> executeQuery: (from [::1]:54842) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') SELECT count(), max(x) FROM file [1]: https://clickhouse-test-reports.s3.yandex.net/28474/6d5f01a56bb1715c47de8444bfc85b39228f3081/stress_test_(thread).html#fail1
2021-09-03 07:35:11 +00:00
throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive");
}
2021-08-06 23:55:04 +00:00
return true;
}
}
#endif