2021-10-27 23:10:39 +00:00
|
|
|
#include <Common/config.h>
|
2021-08-06 23:55:04 +00:00
|
|
|
|
|
|
|
#if USE_BZIP2
|
|
|
|
# include <IO/Bzip2ReadBuffer.h>
|
2021-10-27 23:10:39 +00:00
|
|
|
# include <bzlib.h>
|
2021-08-06 23:55:04 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int BZIP2_STREAM_DECODER_FAILED;
|
Fix endless loop for truncated bzip2
Stress test (thread) found an issue [1]:
elapsed: 2094.356407533
query: SELECT count(), max(x) FROM file
thread_ids: [2927,11494,7502]
current_database: test_19
Stacktrace for 7502:
0 __syscall () at ../base/glibc-compatibility/musl/x86_64/syscall.s:14
1 0x000000001ba033b8 in clock_nanosleep (clk=1, flags=1, req=0x7fbb8adaed68, rem=0x0) at ../base/glibc-compatibility/musl/clock_nanosleep.c:21
2 0x0000000018e25b84 in sleepForNanoseconds (nanoseconds=<optimized out>) at ../base/common/sleep.cpp:48
3 0x0000000009a47143 in DB::injection (yield_probability=0, migrate_probability=0, sleep_probability=0.10000000000000001, sleep_time_us=100000) at ../src/Common/ThreadFuzzer.cpp:193
4 DB::ThreadFuzzer::signalHandler () at ../src/Common/ThreadFuzzer.cpp:204
5 0x00000000098e1447 in __tsan::CallUserSignalHandler(__tsan::ThreadState*, bool, bool, bool, int, __sanitizer::__sanitizer_siginfo*, void*) ()
6 0x00000000098d7f99 in __tsan::ProcessPendingSignals(__tsan::ThreadState*) ()
7 0x00000000098e57de in pread ()
8 0x00000000099fab84 in DB::ReadBufferFromFileDescriptor::nextImpl (this=0x7b38010ddb00) at ../src/IO/ReadBufferFromFileDescriptor.cpp:62
9 0x0000000009ab0cf3 in DB::ReadBuffer::next (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:62
10 DB::ReadBuffer::nextIfAtEnd (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:78
11 DB::Bzip2ReadBuffer::nextImpl (this=0x7b20032f4800) at ../src/IO/Bzip2ReadBuffer.cpp:59
It looks like bz2 is not capable to read/fail partially written archive:
2021.09.03 00:52:36.744818 [ 3553 ] {90419c0a-c410-42ea-ba76-7672ff5ae5ec} <Debug> executeQuery: (from [::1]:54714) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') CREATE TABLE file (x UInt64) ENGINE = File(TSV, 'test_19/bz2.tsv.bz2')
2021.09.03 00:52:41.255409 [ 2927 ] {9c7b4e21-cc37-438d-9c13-f9dc01efd30c} <Debug> executeQuery: (from [::1]:54750) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') TRUNCATE TABLE file
2021.09.03 00:52:43.762791 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Debug> executeQuery: (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') INSERT INTO file SELECT * FROM numbers(1000000)
2021.09.03 00:52:54.939333 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Error> executeQuery: Code: 241. DB::Exception: Memory limit (total) exceeded: would use 63.07 GiB (attempt to allocate chunk of 4194364 bytes), maximum: 62.94 GiB. (MEMORY_LIMIT_EXCEEDED) (version 21.10.1.7982) (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') (in query: INSERT INTO file SELECT * FROM numbers(1000000)), Stack trace (when copying this message, always include the lines below):
2021.09.03 00:53:08.505015 [ 2927 ] {2bc7444e-c0d6-462e-9d6a-567f70b01697} <Debug> executeQuery: (from [::1]:54842) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') SELECT count(), max(x) FROM file
[1]: https://clickhouse-test-reports.s3.yandex.net/28474/6d5f01a56bb1715c47de8444bfc85b39228f3081/stress_test_(thread).html#fail1
2021-09-03 07:35:11 +00:00
|
|
|
extern const int UNEXPECTED_END_OF_FILE;
|
2021-08-06 23:55:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class Bzip2ReadBuffer::Bzip2StateWrapper
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Bzip2StateWrapper()
|
|
|
|
{
|
|
|
|
memset(&stream, 0, sizeof(stream));
|
|
|
|
|
|
|
|
int ret = BZ2_bzDecompressInit(&stream, 0, 0);
|
|
|
|
|
|
|
|
if (ret != BZ_OK)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
|
|
|
"bzip2 stream encoder init failed: error code: {}",
|
|
|
|
ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
~Bzip2StateWrapper()
|
|
|
|
{
|
|
|
|
BZ2_bzDecompressEnd(&stream);
|
|
|
|
}
|
|
|
|
|
|
|
|
bz_stream stream;
|
|
|
|
};
|
|
|
|
|
|
|
|
Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char *existing_memory, size_t alignment)
|
|
|
|
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
|
|
|
|
, in(std::move(in_))
|
|
|
|
, bz(std::make_unique<Bzip2StateWrapper>())
|
|
|
|
, eof(false)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
|
|
|
|
|
|
|
|
bool Bzip2ReadBuffer::nextImpl()
|
|
|
|
{
|
|
|
|
if (eof)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!bz->stream.avail_in)
|
|
|
|
{
|
|
|
|
in->nextIfAtEnd();
|
|
|
|
bz->stream.avail_in = in->buffer().end() - in->position();
|
|
|
|
bz->stream.next_in = in->position();
|
|
|
|
}
|
|
|
|
|
|
|
|
bz->stream.avail_out = internal_buffer.size();
|
|
|
|
bz->stream.next_out = internal_buffer.begin();
|
|
|
|
|
|
|
|
int ret = BZ2_bzDecompress(&bz->stream);
|
|
|
|
|
|
|
|
in->position() = in->buffer().end() - bz->stream.avail_in;
|
|
|
|
working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
|
|
|
|
|
|
|
|
if (ret == BZ_STREAM_END)
|
|
|
|
{
|
|
|
|
if (in->eof())
|
|
|
|
{
|
|
|
|
eof = true;
|
|
|
|
return !working_buffer.empty();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
|
|
|
"bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret != BZ_OK)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
|
|
|
|
"bzip2 stream decoder failed: error code: {}",
|
|
|
|
ret);
|
|
|
|
|
Fix endless loop for truncated bzip2
Stress test (thread) found an issue [1]:
elapsed: 2094.356407533
query: SELECT count(), max(x) FROM file
thread_ids: [2927,11494,7502]
current_database: test_19
Stacktrace for 7502:
0 __syscall () at ../base/glibc-compatibility/musl/x86_64/syscall.s:14
1 0x000000001ba033b8 in clock_nanosleep (clk=1, flags=1, req=0x7fbb8adaed68, rem=0x0) at ../base/glibc-compatibility/musl/clock_nanosleep.c:21
2 0x0000000018e25b84 in sleepForNanoseconds (nanoseconds=<optimized out>) at ../base/common/sleep.cpp:48
3 0x0000000009a47143 in DB::injection (yield_probability=0, migrate_probability=0, sleep_probability=0.10000000000000001, sleep_time_us=100000) at ../src/Common/ThreadFuzzer.cpp:193
4 DB::ThreadFuzzer::signalHandler () at ../src/Common/ThreadFuzzer.cpp:204
5 0x00000000098e1447 in __tsan::CallUserSignalHandler(__tsan::ThreadState*, bool, bool, bool, int, __sanitizer::__sanitizer_siginfo*, void*) ()
6 0x00000000098d7f99 in __tsan::ProcessPendingSignals(__tsan::ThreadState*) ()
7 0x00000000098e57de in pread ()
8 0x00000000099fab84 in DB::ReadBufferFromFileDescriptor::nextImpl (this=0x7b38010ddb00) at ../src/IO/ReadBufferFromFileDescriptor.cpp:62
9 0x0000000009ab0cf3 in DB::ReadBuffer::next (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:62
10 DB::ReadBuffer::nextIfAtEnd (this=0x7b38010ddb00) at ../src/IO/ReadBuffer.h:78
11 DB::Bzip2ReadBuffer::nextImpl (this=0x7b20032f4800) at ../src/IO/Bzip2ReadBuffer.cpp:59
It looks like bz2 is not capable to read/fail partially written archive:
2021.09.03 00:52:36.744818 [ 3553 ] {90419c0a-c410-42ea-ba76-7672ff5ae5ec} <Debug> executeQuery: (from [::1]:54714) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') CREATE TABLE file (x UInt64) ENGINE = File(TSV, 'test_19/bz2.tsv.bz2')
2021.09.03 00:52:41.255409 [ 2927 ] {9c7b4e21-cc37-438d-9c13-f9dc01efd30c} <Debug> executeQuery: (from [::1]:54750) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') TRUNCATE TABLE file
2021.09.03 00:52:43.762791 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Debug> executeQuery: (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') INSERT INTO file SELECT * FROM numbers(1000000)
2021.09.03 00:52:54.939333 [ 2927 ] {82857e3d-f7cc-453d-82a4-e8b8fb82ae0e} <Error> executeQuery: Code: 241. DB::Exception: Memory limit (total) exceeded: would use 63.07 GiB (attempt to allocate chunk of 4194364 bytes), maximum: 62.94 GiB. (MEMORY_LIMIT_EXCEEDED) (version 21.10.1.7982) (from [::1]:54758) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') (in query: INSERT INTO file SELECT * FROM numbers(1000000)), Stack trace (when copying this message, always include the lines below):
2021.09.03 00:53:08.505015 [ 2927 ] {2bc7444e-c0d6-462e-9d6a-567f70b01697} <Debug> executeQuery: (from [::1]:54842) (comment: '/usr/share/clickhouse-test/queries/0_stateless/01059_storage_file_compression.sh') SELECT count(), max(x) FROM file
[1]: https://clickhouse-test-reports.s3.yandex.net/28474/6d5f01a56bb1715c47de8444bfc85b39228f3081/stress_test_(thread).html#fail1
2021-09-03 07:35:11 +00:00
|
|
|
if (in->eof())
|
|
|
|
{
|
|
|
|
eof = true;
|
|
|
|
throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive");
|
|
|
|
}
|
|
|
|
|
2021-08-06 23:55:04 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|