2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ZlibInflatingReadBuffer.h>
|
2017-01-07 16:11:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ZLIB_INFLATE_FAILED;
|
2023-03-20 20:08:56 +00:00
|
|
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
2020-02-25 18:10:48 +00:00
|
|
|
}
|
2017-01-07 16:11:30 +00:00
|
|
|
|
|
|
|
ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
|
2019-11-19 12:46:07 +00:00
|
|
|
std::unique_ptr<ReadBuffer> in_,
|
2019-02-13 20:54:12 +00:00
|
|
|
CompressionMethod compression_method,
|
2017-01-07 16:11:30 +00:00
|
|
|
size_t buf_size,
|
|
|
|
char * existing_memory,
|
|
|
|
size_t alignment)
|
2022-04-15 23:56:45 +00:00
|
|
|
: CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment)
|
2021-12-30 04:47:34 +00:00
|
|
|
, eof_flag(false)
|
2017-01-07 16:11:30 +00:00
|
|
|
{
|
2023-03-20 20:08:56 +00:00
|
|
|
if (buf_size > max_buffer_size)
|
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
|
|
|
|
"Zlib does not support decompression with buffer size greater than {}, got buffer size: {}",
|
|
|
|
max_buffer_size, buf_size);
|
|
|
|
|
2018-11-26 00:56:50 +00:00
|
|
|
zstr.zalloc = nullptr;
|
|
|
|
zstr.zfree = nullptr;
|
|
|
|
zstr.opaque = nullptr;
|
|
|
|
zstr.next_in = nullptr;
|
|
|
|
zstr.avail_in = 0;
|
|
|
|
zstr.next_out = nullptr;
|
2017-01-07 16:11:30 +00:00
|
|
|
zstr.avail_out = 0;
|
|
|
|
|
|
|
|
int window_bits = 15;
|
2019-02-13 20:54:12 +00:00
|
|
|
if (compression_method == CompressionMethod::Gzip)
|
2017-01-07 16:11:30 +00:00
|
|
|
{
|
|
|
|
window_bits += 16;
|
|
|
|
}
|
|
|
|
|
|
|
|
int rc = inflateInit2(&zstr, window_bits);
|
|
|
|
|
|
|
|
if (rc != Z_OK)
|
2021-08-26 12:43:01 +00:00
|
|
|
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION);
|
2017-01-07 16:11:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ZlibInflatingReadBuffer::~ZlibInflatingReadBuffer()
|
|
|
|
{
|
|
|
|
inflateEnd(&zstr);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ZlibInflatingReadBuffer::nextImpl()
|
|
|
|
{
|
2021-08-26 09:15:10 +00:00
|
|
|
/// Need do-while loop to prevent situation, when
|
2021-08-26 08:33:10 +00:00
|
|
|
/// eof was not reached, but working buffer became empty (when nothing was decompressed in current iteration)
|
2021-08-25 18:23:17 +00:00
|
|
|
/// (this happens with compression algorithms, same idea is implemented in ZstdInflatingReadBuffer)
|
2021-08-25 16:56:19 +00:00
|
|
|
do
|
2017-01-07 16:11:30 +00:00
|
|
|
{
|
2021-08-25 18:23:17 +00:00
|
|
|
/// if we already found eof, we shouldn't do anything
|
2021-12-30 04:47:34 +00:00
|
|
|
if (eof_flag)
|
2021-08-25 16:56:19 +00:00
|
|
|
return false;
|
2017-01-07 16:11:30 +00:00
|
|
|
|
2021-08-25 18:23:17 +00:00
|
|
|
/// if there is no available bytes in zstr, move ptr to next available data
|
2021-08-25 16:56:19 +00:00
|
|
|
if (!zstr.avail_in)
|
2017-01-07 16:11:30 +00:00
|
|
|
{
|
2021-08-25 16:56:19 +00:00
|
|
|
in->nextIfAtEnd();
|
|
|
|
zstr.next_in = reinterpret_cast<unsigned char *>(in->position());
|
2023-03-20 20:08:56 +00:00
|
|
|
zstr.avail_in = static_cast<BufferSizeType>(std::min(in->buffer().end() - in->position(), static_cast<Int64>(max_buffer_size)));
|
2017-01-07 16:11:30 +00:00
|
|
|
}
|
2023-03-20 20:08:56 +00:00
|
|
|
|
2021-08-25 18:23:17 +00:00
|
|
|
/// init output bytes (place, where decompressed data will be)
|
2021-08-25 16:56:19 +00:00
|
|
|
zstr.next_out = reinterpret_cast<unsigned char *>(internal_buffer.begin());
|
2023-03-20 20:08:56 +00:00
|
|
|
zstr.avail_out = static_cast<BufferSizeType>(internal_buffer.size());
|
2021-08-25 16:56:19 +00:00
|
|
|
|
2023-03-20 20:08:56 +00:00
|
|
|
size_t old_total_in = zstr.total_in;
|
2021-08-25 16:56:19 +00:00
|
|
|
int rc = inflate(&zstr, Z_NO_FLUSH);
|
|
|
|
|
2021-08-25 18:23:17 +00:00
|
|
|
/// move in stream on place, where reading stopped
|
2023-03-20 20:08:56 +00:00
|
|
|
size_t bytes_read = zstr.total_in - old_total_in;
|
|
|
|
in->position() += bytes_read;
|
|
|
|
|
2021-08-25 18:23:17 +00:00
|
|
|
/// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values)
|
2021-08-25 16:56:19 +00:00
|
|
|
working_buffer.resize(internal_buffer.size() - zstr.avail_out);
|
|
|
|
|
2021-08-26 08:33:24 +00:00
|
|
|
/// If end was reached, it can be end of file or end of part (for example, chunk)
|
2021-08-25 16:56:19 +00:00
|
|
|
if (rc == Z_STREAM_END)
|
2017-01-07 16:11:30 +00:00
|
|
|
{
|
2021-08-26 09:15:10 +00:00
|
|
|
/// if it is end of file, remember this and return
|
2021-08-25 18:23:17 +00:00
|
|
|
/// * true if we can work with working buffer (we still have something to read, so next must return true)
|
|
|
|
/// * false if there is no data in working buffer
|
2021-08-25 16:56:19 +00:00
|
|
|
if (in->eof())
|
|
|
|
{
|
2021-12-30 04:47:34 +00:00
|
|
|
eof_flag = true;
|
2021-08-25 16:56:19 +00:00
|
|
|
return !working_buffer.empty();
|
|
|
|
}
|
2021-08-25 18:23:17 +00:00
|
|
|
/// If it is not end of file, we need to reset zstr and return true, because we still have some data to read
|
2021-08-25 16:56:19 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
rc = inflateReset(&zstr);
|
|
|
|
if (rc != Z_OK)
|
2021-08-26 12:43:01 +00:00
|
|
|
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
|
2021-08-25 16:56:19 +00:00
|
|
|
return true;
|
|
|
|
}
|
2017-01-07 16:11:30 +00:00
|
|
|
}
|
2023-03-20 20:08:56 +00:00
|
|
|
|
2021-08-25 18:23:17 +00:00
|
|
|
/// If it is not end and not OK, something went wrong, throw exception
|
2021-08-25 16:56:19 +00:00
|
|
|
if (rc != Z_OK)
|
2023-03-20 20:08:56 +00:00
|
|
|
throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc));
|
2017-01-07 16:11:30 +00:00
|
|
|
}
|
2021-08-25 16:56:19 +00:00
|
|
|
while (working_buffer.empty());
|
2017-01-07 16:11:30 +00:00
|
|
|
|
2021-08-25 18:23:17 +00:00
|
|
|
/// if code reach this section, working buffer is not empty, so we have some data to process
|
2017-01-07 16:11:30 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|