2020-11-17 16:02:10 +00:00
|
|
|
#include <IO/ZstdInflatingReadBuffer.h>
|
2023-04-27 02:36:50 +00:00
|
|
|
#include <IO/WithFileName.h>
|
2023-04-07 22:54:05 +00:00
|
|
|
#include <zstd_errors.h>
|
2020-11-17 16:02:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ZSTD_DECODER_FAILED;
|
|
|
|
}
|
|
|
|
|
2022-06-18 12:55:35 +00:00
|
|
|
ZstdInflatingReadBuffer::ZstdInflatingReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char * existing_memory, size_t alignment, int zstd_window_log_max)
|
2022-04-15 23:56:45 +00:00
|
|
|
: CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment)
|
2020-11-17 16:02:10 +00:00
|
|
|
{
|
|
|
|
dctx = ZSTD_createDCtx();
|
|
|
|
input = {nullptr, 0, 0};
|
|
|
|
output = {nullptr, 0, 0};
|
|
|
|
|
|
|
|
if (dctx == nullptr)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::ZSTD_DECODER_FAILED, "zstd_stream_decoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
|
|
|
|
}
|
2022-06-18 12:55:35 +00:00
|
|
|
|
|
|
|
size_t ret = ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, zstd_window_log_max);
|
|
|
|
if (ZSTD_isError(ret))
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::ZSTD_DECODER_FAILED, "zstd_stream_decoder init failed: {}", ZSTD_getErrorName(ret));
|
|
|
|
}
|
2020-11-17 16:02:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
ZstdInflatingReadBuffer::~ZstdInflatingReadBuffer()
|
|
|
|
{
|
|
|
|
ZSTD_freeDCtx(dctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ZstdInflatingReadBuffer::nextImpl()
|
|
|
|
{
|
2021-09-03 10:56:18 +00:00
|
|
|
do
|
2020-11-17 16:02:10 +00:00
|
|
|
{
|
2021-09-03 10:56:18 +00:00
|
|
|
// If it is known that end of file was reached, return false
|
2021-12-30 04:47:34 +00:00
|
|
|
if (eof_flag)
|
2021-09-03 10:56:18 +00:00
|
|
|
return false;
|
2020-11-17 16:02:10 +00:00
|
|
|
|
2021-09-03 10:56:18 +00:00
|
|
|
/// If end was reached, get next part
|
|
|
|
if (input.pos >= input.size)
|
|
|
|
{
|
|
|
|
in->nextIfAtEnd();
|
|
|
|
input.src = reinterpret_cast<unsigned char *>(in->position());
|
|
|
|
input.pos = 0;
|
|
|
|
input.size = in->buffer().end() - in->position();
|
|
|
|
}
|
2020-11-17 16:02:10 +00:00
|
|
|
|
2021-09-03 10:56:18 +00:00
|
|
|
/// fill output
|
|
|
|
output.dst = reinterpret_cast<unsigned char *>(internal_buffer.begin());
|
|
|
|
output.size = internal_buffer.size();
|
|
|
|
output.pos = 0;
|
2020-11-17 16:02:10 +00:00
|
|
|
|
2021-09-03 10:56:18 +00:00
|
|
|
/// Decompress data and check errors.
|
|
|
|
size_t ret = ZSTD_decompressStream(dctx, &output, &input);
|
2023-04-07 22:54:05 +00:00
|
|
|
if (ZSTD_getErrorCode(ret))
|
|
|
|
{
|
2021-09-03 10:56:18 +00:00
|
|
|
throw Exception(
|
2023-04-07 22:54:05 +00:00
|
|
|
ErrorCodes::ZSTD_DECODER_FAILED,
|
2023-04-27 02:36:50 +00:00
|
|
|
"ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}{}",
|
2023-04-07 22:54:05 +00:00
|
|
|
ZSTD_getErrorName(ret),
|
|
|
|
ZSTD_error_frameParameter_windowTooLarge == ret
|
|
|
|
? ". You can increase the maximum window size with the 'zstd_window_log_max' setting in ClickHouse. Example: 'SET zstd_window_log_max = 31'"
|
|
|
|
: "",
|
2023-04-27 02:36:50 +00:00
|
|
|
ZSTD_VERSION_STRING,
|
|
|
|
getExceptionEntryWithFileName(*in));
|
2023-04-07 22:54:05 +00:00
|
|
|
}
|
2020-11-17 16:02:10 +00:00
|
|
|
|
2021-09-03 11:18:19 +00:00
|
|
|
/// Check that something has changed after decompress (input or output position)
|
2021-09-21 14:29:05 +00:00
|
|
|
assert(in->eof() || output.pos > 0 || in->position() < in->buffer().begin() + input.pos);
|
2021-09-03 11:18:19 +00:00
|
|
|
|
2021-09-03 10:56:18 +00:00
|
|
|
/// move position to the end of read data
|
|
|
|
in->position() = in->buffer().begin() + input.pos;
|
|
|
|
working_buffer.resize(output.pos);
|
|
|
|
|
|
|
|
/// If end of file is reached, fill eof variable and return true if there is some data in buffer, otherwise return false
|
|
|
|
if (in->eof())
|
|
|
|
{
|
2021-12-30 04:47:34 +00:00
|
|
|
eof_flag = true;
|
2021-09-03 10:56:18 +00:00
|
|
|
return !working_buffer.empty();
|
|
|
|
}
|
2021-07-15 14:58:58 +00:00
|
|
|
/// It is possible, that input buffer is not at eof yet, but nothing was decompressed in current iteration.
|
|
|
|
/// But there are cases, when such behaviour is not allowed - i.e. if input buffer is not eof, then
|
|
|
|
/// it has to be guaranteed that working_buffer is not empty. So if it is empty, continue.
|
2021-09-03 10:56:18 +00:00
|
|
|
} while (output.pos == 0);
|
2020-11-17 16:02:10 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-11-17 18:36:18 +00:00
|
|
|
}
|