ClickHouse/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

203 lines
6.4 KiB
C++
Raw Normal View History

2021-09-21 14:29:05 +00:00
#include <IO/ZstdDeflatingAppendableWriteBuffer.h>
#include <Common/Exception.h>
#include <IO/ReadBufferFromFile.h>
2021-09-21 14:29:05 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int ZSTD_ENCODER_FAILED;
}
ZstdDeflatingAppendableWriteBuffer::ZstdDeflatingAppendableWriteBuffer(
2022-12-13 11:08:59 +00:00
std::unique_ptr<WriteBuffer> out_,
int compression_level,
size_t buf_size,
char * existing_memory,
size_t alignment)
: BufferWithOwnMemory(buf_size, existing_memory, alignment)
, out(std::move(out_))
2021-09-21 14:29:05 +00:00
{
cctx = ZSTD_createCCtx();
if (cctx == nullptr)
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level);
if (ZSTD_isError(ret))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder option setting failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING);
input = {nullptr, 0, 0};
output = {nullptr, 0, 0};
}
void ZstdDeflatingAppendableWriteBuffer::nextImpl()
{
if (!offset())
return;
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
2022-12-13 11:08:59 +00:00
//if (first_write && append_to_existing_file && isNeedToAddEmptyBlock())
//{
// addEmptyBlock();
// first_write = false;
//}
2021-09-21 14:29:05 +00:00
try
{
bool ended = false;
do
{
2021-11-10 22:58:56 +00:00
out->nextIfAtEnd();
2021-09-21 14:29:05 +00:00
2021-11-10 22:58:56 +00:00
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
2021-09-21 14:29:05 +00:00
size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_flush);
2021-09-21 14:29:05 +00:00
if (ZSTD_isError(compression_result))
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED, "Zstd stream encoding failed: error code: {}; zstd version: {}", ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
first_write = false;
2021-11-10 22:58:56 +00:00
out->position() = out->buffer().begin() + output.pos;
2021-09-21 14:29:05 +00:00
bool everything_was_compressed = (input.pos == input.size);
bool everything_was_flushed = compression_result == 0;
ended = everything_was_compressed && everything_was_flushed;
} while (!ended);
}
catch (...)
{
/// Do not try to write next time after exception.
2021-11-10 22:58:56 +00:00
out->position() = out->buffer().begin();
2021-09-21 14:29:05 +00:00
throw;
}
2021-09-21 14:29:05 +00:00
}
2021-11-10 22:58:56 +00:00
ZstdDeflatingAppendableWriteBuffer::~ZstdDeflatingAppendableWriteBuffer()
2021-09-21 14:29:05 +00:00
{
2021-11-11 17:27:23 +00:00
finalize();
2021-09-21 14:29:05 +00:00
}
2021-11-10 22:58:56 +00:00
void ZstdDeflatingAppendableWriteBuffer::finalizeImpl()
2021-09-21 14:29:05 +00:00
{
2021-11-10 22:58:56 +00:00
if (first_write)
2021-09-21 14:29:05 +00:00
{
/// To free cctx
finalizeZstd();
2021-11-10 22:58:56 +00:00
/// Nothing was written
2021-09-21 14:29:05 +00:00
}
else
{
try
{
finalizeBefore();
out->finalize();
finalizeAfter();
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
throw;
}
}
2021-09-21 14:29:05 +00:00
}
2021-11-22 11:19:26 +00:00
void ZstdDeflatingAppendableWriteBuffer::finalizeBefore()
2021-09-21 14:29:05 +00:00
{
next();
2021-11-10 22:58:56 +00:00
out->nextIfAtEnd();
2021-09-21 14:29:05 +00:00
input.src = reinterpret_cast<unsigned char *>(working_buffer.begin());
input.size = offset();
input.pos = 0;
2021-11-10 22:58:56 +00:00
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size();
output.pos = out->offset();
2021-09-21 14:29:05 +00:00
2022-05-04 16:24:28 +00:00
/// Actually we can use ZSTD_e_flush here and add empty termination
/// block on each new buffer creation for non-empty file unconditionally (without isNeedToAddEmptyBlock).
/// However ZSTD_decompressStream is able to read non-terminated frame (we use it in reader buffer),
/// but console zstd utility cannot.
2021-09-21 14:29:05 +00:00
size_t remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
while (remaining != 0)
{
if (ZSTD_isError(remaining))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "Zstd stream encoder end failed: error: '{}' zstd version: {}", ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING);
2021-09-21 14:29:05 +00:00
remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
}
2021-11-10 22:58:56 +00:00
out->position() = out->buffer().begin() + output.pos;
}
2021-11-22 11:19:26 +00:00
void ZstdDeflatingAppendableWriteBuffer::finalizeAfter()
{
finalizeZstd();
}
void ZstdDeflatingAppendableWriteBuffer::finalizeZstd()
2021-11-10 22:58:56 +00:00
{
try
{
size_t err = ZSTD_freeCCtx(cctx);
2021-11-10 22:58:56 +00:00
/// This is just in case, since it is impossible to get an error by using this wrapper.
if (unlikely(err))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING);
}
catch (...)
{
/// It is OK not to terminate under an error from ZSTD_freeCCtx()
/// since all data already written to the stream.
tryLogCurrentException(__PRETTY_FUNCTION__);
}
2021-09-21 14:29:05 +00:00
}
2022-12-15 13:09:52 +00:00
void ZstdDeflatingAppendableWriteBuffer::addEmptyBlock(WriteBuffer & write_buffer)
2021-09-21 14:29:05 +00:00
{
/// HACK: https://github.com/facebook/zstd/issues/2090#issuecomment-620158967
2021-09-23 13:49:48 +00:00
2022-12-15 13:09:52 +00:00
if (write_buffer.buffer().size() - write_buffer.offset() < ZSTD_CORRECT_TERMINATION_LAST_BLOCK.size())
write_buffer.next();
2021-09-23 13:49:48 +00:00
2022-12-15 13:09:52 +00:00
std::memcpy(write_buffer.buffer().begin() + write_buffer.offset(),
2022-05-04 16:24:28 +00:00
ZSTD_CORRECT_TERMINATION_LAST_BLOCK.data(), ZSTD_CORRECT_TERMINATION_LAST_BLOCK.size());
2021-09-23 13:49:48 +00:00
2022-12-15 13:09:52 +00:00
write_buffer.position() = write_buffer.buffer().begin() + write_buffer.offset() + ZSTD_CORRECT_TERMINATION_LAST_BLOCK.size();
}
2022-12-15 13:09:52 +00:00
bool ZstdDeflatingAppendableWriteBuffer::isNeedToAddEmptyBlock(const std::string & file_name)
{
2022-12-15 13:09:52 +00:00
ReadBufferFromFile reader(file_name);
auto fsize = reader.getFileSize();
if (fsize > 3)
{
std::array<char, 3> result;
reader.seek(fsize - 3, SEEK_SET);
reader.readStrict(result.data(), 3);
/// If we don't have correct block in the end, then we need to add it manually.
/// NOTE: maybe we can have the same bytes in case of data corruption/unfinished write.
/// But in this case file still corrupted and we have to remove it.
return result != ZSTD_CORRECT_TERMINATION_LAST_BLOCK;
}
else if (fsize > 0)
{
throw Exception(
ErrorCodes::ZSTD_ENCODER_FAILED,
"Trying to write to non-empty file '{}' with tiny size {}. It can lead to data corruption",
file_name, fsize);
}
return false;
2021-09-21 14:29:05 +00:00
}
}