2012-01-05 10:22:02 +00:00
|
|
|
#pragma once
|
2010-06-04 18:25:25 +00:00
|
|
|
|
2012-01-05 18:35:22 +00:00
|
|
|
#include <math.h>
|
|
|
|
|
2011-06-24 20:18:09 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <city.h>
|
2016-02-03 21:16:19 +00:00
|
|
|
|
|
|
|
#ifdef USE_QUICKLZ
|
|
|
|
#include <quicklz/quicklz_level1.h>
|
|
|
|
#endif
|
|
|
|
|
2012-01-05 18:35:22 +00:00
|
|
|
#include <lz4/lz4.h>
|
2014-03-02 05:12:51 +00:00
|
|
|
#include <lz4/lz4hc.h>
|
2015-03-09 01:15:43 +00:00
|
|
|
#include <zstd/zstd.h>
|
2012-01-05 18:35:22 +00:00
|
|
|
|
2013-12-09 00:23:17 +00:00
|
|
|
#include <DB/Common/PODArray.h>
|
2012-01-05 18:35:22 +00:00
|
|
|
#include <DB/Core/Types.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
|
|
#include <DB/IO/WriteBuffer.h>
|
2011-06-27 18:22:14 +00:00
|
|
|
#include <DB/IO/BufferWithOwnMemory.h>
|
2011-06-17 21:19:39 +00:00
|
|
|
#include <DB/IO/CompressedStream.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_COMPRESS;
|
|
|
|
extern const int UNKNOWN_COMPRESSION_METHOD;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-06-27 18:22:14 +00:00
|
|
|
class CompressedWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
|
2010-06-04 18:25:25 +00:00
|
|
|
{
|
|
|
|
private:
|
|
|
|
WriteBuffer & out;
|
2015-03-09 01:15:43 +00:00
|
|
|
CompressionMethod method;
|
2010-06-04 18:25:25 +00:00
|
|
|
|
2013-12-09 00:23:17 +00:00
|
|
|
PODArray<char> compressed_buffer;
|
2016-03-07 04:31:10 +00:00
|
|
|
|
2016-02-03 21:16:19 +00:00
|
|
|
#ifdef USE_QUICKLZ
|
2012-02-23 23:34:05 +00:00
|
|
|
qlz_state_compress * qlz_state;
|
2016-02-21 21:57:46 +00:00
|
|
|
#else
|
2016-02-24 07:53:04 +00:00
|
|
|
void * fixed_size_padding = nullptr;
|
2016-03-07 04:31:10 +00:00
|
|
|
/// Отменяет warning unused-private-field.
|
|
|
|
void * fixed_size_padding_used() const { return fixed_size_padding; }
|
2016-02-03 21:16:19 +00:00
|
|
|
#endif
|
2011-06-17 21:19:39 +00:00
|
|
|
|
2011-06-24 21:08:26 +00:00
|
|
|
void nextImpl()
|
2010-06-04 18:25:25 +00:00
|
|
|
{
|
2011-06-27 19:34:03 +00:00
|
|
|
if (!offset())
|
|
|
|
return;
|
|
|
|
|
2011-06-27 18:22:14 +00:00
|
|
|
size_t uncompressed_size = offset();
|
2012-01-05 18:35:22 +00:00
|
|
|
size_t compressed_size = 0;
|
2014-04-08 07:31:51 +00:00
|
|
|
char * compressed_buffer_ptr = nullptr;
|
2012-01-05 18:35:22 +00:00
|
|
|
|
2015-03-09 01:15:43 +00:00
|
|
|
/** Формат сжатого блока - см. CompressedStream.h
|
2012-01-05 18:35:22 +00:00
|
|
|
*/
|
2011-06-26 21:30:59 +00:00
|
|
|
|
2012-01-05 18:35:22 +00:00
|
|
|
switch (method)
|
|
|
|
{
|
|
|
|
case CompressionMethod::QuickLZ:
|
|
|
|
{
|
2016-02-03 21:16:19 +00:00
|
|
|
#ifdef USE_QUICKLZ
|
2012-01-05 18:35:22 +00:00
|
|
|
compressed_buffer.resize(uncompressed_size + QUICKLZ_ADDITIONAL_SPACE);
|
|
|
|
|
|
|
|
compressed_size = qlz_compress(
|
|
|
|
working_buffer.begin(),
|
|
|
|
&compressed_buffer[0],
|
|
|
|
uncompressed_size,
|
2012-02-23 23:34:05 +00:00
|
|
|
qlz_state);
|
2012-01-05 18:35:22 +00:00
|
|
|
|
2014-10-05 21:59:23 +00:00
|
|
|
compressed_buffer[0] &= 3;
|
2012-01-05 18:35:22 +00:00
|
|
|
compressed_buffer_ptr = &compressed_buffer[0];
|
|
|
|
break;
|
2016-02-03 21:16:19 +00:00
|
|
|
#else
|
|
|
|
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
|
|
|
#endif
|
2012-01-05 18:35:22 +00:00
|
|
|
}
|
|
|
|
case CompressionMethod::LZ4:
|
2014-03-02 05:12:51 +00:00
|
|
|
case CompressionMethod::LZ4HC:
|
2012-01-05 18:35:22 +00:00
|
|
|
{
|
2014-10-05 21:59:23 +00:00
|
|
|
static constexpr size_t header_size = 1 + sizeof(UInt32) + sizeof(UInt32);
|
|
|
|
|
|
|
|
compressed_buffer.resize(header_size + LZ4_COMPRESSBOUND(uncompressed_size));
|
2012-01-05 18:35:22 +00:00
|
|
|
|
2015-03-09 01:15:43 +00:00
|
|
|
compressed_buffer[0] = static_cast<UInt8>(CompressionMethodByte::LZ4);
|
2012-01-05 18:35:22 +00:00
|
|
|
|
2014-03-02 05:12:51 +00:00
|
|
|
if (method == CompressionMethod::LZ4)
|
2014-10-05 21:59:23 +00:00
|
|
|
compressed_size = header_size + LZ4_compress(
|
2014-03-02 05:12:51 +00:00
|
|
|
working_buffer.begin(),
|
2014-10-05 21:59:23 +00:00
|
|
|
&compressed_buffer[header_size],
|
2014-03-02 05:12:51 +00:00
|
|
|
uncompressed_size);
|
|
|
|
else
|
2014-10-05 21:59:23 +00:00
|
|
|
compressed_size = header_size + LZ4_compressHC(
|
2014-03-02 05:12:51 +00:00
|
|
|
working_buffer.begin(),
|
2014-10-05 21:59:23 +00:00
|
|
|
&compressed_buffer[header_size],
|
2014-03-02 05:12:51 +00:00
|
|
|
uncompressed_size);
|
2012-01-05 18:35:22 +00:00
|
|
|
|
|
|
|
UInt32 compressed_size_32 = compressed_size;
|
|
|
|
UInt32 uncompressed_size_32 = uncompressed_size;
|
|
|
|
|
|
|
|
memcpy(&compressed_buffer[1], reinterpret_cast<const char *>(&compressed_size_32), sizeof(compressed_size_32));
|
|
|
|
memcpy(&compressed_buffer[5], reinterpret_cast<const char *>(&uncompressed_size_32), sizeof(uncompressed_size_32));
|
|
|
|
|
|
|
|
compressed_buffer_ptr = &compressed_buffer[0];
|
|
|
|
break;
|
|
|
|
}
|
2015-03-09 01:15:43 +00:00
|
|
|
case CompressionMethod::ZSTD:
|
|
|
|
{
|
|
|
|
static constexpr size_t header_size = 1 + sizeof(UInt32) + sizeof(UInt32);
|
|
|
|
|
|
|
|
compressed_buffer.resize(header_size + ZSTD_compressBound(uncompressed_size));
|
|
|
|
|
|
|
|
compressed_buffer[0] = static_cast<UInt8>(CompressionMethodByte::ZSTD);
|
|
|
|
|
|
|
|
size_t res = ZSTD_compress(
|
|
|
|
&compressed_buffer[header_size],
|
|
|
|
compressed_buffer.size(),
|
|
|
|
working_buffer.begin(),
|
2016-03-07 09:52:53 +00:00
|
|
|
uncompressed_size,
|
|
|
|
1);
|
2015-03-09 01:15:43 +00:00
|
|
|
|
|
|
|
if (ZSTD_isError(res))
|
|
|
|
throw Exception("Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_COMPRESS);
|
|
|
|
|
|
|
|
compressed_size = header_size + res;
|
|
|
|
|
|
|
|
UInt32 compressed_size_32 = compressed_size;
|
|
|
|
UInt32 uncompressed_size_32 = uncompressed_size;
|
|
|
|
|
|
|
|
memcpy(&compressed_buffer[1], reinterpret_cast<const char *>(&compressed_size_32), sizeof(compressed_size_32));
|
|
|
|
memcpy(&compressed_buffer[5], reinterpret_cast<const char *>(&uncompressed_size_32), sizeof(uncompressed_size_32));
|
|
|
|
|
|
|
|
compressed_buffer_ptr = &compressed_buffer[0];
|
|
|
|
break;
|
|
|
|
}
|
2012-01-05 18:35:22 +00:00
|
|
|
default:
|
|
|
|
throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
|
|
|
}
|
2010-06-04 18:25:25 +00:00
|
|
|
|
2012-01-05 18:35:22 +00:00
|
|
|
uint128 checksum = CityHash128(compressed_buffer_ptr, compressed_size);
|
2011-06-24 20:18:09 +00:00
|
|
|
out.write(reinterpret_cast<const char *>(&checksum), sizeof(checksum));
|
|
|
|
|
2012-01-05 18:35:22 +00:00
|
|
|
out.write(compressed_buffer_ptr, compressed_size);
|
2011-05-05 19:10:17 +00:00
|
|
|
}
|
|
|
|
|
2011-06-26 21:30:59 +00:00
|
|
|
public:
|
2012-01-31 19:52:51 +00:00
|
|
|
CompressedWriteBuffer(
|
|
|
|
WriteBuffer & out_,
|
2015-03-09 01:15:43 +00:00
|
|
|
CompressionMethod method_ = CompressionMethod::LZ4,
|
2012-01-31 19:52:51 +00:00
|
|
|
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
|
2016-02-03 21:16:19 +00:00
|
|
|
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), method(method_)
|
|
|
|
#ifdef USE_QUICKLZ
|
|
|
|
, qlz_state(new qlz_state_compress)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
}
|
2011-06-26 21:30:59 +00:00
|
|
|
|
2011-06-24 21:08:26 +00:00
|
|
|
/// Объём сжатых данных
|
2011-05-05 19:10:17 +00:00
|
|
|
size_t getCompressedBytes()
|
|
|
|
{
|
|
|
|
nextIfAtEnd();
|
2011-06-30 15:21:35 +00:00
|
|
|
return out.count();
|
2011-05-05 19:10:17 +00:00
|
|
|
}
|
|
|
|
|
2011-05-11 17:27:08 +00:00
|
|
|
/// Сколько несжатых байт было записано в буфер
|
2011-05-05 19:10:17 +00:00
|
|
|
size_t getUncompressedBytes()
|
2011-05-11 17:27:08 +00:00
|
|
|
{
|
|
|
|
return count();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Сколько байт находится в буфере (ещё не сжато)
|
|
|
|
size_t getRemainingBytes()
|
2011-05-05 19:10:17 +00:00
|
|
|
{
|
|
|
|
nextIfAtEnd();
|
2011-06-30 15:21:35 +00:00
|
|
|
return offset();
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
~CompressedWriteBuffer()
|
|
|
|
{
|
2013-11-18 17:17:45 +00:00
|
|
|
try
|
|
|
|
{
|
2011-12-12 01:06:13 +00:00
|
|
|
next();
|
2013-11-18 17:17:45 +00:00
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
2013-11-18 19:18:03 +00:00
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
2013-11-18 17:17:45 +00:00
|
|
|
}
|
2012-02-23 23:34:05 +00:00
|
|
|
|
2016-02-03 21:16:19 +00:00
|
|
|
#ifdef USE_QUICKLZ
|
2016-02-24 07:53:04 +00:00
|
|
|
delete qlz_state;
|
2016-02-03 21:16:19 +00:00
|
|
|
#endif
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|