2019-06-06 17:11:31 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Core/Types.h>
|
|
|
|
#include <Compression/ICompressionCodec.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-07-01 13:35:04 +00:00
|
|
|
/// Get 64 integer valuses, makes 64x64 bit matrix, transpose it and crop unused bits (most significant zeroes).
|
|
|
|
/// In example, if we have UInt8 with only 0 and 1 inside 64xUInt8 would be compressed into 1xUInt64.
|
|
|
|
/// It detects unused bits by calculating min and max values of data part, saving them in header in compression phase.
|
|
|
|
/// There's a special case with signed integers parts with crossing zero data. Here it stores one more bit to detect sign of value.
|
2019-06-06 17:11:31 +00:00
|
|
|
class CompressionCodecT64 : public ICompressionCodec
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr UInt32 HEADER_SIZE = 1 + 2 * sizeof(UInt64);
|
|
|
|
static constexpr UInt32 MAX_COMPRESSED_BLOCK_SIZE = sizeof(UInt64) * 64;
|
|
|
|
|
2019-07-01 13:35:04 +00:00
|
|
|
/// There're 2 compression variants:
|
|
|
|
/// Byte - transpose bit matrix by bytes (only the last not full byte is transposed by bits). It's default.
|
|
|
|
/// Bits - full bit-transpose of the bit matrix. It uses more resources and leads to better compression with ZSTD (but worse with LZ4).
|
2019-06-25 13:59:33 +00:00
|
|
|
enum class Variant
|
|
|
|
{
|
|
|
|
Byte,
|
|
|
|
Bit
|
|
|
|
};
|
|
|
|
|
|
|
|
CompressionCodecT64(TypeIndex type_idx_, Variant variant_)
|
2019-06-06 17:11:31 +00:00
|
|
|
: type_idx(type_idx_)
|
2019-06-25 13:59:33 +00:00
|
|
|
, variant(variant_)
|
2019-06-06 17:11:31 +00:00
|
|
|
{}
|
|
|
|
|
2020-01-03 14:39:24 +00:00
|
|
|
uint8_t getMethodByte() const override;
|
2019-06-25 13:59:33 +00:00
|
|
|
String getCodecDesc() const override
|
|
|
|
{
|
2019-07-01 13:35:04 +00:00
|
|
|
return String("T64") + ((variant == Variant::Byte) ? "" : "(\'bit\')");
|
2019-06-25 13:59:33 +00:00
|
|
|
}
|
2019-06-06 17:11:31 +00:00
|
|
|
|
|
|
|
void useInfoAboutType(DataTypePtr data_type) override;
|
|
|
|
|
|
|
|
protected:
|
2020-03-08 22:08:39 +00:00
|
|
|
UInt32 doCompressData(const char * src, UInt32 src_size, char * dst) const override;
|
|
|
|
void doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override;
|
2019-06-06 17:11:31 +00:00
|
|
|
|
|
|
|
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override
|
|
|
|
{
|
2019-06-11 14:14:45 +00:00
|
|
|
/// uncompressed_size - (uncompressed_size % (sizeof(T) * 64)) + sizeof(UInt64) * sizeof(T) + header_size
|
|
|
|
return uncompressed_size + MAX_COMPRESSED_BLOCK_SIZE + HEADER_SIZE;
|
2019-06-06 17:11:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
TypeIndex type_idx;
|
2019-06-25 13:59:33 +00:00
|
|
|
Variant variant;
|
2019-06-06 17:11:31 +00:00
|
|
|
};
|
|
|
|
|
2019-12-15 06:34:43 +00:00
|
|
|
class CompressionCodecFactory;
|
|
|
|
void registerCodecT64(CompressionCodecFactory & factory);
|
|
|
|
|
2019-06-06 17:11:31 +00:00
|
|
|
}
|