2018-10-11 02:57:48 +00:00
|
|
|
#include <lz4.h>
|
|
|
|
#include <lz4hc.h>
|
2021-05-23 01:12:30 +00:00
|
|
|
|
|
|
|
#include <Compression/ICompressionCodec.h>
|
2018-12-21 13:25:39 +00:00
|
|
|
#include <Compression/CompressionInfo.h>
|
2018-10-11 02:57:48 +00:00
|
|
|
#include <Compression/CompressionFactory.h>
|
2018-12-28 18:15:26 +00:00
|
|
|
#include <Compression/LZ4_decompress_faster.h>
|
2018-12-21 14:03:53 +00:00
|
|
|
#include <Parsers/IAST.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
2020-08-28 17:40:45 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
2021-05-23 01:12:30 +00:00
|
|
|
#include <IO/WriteBuffer.h>
|
2019-01-14 11:54:40 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2021-05-23 01:12:30 +00:00
|
|
|
#include <IO/BufferWithOwnMemory.h>
|
|
|
|
|
2019-12-18 05:12:03 +00:00
|
|
|
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
2018-10-11 02:57:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-05-23 01:12:30 +00:00
|
|
|
class CompressionCodecLZ4 : public ICompressionCodec
|
|
|
|
{
|
|
|
|
public:
|
2021-05-24 03:00:57 +00:00
|
|
|
explicit CompressionCodecLZ4();
|
2021-05-23 01:12:30 +00:00
|
|
|
|
|
|
|
uint8_t getMethodByte() const override;
|
|
|
|
|
|
|
|
UInt32 getAdditionalSizeAtTheEndOfBuffer() const override { return LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER; }
|
|
|
|
|
|
|
|
void updateHash(SipHash & hash) const override;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
|
|
|
|
|
|
|
bool isCompression() const override { return true; }
|
|
|
|
bool isGenericCompression() const override { return true; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
|
|
|
|
|
|
|
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
|
|
|
|
|
|
|
mutable LZ4::PerformanceStatistics lz4_stat;
|
|
|
|
ASTPtr codec_desc;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class CompressionCodecLZ4HC : public CompressionCodecLZ4
|
|
|
|
{
|
|
|
|
public:
|
2021-05-24 03:00:57 +00:00
|
|
|
explicit CompressionCodecLZ4HC(int level_);
|
2021-05-23 01:12:30 +00:00
|
|
|
|
|
|
|
protected:
|
|
|
|
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
const int level;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2018-12-21 14:03:53 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-05-23 01:12:30 +00:00
|
|
|
extern const int CANNOT_COMPRESS;
|
2021-08-03 12:24:16 +00:00
|
|
|
extern const int CANNOT_DECOMPRESS;
|
2021-05-23 01:12:30 +00:00
|
|
|
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
|
|
|
extern const int ILLEGAL_CODEC_PARAMETER;
|
2018-12-21 14:03:53 +00:00
|
|
|
}
|
|
|
|
|
2020-09-14 19:15:25 +00:00
|
|
|
CompressionCodecLZ4::CompressionCodecLZ4()
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2020-09-14 19:15:25 +00:00
|
|
|
setCodecDescription("LZ4");
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2020-09-14 19:15:25 +00:00
|
|
|
uint8_t CompressionCodecLZ4::getMethodByte() const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2020-09-14 19:15:25 +00:00
|
|
|
return static_cast<uint8_t>(CompressionMethodByte::LZ4);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2020-09-03 22:04:46 +00:00
|
|
|
void CompressionCodecLZ4::updateHash(SipHash & hash) const
|
|
|
|
{
|
|
|
|
getCodecDesc()->updateTreeHash(hash);
|
|
|
|
}
|
|
|
|
|
2019-01-15 14:20:34 +00:00
|
|
|
UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
return LZ4_COMPRESSBOUND(uncompressed_size);
|
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
return LZ4_compress_default(source, dest, source_size, LZ4_COMPRESSBOUND(source_size));
|
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2021-08-03 12:24:16 +00:00
|
|
|
bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
|
|
|
|
|
|
|
|
if (!success)
|
2021-08-03 12:27:03 +00:00
|
|
|
throw Exception("Cannot decompress", ErrorCodes::CANNOT_DECOMPRESS);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void registerCodecLZ4(CompressionCodecFactory & factory)
|
|
|
|
{
|
2018-12-24 14:10:37 +00:00
|
|
|
factory.registerSimpleCompressionCodec("LZ4", static_cast<UInt8>(CompressionMethodByte::LZ4), [&] ()
|
|
|
|
{
|
2018-10-11 02:57:48 +00:00
|
|
|
return std::make_shared<CompressionCodecLZ4>();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-12-21 14:03:53 +00:00
|
|
|
UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
|
|
|
{
|
|
|
|
auto success = LZ4_compress_HC(source, dest, source_size, LZ4_COMPRESSBOUND(source_size), level);
|
|
|
|
|
|
|
|
if (!success)
|
|
|
|
throw Exception("Cannot LZ4_compress_HC", ErrorCodes::CANNOT_COMPRESS);
|
|
|
|
|
|
|
|
return success;
|
|
|
|
}
|
|
|
|
|
|
|
|
void registerCodecLZ4HC(CompressionCodecFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerCompressionCodec("LZ4HC", {}, [&](const ASTPtr & arguments) -> CompressionCodecPtr
|
|
|
|
{
|
2018-12-24 14:10:37 +00:00
|
|
|
int level = 0;
|
2018-12-21 14:03:53 +00:00
|
|
|
|
|
|
|
if (arguments && !arguments->children.empty())
|
|
|
|
{
|
|
|
|
if (arguments->children.size() > 1)
|
|
|
|
throw Exception("LZ4HC codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
|
|
|
|
|
|
|
|
const auto children = arguments->children;
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * literal = children[0]->as<ASTLiteral>();
|
2020-06-04 08:55:56 +00:00
|
|
|
if (!literal)
|
|
|
|
throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
|
|
|
|
2018-12-21 14:03:53 +00:00
|
|
|
level = literal->value.safeGet<UInt64>();
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::make_shared<CompressionCodecLZ4HC>(level);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
CompressionCodecLZ4HC::CompressionCodecLZ4HC(int level_)
|
|
|
|
: level(level_)
|
|
|
|
{
|
2020-09-14 19:15:25 +00:00
|
|
|
setCodecDescription("LZ4HC", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
|
2018-12-21 14:03:53 +00:00
|
|
|
}
|
|
|
|
|
2021-08-19 12:52:24 +00:00
|
|
|
|
|
|
|
CompressionCodecPtr getCompressionCodecLZ4(int level)
|
|
|
|
{
|
|
|
|
return std::make_shared<CompressionCodecLZ4HC>(level);
|
|
|
|
}
|
|
|
|
|
2018-12-21 14:03:53 +00:00
|
|
|
}
|