2018-10-11 02:57:48 +00:00
|
|
|
#include <Compression/CompressionCodecMultiple.h>
|
2018-12-21 13:25:39 +00:00
|
|
|
#include <Compression/CompressionInfo.h>
|
2018-10-11 02:57:48 +00:00
|
|
|
#include <common/unaligned.h>
|
|
|
|
#include <Compression/CompressionFactory.h>
|
2018-12-18 13:08:22 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2018-12-19 17:20:18 +00:00
|
|
|
#include <Common/hex.h>
|
2019-01-15 11:06:00 +00:00
|
|
|
#include <sstream>
|
2018-10-11 02:57:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2018-12-18 14:34:18 +00:00
|
|
|
|
|
|
|
|
2018-12-18 13:08:22 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int UNKNOWN_CODEC;
|
|
|
|
extern const int CORRUPTED_DATA;
|
|
|
|
}
|
2018-10-11 02:57:48 +00:00
|
|
|
|
|
|
|
CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs)
|
|
|
|
: codecs(codecs)
|
|
|
|
{
|
2019-01-15 11:06:00 +00:00
|
|
|
std::ostringstream ss;
|
2018-10-11 02:57:48 +00:00
|
|
|
for (size_t idx = 0; idx < codecs.size(); idx++)
|
|
|
|
{
|
|
|
|
if (idx != 0)
|
2019-01-15 11:06:00 +00:00
|
|
|
ss << ',' << ' ';
|
2018-10-11 02:57:48 +00:00
|
|
|
|
|
|
|
const auto codec = codecs[idx];
|
2019-01-15 11:06:00 +00:00
|
|
|
ss << codec->getCodecDesc();
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
2019-01-15 11:06:00 +00:00
|
|
|
codec_desc = ss.str();
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt8 CompressionCodecMultiple::getMethodByte() const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2018-12-19 17:20:18 +00:00
|
|
|
return static_cast<UInt8>(CompressionMethodByte::Multiple);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
String CompressionCodecMultiple::getCodecDesc() const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2018-12-19 17:20:18 +00:00
|
|
|
return codec_desc;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2019-01-15 14:20:34 +00:00
|
|
|
UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt32 compressed_size = uncompressed_size;
|
2018-10-11 02:57:48 +00:00
|
|
|
for (auto & codec : codecs)
|
2018-12-19 17:20:18 +00:00
|
|
|
compressed_size = codec->getCompressedReserveSize(compressed_size);
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
/// TotalCodecs ByteForEachCodec data
|
|
|
|
return sizeof(UInt8) + codecs.size() + compressed_size;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
PODArray<char> compressed_buf;
|
2018-12-18 13:08:22 +00:00
|
|
|
PODArray<char> uncompressed_buf(source, source + source_size);
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
dest[0] = static_cast<UInt8>(codecs.size());
|
2018-12-18 13:08:22 +00:00
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
size_t codecs_byte_pos = 1;
|
2018-12-17 08:31:59 +00:00
|
|
|
for (size_t idx = 0; idx < codecs.size(); ++idx, ++codecs_byte_pos)
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
|
|
|
const auto codec = codecs[idx];
|
2018-12-17 08:31:59 +00:00
|
|
|
dest[codecs_byte_pos] = codec->getMethodByte();
|
2018-12-19 17:20:18 +00:00
|
|
|
compressed_buf.resize(codec->getCompressedReserveSize(source_size));
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt32 size_compressed = codec->compress(uncompressed_buf.data(), source_size, compressed_buf.data());
|
2018-12-18 13:08:22 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
uncompressed_buf.swap(compressed_buf);
|
|
|
|
source_size = size_compressed;
|
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
memcpy(&dest[1 + codecs.size()], uncompressed_buf.data(), source_size);
|
2018-12-18 13:08:22 +00:00
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
return 1 + codecs.size() + source_size;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt8 compression_methods_size = source[0];
|
2018-12-18 13:08:22 +00:00
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
PODArray<char> compressed_buf(&source[compression_methods_size + 1], &source[source_size]);
|
2018-10-11 02:57:48 +00:00
|
|
|
PODArray<char> uncompressed_buf;
|
2018-12-18 13:08:22 +00:00
|
|
|
/// Insert all data into compressed buf
|
2018-12-19 17:20:18 +00:00
|
|
|
source_size -= (compression_methods_size + 1);
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2018-12-18 13:08:22 +00:00
|
|
|
for (long idx = compression_methods_size - 1; idx >= 0; --idx)
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt8 compression_method = source[idx + 1];
|
2018-10-11 02:57:48 +00:00
|
|
|
const auto codec = CompressionCodecFactory::instance().get(compression_method);
|
2018-12-20 10:27:38 +00:00
|
|
|
compressed_buf.resize(compressed_buf.size() + codec->getAdditionalSizeAtTheEndOfBuffer());
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt32 uncompressed_size = ICompressionCodec::readDecompressedBlockSize(compressed_buf.data());
|
|
|
|
|
2018-12-18 13:08:22 +00:00
|
|
|
if (idx == 0 && uncompressed_size != decompressed_size)
|
|
|
|
throw Exception("Wrong final decompressed size in codec Multiple, got " + toString(uncompressed_size) + ", expected " + toString(decompressed_size), ErrorCodes::CORRUPTED_DATA);
|
2018-12-19 17:20:18 +00:00
|
|
|
|
2018-12-20 10:27:38 +00:00
|
|
|
uncompressed_buf.resize(uncompressed_size + codec->getAdditionalSizeAtTheEndOfBuffer());
|
2018-12-19 17:20:18 +00:00
|
|
|
codec->decompress(compressed_buf.data(), source_size, uncompressed_buf.data());
|
2018-10-11 02:57:48 +00:00
|
|
|
uncompressed_buf.swap(compressed_buf);
|
2018-12-19 17:20:18 +00:00
|
|
|
source_size = uncompressed_size;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-18 13:08:22 +00:00
|
|
|
memcpy(dest, compressed_buf.data(), decompressed_size);
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-17 08:31:59 +00:00
|
|
|
void registerCodecMultiple(CompressionCodecFactory & factory)
|
|
|
|
{
|
2018-12-24 14:10:37 +00:00
|
|
|
factory.registerSimpleCompressionCodec("Multiple", static_cast<UInt8>(CompressionMethodByte::Multiple), [&] ()
|
|
|
|
{
|
2018-12-17 08:31:59 +00:00
|
|
|
return std::make_shared<CompressionCodecMultiple>();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-12-13 10:25:11 +00:00
|
|
|
}
|