2018-12-28 18:15:26 +00:00
|
|
|
#include "ICompressionCodec.h"
|
|
|
|
|
2020-07-09 01:00:16 +00:00
|
|
|
#include <cassert>
|
|
|
|
|
2020-08-26 08:59:02 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/unaligned.h>
|
2020-07-09 01:00:16 +00:00
|
|
|
#include <Common/Exception.h>
|
2020-08-26 15:29:46 +00:00
|
|
|
#include <Parsers/queryToString.h>
|
2020-09-14 19:15:25 +00:00
|
|
|
#include <Parsers/ASTIdentifier.h>
|
2020-09-18 11:37:58 +00:00
|
|
|
#include <Compression/CompressionCodecMultiple.h>
|
2018-10-11 02:57:48 +00:00
|
|
|
|
2019-12-19 19:23:49 +00:00
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_DECOMPRESS;
|
2019-12-19 19:23:49 +00:00
|
|
|
extern const int CORRUPTED_DATA;
|
2020-09-14 19:15:25 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2020-09-14 19:15:25 +00:00
|
|
|
|
|
|
|
void ICompressionCodec::setCodecDescription(const String & codec_name, const ASTs & arguments)
|
2020-08-26 08:59:02 +00:00
|
|
|
{
|
|
|
|
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
|
|
|
|
result->name = "CODEC";
|
2020-09-16 09:08:39 +00:00
|
|
|
|
2020-10-27 11:04:03 +00:00
|
|
|
/// Special case for codec Multiple, which doesn't have name. It's just list
|
2020-09-16 08:18:42 +00:00
|
|
|
/// of other codecs.
|
2020-09-14 19:15:25 +00:00
|
|
|
if (codec_name.empty())
|
2020-08-26 15:29:46 +00:00
|
|
|
{
|
2020-09-14 19:15:25 +00:00
|
|
|
ASTPtr codec_desc = std::make_shared<ASTExpressionList>();
|
|
|
|
for (const auto & argument : arguments)
|
|
|
|
codec_desc->children.push_back(argument);
|
2020-08-26 15:29:46 +00:00
|
|
|
result->arguments = codec_desc;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-09-14 19:15:25 +00:00
|
|
|
ASTPtr codec_desc;
|
2020-09-16 08:18:42 +00:00
|
|
|
if (arguments.empty()) /// Codec without arguments is just ASTIdentifier
|
2020-09-14 19:15:25 +00:00
|
|
|
codec_desc = std::make_shared<ASTIdentifier>(codec_name);
|
2020-09-16 08:18:42 +00:00
|
|
|
else /// Codec with arguments represented as ASTFunction
|
2020-09-14 19:15:25 +00:00
|
|
|
codec_desc = makeASTFunction(codec_name, arguments);
|
|
|
|
|
2020-08-26 15:29:46 +00:00
|
|
|
result->arguments = std::make_shared<ASTExpressionList>();
|
|
|
|
result->arguments->children.push_back(codec_desc);
|
|
|
|
}
|
2020-09-14 19:15:25 +00:00
|
|
|
|
2020-08-26 15:29:46 +00:00
|
|
|
result->children.push_back(result->arguments);
|
2020-09-14 19:15:25 +00:00
|
|
|
full_codec_desc = result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ASTPtr ICompressionCodec::getFullCodecDesc() const
|
|
|
|
{
|
|
|
|
if (full_codec_desc == nullptr)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Codec description is not prepared");
|
2020-09-14 19:15:25 +00:00
|
|
|
|
|
|
|
return full_codec_desc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ASTPtr ICompressionCodec::getCodecDesc() const
|
|
|
|
{
|
|
|
|
auto arguments = getFullCodecDesc()->as<ASTFunction>()->arguments;
|
2020-09-16 08:18:42 +00:00
|
|
|
/// If it has exactly one argument, than it's single codec, return it
|
2020-09-14 19:15:25 +00:00
|
|
|
if (arguments->children.size() == 1)
|
|
|
|
return arguments->children[0];
|
2020-09-16 08:18:42 +00:00
|
|
|
else /// Otherwise we have multiple codecs and return them as expression list
|
2020-09-14 19:15:25 +00:00
|
|
|
return arguments;
|
2020-08-26 08:59:02 +00:00
|
|
|
}
|
2018-12-19 17:20:18 +00:00
|
|
|
|
2020-09-03 22:04:46 +00:00
|
|
|
UInt64 ICompressionCodec::getHash() const
|
|
|
|
{
|
|
|
|
SipHash hash;
|
|
|
|
updateHash(hash);
|
|
|
|
return hash.get64();
|
|
|
|
}
|
|
|
|
|
2019-06-13 14:04:38 +00:00
|
|
|
UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2020-06-05 12:02:38 +00:00
|
|
|
assert(source != nullptr && dest != nullptr);
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
dest[0] = getMethodByte();
|
|
|
|
UInt8 header_size = getHeaderSize();
|
|
|
|
/// Write data from header_size
|
|
|
|
UInt32 compressed_bytes_written = doCompressData(source, source_size, &dest[header_size]);
|
2023-04-21 10:38:45 +00:00
|
|
|
unalignedStoreLittleEndian<UInt32>(&dest[1], compressed_bytes_written + header_size);
|
|
|
|
unalignedStoreLittleEndian<UInt32>(&dest[5], source_size);
|
2018-12-19 17:20:18 +00:00
|
|
|
return header_size + compressed_bytes_written;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
2022-04-26 18:14:09 +00:00
|
|
|
|
2022-07-06 22:34:31 +00:00
|
|
|
UInt32 ICompressionCodec::decompress(const char * source, UInt32 source_size, char * dest) const
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2020-06-05 12:02:38 +00:00
|
|
|
assert(source != nullptr && dest != nullptr);
|
2019-12-19 19:23:49 +00:00
|
|
|
|
2020-06-05 12:02:38 +00:00
|
|
|
UInt8 header_size = getHeaderSize();
|
2019-12-19 19:23:49 +00:00
|
|
|
if (source_size < header_size)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::CORRUPTED_DATA,
|
|
|
|
"Can't decompress data: the compressed data size ({}, this should include header size) "
|
|
|
|
"is less than the header size ({})", source_size, static_cast<size_t>(header_size));
|
2019-12-19 19:23:49 +00:00
|
|
|
|
2020-07-09 01:00:16 +00:00
|
|
|
uint8_t our_method = getMethodByte();
|
2020-01-03 14:39:24 +00:00
|
|
|
uint8_t method = source[0];
|
2020-07-09 01:00:16 +00:00
|
|
|
if (method != our_method)
|
|
|
|
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Can't decompress data with codec byte {} using codec with byte {}", method, our_method);
|
2018-12-19 17:20:18 +00:00
|
|
|
|
2019-08-26 14:39:49 +00:00
|
|
|
UInt32 decompressed_size = readDecompressedBlockSize(source);
|
2022-07-06 22:18:55 +00:00
|
|
|
doDecompressData(&source[header_size], source_size - header_size, dest, decompressed_size);
|
2018-12-19 17:20:18 +00:00
|
|
|
|
2019-08-11 08:01:02 +00:00
|
|
|
return decompressed_size;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
UInt32 ICompressionCodec::readCompressedBlockSize(const char * source)
|
2018-10-11 02:57:48 +00:00
|
|
|
{
|
2023-04-21 10:38:45 +00:00
|
|
|
UInt32 compressed_block_size = unalignedLoadLittleEndian<UInt32>(&source[1]);
|
2021-10-01 06:32:54 +00:00
|
|
|
if (compressed_block_size == 0)
|
|
|
|
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: header is corrupt with compressed block size 0");
|
|
|
|
return compressed_block_size;
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 17:20:18 +00:00
|
|
|
|
|
|
|
UInt32 ICompressionCodec::readDecompressedBlockSize(const char * source)
|
|
|
|
{
|
2023-04-21 10:38:45 +00:00
|
|
|
UInt32 decompressed_block_size = unalignedLoadLittleEndian<UInt32>(&source[5]);
|
2021-10-01 06:32:54 +00:00
|
|
|
if (decompressed_block_size == 0)
|
|
|
|
throw Exception(ErrorCodes::CORRUPTED_DATA, "Can't decompress data: header is corrupt with decompressed block size 0");
|
|
|
|
return decompressed_block_size;
|
2018-12-19 17:20:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-01-03 14:39:24 +00:00
|
|
|
uint8_t ICompressionCodec::readMethod(const char * source)
|
2018-12-19 17:20:18 +00:00
|
|
|
{
|
2020-01-03 14:39:24 +00:00
|
|
|
return static_cast<uint8_t>(source[0]);
|
2018-12-19 17:20:18 +00:00
|
|
|
}
|
|
|
|
|
2018-10-11 02:57:48 +00:00
|
|
|
}
|