2018-12-28 18:15:26 +00:00
# include "ICompressionCodec.h"
2020-07-09 01:00:16 +00:00
# include <cassert>
2020-08-26 08:59:02 +00:00
# include <Parsers/ASTFunction.h>
2021-10-02 07:13:14 +00:00
# include <base/unaligned.h>
2020-07-09 01:00:16 +00:00
# include <Common/Exception.h>
2020-08-26 15:29:46 +00:00
# include <Parsers/queryToString.h>
2020-09-14 19:15:25 +00:00
# include <Parsers/ASTIdentifier.h>
2020-09-18 11:37:58 +00:00
# include <Compression/CompressionCodecMultiple.h>
2018-10-11 02:57:48 +00:00
2019-12-19 19:23:49 +00:00
2018-10-11 02:57:48 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_DECOMPRESS ;
2019-12-19 19:23:49 +00:00
extern const int CORRUPTED_DATA ;
2020-09-14 19:15:25 +00:00
extern const int LOGICAL_ERROR ;
2018-10-11 02:57:48 +00:00
}
2020-09-14 19:15:25 +00:00
void ICompressionCodec : : setCodecDescription ( const String & codec_name , const ASTs & arguments )
2020-08-26 08:59:02 +00:00
{
std : : shared_ptr < ASTFunction > result = std : : make_shared < ASTFunction > ( ) ;
result - > name = " CODEC " ;
2020-09-16 09:08:39 +00:00
2020-10-27 11:04:03 +00:00
/// Special case for codec Multiple, which doesn't have name. It's just list
2020-09-16 08:18:42 +00:00
/// of other codecs.
2020-09-14 19:15:25 +00:00
if ( codec_name . empty ( ) )
2020-08-26 15:29:46 +00:00
{
2020-09-14 19:15:25 +00:00
ASTPtr codec_desc = std : : make_shared < ASTExpressionList > ( ) ;
for ( const auto & argument : arguments )
codec_desc - > children . push_back ( argument ) ;
2020-08-26 15:29:46 +00:00
result - > arguments = codec_desc ;
}
else
{
2020-09-14 19:15:25 +00:00
ASTPtr codec_desc ;
2020-09-16 08:18:42 +00:00
if ( arguments . empty ( ) ) /// Codec without arguments is just ASTIdentifier
2020-09-14 19:15:25 +00:00
codec_desc = std : : make_shared < ASTIdentifier > ( codec_name ) ;
2020-09-16 08:18:42 +00:00
else /// Codec with arguments represented as ASTFunction
2020-09-14 19:15:25 +00:00
codec_desc = makeASTFunction ( codec_name , arguments ) ;
2020-08-26 15:29:46 +00:00
result - > arguments = std : : make_shared < ASTExpressionList > ( ) ;
result - > arguments - > children . push_back ( codec_desc ) ;
}
2020-09-14 19:15:25 +00:00
2020-08-26 15:29:46 +00:00
result - > children . push_back ( result - > arguments ) ;
2020-09-14 19:15:25 +00:00
full_codec_desc = result ;
}
ASTPtr ICompressionCodec : : getFullCodecDesc ( ) const
{
if ( full_codec_desc = = nullptr )
throw Exception ( " Codec description is not prepared " , ErrorCodes : : LOGICAL_ERROR ) ;
return full_codec_desc ;
}
ASTPtr ICompressionCodec : : getCodecDesc ( ) const
{
auto arguments = getFullCodecDesc ( ) - > as < ASTFunction > ( ) - > arguments ;
2020-09-16 08:18:42 +00:00
/// If it has exactly one argument, than it's single codec, return it
2020-09-14 19:15:25 +00:00
if ( arguments - > children . size ( ) = = 1 )
return arguments - > children [ 0 ] ;
2020-09-16 08:18:42 +00:00
else /// Otherwise we have multiple codecs and return them as expression list
2020-09-14 19:15:25 +00:00
return arguments ;
2020-08-26 08:59:02 +00:00
}
2018-12-19 17:20:18 +00:00
2020-09-03 22:04:46 +00:00
UInt64 ICompressionCodec : : getHash ( ) const
{
SipHash hash ;
updateHash ( hash ) ;
return hash . get64 ( ) ;
}
2019-06-13 14:04:38 +00:00
UInt32 ICompressionCodec : : compress ( const char * source , UInt32 source_size , char * dest ) const
2018-10-11 02:57:48 +00:00
{
2020-06-05 12:02:38 +00:00
assert ( source ! = nullptr & & dest ! = nullptr ) ;
2018-12-19 17:20:18 +00:00
dest [ 0 ] = getMethodByte ( ) ;
UInt8 header_size = getHeaderSize ( ) ;
/// Write data from header_size
UInt32 compressed_bytes_written = doCompressData ( source , source_size , & dest [ header_size ] ) ;
unalignedStore < UInt32 > ( & dest [ 1 ] , compressed_bytes_written + header_size ) ;
unalignedStore < UInt32 > ( & dest [ 5 ] , source_size ) ;
return header_size + compressed_bytes_written ;
2018-10-11 02:57:48 +00:00
}
2018-12-19 17:20:18 +00:00
2019-06-13 14:04:38 +00:00
UInt32 ICompressionCodec : : decompress ( const char * source , UInt32 source_size , char * dest ) const
2018-10-11 02:57:48 +00:00
{
2020-06-05 12:02:38 +00:00
assert ( source ! = nullptr & & dest ! = nullptr ) ;
2019-12-19 19:23:49 +00:00
2020-06-05 12:02:38 +00:00
UInt8 header_size = getHeaderSize ( ) ;
2019-12-19 19:23:49 +00:00
if ( source_size < header_size )
2021-03-26 23:42:22 +00:00
throw Exception ( ErrorCodes : : CORRUPTED_DATA , " Can't decompress data: the compressed data size ({}, this should include header size) is less than the header size ( { } ) " , source_size, static_cast<size_t>(header_size));
2019-12-19 19:23:49 +00:00
2020-07-09 01:00:16 +00:00
uint8_t our_method = getMethodByte ( ) ;
2020-01-03 14:39:24 +00:00
uint8_t method = source [ 0 ] ;
2020-07-09 01:00:16 +00:00
if ( method ! = our_method )
throw Exception ( ErrorCodes : : CANNOT_DECOMPRESS , " Can't decompress data with codec byte {} using codec with byte {} " , method , our_method ) ;
2018-12-19 17:20:18 +00:00
2019-08-26 14:39:49 +00:00
UInt32 decompressed_size = readDecompressedBlockSize ( source ) ;
2018-12-19 17:20:18 +00:00
doDecompressData ( & source [ header_size ] , source_size - header_size , dest , decompressed_size ) ;
2019-08-11 08:01:02 +00:00
return decompressed_size ;
2018-10-11 02:57:48 +00:00
}
2018-12-19 17:20:18 +00:00
UInt32 ICompressionCodec : : readCompressedBlockSize ( const char * source )
2018-10-11 02:57:48 +00:00
{
2021-10-01 06:32:54 +00:00
UInt32 compressed_block_size = unalignedLoad < UInt32 > ( & source [ 1 ] ) ;
if ( compressed_block_size = = 0 )
throw Exception ( ErrorCodes : : CORRUPTED_DATA , " Can't decompress data: header is corrupt with compressed block size 0 " ) ;
return compressed_block_size ;
2018-10-11 02:57:48 +00:00
}
2018-12-19 17:20:18 +00:00
UInt32 ICompressionCodec : : readDecompressedBlockSize ( const char * source )
{
2021-10-01 06:32:54 +00:00
UInt32 decompressed_block_size = unalignedLoad < UInt32 > ( & source [ 5 ] ) ;
if ( decompressed_block_size = = 0 )
throw Exception ( ErrorCodes : : CORRUPTED_DATA , " Can't decompress data: header is corrupt with decompressed block size 0 " ) ;
return decompressed_block_size ;
2018-12-19 17:20:18 +00:00
}
2020-01-03 14:39:24 +00:00
uint8_t ICompressionCodec : : readMethod ( const char * source )
2018-12-19 17:20:18 +00:00
{
2020-01-03 14:39:24 +00:00
return static_cast < uint8_t > ( source [ 0 ] ) ;
2018-12-19 17:20:18 +00:00
}
2018-10-11 02:57:48 +00:00
}