2019-01-14 11:55:53 +00:00
|
|
|
#include <Compression/CompressionCodecDelta.h>
|
|
|
|
#include <Compression/CompressionInfo.h>
|
|
|
|
#include <Compression/CompressionFactory.h>
|
2019-01-14 15:10:48 +00:00
|
|
|
#include <common/unaligned.h>
|
2019-01-14 11:55:53 +00:00
|
|
|
#include <Parsers/IAST.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <cstdlib>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_COMPRESS;
|
|
|
|
extern const int CANNOT_DECOMPRESS;
|
|
|
|
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
|
|
|
extern const int ILLEGAL_CODEC_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
CompressionCodecDelta::CompressionCodecDelta(UInt8 delta_bytes_size_)
|
|
|
|
: delta_bytes_size(delta_bytes_size_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
UInt8 CompressionCodecDelta::getMethodByte() const
|
|
|
|
{
|
|
|
|
return static_cast<UInt8>(CompressionMethodByte::Delta);
|
|
|
|
}
|
|
|
|
|
|
|
|
String CompressionCodecDelta::getCodecDesc() const
|
|
|
|
{
|
|
|
|
return "Delta(" + toString(delta_bytes_size) + ")";
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void compressDataForType(const char * source, UInt32 source_size, char * dest)
|
|
|
|
{
|
2019-01-14 15:10:48 +00:00
|
|
|
if (source_size % sizeof(T) != 0)
|
|
|
|
throw Exception("Cannot delta compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
|
|
|
|
|
2019-01-16 00:52:33 +00:00
|
|
|
T prev_src{};
|
|
|
|
const char * source_end = source + source_size;
|
|
|
|
while (source < source_end)
|
|
|
|
{
|
|
|
|
T curr_src = unalignedLoad<T>(source);
|
2019-06-28 10:42:36 +00:00
|
|
|
unalignedStore<T>(dest, curr_src - prev_src);
|
2019-01-16 00:52:33 +00:00
|
|
|
prev_src = curr_src;
|
|
|
|
|
|
|
|
source += sizeof(T);
|
|
|
|
dest += sizeof(T);
|
|
|
|
}
|
2019-01-14 11:55:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
|
|
|
{
|
2019-01-14 15:10:48 +00:00
|
|
|
if (source_size % sizeof(T) != 0)
|
|
|
|
throw Exception("Cannot delta decompress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_DECOMPRESS);
|
|
|
|
|
2019-01-16 00:52:33 +00:00
|
|
|
T accumulator{};
|
|
|
|
const char * source_end = source + source_size;
|
|
|
|
while (source < source_end)
|
|
|
|
{
|
|
|
|
accumulator += unalignedLoad<T>(source);
|
2019-06-28 16:21:05 +00:00
|
|
|
unalignedStore<T>(dest, accumulator);
|
2019-01-14 11:55:53 +00:00
|
|
|
|
2019-01-16 00:52:33 +00:00
|
|
|
source += sizeof(T);
|
|
|
|
dest += sizeof(T);
|
|
|
|
}
|
2019-01-14 11:55:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
|
|
|
{
|
|
|
|
UInt8 bytes_to_skip = source_size % delta_bytes_size;
|
|
|
|
dest[0] = delta_bytes_size;
|
2019-08-26 14:39:49 +00:00
|
|
|
dest[1] = bytes_to_skip; /// unused (backward compatibility)
|
2019-01-14 11:55:53 +00:00
|
|
|
memcpy(&dest[2], source, bytes_to_skip);
|
|
|
|
size_t start_pos = 2 + bytes_to_skip;
|
|
|
|
switch (delta_bytes_size)
|
|
|
|
{
|
|
|
|
case 1:
|
|
|
|
compressDataForType<UInt8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
compressDataForType<UInt16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
compressDataForType<UInt32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
compressDataForType<UInt64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 1 + 1 + source_size;
|
|
|
|
}
|
|
|
|
|
2019-08-26 14:39:49 +00:00
|
|
|
void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
|
2019-01-14 11:55:53 +00:00
|
|
|
{
|
2019-08-26 14:39:49 +00:00
|
|
|
if (source_size < 2)
|
|
|
|
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
|
|
|
|
|
2019-01-14 11:55:53 +00:00
|
|
|
UInt8 bytes_size = source[0];
|
2019-08-26 14:39:49 +00:00
|
|
|
UInt8 bytes_to_skip = uncompressed_size % bytes_size;
|
|
|
|
|
2019-08-26 16:58:40 +00:00
|
|
|
if (UInt32(2 + bytes_to_skip) > source_size)
|
2019-08-26 14:39:49 +00:00
|
|
|
throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
|
2019-01-14 11:55:53 +00:00
|
|
|
|
|
|
|
memcpy(dest, &source[2], bytes_to_skip);
|
2019-01-14 15:10:48 +00:00
|
|
|
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
2019-01-14 11:55:53 +00:00
|
|
|
switch (bytes_size)
|
|
|
|
{
|
|
|
|
case 1:
|
2019-01-14 15:10:48 +00:00
|
|
|
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
2019-01-14 11:55:53 +00:00
|
|
|
break;
|
|
|
|
case 2:
|
2019-01-14 15:10:48 +00:00
|
|
|
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
2019-01-14 11:55:53 +00:00
|
|
|
break;
|
|
|
|
case 4:
|
2019-01-14 15:10:48 +00:00
|
|
|
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
2019-01-14 11:55:53 +00:00
|
|
|
break;
|
|
|
|
case 8:
|
2019-01-14 15:10:48 +00:00
|
|
|
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
2019-01-14 11:55:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-02 12:51:31 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
UInt8 getDeltaBytesSize(DataTypePtr column_type)
|
|
|
|
{
|
|
|
|
UInt8 delta_bytes_size = 1;
|
|
|
|
if (column_type && column_type->haveMaximumSizeOfValue())
|
|
|
|
{
|
|
|
|
size_t max_size = column_type->getSizeOfValueInMemory();
|
|
|
|
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
|
|
|
|
delta_bytes_size = static_cast<UInt8>(max_size);
|
|
|
|
}
|
|
|
|
return delta_bytes_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void CompressionCodecDelta::useInfoAboutType(DataTypePtr data_type)
|
|
|
|
{
|
|
|
|
delta_bytes_size = getDeltaBytesSize(data_type);
|
|
|
|
}
|
|
|
|
|
2019-01-14 11:55:53 +00:00
|
|
|
void registerCodecDelta(CompressionCodecFactory & factory)
|
|
|
|
{
|
|
|
|
UInt8 method_code = UInt8(CompressionMethodByte::Delta);
|
2019-01-15 14:20:34 +00:00
|
|
|
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
|
2019-01-14 11:55:53 +00:00
|
|
|
{
|
2019-04-02 12:51:31 +00:00
|
|
|
UInt8 delta_bytes_size = getDeltaBytesSize(column_type);
|
2019-01-14 11:55:53 +00:00
|
|
|
if (arguments && !arguments->children.empty())
|
|
|
|
{
|
|
|
|
if (arguments->children.size() > 1)
|
|
|
|
throw Exception("Delta codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
|
|
|
|
|
|
|
|
const auto children = arguments->children;
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * literal = children[0]->as<ASTLiteral>();
|
2019-01-14 11:55:53 +00:00
|
|
|
size_t user_bytes_size = literal->value.safeGet<UInt64>();
|
|
|
|
if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8)
|
|
|
|
throw Exception("Delta value for delta codec can be 1, 2, 4 or 8, given " + toString(user_bytes_size), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
|
|
|
delta_bytes_size = static_cast<UInt8>(user_bytes_size);
|
|
|
|
}
|
|
|
|
return std::make_shared<CompressionCodecDelta>(delta_bytes_size);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|