2012-01-05 10:22:02 +00:00
|
|
|
#pragma once
|
2011-06-17 21:19:39 +00:00
|
|
|
|
2017-02-07 21:26:32 +00:00
|
|
|
#include <cstdint>
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Common Defines */
|
2011-06-17 21:19:39 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
#define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL /// 1GB
|
2012-01-05 18:35:22 +00:00
|
|
|
|
2011-06-17 21:19:39 +00:00
|
|
|
#define QUICKLZ_ADDITIONAL_SPACE 400
|
2016-02-03 21:16:19 +00:00
|
|
|
#define COMPRESSED_BLOCK_HEADER_SIZE 9
|
2012-01-05 18:35:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Compression method */
|
2015-03-09 01:15:43 +00:00
|
|
|
enum class CompressionMethod
|
2012-01-05 18:35:22 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
QuickLZ = 0,
|
|
|
|
LZ4 = 1,
|
2017-05-28 14:29:40 +00:00
|
|
|
LZ4HC = 2, /// The format is the same as for LZ4. The difference is only in compression.
|
|
|
|
ZSTD = 3, /// Experimental algorithm: https://github.com/Cyan4973/zstd
|
2015-03-09 01:15:43 +00:00
|
|
|
};
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** The compressed block format is as follows:
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* The first 16 bytes are the checksum from all other bytes of the block. Now only CityHash128 is used.
|
|
|
|
* In the future, you can provide other checksums, although it will not be possible to make them different in size.
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* The next byte specifies the compression algorithm. Then everything depends on the algorithm.
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* The first 4 options are compatible with QuickLZ level 1.
|
|
|
|
* That is, if the value of the first byte is < 4, it is enough to use qlz_level1_decompress function to decompress.
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* 0x00 - uncompressed data, small block. Next, one byte - compressed data size, including header; one byte - uncompressed data size.
|
|
|
|
* 0x01 - compressed data, QuickLZ level 1, small block. Then two bytes are similar.
|
|
|
|
* 0x02 - uncompressed data, large block. Then 4 bytes - compressed data size, including header; 4 bytes uncompressed data size.
|
|
|
|
* 0x03 - compressed data, QuickLZ level 1, large block. Then 8 bytes are similar.
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* 0x82 - LZ4 or LZ4HC (they have the same format).
|
|
|
|
* Next 4 bytes - the size of the compressed data, taking into account the header; 4 bytes is the size of the uncompressed data.
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* NOTE: Why is 0x82?
|
|
|
|
* Originally only QuickLZ was used. Then LZ4 was added.
|
|
|
|
* The high bit is set to distinguish from QuickLZ, and the second bit is set for compatibility,
|
|
|
|
* for the functions qlz_size_compressed, qlz_size_decompressed to work.
|
|
|
|
* Although now such compatibility is no longer relevant.
|
2015-03-09 01:15:43 +00:00
|
|
|
*
|
|
|
|
* 0x90 - ZSTD
|
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* All sizes are little endian.
|
2015-03-09 01:15:43 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
enum class CompressionMethodByte : uint8_t
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
LZ4 = 0x82,
|
|
|
|
ZSTD = 0x90,
|
2015-03-09 01:15:43 +00:00
|
|
|
};
|
2012-01-05 18:35:22 +00:00
|
|
|
|
|
|
|
}
|