mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-05 15:21:43 +00:00
254 lines
8.6 KiB
C++
254 lines
8.6 KiB
C++
/*
|
|
zstd_internal - common functions to include
|
|
Header File for include
|
|
Copyright (C) 2014-2016, Yann Collet.
|
|
|
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
in the documentation and/or other materials provided with the
|
|
distribution.
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
You can contact the author at :
|
|
- zstd homepage : https://www.zstd.net
|
|
*/
|
|
#ifndef ZSTD_CCOMMON_H_MODULE
|
|
#define ZSTD_CCOMMON_H_MODULE
|
|
|
|
/*-*************************************
|
|
* Dependencies
|
|
***************************************/
|
|
#include "mem.h"
|
|
#include "error_private.h"
|
|
#include "zstd_static.h"
|
|
|
|
|
|
/*-*************************************
|
|
* Common macros
|
|
***************************************/
|
|
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
|
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
|
|
|
|
|
/*-*************************************
|
|
* Common constants
|
|
***************************************/
|
|
#define ZSTD_OPT_DEBUG 0 // 3 = compression stats; 5 = check encoded sequences; 9 = full logs
|
|
#include <stdio.h>
|
|
#if defined(ZSTD_OPT_DEBUG) && ZSTD_OPT_DEBUG>=9
|
|
#define ZSTD_LOG_PARSER(...) printf(__VA_ARGS__)
|
|
#define ZSTD_LOG_ENCODE(...) printf(__VA_ARGS__)
|
|
#define ZSTD_LOG_BLOCK(...) printf(__VA_ARGS__)
|
|
#else
|
|
#define ZSTD_LOG_PARSER(...)
|
|
#define ZSTD_LOG_ENCODE(...)
|
|
#define ZSTD_LOG_BLOCK(...)
|
|
#endif
|
|
|
|
#define ZSTD_OPT_NUM (1<<12)
|
|
#define ZSTD_DICT_MAGIC 0xEC30A436
|
|
|
|
#define ZSTD_REP_NUM 3
|
|
#define ZSTD_REP_INIT ZSTD_REP_NUM
|
|
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
|
|
|
|
#define KB *(1 <<10)
|
|
#define MB *(1 <<20)
|
|
#define GB *(1U<<30)
|
|
|
|
#define BIT7 128
|
|
#define BIT6 64
|
|
#define BIT5 32
|
|
#define BIT4 16
|
|
#define BIT1 2
|
|
#define BIT0 1
|
|
|
|
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 12
|
|
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 1, 2, 8 };
|
|
|
|
#define ZSTD_BLOCKHEADERSIZE 3 /* because C standard does not allow a static const value to be defined using another static const value .... :( */
|
|
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
|
|
typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
|
|
|
|
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
|
|
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
|
|
|
|
#define HufLog 12
|
|
|
|
#define IS_HUF 0
|
|
#define IS_PCH 1
|
|
#define IS_RAW 2
|
|
#define IS_RLE 3
|
|
|
|
#define LONGNBSEQ 0x7F00
|
|
|
|
#define MINMATCH 3
|
|
#define EQUAL_READ32 4
|
|
#define REPCODE_STARTVALUE 1
|
|
|
|
#define Litbits 8
|
|
#define MaxLit ((1<<Litbits) - 1)
|
|
#define MaxML 52
|
|
#define MaxLL 35
|
|
#define MaxOff 28
|
|
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
|
|
#define MLFSELog 9
|
|
#define LLFSELog 9
|
|
#define OffFSELog 8
|
|
|
|
#define FSE_ENCODING_RAW 0
|
|
#define FSE_ENCODING_RLE 1
|
|
#define FSE_ENCODING_STATIC 2
|
|
#define FSE_ENCODING_DYNAMIC 3
|
|
|
|
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
|
|
13,14,15,16 };
|
|
static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
|
|
-1,-1,-1,-1 };
|
|
static const U32 LL_defaultNormLog = 6;
|
|
|
|
static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
|
|
12,13,14,15,16 };
|
|
static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
|
|
-1,-1,-1,-1,-1 };
|
|
static const U32 ML_defaultNormLog = 6;
|
|
|
|
static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
|
|
static const U32 OF_defaultNormLog = 5;
|
|
|
|
|
|
/*-*******************************************
|
|
* Shared functions to include for inlining
|
|
*********************************************/
|
|
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
|
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
|
|
|
/*! ZSTD_wildcopy() :
|
|
* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
|
|
#define WILDCOPY_OVERLENGTH 8
|
|
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
|
|
{
|
|
const BYTE* ip = (const BYTE*)src;
|
|
BYTE* op = (BYTE*)dst;
|
|
BYTE* const oend = op + length;
|
|
do
|
|
COPY8(op, ip)
|
|
while (op < oend);
|
|
}
|
|
|
|
MEM_STATIC unsigned ZSTD_highbit(U32 val)
|
|
{
|
|
# if defined(_MSC_VER) /* Visual */
|
|
unsigned long r=0;
|
|
_BitScanReverse(&r, val);
|
|
return (unsigned)r;
|
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
|
return 31 - __builtin_clz(val);
|
|
# else /* Software version */
|
|
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
|
U32 v = val;
|
|
int r;
|
|
v |= v >> 1;
|
|
v |= v >> 2;
|
|
v |= v >> 4;
|
|
v |= v >> 8;
|
|
v |= v >> 16;
|
|
r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
|
|
return r;
|
|
# endif
|
|
}
|
|
|
|
|
|
/*-*******************************************
|
|
* Private interfaces
|
|
*********************************************/
|
|
typedef struct {
|
|
U32 off;
|
|
U32 len;
|
|
} ZSTD_match_t;
|
|
|
|
typedef struct {
|
|
U32 price;
|
|
U32 off;
|
|
U32 mlen;
|
|
U32 litlen;
|
|
U32 rep[ZSTD_REP_INIT];
|
|
} ZSTD_optimal_t;
|
|
|
|
#if ZSTD_OPT_DEBUG == 3
|
|
#include ".debug/zstd_stats.h"
|
|
#else
|
|
typedef struct { U32 unused; } ZSTD_stats_t;
|
|
MEM_STATIC void ZSTD_statsPrint(ZSTD_stats_t* stats, U32 searchLength) { (void)stats; (void)searchLength; };
|
|
MEM_STATIC void ZSTD_statsInit(ZSTD_stats_t* stats) { (void)stats; };
|
|
MEM_STATIC void ZSTD_statsResetFreqs(ZSTD_stats_t* stats) { (void)stats; };
|
|
MEM_STATIC void ZSTD_statsUpdatePrices(ZSTD_stats_t* stats, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength) { (void)stats; (void)litLength; (void)literals; (void)offset; (void)matchLength; };
|
|
#endif
|
|
|
|
typedef struct {
|
|
void* buffer;
|
|
U32* offsetStart;
|
|
U32* offset;
|
|
BYTE* offCodeStart;
|
|
BYTE* litStart;
|
|
BYTE* lit;
|
|
U16* litLengthStart;
|
|
U16* litLength;
|
|
BYTE* llCodeStart;
|
|
U16* matchLengthStart;
|
|
U16* matchLength;
|
|
BYTE* mlCodeStart;
|
|
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
|
|
U32 longLengthPos;
|
|
/* opt */
|
|
ZSTD_optimal_t* priceTable;
|
|
ZSTD_match_t* matchTable;
|
|
U32* matchLengthFreq;
|
|
U32* litLengthFreq;
|
|
U32* litFreq;
|
|
U32* offCodeFreq;
|
|
U32 matchLengthSum;
|
|
U32 matchSum;
|
|
U32 litLengthSum;
|
|
U32 litSum;
|
|
U32 offCodeSum;
|
|
U32 log2matchLengthSum;
|
|
U32 log2matchSum;
|
|
U32 log2litLengthSum;
|
|
U32 log2litSum;
|
|
U32 log2offCodeSum;
|
|
U32 factor;
|
|
ZSTD_stats_t stats;
|
|
} seqStore_t;
|
|
|
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
|
|
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq);
|
|
size_t ZSTD_compressBegin_targetSrcSize(ZSTD_CCtx* zc, const void* dict, size_t dictSize, size_t targetSrcSize, int compressionLevel);
|
|
|
|
|
|
#endif /* ZSTD_CCOMMON_H_MODULE */
|