mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 21:51:57 +00:00
342 lines
14 KiB
C
342 lines
14 KiB
C
/* ******************************************************************
|
|
FSE : Finite State Entropy coder
|
|
header file for static linking (only)
|
|
Copyright (C) 2013-2015, Yann Collet
|
|
|
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
in the documentation and/or other materials provided with the
|
|
distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
You can contact the author at :
|
|
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
****************************************************************** */
|
|
#ifndef FSE_STATIC_H
|
|
#define FSE_STATIC_H
|
|
|
|
#if defined (__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
|
|
/* *****************************************
|
|
* Dependencies
|
|
*******************************************/
|
|
#include "fse.h"
|
|
#include "bitstream.h"
|
|
|
|
|
|
/* *****************************************
|
|
* Static allocation
|
|
*******************************************/
|
|
/* FSE buffer bounds */
|
|
#define FSE_NCOUNTBOUND 512
|
|
#define FSE_BLOCKBOUND(size) (size + (size>>7))
|
|
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
|
|
|
/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
|
|
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
|
|
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
|
|
|
|
|
|
/* *****************************************
|
|
* FSE advanced API
|
|
*******************************************/
|
|
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
|
|
/* same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */
|
|
|
|
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
|
/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
|
|
|
|
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
|
/* build a fake FSE_CTable, designed to compress always the same symbolValue */
|
|
|
|
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
|
|
/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
|
|
|
|
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
|
|
/* build a fake FSE_DTable, designed to always generate the same symbolValue */
|
|
|
|
|
|
/* *****************************************
|
|
* FSE symbol compression API
|
|
*******************************************/
|
|
/*!
|
|
This API consists of small unitary functions, which highly benefit from being inlined.
|
|
You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
|
|
Visual seems to do it automatically.
|
|
For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
|
|
If none of these solutions is applicable, include "fse.c" directly.
|
|
*/
|
|
typedef struct
|
|
{
|
|
ptrdiff_t value;
|
|
const void* stateTable;
|
|
const void* symbolTT;
|
|
unsigned stateLog;
|
|
} FSE_CState_t;
|
|
|
|
static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
|
|
|
|
static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
|
|
|
|
static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
|
|
|
|
/*!
|
|
These functions are inner components of FSE_compress_usingCTable().
|
|
They allow the creation of custom streams, mixing multiple tables and bit sources.
|
|
|
|
A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
|
|
So the first symbol you will encode is the last you will decode, like a LIFO stack.
|
|
|
|
You will need a few variables to track your CStream. They are :
|
|
|
|
FSE_CTable ct; // Provided by FSE_buildCTable()
|
|
BIT_CStream_t bitStream; // bitStream tracking structure
|
|
FSE_CState_t state; // State tracking structure (can have several)
|
|
|
|
|
|
The first thing to do is to init bitStream and state.
|
|
size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
|
|
FSE_initCState(&state, ct);
|
|
|
|
Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
|
|
You can then encode your input data, byte after byte.
|
|
FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
|
|
Remember decoding will be done in reverse direction.
|
|
FSE_encodeByte(&bitStream, &state, symbol);
|
|
|
|
At any time, you can also add any bit sequence.
|
|
Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
|
|
BIT_addBits(&bitStream, bitField, nbBits);
|
|
|
|
The above methods don't commit data to memory, they just store it into local register, for speed.
|
|
Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
|
|
Writing data to memory is a manual operation, performed by the flushBits function.
|
|
BIT_flushBits(&bitStream);
|
|
|
|
Your last FSE encoding operation shall be to flush your last state value(s).
|
|
FSE_flushState(&bitStream, &state);
|
|
|
|
Finally, you must close the bitStream.
|
|
The function returns the size of CStream in bytes.
|
|
If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
|
|
If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
|
|
size_t size = BIT_closeCStream(&bitStream);
|
|
*/
|
|
|
|
|
|
/* *****************************************
|
|
* FSE symbol decompression API
|
|
*******************************************/
|
|
typedef struct
|
|
{
|
|
size_t state;
|
|
const void* table; /* precise table may vary, depending on U16 */
|
|
} FSE_DState_t;
|
|
|
|
|
|
static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
|
|
|
|
static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
|
|
|
|
static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
|
|
|
|
/*!
|
|
Let's now decompose FSE_decompress_usingDTable() into its unitary components.
|
|
You will decode FSE-encoded symbols from the bitStream,
|
|
and also any other bitFields you put in, **in reverse order**.
|
|
|
|
You will need a few variables to track your bitStream. They are :
|
|
|
|
BIT_DStream_t DStream; // Stream context
|
|
FSE_DState_t DState; // State context. Multiple ones are possible
|
|
FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
|
|
|
|
The first thing to do is to init the bitStream.
|
|
errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
|
|
|
|
You should then retrieve your initial state(s)
|
|
(in reverse flushing order if you have several ones) :
|
|
errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
|
|
|
|
You can then decode your data, symbol after symbol.
|
|
For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
|
|
Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
|
|
unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
|
|
|
|
You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
|
|
Note : maximum allowed nbBits is 25, for 32-bits compatibility
|
|
size_t bitField = BIT_readBits(&DStream, nbBits);
|
|
|
|
All above operations only read from local register (which size depends on size_t).
|
|
Refueling the register from memory is manually performed by the reload method.
|
|
endSignal = FSE_reloadDStream(&DStream);
|
|
|
|
BIT_reloadDStream() result tells if there is still some more data to read from DStream.
|
|
BIT_DStream_unfinished : there is still some data left into the DStream.
|
|
BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
|
|
BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
|
|
BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
|
|
|
|
When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
|
|
to properly detect the exact end of stream.
|
|
After each decoded symbol, check if DStream is fully consumed using this simple test :
|
|
BIT_reloadDStream(&DStream) >= BIT_DStream_completed
|
|
|
|
When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
|
|
Checking if DStream has reached its end is performed by :
|
|
BIT_endOfDStream(&DStream);
|
|
Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
|
|
FSE_endOfDState(&DState);
|
|
*/
|
|
|
|
|
|
/* *****************************************
|
|
* FSE unsafe API
|
|
*******************************************/
|
|
static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
|
|
/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
|
|
|
|
|
|
/* *****************************************
|
|
* Implementation of inlined functions
|
|
*******************************************/
|
|
typedef struct {
|
|
int deltaFindState;
|
|
U32 deltaNbBits;
|
|
} FSE_symbolCompressionTransform; /* total 8 bytes */
|
|
|
|
MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
|
|
{
|
|
const void* ptr = ct;
|
|
const U16* u16ptr = (const U16*) ptr;
|
|
const U32 tableLog = MEM_read16(ptr);
|
|
statePtr->value = (ptrdiff_t)1<<tableLog;
|
|
statePtr->stateTable = u16ptr+2;
|
|
statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
|
|
statePtr->stateLog = tableLog;
|
|
}
|
|
|
|
MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
|
|
{
|
|
FSE_initCState(statePtr, ct);
|
|
{
|
|
const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
|
|
const U16* stateTable = (const U16*)(statePtr->stateTable);
|
|
U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
|
|
statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
|
|
statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
|
|
|
|
}
|
|
}
|
|
|
|
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
|
|
{
|
|
const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
|
|
const U16* const stateTable = (const U16*)(statePtr->stateTable);
|
|
U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
|
|
BIT_addBits(bitC, statePtr->value, nbBitsOut);
|
|
statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
|
|
}
|
|
|
|
MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
|
|
{
|
|
BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
|
|
BIT_flushBits(bitC);
|
|
}
|
|
|
|
/*<===== Decompression =====>*/
|
|
|
|
typedef struct {
|
|
U16 tableLog;
|
|
U16 fastMode;
|
|
} FSE_DTableHeader; /* sizeof U32 */
|
|
|
|
typedef struct
|
|
{
|
|
unsigned short newState;
|
|
unsigned char symbol;
|
|
unsigned char nbBits;
|
|
} FSE_decode_t; /* size == U32 */
|
|
|
|
MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
|
|
{
|
|
const void* ptr = dt;
|
|
const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
|
|
DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
|
|
BIT_reloadDStream(bitD);
|
|
DStatePtr->table = dt + 1;
|
|
}
|
|
|
|
MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
|
|
{
|
|
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
|
return DInfo.symbol;
|
|
}
|
|
|
|
MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
|
|
{
|
|
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
|
U32 const nbBits = DInfo.nbBits;
|
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
DStatePtr->state = DInfo.newState + lowBits;
|
|
}
|
|
|
|
MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
|
|
{
|
|
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
|
U32 const nbBits = DInfo.nbBits;
|
|
BYTE const symbol = DInfo.symbol;
|
|
size_t const lowBits = BIT_readBits(bitD, nbBits);
|
|
|
|
DStatePtr->state = DInfo.newState + lowBits;
|
|
return symbol;
|
|
}
|
|
|
|
/*! FSE_decodeSymbolFast() :
|
|
unsafe, only works if no symbol has a probability > 50% */
|
|
MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
|
|
{
|
|
FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
|
|
U32 const nbBits = DInfo.nbBits;
|
|
BYTE const symbol = DInfo.symbol;
|
|
size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
|
|
|
|
DStatePtr->state = DInfo.newState + lowBits;
|
|
return symbol;
|
|
}
|
|
|
|
MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|
{
|
|
return DStatePtr->state == 0;
|
|
}
|
|
|
|
|
|
#if defined (__cplusplus)
|
|
}
|
|
#endif
|
|
|
|
#endif /* FSE_STATIC_H */
|