mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-13 11:04:10 +00:00
260 lines
11 KiB
C
260 lines
11 KiB
C
/* ******************************************************************
|
|
FSE : Finite State Entropy coder
|
|
header file for static linking (only)
|
|
Copyright (C) 2013-2015, Yann Collet
|
|
|
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
in the documentation and/or other materials provided with the
|
|
distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
You can contact the author at :
|
|
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
****************************************************************** */
|
|
#pragma once
|
|
|
|
#if defined (__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
|
|
/******************************************
|
|
* FSE API compatible with DLL
|
|
******************************************/
|
|
#include "fse.h"
|
|
|
|
|
|
/******************************************
|
|
* Static allocation
|
|
******************************************/
|
|
#define FSE_MAX_HEADERSIZE 512
|
|
#define FSE_COMPRESSBOUND(size) (size + (size>>7) + FSE_MAX_HEADERSIZE) /* Macro can be useful for static allocation */
|
|
/* You can statically allocate a CTable as a table of unsigned using below macro */
|
|
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
|
|
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
|
|
|
|
|
|
/******************************************
|
|
* Error Management
|
|
******************************************/
|
|
#define FSE_LIST_ERRORS(ITEM) \
|
|
ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
|
|
ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
|
|
ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
|
|
ITEM(FSE_ERROR_corruptionDetected) \
|
|
ITEM(FSE_ERROR_maxCode)
|
|
|
|
#define FSE_GENERATE_ENUM(ENUM) ENUM,
|
|
typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
|
|
|
|
|
|
/******************************************
|
|
* FSE advanced API
|
|
******************************************/
|
|
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
|
|
/* same as FSE_count(), but blindly trust that all values within src are <= maxSymbolValuePtr[0] */
|
|
|
|
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
|
/* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
|
|
|
|
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
|
/* build a fake FSE_CTable, designed to compress always the same symbolValue */
|
|
|
|
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
|
|
/* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
|
|
|
|
size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
|
|
/* build a fake FSE_DTable, designed to always generate the same symbolValue */
|
|
|
|
|
|
/******************************************
|
|
* FSE symbol compression API
|
|
******************************************/
|
|
/*
|
|
This API consists of small unitary functions, which highly benefit from being inlined.
|
|
You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
|
|
Visual seems to do it automatically.
|
|
For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
|
|
*/
|
|
|
|
typedef struct
|
|
{
|
|
size_t bitContainer;
|
|
int bitPos;
|
|
char* startPtr;
|
|
char* ptr;
|
|
} FSE_CStream_t;
|
|
|
|
typedef struct
|
|
{
|
|
ptrdiff_t value;
|
|
const void* stateTable;
|
|
const void* symbolTT;
|
|
unsigned stateLog;
|
|
} FSE_CState_t;
|
|
|
|
void FSE_initCStream(FSE_CStream_t* bitC, void* dstBuffer);
|
|
void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
|
|
|
|
void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned char symbol);
|
|
void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits);
|
|
void FSE_flushBits(FSE_CStream_t* bitC);
|
|
|
|
void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* CStatePtr);
|
|
size_t FSE_closeCStream(FSE_CStream_t* bitC);
|
|
|
|
/*
|
|
These functions are inner components of FSE_compress_usingCTable().
|
|
They allow the creation of custom streams, mixing multiple tables and bit sources.
|
|
|
|
A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
|
|
So the first symbol you will encode is the last you will decode, like a LIFO stack.
|
|
|
|
You will need a few variables to track your CStream. They are :
|
|
|
|
FSE_CTable ct; // Provided by FSE_buildCTable()
|
|
FSE_CStream_t bitC; // bitStream tracking structure
|
|
FSE_CState_t state; // State tracking structure (can have several)
|
|
|
|
|
|
The first thing to do is to init bitStream and state.
|
|
FSE_initCStream(&bitC, dstBuffer);
|
|
FSE_initCState(&state, ct);
|
|
|
|
You can then encode your input data, byte after byte.
|
|
FSE_encodeByte() outputs a maximum of 'tableLog' bits at a time.
|
|
Remember decoding will be done in reverse direction.
|
|
FSE_encodeByte(&bitStream, &state, symbol);
|
|
|
|
At any time, you can also add any bit sequence.
|
|
Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
|
|
FSE_addBits(&bitStream, bitField, nbBits);
|
|
|
|
The above methods don't commit data to memory, they just store it into local register, for speed.
|
|
Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
|
|
Writing data to memory is a manual operation, performed by the flushBits function.
|
|
FSE_flushBits(&bitStream);
|
|
|
|
Your last FSE encoding operation shall be to flush your last state value(s).
|
|
FSE_flushState(&bitStream, &state);
|
|
|
|
Finally, you must then close the bitStream.
|
|
The function returns the size in bytes of CStream.
|
|
If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
|
|
size_t size = FSE_closeCStream(&bitStream);
|
|
*/
|
|
|
|
|
|
/******************************************
|
|
* FSE symbol decompression API
|
|
******************************************/
|
|
typedef struct
|
|
{
|
|
size_t bitContainer;
|
|
unsigned bitsConsumed;
|
|
const char* ptr;
|
|
const char* start;
|
|
} FSE_DStream_t;
|
|
|
|
typedef struct
|
|
{
|
|
size_t state;
|
|
const void* table; /* precise table may vary, depending on U16 */
|
|
} FSE_DState_t;
|
|
|
|
|
|
size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
|
|
void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt);
|
|
|
|
unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
|
|
size_t FSE_readBits(FSE_DStream_t* bitD, unsigned nbBits);
|
|
unsigned int FSE_reloadDStream(FSE_DStream_t* bitD);
|
|
|
|
unsigned FSE_endOfDStream(const FSE_DStream_t* bitD);
|
|
unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
|
|
|
|
/*
|
|
Let's now decompose FSE_decompress_usingDTable() into its unitary components.
|
|
You will decode FSE-encoded symbols from the bitStream,
|
|
and also any other bitFields you put in, **in reverse order**.
|
|
|
|
You will need a few variables to track your bitStream. They are :
|
|
|
|
FSE_DStream_t DStream; // Stream context
|
|
FSE_DState_t DState; // State context. Multiple ones are possible
|
|
FSE_DTable dt; // Decoding table, provided by FSE_buildDTable()
|
|
U32 tableLog; // Provided by FSE_readHeader()
|
|
|
|
The first thing to do is to init the bitStream.
|
|
errorCode = FSE_initDStream(&DStream, &optionalId, srcBuffer, srcSize);
|
|
|
|
You should then retrieve your initial state(s) :
|
|
errorCode = FSE_initDState(&DState, &DStream, dt, tableLog);
|
|
|
|
You can then decode your data, symbol after symbol.
|
|
For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
|
|
Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
|
|
unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
|
|
|
|
You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
|
|
Note : maximum allowed nbBits is 25
|
|
unsigned int bitField = FSE_readBits(&DStream, nbBits);
|
|
|
|
All above operations only read from local register (which size is controlled by bitD_t==32 bits).
|
|
Refueling the register from memory is manually performed by the reload method.
|
|
endSignal = FSE_reloadDStream(&DStream);
|
|
|
|
FSE_reloadDStream() result tells if there is still some more data to read from DStream.
|
|
0 : there is still some data left into the DStream.
|
|
1 : Dstream reached end of buffer, but is not yet fully extracted. It will not load data from memory any more.
|
|
2 : Dstream reached its exact end, corresponding in general to decompression completed.
|
|
3 : Dstream went too far. Decompression result is corrupted.
|
|
|
|
When reaching end of buffer(1), progress slowly, notably if you decode multiple symbols per loop,
|
|
to properly detect the exact end of stream.
|
|
After each decoded symbol, check if DStream is fully consumed using this simple test :
|
|
FSE_reloadDStream(&DStream) >= 2
|
|
|
|
When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
|
|
Checking if DStream has reached its end is performed by :
|
|
FSE_endOfDStream(&DStream);
|
|
Check also the states. There might be some entropy left there, able to decode some high probability (>50%) symbol.
|
|
FSE_endOfDState(&DState);
|
|
*/
|
|
|
|
|
|
/******************************************
|
|
* FSE unsafe symbol API
|
|
******************************************/
|
|
size_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
|
|
/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
|
|
|
|
unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
|
|
/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
|
|
|
|
|
|
#if defined (__cplusplus)
|
|
}
|
|
#endif
|