2015-03-09 01:26:33 +00:00
/* ******************************************************************
2016-05-19 18:03:58 +00:00
FSE : Finite State Entropy encoder
2015-03-09 01:26:33 +00:00
Copyright ( C ) 2013 - 2015 , Yann Collet .
BSD 2 - Clause License ( http : //www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms , with or without
modification , are permitted provided that the following conditions are
met :
* Redistributions of source code must retain the above copyright
notice , this list of conditions and the following disclaimer .
* Redistributions in binary form must reproduce the above
copyright notice , this list of conditions and the following disclaimer
in the documentation and / or other materials provided with the
distribution .
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
" AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT
LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL ,
SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT
LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
You can contact the author at :
- FSE source repository : https : //github.com/Cyan4973/FiniteStateEntropy
- Public forum : https : //groups.google.com/forum/#!forum/lz4c
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2016-03-07 09:52:53 +00:00
/* **************************************************************
2015-03-09 01:26:33 +00:00
* Compiler specifics
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifdef _MSC_VER /* Visual Studio */
# define FORCE_INLINE static __forceinline
# include <intrin.h> /* For Visual 2005 */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
# else
# ifdef __GNUC__
2016-03-07 09:52:53 +00:00
# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2015-03-09 01:26:33 +00:00
# define FORCE_INLINE static inline __attribute__((always_inline))
# else
# define FORCE_INLINE static inline
# endif
# endif
2016-03-07 09:52:53 +00:00
/* **************************************************************
2015-03-09 01:26:33 +00:00
* Includes
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include <stdlib.h> /* malloc, free, qsort */
# include <string.h> /* memcpy, memset */
# include <stdio.h> /* printf (debug) */
2016-03-07 09:52:53 +00:00
# include "bitstream.h"
2015-03-09 01:26:33 +00:00
# include "fse_static.h"
2016-03-07 09:52:53 +00:00
/* **************************************************************
2015-03-09 01:26:33 +00:00
* Error Management
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1 / (int)(!!(c)) }; } /* use only *after* variable declarations */
2016-03-07 09:52:53 +00:00
/* **************************************************************
2015-03-09 01:26:33 +00:00
* Complex types
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2015-07-09 18:40:37 +00:00
typedef U32 CTable_max_t [ FSE_CTABLE_SIZE_U32 ( FSE_MAX_TABLELOG , FSE_MAX_SYMBOL_VALUE ) ] ;
2015-03-09 01:26:33 +00:00
2016-03-07 09:52:53 +00:00
/* **************************************************************
2015-09-09 23:51:17 +00:00
* Templates
2015-03-09 01:26:33 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2015-09-09 23:51:17 +00:00
/*
designed to be included
for type - specific functions ( template emulation in C )
Objective is to write these functions only once , for improved maintenance
*/
/* safety checks */
# ifndef FSE_FUNCTION_EXTENSION
# error "FSE_FUNCTION_EXTENSION must be defined"
# endif
# ifndef FSE_FUNCTION_TYPE
# error "FSE_FUNCTION_TYPE must be defined"
# endif
/* Function names */
# define FSE_CAT(X,Y) X##Y
# define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
# define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
2015-03-09 01:26:33 +00:00
2015-07-09 18:40:37 +00:00
2015-09-09 23:51:17 +00:00
/* Function templates */
2016-03-07 09:52:53 +00:00
size_t FSE_buildCTable ( FSE_CTable * ct , const short * normalizedCounter , unsigned maxSymbolValue , unsigned tableLog )
2015-09-09 23:51:17 +00:00
{
2016-04-26 21:18:12 +00:00
U32 const tableSize = 1 < < tableLog ;
U32 const tableMask = tableSize - 1 ;
2016-03-07 09:52:53 +00:00
void * const ptr = ct ;
U16 * const tableU16 = ( ( U16 * ) ptr ) + 2 ;
void * const FSCT = ( ( U32 * ) ptr ) + 1 /* header */ + ( tableLog ? tableSize > > 1 : 1 ) ;
FSE_symbolCompressionTransform * const symbolTT = ( FSE_symbolCompressionTransform * ) ( FSCT ) ;
2016-05-19 18:03:58 +00:00
U32 const step = FSE_TABLESTEP ( tableSize ) ;
2016-04-26 21:18:12 +00:00
U32 cumul [ FSE_MAX_SYMBOL_VALUE + 2 ] ;
2016-05-19 18:03:58 +00:00
2016-03-07 09:52:53 +00:00
FSE_FUNCTION_TYPE tableSymbol [ FSE_MAX_TABLESIZE ] ; /* memset() is not necessary, even if static analyzer complain about it */
2015-09-09 23:51:17 +00:00
U32 highThreshold = tableSize - 1 ;
2016-04-26 21:18:12 +00:00
/* CTable header */
2016-05-19 18:03:58 +00:00
2015-09-09 23:51:17 +00:00
tableU16 [ - 2 ] = ( U16 ) tableLog ;
tableU16 [ - 1 ] = ( U16 ) maxSymbolValue ;
/* For explanations on how to distribute symbol values over the table :
* http : //fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
/* symbol start positions */
2016-04-26 21:18:12 +00:00
{ U32 u ;
cumul [ 0 ] = 0 ;
for ( u = 1 ; u < = maxSymbolValue + 1 ; u + + ) {
if ( normalizedCounter [ u - 1 ] = = - 1 ) { /* Low proba symbol */
cumul [ u ] = cumul [ u - 1 ] + 1 ;
tableSymbol [ highThreshold - - ] = ( FSE_FUNCTION_TYPE ) ( u - 1 ) ;
} else {
cumul [ u ] = cumul [ u - 1 ] + normalizedCounter [ u - 1 ] ;
} }
cumul [ maxSymbolValue + 1 ] = tableSize + 1 ;
}
2015-09-09 23:51:17 +00:00
/* Spread symbols */
2016-04-26 21:18:12 +00:00
{ U32 position = 0 ;
U32 symbol ;
for ( symbol = 0 ; symbol < = maxSymbolValue ; symbol + + ) {
int nbOccurences ;
for ( nbOccurences = 0 ; nbOccurences < normalizedCounter [ symbol ] ; nbOccurences + + ) {
tableSymbol [ position ] = ( FSE_FUNCTION_TYPE ) symbol ;
position = ( position + step ) & tableMask ;
while ( position > highThreshold ) position = ( position + step ) & tableMask ; /* Low proba area */
} }
2016-05-19 18:03:58 +00:00
2016-04-26 21:18:12 +00:00
if ( position ! = 0 ) return ERROR ( GENERIC ) ; /* Must have gone through all positions */
}
2015-09-09 23:51:17 +00:00
/* Build table */
2016-04-26 21:18:12 +00:00
{ U32 u ; for ( u = 0 ; u < tableSize ; u + + ) {
FSE_FUNCTION_TYPE s = tableSymbol [ u ] ; /* note : static analyzer may not understand tableSymbol is properly initialized */
tableU16 [ cumul [ s ] + + ] = ( U16 ) ( tableSize + u ) ; /* TableU16 : sorted by symbol order; gives next state value */
} }
2015-09-09 23:51:17 +00:00
/* Build Symbol Transformation Table */
2016-04-26 21:18:12 +00:00
{ unsigned total = 0 ;
2015-09-09 23:51:17 +00:00
unsigned s ;
2016-03-07 09:52:53 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
2015-09-09 23:51:17 +00:00
switch ( normalizedCounter [ s ] )
{
2016-04-26 21:18:12 +00:00
case 0 : break ;
2016-05-19 18:03:58 +00:00
2015-09-09 23:51:17 +00:00
case - 1 :
case 1 :
2016-03-07 09:52:53 +00:00
symbolTT [ s ] . deltaNbBits = ( tableLog < < 16 ) - ( 1 < < tableLog ) ;
2015-09-09 23:51:17 +00:00
symbolTT [ s ] . deltaFindState = total - 1 ;
total + + ;
break ;
default :
{
2016-04-26 21:18:12 +00:00
U32 const maxBitsOut = tableLog - BIT_highbit32 ( normalizedCounter [ s ] - 1 ) ;
U32 const minStatePlus = normalizedCounter [ s ] < < maxBitsOut ;
2015-09-09 23:51:17 +00:00
symbolTT [ s ] . deltaNbBits = ( maxBitsOut < < 16 ) - minStatePlus ;
symbolTT [ s ] . deltaFindState = total - normalizedCounter [ s ] ;
total + = normalizedCounter [ s ] ;
2016-03-07 09:52:53 +00:00
} } } }
2015-09-09 23:51:17 +00:00
return 0 ;
}
# ifndef FSE_COMMONDEFS_ONLY
2016-03-07 09:52:53 +00:00
/*-**************************************************************
* FSE NCount encoding - decoding
2015-09-09 23:51:17 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
size_t FSE_NCountWriteBound ( unsigned maxSymbolValue , unsigned tableLog )
{
size_t maxHeaderSize = ( ( ( maxSymbolValue + 1 ) * tableLog ) > > 3 ) + 3 ;
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND ; /* maxSymbolValue==0 ? use default */
}
2016-03-07 09:52:53 +00:00
static short FSE_abs ( short a ) { return a < 0 ? - a : a ; }
2015-09-09 23:51:17 +00:00
static size_t FSE_writeNCount_generic ( void * header , size_t headerBufferSize ,
const short * normalizedCounter , unsigned maxSymbolValue , unsigned tableLog ,
unsigned writeIsSafe )
{
BYTE * const ostart = ( BYTE * ) header ;
BYTE * out = ostart ;
BYTE * const oend = ostart + headerBufferSize ;
int nbBits ;
const int tableSize = 1 < < tableLog ;
int remaining ;
int threshold ;
U32 bitStream ;
int bitCount ;
unsigned charnum = 0 ;
int previous0 = 0 ;
bitStream = 0 ;
bitCount = 0 ;
/* Table Size */
bitStream + = ( tableLog - FSE_MIN_TABLELOG ) < < bitCount ;
bitCount + = 4 ;
/* Init */
remaining = tableSize + 1 ; /* +1 for extra accuracy */
threshold = tableSize ;
nbBits = tableLog + 1 ;
2016-03-07 09:52:53 +00:00
while ( remaining > 1 ) { /* stops at 1 */
if ( previous0 ) {
2015-09-09 23:51:17 +00:00
unsigned start = charnum ;
while ( ! normalizedCounter [ charnum ] ) charnum + + ;
2016-03-07 09:52:53 +00:00
while ( charnum > = start + 24 ) {
2015-09-09 23:51:17 +00:00
start + = 24 ;
bitStream + = 0xFFFFU < < bitCount ;
2016-03-07 09:52:53 +00:00
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) ) return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2015-09-09 23:51:17 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = 2 ;
bitStream > > = 16 ;
}
2016-03-07 09:52:53 +00:00
while ( charnum > = start + 3 ) {
2015-09-09 23:51:17 +00:00
start + = 3 ;
bitStream + = 3 < < bitCount ;
bitCount + = 2 ;
}
bitStream + = ( charnum - start ) < < bitCount ;
bitCount + = 2 ;
2016-03-07 09:52:53 +00:00
if ( bitCount > 16 ) {
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) ) return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2015-09-09 23:51:17 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = 2 ;
bitStream > > = 16 ;
bitCount - = 16 ;
2016-03-07 09:52:53 +00:00
} }
2016-04-26 21:18:12 +00:00
{ short count = normalizedCounter [ charnum + + ] ;
2015-09-09 23:51:17 +00:00
const short max = ( short ) ( ( 2 * threshold - 1 ) - remaining ) ;
remaining - = FSE_abs ( count ) ;
2016-03-07 09:52:53 +00:00
if ( remaining < 1 ) return ERROR ( GENERIC ) ;
2015-09-09 23:51:17 +00:00
count + + ; /* +1 for extra accuracy */
if ( count > = threshold ) count + = max ; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
bitStream + = count < < bitCount ;
bitCount + = nbBits ;
bitCount - = ( count < max ) ;
previous0 = ( count = = 1 ) ;
while ( remaining < threshold ) nbBits - - , threshold > > = 1 ;
}
2016-03-07 09:52:53 +00:00
if ( bitCount > 16 ) {
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) ) return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2015-09-09 23:51:17 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = 2 ;
bitStream > > = 16 ;
bitCount - = 16 ;
2016-03-07 09:52:53 +00:00
} }
2015-09-09 23:51:17 +00:00
/* flush remaining bitStream */
2016-03-07 09:52:53 +00:00
if ( ( ! writeIsSafe ) & & ( out > oend - 2 ) ) return ERROR ( dstSize_tooSmall ) ; /* Buffer overflow */
2015-09-09 23:51:17 +00:00
out [ 0 ] = ( BYTE ) bitStream ;
out [ 1 ] = ( BYTE ) ( bitStream > > 8 ) ;
out + = ( bitCount + 7 ) / 8 ;
2016-03-07 09:52:53 +00:00
if ( charnum > maxSymbolValue + 1 ) return ERROR ( GENERIC ) ;
2015-09-09 23:51:17 +00:00
return ( out - ostart ) ;
}
size_t FSE_writeNCount ( void * buffer , size_t bufferSize , const short * normalizedCounter , unsigned maxSymbolValue , unsigned tableLog )
{
2016-03-07 09:52:53 +00:00
if ( tableLog > FSE_MAX_TABLELOG ) return ERROR ( GENERIC ) ; /* Unsupported */
if ( tableLog < FSE_MIN_TABLELOG ) return ERROR ( GENERIC ) ; /* Unsupported */
2015-09-09 23:51:17 +00:00
if ( bufferSize < FSE_NCountWriteBound ( maxSymbolValue , tableLog ) )
return FSE_writeNCount_generic ( buffer , bufferSize , normalizedCounter , maxSymbolValue , tableLog , 0 ) ;
return FSE_writeNCount_generic ( buffer , bufferSize , normalizedCounter , maxSymbolValue , tableLog , 1 ) ;
}
2015-03-09 01:26:33 +00:00
2016-03-07 09:52:53 +00:00
/*-**************************************************************
* Counting histogram
2015-03-09 01:26:33 +00:00
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2016-03-07 09:52:53 +00:00
/*! FSE_count_simple
2016-04-26 21:18:12 +00:00
This function just counts byte values within ` src ` ,
and store the histogram into table ` count ` .
This function is unsafe : it doesn ' t check that all values within ` src ` can fit into ` count ` .
For this reason , prefer using a table ` count ` with 256 elements .
@ return : count of most numerous element
2015-03-09 01:26:33 +00:00
*/
2016-03-07 09:52:53 +00:00
static size_t FSE_count_simple ( unsigned * count , unsigned * maxSymbolValuePtr ,
const void * src , size_t srcSize )
2015-03-09 01:26:33 +00:00
{
2016-03-07 09:52:53 +00:00
const BYTE * ip = ( const BYTE * ) src ;
const BYTE * const end = ip + srcSize ;
unsigned maxSymbolValue = * maxSymbolValuePtr ;
unsigned max = 0 ;
2015-03-09 01:26:33 +00:00
2016-05-19 18:03:58 +00:00
2016-03-07 09:52:53 +00:00
memset ( count , 0 , ( maxSymbolValue + 1 ) * sizeof ( * count ) ) ;
if ( srcSize = = 0 ) { * maxSymbolValuePtr = 0 ; return 0 ; }
while ( ip < end ) count [ * ip + + ] + + ;
while ( ! count [ maxSymbolValue ] ) maxSymbolValue - - ;
* maxSymbolValuePtr = maxSymbolValue ;
2016-04-26 21:18:12 +00:00
{ U32 s ; for ( s = 0 ; s < = maxSymbolValue ; s + + ) if ( count [ s ] > max ) max = count [ s ] ; }
2016-03-07 09:52:53 +00:00
return ( size_t ) max ;
}
static size_t FSE_count_parallel ( unsigned * count , unsigned * maxSymbolValuePtr ,
const void * source , size_t sourceSize ,
unsigned checkMax )
{
const BYTE * ip = ( const BYTE * ) source ;
const BYTE * const iend = ip + sourceSize ;
unsigned maxSymbolValue = * maxSymbolValuePtr ;
unsigned max = 0 ;
2016-05-19 18:03:58 +00:00
2016-03-07 09:52:53 +00:00
U32 Counting1 [ 256 ] = { 0 } ;
U32 Counting2 [ 256 ] = { 0 } ;
U32 Counting3 [ 256 ] = { 0 } ;
U32 Counting4 [ 256 ] = { 0 } ;
/* safety checks */
if ( ! sourceSize ) {
memset ( count , 0 , maxSymbolValue + 1 ) ;
* maxSymbolValuePtr = 0 ;
return 0 ;
}
if ( ! maxSymbolValue ) maxSymbolValue = 255 ; /* 0 == default */
2016-04-26 21:18:12 +00:00
/* by stripes of 16 bytes */
{ U32 cached = MEM_read32 ( ip ) ; ip + = 4 ;
2016-03-07 09:52:53 +00:00
while ( ip < iend - 15 ) {
U32 c = cached ; cached = MEM_read32 ( ip ) ; ip + = 4 ;
Counting1 [ ( BYTE ) c ] + + ;
Counting2 [ ( BYTE ) ( c > > 8 ) ] + + ;
Counting3 [ ( BYTE ) ( c > > 16 ) ] + + ;
Counting4 [ c > > 24 ] + + ;
c = cached ; cached = MEM_read32 ( ip ) ; ip + = 4 ;
Counting1 [ ( BYTE ) c ] + + ;
Counting2 [ ( BYTE ) ( c > > 8 ) ] + + ;
Counting3 [ ( BYTE ) ( c > > 16 ) ] + + ;
Counting4 [ c > > 24 ] + + ;
c = cached ; cached = MEM_read32 ( ip ) ; ip + = 4 ;
Counting1 [ ( BYTE ) c ] + + ;
Counting2 [ ( BYTE ) ( c > > 8 ) ] + + ;
Counting3 [ ( BYTE ) ( c > > 16 ) ] + + ;
Counting4 [ c > > 24 ] + + ;
c = cached ; cached = MEM_read32 ( ip ) ; ip + = 4 ;
Counting1 [ ( BYTE ) c ] + + ;
Counting2 [ ( BYTE ) ( c > > 8 ) ] + + ;
Counting3 [ ( BYTE ) ( c > > 16 ) ] + + ;
Counting4 [ c > > 24 ] + + ;
}
ip - = 4 ;
}
/* finish last symbols */
while ( ip < iend ) Counting1 [ * ip + + ] + + ;
if ( checkMax ) { /* verify stats will fit into destination table */
2016-04-26 21:18:12 +00:00
U32 s ; for ( s = 255 ; s > maxSymbolValue ; s - - ) {
2016-03-07 09:52:53 +00:00
Counting1 [ s ] + = Counting2 [ s ] + Counting3 [ s ] + Counting4 [ s ] ;
if ( Counting1 [ s ] ) return ERROR ( maxSymbolValue_tooSmall ) ;
} }
2016-04-26 21:18:12 +00:00
{ U32 s ; for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
2016-03-07 09:52:53 +00:00
count [ s ] = Counting1 [ s ] + Counting2 [ s ] + Counting3 [ s ] + Counting4 [ s ] ;
if ( count [ s ] > max ) max = count [ s ] ;
2016-04-26 21:18:12 +00:00
} }
2016-03-07 09:52:53 +00:00
while ( ! count [ maxSymbolValue ] ) maxSymbolValue - - ;
* maxSymbolValuePtr = maxSymbolValue ;
return ( size_t ) max ;
}
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
size_t FSE_countFast ( unsigned * count , unsigned * maxSymbolValuePtr ,
const void * source , size_t sourceSize )
{
if ( sourceSize < 1500 ) return FSE_count_simple ( count , maxSymbolValuePtr , source , sourceSize ) ;
return FSE_count_parallel ( count , maxSymbolValuePtr , source , sourceSize , 0 ) ;
}
size_t FSE_count ( unsigned * count , unsigned * maxSymbolValuePtr ,
const void * source , size_t sourceSize )
{
if ( * maxSymbolValuePtr < 255 )
return FSE_count_parallel ( count , maxSymbolValuePtr , source , sourceSize , 1 ) ;
* maxSymbolValuePtr = 255 ;
return FSE_countFast ( count , maxSymbolValuePtr , source , sourceSize ) ;
}
2016-05-19 18:03:58 +00:00
2016-03-07 09:52:53 +00:00
/*-**************************************************************
* FSE Compression Code
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2016-04-26 21:18:12 +00:00
/*! FSE_sizeof_CTable() :
FSE_CTable is a variable size structure which contains :
` U16 tableLog ; `
` U16 maxSymbolValue ; `
` U16 nextStateNumber [ 1 < < tableLog ] ; ` // This size is variable
` FSE_symbolCompressionTransform symbolTT [ maxSymbolValue + 1 ] ; ` // This size is variable
Allocation is manual ( C standard does not support variable - size structures ) .
2016-03-07 09:52:53 +00:00
*/
size_t FSE_sizeof_CTable ( unsigned maxSymbolValue , unsigned tableLog )
{
size_t size ;
FSE_STATIC_ASSERT ( ( size_t ) FSE_CTABLE_SIZE_U32 ( FSE_MAX_TABLELOG , FSE_MAX_SYMBOL_VALUE ) * 4 > = sizeof ( CTable_max_t ) ) ; /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
if ( tableLog > FSE_MAX_TABLELOG ) return ERROR ( GENERIC ) ;
size = FSE_CTABLE_SIZE_U32 ( tableLog , maxSymbolValue ) * sizeof ( U32 ) ;
return size ;
}
FSE_CTable * FSE_createCTable ( unsigned maxSymbolValue , unsigned tableLog )
2015-03-09 01:26:33 +00:00
{
size_t size ;
if ( tableLog > FSE_TABLELOG_ABSOLUTE_MAX ) tableLog = FSE_TABLELOG_ABSOLUTE_MAX ;
size = FSE_CTABLE_SIZE_U32 ( tableLog , maxSymbolValue ) * sizeof ( U32 ) ;
2015-07-09 18:40:37 +00:00
return ( FSE_CTable * ) malloc ( size ) ;
2015-03-09 01:26:33 +00:00
}
2016-04-26 21:18:12 +00:00
void FSE_freeCTable ( FSE_CTable * ct ) { free ( ct ) ; }
2015-03-09 01:26:33 +00:00
2015-09-09 23:51:17 +00:00
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog ( size_t srcSize , unsigned maxSymbolValue )
{
2016-03-07 09:52:53 +00:00
U32 minBitsSrc = BIT_highbit32 ( ( U32 ) ( srcSize - 1 ) ) + 1 ;
U32 minBitsSymbols = BIT_highbit32 ( maxSymbolValue ) + 2 ;
2015-09-09 23:51:17 +00:00
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols ;
return minBits ;
}
2015-03-09 01:26:33 +00:00
unsigned FSE_optimalTableLog ( unsigned maxTableLog , size_t srcSize , unsigned maxSymbolValue )
{
2016-03-07 09:52:53 +00:00
U32 maxBitsSrc = BIT_highbit32 ( ( U32 ) ( srcSize - 1 ) ) - 2 ;
2015-03-09 01:26:33 +00:00
U32 tableLog = maxTableLog ;
2015-09-09 23:51:17 +00:00
U32 minBits = FSE_minTableLog ( srcSize , maxSymbolValue ) ;
2015-03-09 01:26:33 +00:00
if ( tableLog = = 0 ) tableLog = FSE_DEFAULT_TABLELOG ;
2015-09-09 23:51:17 +00:00
if ( maxBitsSrc < tableLog ) tableLog = maxBitsSrc ; /* Accuracy can be reduced */
if ( minBits > tableLog ) tableLog = minBits ; /* Need a minimum to safely represent all symbol values */
2015-03-09 01:26:33 +00:00
if ( tableLog < FSE_MIN_TABLELOG ) tableLog = FSE_MIN_TABLELOG ;
if ( tableLog > FSE_MAX_TABLELOG ) tableLog = FSE_MAX_TABLELOG ;
return tableLog ;
}
/* Secondary normalization method.
To be used when primary method fails . */
static size_t FSE_normalizeM2 ( short * norm , U32 tableLog , const unsigned * count , size_t total , U32 maxSymbolValue )
{
U32 s ;
U32 distributed = 0 ;
U32 ToDistribute ;
/* Init */
U32 lowThreshold = ( U32 ) ( total > > tableLog ) ;
U32 lowOne = ( U32 ) ( ( total * 3 ) > > ( tableLog + 1 ) ) ;
2016-03-07 09:52:53 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( count [ s ] = = 0 ) {
2015-03-09 01:26:33 +00:00
norm [ s ] = 0 ;
continue ;
}
2016-03-07 09:52:53 +00:00
if ( count [ s ] < = lowThreshold ) {
2015-03-09 01:26:33 +00:00
norm [ s ] = - 1 ;
distributed + + ;
total - = count [ s ] ;
continue ;
}
2016-03-07 09:52:53 +00:00
if ( count [ s ] < = lowOne ) {
2015-03-09 01:26:33 +00:00
norm [ s ] = 1 ;
distributed + + ;
total - = count [ s ] ;
continue ;
}
norm [ s ] = - 2 ;
}
ToDistribute = ( 1 < < tableLog ) - distributed ;
2016-03-07 09:52:53 +00:00
if ( ( total / ToDistribute ) > lowOne ) {
2015-03-09 01:26:33 +00:00
/* risk of rounding to zero */
lowOne = ( U32 ) ( ( total * 3 ) / ( ToDistribute * 2 ) ) ;
2016-03-07 09:52:53 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( ( norm [ s ] = = - 2 ) & & ( count [ s ] < = lowOne ) ) {
2015-03-09 01:26:33 +00:00
norm [ s ] = 1 ;
distributed + + ;
total - = count [ s ] ;
continue ;
2016-03-07 09:52:53 +00:00
} }
2015-03-09 01:26:33 +00:00
ToDistribute = ( 1 < < tableLog ) - distributed ;
}
2016-03-07 09:52:53 +00:00
if ( distributed = = maxSymbolValue + 1 ) {
2015-03-09 01:26:33 +00:00
/* all values are pretty poor;
probably incompressible data ( should have already been detected ) ;
find max , then give all remaining points to max */
2016-04-26 21:18:12 +00:00
U32 maxV = 0 , maxC = 0 ;
2015-03-09 01:26:33 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + )
if ( count [ s ] > maxC ) maxV = s , maxC = count [ s ] ;
2015-07-09 18:40:37 +00:00
norm [ maxV ] + = ( short ) ToDistribute ;
2015-03-09 01:26:33 +00:00
return 0 ;
}
{
U64 const vStepLog = 62 - tableLog ;
U64 const mid = ( 1ULL < < ( vStepLog - 1 ) ) - 1 ;
U64 const rStep = ( ( ( ( U64 ) 1 < < vStepLog ) * ToDistribute ) + mid ) / total ; /* scale on remaining */
U64 tmpTotal = mid ;
2016-03-07 09:52:53 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( norm [ s ] = = - 2 ) {
2015-03-09 01:26:33 +00:00
U64 end = tmpTotal + ( count [ s ] * rStep ) ;
U32 sStart = ( U32 ) ( tmpTotal > > vStepLog ) ;
U32 sEnd = ( U32 ) ( end > > vStepLog ) ;
U32 weight = sEnd - sStart ;
if ( weight < 1 )
2016-03-07 09:52:53 +00:00
return ERROR ( GENERIC ) ;
2015-07-09 18:40:37 +00:00
norm [ s ] = ( short ) weight ;
2015-03-09 01:26:33 +00:00
tmpTotal = end ;
2016-03-07 09:52:53 +00:00
} } }
2015-03-09 01:26:33 +00:00
return 0 ;
}
size_t FSE_normalizeCount ( short * normalizedCounter , unsigned tableLog ,
const unsigned * count , size_t total ,
unsigned maxSymbolValue )
{
/* Sanity checks */
if ( tableLog = = 0 ) tableLog = FSE_DEFAULT_TABLELOG ;
2016-03-07 09:52:53 +00:00
if ( tableLog < FSE_MIN_TABLELOG ) return ERROR ( GENERIC ) ; /* Unsupported size */
if ( tableLog > FSE_MAX_TABLELOG ) return ERROR ( tableLog_tooLarge ) ; /* Unsupported size */
if ( tableLog < FSE_minTableLog ( total , maxSymbolValue ) ) return ERROR ( GENERIC ) ; /* Too small tableLog, compression potentially impossible */
2015-03-09 01:26:33 +00:00
2016-04-26 21:18:12 +00:00
{ U32 const rtbTable [ ] = { 0 , 473195 , 504333 , 520860 , 550000 , 700000 , 750000 , 830000 } ;
2016-05-19 18:03:58 +00:00
2015-03-09 01:26:33 +00:00
U64 const scale = 62 - tableLog ;
U64 const step = ( ( U64 ) 1 < < 62 ) / total ; /* <== here, one division ! */
U64 const vStep = 1ULL < < ( scale - 20 ) ;
int stillToDistribute = 1 < < tableLog ;
unsigned s ;
unsigned largest = 0 ;
short largestP = 0 ;
U32 lowThreshold = ( U32 ) ( total > > tableLog ) ;
2016-03-07 09:52:53 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
if ( count [ s ] = = total ) return 0 ; /* rle special case */
if ( count [ s ] = = 0 ) { normalizedCounter [ s ] = 0 ; continue ; }
if ( count [ s ] < = lowThreshold ) {
2015-03-09 01:26:33 +00:00
normalizedCounter [ s ] = - 1 ;
stillToDistribute - - ;
2016-03-07 09:52:53 +00:00
} else {
2015-03-09 01:26:33 +00:00
short proba = ( short ) ( ( count [ s ] * step ) > > scale ) ;
2016-03-07 09:52:53 +00:00
if ( proba < 8 ) {
2015-03-09 01:26:33 +00:00
U64 restToBeat = vStep * rtbTable [ proba ] ;
proba + = ( count [ s ] * step ) - ( ( U64 ) proba < < scale ) > restToBeat ;
}
2016-03-07 09:52:53 +00:00
if ( proba > largestP ) largestP = proba , largest = s ;
2015-03-09 01:26:33 +00:00
normalizedCounter [ s ] = proba ;
stillToDistribute - = proba ;
2016-03-07 09:52:53 +00:00
} }
if ( - stillToDistribute > = ( normalizedCounter [ largest ] > > 1 ) ) {
2015-03-09 01:26:33 +00:00
/* corner case, need another normalization method */
size_t errorCode = FSE_normalizeM2 ( normalizedCounter , tableLog , count , total , maxSymbolValue ) ;
if ( FSE_isError ( errorCode ) ) return errorCode ;
}
else normalizedCounter [ largest ] + = ( short ) stillToDistribute ;
}
#if 0
{ /* Print Table (debug) */
U32 s ;
U32 nTotal = 0 ;
for ( s = 0 ; s < = maxSymbolValue ; s + + )
printf ( " %3i: %4i \n " , s , normalizedCounter [ s ] ) ;
for ( s = 0 ; s < = maxSymbolValue ; s + + )
nTotal + = abs ( normalizedCounter [ s ] ) ;
if ( nTotal ! = ( 1U < < tableLog ) )
printf ( " Warning !!! Total == %u != %u !!! " , nTotal , 1U < < tableLog ) ;
getchar ( ) ;
}
# endif
return tableLog ;
}
2015-07-09 18:40:37 +00:00
/* fake FSE_CTable, for raw (uncompressed) input */
size_t FSE_buildCTable_raw ( FSE_CTable * ct , unsigned nbBits )
2015-03-09 01:26:33 +00:00
{
const unsigned tableSize = 1 < < nbBits ;
const unsigned tableMask = tableSize - 1 ;
const unsigned maxSymbolValue = tableMask ;
2016-03-07 09:52:53 +00:00
void * const ptr = ct ;
U16 * const tableU16 = ( ( U16 * ) ptr ) + 2 ;
void * const FSCT = ( ( U32 * ) ptr ) + 1 /* header */ + ( tableSize > > 1 ) ; /* assumption : tableLog >= 1 */
FSE_symbolCompressionTransform * const symbolTT = ( FSE_symbolCompressionTransform * ) ( FSCT ) ;
2015-03-09 01:26:33 +00:00
unsigned s ;
/* Sanity checks */
2016-03-07 09:52:53 +00:00
if ( nbBits < 1 ) return ERROR ( GENERIC ) ; /* min size */
2015-03-09 01:26:33 +00:00
/* header */
tableU16 [ - 2 ] = ( U16 ) nbBits ;
tableU16 [ - 1 ] = ( U16 ) maxSymbolValue ;
/* Build table */
for ( s = 0 ; s < tableSize ; s + + )
tableU16 [ s ] = ( U16 ) ( tableSize + s ) ;
/* Build Symbol Transformation Table */
2016-04-26 21:18:12 +00:00
{ const U32 deltaNbBits = ( nbBits < < 16 ) - ( 1 < < nbBits ) ;
2016-05-19 18:03:58 +00:00
2016-03-07 09:52:53 +00:00
for ( s = 0 ; s < = maxSymbolValue ; s + + ) {
symbolTT [ s ] . deltaNbBits = deltaNbBits ;
symbolTT [ s ] . deltaFindState = s - 1 ;
2016-04-26 21:18:12 +00:00
} }
2015-03-09 01:26:33 +00:00
2016-05-19 18:03:58 +00:00
2015-03-09 01:26:33 +00:00
return 0 ;
}
2015-07-09 18:40:37 +00:00
/* fake FSE_CTable, for rle (100% always same symbol) input */
size_t FSE_buildCTable_rle ( FSE_CTable * ct , BYTE symbolValue )
2015-03-09 01:26:33 +00:00
{
2016-03-07 09:52:53 +00:00
void * ptr = ct ;
U16 * tableU16 = ( ( U16 * ) ptr ) + 2 ;
void * FSCTptr = ( U32 * ) ptr + 2 ;
FSE_symbolCompressionTransform * symbolTT = ( FSE_symbolCompressionTransform * ) FSCTptr ;
2015-03-09 01:26:33 +00:00
/* header */
tableU16 [ - 2 ] = ( U16 ) 0 ;
tableU16 [ - 1 ] = ( U16 ) symbolValue ;
/* Build table */
tableU16 [ 0 ] = 0 ;
tableU16 [ 1 ] = 0 ; /* just in case */
/* Build Symbol Transformation Table */
2016-03-07 09:52:53 +00:00
symbolTT [ symbolValue ] . deltaNbBits = 0 ;
symbolTT [ symbolValue ] . deltaFindState = 0 ;
2015-03-09 01:26:33 +00:00
2015-09-09 23:51:17 +00:00
return 0 ;
2015-03-09 01:26:33 +00:00
}
2015-09-09 23:51:17 +00:00
static size_t FSE_compress_usingCTable_generic ( void * dst , size_t dstSize ,
2015-03-09 01:26:33 +00:00
const void * src , size_t srcSize ,
2015-09-09 23:51:17 +00:00
const FSE_CTable * ct , const unsigned fast )
2015-03-09 01:26:33 +00:00
{
const BYTE * const istart = ( const BYTE * ) src ;
const BYTE * const iend = istart + srcSize ;
2016-04-26 21:18:12 +00:00
const BYTE * ip = iend ;
2016-05-19 18:03:58 +00:00
2016-03-07 09:52:53 +00:00
BIT_CStream_t bitC ;
2015-03-09 01:26:33 +00:00
FSE_CState_t CState1 , CState2 ;
/* init */
2016-04-26 21:18:12 +00:00
if ( srcSize < = 2 ) return 0 ;
{ size_t const errorCode = BIT_initCStream ( & bitC , dst , dstSize ) ;
if ( FSE_isError ( errorCode ) ) return 0 ; }
2015-03-09 01:26:33 +00:00
2016-03-07 09:52:53 +00:00
# define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
2015-09-09 23:51:17 +00:00
2016-03-07 09:52:53 +00:00
if ( srcSize & 1 ) {
2016-04-26 21:18:12 +00:00
FSE_initCState2 ( & CState1 , ct , * - - ip ) ;
FSE_initCState2 ( & CState2 , ct , * - - ip ) ;
2015-07-09 18:40:37 +00:00
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
2015-09-09 23:51:17 +00:00
FSE_FLUSHBITS ( & bitC ) ;
2016-04-26 21:18:12 +00:00
} else {
FSE_initCState2 ( & CState2 , ct , * - - ip ) ;
FSE_initCState2 ( & CState1 , ct , * - - ip ) ;
2015-03-09 01:26:33 +00:00
}
/* join to mod 4 */
2016-04-26 21:18:12 +00:00
srcSize - = 2 ;
2016-03-07 09:52:53 +00:00
if ( ( sizeof ( bitC . bitContainer ) * 8 > FSE_MAX_TABLELOG * 4 + 7 ) & & ( srcSize & 2 ) ) { /* test bit 2 */
2015-07-09 18:40:37 +00:00
FSE_encodeSymbol ( & bitC , & CState2 , * - - ip ) ;
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
2015-09-09 23:51:17 +00:00
FSE_FLUSHBITS ( & bitC ) ;
2015-03-09 01:26:33 +00:00
}
/* 2 or 4 encoding per loop */
2016-04-26 21:18:12 +00:00
for ( ; ip > istart ; ) {
2016-05-19 18:03:58 +00:00
2015-07-09 18:40:37 +00:00
FSE_encodeSymbol ( & bitC , & CState2 , * - - ip ) ;
2015-03-09 01:26:33 +00:00
2015-09-09 23:51:17 +00:00
if ( sizeof ( bitC . bitContainer ) * 8 < FSE_MAX_TABLELOG * 2 + 7 ) /* this test must be static */
FSE_FLUSHBITS ( & bitC ) ;
2015-03-09 01:26:33 +00:00
2015-07-09 18:40:37 +00:00
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
2015-03-09 01:26:33 +00:00
2016-03-07 09:52:53 +00:00
if ( sizeof ( bitC . bitContainer ) * 8 > FSE_MAX_TABLELOG * 4 + 7 ) { /* this test must be static */
2015-07-09 18:40:37 +00:00
FSE_encodeSymbol ( & bitC , & CState2 , * - - ip ) ;
FSE_encodeSymbol ( & bitC , & CState1 , * - - ip ) ;
2015-03-09 01:26:33 +00:00
}
2015-09-09 23:51:17 +00:00
FSE_FLUSHBITS ( & bitC ) ;
2015-03-09 01:26:33 +00:00
}
FSE_flushCState ( & bitC , & CState2 ) ;
FSE_flushCState ( & bitC , & CState1 ) ;
2016-03-07 09:52:53 +00:00
return BIT_closeCStream ( & bitC ) ;
2015-03-09 01:26:33 +00:00
}
2015-09-09 23:51:17 +00:00
size_t FSE_compress_usingCTable ( void * dst , size_t dstSize ,
const void * src , size_t srcSize ,
const FSE_CTable * ct )
{
const unsigned fast = ( dstSize > = FSE_BLOCKBOUND ( srcSize ) ) ;
2015-03-09 01:26:33 +00:00
2015-09-09 23:51:17 +00:00
if ( fast )
return FSE_compress_usingCTable_generic ( dst , dstSize , src , srcSize , ct , 1 ) ;
else
return FSE_compress_usingCTable_generic ( dst , dstSize , src , srcSize , ct , 0 ) ;
}
2015-03-09 01:26:33 +00:00
2015-09-09 23:51:17 +00:00
size_t FSE_compressBound ( size_t size ) { return FSE_COMPRESSBOUND ( size ) ; }
2015-03-09 01:26:33 +00:00
size_t FSE_compress2 ( void * dst , size_t dstSize , const void * src , size_t srcSize , unsigned maxSymbolValue , unsigned tableLog )
{
const BYTE * const istart = ( const BYTE * ) src ;
const BYTE * ip = istart ;
BYTE * const ostart = ( BYTE * ) dst ;
BYTE * op = ostart ;
BYTE * const oend = ostart + dstSize ;
U32 count [ FSE_MAX_SYMBOL_VALUE + 1 ] ;
S16 norm [ FSE_MAX_SYMBOL_VALUE + 1 ] ;
2015-07-09 18:40:37 +00:00
CTable_max_t ct ;
2015-03-09 01:26:33 +00:00
size_t errorCode ;
2015-09-09 23:51:17 +00:00
/* init conditions */
if ( srcSize < = 1 ) return 0 ; /* Uncompressible */
2015-03-09 01:26:33 +00:00
if ( ! maxSymbolValue ) maxSymbolValue = FSE_MAX_SYMBOL_VALUE ;
if ( ! tableLog ) tableLog = FSE_DEFAULT_TABLELOG ;
/* Scan input and build symbol stats */
2015-07-09 18:40:37 +00:00
errorCode = FSE_count ( count , & maxSymbolValue , ip , srcSize ) ;
2015-03-09 01:26:33 +00:00
if ( FSE_isError ( errorCode ) ) return errorCode ;
if ( errorCode = = srcSize ) return 1 ;
2015-09-09 23:51:17 +00:00
if ( errorCode = = 1 ) return 0 ; /* each symbol only present once */
2015-03-09 01:26:33 +00:00
if ( errorCode < ( srcSize > > 7 ) ) return 0 ; /* Heuristic : not compressible enough */
tableLog = FSE_optimalTableLog ( tableLog , srcSize , maxSymbolValue ) ;
errorCode = FSE_normalizeCount ( norm , tableLog , count , srcSize , maxSymbolValue ) ;
if ( FSE_isError ( errorCode ) ) return errorCode ;
/* Write table description header */
2015-09-09 23:51:17 +00:00
errorCode = FSE_writeNCount ( op , oend - op , norm , maxSymbolValue , tableLog ) ;
2015-03-09 01:26:33 +00:00
if ( FSE_isError ( errorCode ) ) return errorCode ;
op + = errorCode ;
/* Compress */
2015-07-09 18:40:37 +00:00
errorCode = FSE_buildCTable ( ct , norm , maxSymbolValue , tableLog ) ;
2015-03-09 01:26:33 +00:00
if ( FSE_isError ( errorCode ) ) return errorCode ;
2015-09-09 23:51:17 +00:00
errorCode = FSE_compress_usingCTable ( op , oend - op , ip , srcSize , ct ) ;
if ( errorCode = = 0 ) return 0 ; /* not enough space for compressed data */
op + = errorCode ;
2015-03-09 01:26:33 +00:00
/* check compressibility */
if ( ( size_t ) ( op - ostart ) > = srcSize - 1 )
return 0 ;
return op - ostart ;
}
size_t FSE_compress ( void * dst , size_t dstSize , const void * src , size_t srcSize )
{
return FSE_compress2 ( dst , dstSize , src , ( U32 ) srcSize , FSE_MAX_SYMBOL_VALUE , FSE_DEFAULT_TABLELOG ) ;
}
2015-09-09 23:51:17 +00:00
# endif /* FSE_COMMONDEFS_ONLY */