1*3117ece4Schristos /* ****************************************************************** 2*3117ece4Schristos * FSE : Finite State Entropy encoder 3*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 4*3117ece4Schristos * 5*3117ece4Schristos * You can contact the author at : 6*3117ece4Schristos * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 7*3117ece4Schristos * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8*3117ece4Schristos * 9*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 10*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 12*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 13*3117ece4Schristos ****************************************************************** */ 14*3117ece4Schristos 15*3117ece4Schristos /* ************************************************************** 16*3117ece4Schristos * Includes 17*3117ece4Schristos ****************************************************************/ 18*3117ece4Schristos #include "../common/compiler.h" 19*3117ece4Schristos #include "../common/mem.h" /* U32, U16, etc. */ 20*3117ece4Schristos #include "../common/debug.h" /* assert, DEBUGLOG */ 21*3117ece4Schristos #include "hist.h" /* HIST_count_wksp */ 22*3117ece4Schristos #include "../common/bitstream.h" 23*3117ece4Schristos #define FSE_STATIC_LINKING_ONLY 24*3117ece4Schristos #include "../common/fse.h" 25*3117ece4Schristos #include "../common/error_private.h" 26*3117ece4Schristos #define ZSTD_DEPS_NEED_MALLOC 27*3117ece4Schristos #define ZSTD_DEPS_NEED_MATH64 28*3117ece4Schristos #include "../common/zstd_deps.h" /* ZSTD_memset */ 29*3117ece4Schristos #include "../common/bits.h" /* ZSTD_highbit32 */ 30*3117ece4Schristos 31*3117ece4Schristos 32*3117ece4Schristos /* ************************************************************** 33*3117ece4Schristos * Error Management 34*3117ece4Schristos ****************************************************************/ 35*3117ece4Schristos #define FSE_isError ERR_isError 36*3117ece4Schristos 37*3117ece4Schristos 38*3117ece4Schristos /* ************************************************************** 39*3117ece4Schristos * Templates 40*3117ece4Schristos ****************************************************************/ 41*3117ece4Schristos /* 42*3117ece4Schristos designed to be included 43*3117ece4Schristos for type-specific functions (template emulation in C) 44*3117ece4Schristos Objective is to write these functions only once, for improved maintenance 45*3117ece4Schristos */ 46*3117ece4Schristos 47*3117ece4Schristos /* safety checks */ 48*3117ece4Schristos #ifndef FSE_FUNCTION_EXTENSION 49*3117ece4Schristos # error "FSE_FUNCTION_EXTENSION must be defined" 50*3117ece4Schristos #endif 51*3117ece4Schristos #ifndef FSE_FUNCTION_TYPE 52*3117ece4Schristos # error "FSE_FUNCTION_TYPE must be defined" 53*3117ece4Schristos #endif 54*3117ece4Schristos 55*3117ece4Schristos /* Function names */ 56*3117ece4Schristos #define FSE_CAT(X,Y) X##Y 57*3117ece4Schristos #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) 58*3117ece4Schristos #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) 59*3117ece4Schristos 60*3117ece4Schristos 61*3117ece4Schristos /* Function templates */ 62*3117ece4Schristos 63*3117ece4Schristos /* FSE_buildCTable_wksp() : 64*3117ece4Schristos * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). 65*3117ece4Schristos * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` 66*3117ece4Schristos * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements 67*3117ece4Schristos */ 68*3117ece4Schristos size_t FSE_buildCTable_wksp(FSE_CTable* ct, 69*3117ece4Schristos const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 70*3117ece4Schristos void* workSpace, size_t wkspSize) 71*3117ece4Schristos { 72*3117ece4Schristos U32 const tableSize = 1 << tableLog; 73*3117ece4Schristos U32 const tableMask = tableSize - 1; 74*3117ece4Schristos void* const ptr = ct; 75*3117ece4Schristos U16* const tableU16 = ( (U16*) ptr) + 2; 76*3117ece4Schristos void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; 77*3117ece4Schristos FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); 78*3117ece4Schristos U32 const step = FSE_TABLESTEP(tableSize); 79*3117ece4Schristos U32 const maxSV1 = maxSymbolValue+1; 80*3117ece4Schristos 81*3117ece4Schristos U16* cumul = (U16*)workSpace; /* size = maxSV1 */ 82*3117ece4Schristos FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ 83*3117ece4Schristos 84*3117ece4Schristos U32 highThreshold = tableSize-1; 85*3117ece4Schristos 86*3117ece4Schristos assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */ 87*3117ece4Schristos if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); 88*3117ece4Schristos /* CTable header */ 89*3117ece4Schristos tableU16[-2] = (U16) tableLog; 90*3117ece4Schristos tableU16[-1] = (U16) maxSymbolValue; 91*3117ece4Schristos assert(tableLog < 16); /* required for threshold strategy to work */ 92*3117ece4Schristos 93*3117ece4Schristos /* For explanations on how to distribute symbol values over the table : 94*3117ece4Schristos * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 95*3117ece4Schristos 96*3117ece4Schristos #ifdef __clang_analyzer__ 97*3117ece4Schristos ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ 98*3117ece4Schristos #endif 99*3117ece4Schristos 100*3117ece4Schristos /* symbol start positions */ 101*3117ece4Schristos { U32 u; 102*3117ece4Schristos cumul[0] = 0; 103*3117ece4Schristos for (u=1; u <= maxSV1; u++) { 104*3117ece4Schristos if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ 105*3117ece4Schristos cumul[u] = cumul[u-1] + 1; 106*3117ece4Schristos tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); 107*3117ece4Schristos } else { 108*3117ece4Schristos assert(normalizedCounter[u-1] >= 0); 109*3117ece4Schristos cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1]; 110*3117ece4Schristos assert(cumul[u] >= cumul[u-1]); /* no overflow */ 111*3117ece4Schristos } } 112*3117ece4Schristos cumul[maxSV1] = (U16)(tableSize+1); 113*3117ece4Schristos } 114*3117ece4Schristos 115*3117ece4Schristos /* Spread symbols */ 116*3117ece4Schristos if (highThreshold == tableSize - 1) { 117*3117ece4Schristos /* Case for no low prob count symbols. Lay down 8 bytes at a time 118*3117ece4Schristos * to reduce branch misses since we are operating on a small block 119*3117ece4Schristos */ 120*3117ece4Schristos BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */ 121*3117ece4Schristos { U64 const add = 0x0101010101010101ull; 122*3117ece4Schristos size_t pos = 0; 123*3117ece4Schristos U64 sv = 0; 124*3117ece4Schristos U32 s; 125*3117ece4Schristos for (s=0; s<maxSV1; ++s, sv += add) { 126*3117ece4Schristos int i; 127*3117ece4Schristos int const n = normalizedCounter[s]; 128*3117ece4Schristos MEM_write64(spread + pos, sv); 129*3117ece4Schristos for (i = 8; i < n; i += 8) { 130*3117ece4Schristos MEM_write64(spread + pos + i, sv); 131*3117ece4Schristos } 132*3117ece4Schristos assert(n>=0); 133*3117ece4Schristos pos += (size_t)n; 134*3117ece4Schristos } 135*3117ece4Schristos } 136*3117ece4Schristos /* Spread symbols across the table. Lack of lowprob symbols means that 137*3117ece4Schristos * we don't need variable sized inner loop, so we can unroll the loop and 138*3117ece4Schristos * reduce branch misses. 139*3117ece4Schristos */ 140*3117ece4Schristos { size_t position = 0; 141*3117ece4Schristos size_t s; 142*3117ece4Schristos size_t const unroll = 2; /* Experimentally determined optimal unroll */ 143*3117ece4Schristos assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ 144*3117ece4Schristos for (s = 0; s < (size_t)tableSize; s += unroll) { 145*3117ece4Schristos size_t u; 146*3117ece4Schristos for (u = 0; u < unroll; ++u) { 147*3117ece4Schristos size_t const uPosition = (position + (u * step)) & tableMask; 148*3117ece4Schristos tableSymbol[uPosition] = spread[s + u]; 149*3117ece4Schristos } 150*3117ece4Schristos position = (position + (unroll * step)) & tableMask; 151*3117ece4Schristos } 152*3117ece4Schristos assert(position == 0); /* Must have initialized all positions */ 153*3117ece4Schristos } 154*3117ece4Schristos } else { 155*3117ece4Schristos U32 position = 0; 156*3117ece4Schristos U32 symbol; 157*3117ece4Schristos for (symbol=0; symbol<maxSV1; symbol++) { 158*3117ece4Schristos int nbOccurrences; 159*3117ece4Schristos int const freq = normalizedCounter[symbol]; 160*3117ece4Schristos for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { 161*3117ece4Schristos tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; 162*3117ece4Schristos position = (position + step) & tableMask; 163*3117ece4Schristos while (position > highThreshold) 164*3117ece4Schristos position = (position + step) & tableMask; /* Low proba area */ 165*3117ece4Schristos } } 166*3117ece4Schristos assert(position==0); /* Must have initialized all positions */ 167*3117ece4Schristos } 168*3117ece4Schristos 169*3117ece4Schristos /* Build table */ 170*3117ece4Schristos { U32 u; for (u=0; u<tableSize; u++) { 171*3117ece4Schristos FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ 172*3117ece4Schristos tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */ 173*3117ece4Schristos } } 174*3117ece4Schristos 175*3117ece4Schristos /* Build Symbol Transformation Table */ 176*3117ece4Schristos { unsigned total = 0; 177*3117ece4Schristos unsigned s; 178*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) { 179*3117ece4Schristos switch (normalizedCounter[s]) 180*3117ece4Schristos { 181*3117ece4Schristos case 0: 182*3117ece4Schristos /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */ 183*3117ece4Schristos symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog); 184*3117ece4Schristos break; 185*3117ece4Schristos 186*3117ece4Schristos case -1: 187*3117ece4Schristos case 1: 188*3117ece4Schristos symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog); 189*3117ece4Schristos assert(total <= INT_MAX); 190*3117ece4Schristos symbolTT[s].deltaFindState = (int)(total - 1); 191*3117ece4Schristos total ++; 192*3117ece4Schristos break; 193*3117ece4Schristos default : 194*3117ece4Schristos assert(normalizedCounter[s] > 1); 195*3117ece4Schristos { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); 196*3117ece4Schristos U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; 197*3117ece4Schristos symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; 198*3117ece4Schristos symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); 199*3117ece4Schristos total += (unsigned)normalizedCounter[s]; 200*3117ece4Schristos } } } } 201*3117ece4Schristos 202*3117ece4Schristos #if 0 /* debug : symbol costs */ 203*3117ece4Schristos DEBUGLOG(5, "\n --- table statistics : "); 204*3117ece4Schristos { U32 symbol; 205*3117ece4Schristos for (symbol=0; symbol<=maxSymbolValue; symbol++) { 206*3117ece4Schristos DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", 207*3117ece4Schristos symbol, normalizedCounter[symbol], 208*3117ece4Schristos FSE_getMaxNbBits(symbolTT, symbol), 209*3117ece4Schristos (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); 210*3117ece4Schristos } } 211*3117ece4Schristos #endif 212*3117ece4Schristos 213*3117ece4Schristos return 0; 214*3117ece4Schristos } 215*3117ece4Schristos 216*3117ece4Schristos 217*3117ece4Schristos 218*3117ece4Schristos #ifndef FSE_COMMONDEFS_ONLY 219*3117ece4Schristos 220*3117ece4Schristos /*-************************************************************** 221*3117ece4Schristos * FSE NCount encoding 222*3117ece4Schristos ****************************************************************/ 223*3117ece4Schristos size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) 224*3117ece4Schristos { 225*3117ece4Schristos size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog 226*3117ece4Schristos + 4 /* bitCount initialized at 4 */ 227*3117ece4Schristos + 2 /* first two symbols may use one additional bit each */) / 8) 228*3117ece4Schristos + 1 /* round up to whole nb bytes */ 229*3117ece4Schristos + 2 /* additional two bytes for bitstream flush */; 230*3117ece4Schristos return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ 231*3117ece4Schristos } 232*3117ece4Schristos 233*3117ece4Schristos static size_t 234*3117ece4Schristos FSE_writeNCount_generic (void* header, size_t headerBufferSize, 235*3117ece4Schristos const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 236*3117ece4Schristos unsigned writeIsSafe) 237*3117ece4Schristos { 238*3117ece4Schristos BYTE* const ostart = (BYTE*) header; 239*3117ece4Schristos BYTE* out = ostart; 240*3117ece4Schristos BYTE* const oend = ostart + headerBufferSize; 241*3117ece4Schristos int nbBits; 242*3117ece4Schristos const int tableSize = 1 << tableLog; 243*3117ece4Schristos int remaining; 244*3117ece4Schristos int threshold; 245*3117ece4Schristos U32 bitStream = 0; 246*3117ece4Schristos int bitCount = 0; 247*3117ece4Schristos unsigned symbol = 0; 248*3117ece4Schristos unsigned const alphabetSize = maxSymbolValue + 1; 249*3117ece4Schristos int previousIs0 = 0; 250*3117ece4Schristos 251*3117ece4Schristos /* Table Size */ 252*3117ece4Schristos bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; 253*3117ece4Schristos bitCount += 4; 254*3117ece4Schristos 255*3117ece4Schristos /* Init */ 256*3117ece4Schristos remaining = tableSize+1; /* +1 for extra accuracy */ 257*3117ece4Schristos threshold = tableSize; 258*3117ece4Schristos nbBits = (int)tableLog+1; 259*3117ece4Schristos 260*3117ece4Schristos while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ 261*3117ece4Schristos if (previousIs0) { 262*3117ece4Schristos unsigned start = symbol; 263*3117ece4Schristos while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; 264*3117ece4Schristos if (symbol == alphabetSize) break; /* incorrect distribution */ 265*3117ece4Schristos while (symbol >= start+24) { 266*3117ece4Schristos start+=24; 267*3117ece4Schristos bitStream += 0xFFFFU << bitCount; 268*3117ece4Schristos if ((!writeIsSafe) && (out > oend-2)) 269*3117ece4Schristos return ERROR(dstSize_tooSmall); /* Buffer overflow */ 270*3117ece4Schristos out[0] = (BYTE) bitStream; 271*3117ece4Schristos out[1] = (BYTE)(bitStream>>8); 272*3117ece4Schristos out+=2; 273*3117ece4Schristos bitStream>>=16; 274*3117ece4Schristos } 275*3117ece4Schristos while (symbol >= start+3) { 276*3117ece4Schristos start+=3; 277*3117ece4Schristos bitStream += 3U << bitCount; 278*3117ece4Schristos bitCount += 2; 279*3117ece4Schristos } 280*3117ece4Schristos bitStream += (symbol-start) << bitCount; 281*3117ece4Schristos bitCount += 2; 282*3117ece4Schristos if (bitCount>16) { 283*3117ece4Schristos if ((!writeIsSafe) && (out > oend - 2)) 284*3117ece4Schristos return ERROR(dstSize_tooSmall); /* Buffer overflow */ 285*3117ece4Schristos out[0] = (BYTE)bitStream; 286*3117ece4Schristos out[1] = (BYTE)(bitStream>>8); 287*3117ece4Schristos out += 2; 288*3117ece4Schristos bitStream >>= 16; 289*3117ece4Schristos bitCount -= 16; 290*3117ece4Schristos } } 291*3117ece4Schristos { int count = normalizedCounter[symbol++]; 292*3117ece4Schristos int const max = (2*threshold-1) - remaining; 293*3117ece4Schristos remaining -= count < 0 ? -count : count; 294*3117ece4Schristos count++; /* +1 for extra accuracy */ 295*3117ece4Schristos if (count>=threshold) 296*3117ece4Schristos count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ 297*3117ece4Schristos bitStream += (U32)count << bitCount; 298*3117ece4Schristos bitCount += nbBits; 299*3117ece4Schristos bitCount -= (count<max); 300*3117ece4Schristos previousIs0 = (count==1); 301*3117ece4Schristos if (remaining<1) return ERROR(GENERIC); 302*3117ece4Schristos while (remaining<threshold) { nbBits--; threshold>>=1; } 303*3117ece4Schristos } 304*3117ece4Schristos if (bitCount>16) { 305*3117ece4Schristos if ((!writeIsSafe) && (out > oend - 2)) 306*3117ece4Schristos return ERROR(dstSize_tooSmall); /* Buffer overflow */ 307*3117ece4Schristos out[0] = (BYTE)bitStream; 308*3117ece4Schristos out[1] = (BYTE)(bitStream>>8); 309*3117ece4Schristos out += 2; 310*3117ece4Schristos bitStream >>= 16; 311*3117ece4Schristos bitCount -= 16; 312*3117ece4Schristos } } 313*3117ece4Schristos 314*3117ece4Schristos if (remaining != 1) 315*3117ece4Schristos return ERROR(GENERIC); /* incorrect normalized distribution */ 316*3117ece4Schristos assert(symbol <= alphabetSize); 317*3117ece4Schristos 318*3117ece4Schristos /* flush remaining bitStream */ 319*3117ece4Schristos if ((!writeIsSafe) && (out > oend - 2)) 320*3117ece4Schristos return ERROR(dstSize_tooSmall); /* Buffer overflow */ 321*3117ece4Schristos out[0] = (BYTE)bitStream; 322*3117ece4Schristos out[1] = (BYTE)(bitStream>>8); 323*3117ece4Schristos out+= (bitCount+7) /8; 324*3117ece4Schristos 325*3117ece4Schristos assert(out >= ostart); 326*3117ece4Schristos return (size_t)(out-ostart); 327*3117ece4Schristos } 328*3117ece4Schristos 329*3117ece4Schristos 330*3117ece4Schristos size_t FSE_writeNCount (void* buffer, size_t bufferSize, 331*3117ece4Schristos const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 332*3117ece4Schristos { 333*3117ece4Schristos if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ 334*3117ece4Schristos if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ 335*3117ece4Schristos 336*3117ece4Schristos if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) 337*3117ece4Schristos return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); 338*3117ece4Schristos 339*3117ece4Schristos return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); 340*3117ece4Schristos } 341*3117ece4Schristos 342*3117ece4Schristos 343*3117ece4Schristos /*-************************************************************** 344*3117ece4Schristos * FSE Compression Code 345*3117ece4Schristos ****************************************************************/ 346*3117ece4Schristos 347*3117ece4Schristos /* provides the minimum logSize to safely represent a distribution */ 348*3117ece4Schristos static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) 349*3117ece4Schristos { 350*3117ece4Schristos U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; 351*3117ece4Schristos U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; 352*3117ece4Schristos U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; 353*3117ece4Schristos assert(srcSize > 1); /* Not supported, RLE should be used instead */ 354*3117ece4Schristos return minBits; 355*3117ece4Schristos } 356*3117ece4Schristos 357*3117ece4Schristos unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) 358*3117ece4Schristos { 359*3117ece4Schristos U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; 360*3117ece4Schristos U32 tableLog = maxTableLog; 361*3117ece4Schristos U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); 362*3117ece4Schristos assert(srcSize > 1); /* Not supported, RLE should be used instead */ 363*3117ece4Schristos if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; 364*3117ece4Schristos if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ 365*3117ece4Schristos if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ 366*3117ece4Schristos if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; 367*3117ece4Schristos if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; 368*3117ece4Schristos return tableLog; 369*3117ece4Schristos } 370*3117ece4Schristos 371*3117ece4Schristos unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 372*3117ece4Schristos { 373*3117ece4Schristos return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); 374*3117ece4Schristos } 375*3117ece4Schristos 376*3117ece4Schristos /* Secondary normalization method. 377*3117ece4Schristos To be used when primary method fails. */ 378*3117ece4Schristos 379*3117ece4Schristos static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) 380*3117ece4Schristos { 381*3117ece4Schristos short const NOT_YET_ASSIGNED = -2; 382*3117ece4Schristos U32 s; 383*3117ece4Schristos U32 distributed = 0; 384*3117ece4Schristos U32 ToDistribute; 385*3117ece4Schristos 386*3117ece4Schristos /* Init */ 387*3117ece4Schristos U32 const lowThreshold = (U32)(total >> tableLog); 388*3117ece4Schristos U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); 389*3117ece4Schristos 390*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) { 391*3117ece4Schristos if (count[s] == 0) { 392*3117ece4Schristos norm[s]=0; 393*3117ece4Schristos continue; 394*3117ece4Schristos } 395*3117ece4Schristos if (count[s] <= lowThreshold) { 396*3117ece4Schristos norm[s] = lowProbCount; 397*3117ece4Schristos distributed++; 398*3117ece4Schristos total -= count[s]; 399*3117ece4Schristos continue; 400*3117ece4Schristos } 401*3117ece4Schristos if (count[s] <= lowOne) { 402*3117ece4Schristos norm[s] = 1; 403*3117ece4Schristos distributed++; 404*3117ece4Schristos total -= count[s]; 405*3117ece4Schristos continue; 406*3117ece4Schristos } 407*3117ece4Schristos 408*3117ece4Schristos norm[s]=NOT_YET_ASSIGNED; 409*3117ece4Schristos } 410*3117ece4Schristos ToDistribute = (1 << tableLog) - distributed; 411*3117ece4Schristos 412*3117ece4Schristos if (ToDistribute == 0) 413*3117ece4Schristos return 0; 414*3117ece4Schristos 415*3117ece4Schristos if ((total / ToDistribute) > lowOne) { 416*3117ece4Schristos /* risk of rounding to zero */ 417*3117ece4Schristos lowOne = (U32)((total * 3) / (ToDistribute * 2)); 418*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) { 419*3117ece4Schristos if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { 420*3117ece4Schristos norm[s] = 1; 421*3117ece4Schristos distributed++; 422*3117ece4Schristos total -= count[s]; 423*3117ece4Schristos continue; 424*3117ece4Schristos } } 425*3117ece4Schristos ToDistribute = (1 << tableLog) - distributed; 426*3117ece4Schristos } 427*3117ece4Schristos 428*3117ece4Schristos if (distributed == maxSymbolValue+1) { 429*3117ece4Schristos /* all values are pretty poor; 430*3117ece4Schristos probably incompressible data (should have already been detected); 431*3117ece4Schristos find max, then give all remaining points to max */ 432*3117ece4Schristos U32 maxV = 0, maxC = 0; 433*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) 434*3117ece4Schristos if (count[s] > maxC) { maxV=s; maxC=count[s]; } 435*3117ece4Schristos norm[maxV] += (short)ToDistribute; 436*3117ece4Schristos return 0; 437*3117ece4Schristos } 438*3117ece4Schristos 439*3117ece4Schristos if (total == 0) { 440*3117ece4Schristos /* all of the symbols were low enough for the lowOne or lowThreshold */ 441*3117ece4Schristos for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) 442*3117ece4Schristos if (norm[s] > 0) { ToDistribute--; norm[s]++; } 443*3117ece4Schristos return 0; 444*3117ece4Schristos } 445*3117ece4Schristos 446*3117ece4Schristos { U64 const vStepLog = 62 - tableLog; 447*3117ece4Schristos U64 const mid = (1ULL << (vStepLog-1)) - 1; 448*3117ece4Schristos U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ 449*3117ece4Schristos U64 tmpTotal = mid; 450*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) { 451*3117ece4Schristos if (norm[s]==NOT_YET_ASSIGNED) { 452*3117ece4Schristos U64 const end = tmpTotal + (count[s] * rStep); 453*3117ece4Schristos U32 const sStart = (U32)(tmpTotal >> vStepLog); 454*3117ece4Schristos U32 const sEnd = (U32)(end >> vStepLog); 455*3117ece4Schristos U32 const weight = sEnd - sStart; 456*3117ece4Schristos if (weight < 1) 457*3117ece4Schristos return ERROR(GENERIC); 458*3117ece4Schristos norm[s] = (short)weight; 459*3117ece4Schristos tmpTotal = end; 460*3117ece4Schristos } } } 461*3117ece4Schristos 462*3117ece4Schristos return 0; 463*3117ece4Schristos } 464*3117ece4Schristos 465*3117ece4Schristos size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, 466*3117ece4Schristos const unsigned* count, size_t total, 467*3117ece4Schristos unsigned maxSymbolValue, unsigned useLowProbCount) 468*3117ece4Schristos { 469*3117ece4Schristos /* Sanity checks */ 470*3117ece4Schristos if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; 471*3117ece4Schristos if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ 472*3117ece4Schristos if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ 473*3117ece4Schristos if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ 474*3117ece4Schristos 475*3117ece4Schristos { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; 476*3117ece4Schristos short const lowProbCount = useLowProbCount ? -1 : 1; 477*3117ece4Schristos U64 const scale = 62 - tableLog; 478*3117ece4Schristos U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ 479*3117ece4Schristos U64 const vStep = 1ULL<<(scale-20); 480*3117ece4Schristos int stillToDistribute = 1<<tableLog; 481*3117ece4Schristos unsigned s; 482*3117ece4Schristos unsigned largest=0; 483*3117ece4Schristos short largestP=0; 484*3117ece4Schristos U32 lowThreshold = (U32)(total >> tableLog); 485*3117ece4Schristos 486*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) { 487*3117ece4Schristos if (count[s] == total) return 0; /* rle special case */ 488*3117ece4Schristos if (count[s] == 0) { normalizedCounter[s]=0; continue; } 489*3117ece4Schristos if (count[s] <= lowThreshold) { 490*3117ece4Schristos normalizedCounter[s] = lowProbCount; 491*3117ece4Schristos stillToDistribute--; 492*3117ece4Schristos } else { 493*3117ece4Schristos short proba = (short)((count[s]*step) >> scale); 494*3117ece4Schristos if (proba<8) { 495*3117ece4Schristos U64 restToBeat = vStep * rtbTable[proba]; 496*3117ece4Schristos proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat; 497*3117ece4Schristos } 498*3117ece4Schristos if (proba > largestP) { largestP=proba; largest=s; } 499*3117ece4Schristos normalizedCounter[s] = proba; 500*3117ece4Schristos stillToDistribute -= proba; 501*3117ece4Schristos } } 502*3117ece4Schristos if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { 503*3117ece4Schristos /* corner case, need another normalization method */ 504*3117ece4Schristos size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); 505*3117ece4Schristos if (FSE_isError(errorCode)) return errorCode; 506*3117ece4Schristos } 507*3117ece4Schristos else normalizedCounter[largest] += (short)stillToDistribute; 508*3117ece4Schristos } 509*3117ece4Schristos 510*3117ece4Schristos #if 0 511*3117ece4Schristos { /* Print Table (debug) */ 512*3117ece4Schristos U32 s; 513*3117ece4Schristos U32 nTotal = 0; 514*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) 515*3117ece4Schristos RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); 516*3117ece4Schristos for (s=0; s<=maxSymbolValue; s++) 517*3117ece4Schristos nTotal += abs(normalizedCounter[s]); 518*3117ece4Schristos if (nTotal != (1U<<tableLog)) 519*3117ece4Schristos RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog); 520*3117ece4Schristos getchar(); 521*3117ece4Schristos } 522*3117ece4Schristos #endif 523*3117ece4Schristos 524*3117ece4Schristos return tableLog; 525*3117ece4Schristos } 526*3117ece4Schristos 527*3117ece4Schristos /* fake FSE_CTable, for rle input (always same symbol) */ 528*3117ece4Schristos size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) 529*3117ece4Schristos { 530*3117ece4Schristos void* ptr = ct; 531*3117ece4Schristos U16* tableU16 = ( (U16*) ptr) + 2; 532*3117ece4Schristos void* FSCTptr = (U32*)ptr + 2; 533*3117ece4Schristos FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr; 534*3117ece4Schristos 535*3117ece4Schristos /* header */ 536*3117ece4Schristos tableU16[-2] = (U16) 0; 537*3117ece4Schristos tableU16[-1] = (U16) symbolValue; 538*3117ece4Schristos 539*3117ece4Schristos /* Build table */ 540*3117ece4Schristos tableU16[0] = 0; 541*3117ece4Schristos tableU16[1] = 0; /* just in case */ 542*3117ece4Schristos 543*3117ece4Schristos /* Build Symbol Transformation Table */ 544*3117ece4Schristos symbolTT[symbolValue].deltaNbBits = 0; 545*3117ece4Schristos symbolTT[symbolValue].deltaFindState = 0; 546*3117ece4Schristos 547*3117ece4Schristos return 0; 548*3117ece4Schristos } 549*3117ece4Schristos 550*3117ece4Schristos 551*3117ece4Schristos static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, 552*3117ece4Schristos const void* src, size_t srcSize, 553*3117ece4Schristos const FSE_CTable* ct, const unsigned fast) 554*3117ece4Schristos { 555*3117ece4Schristos const BYTE* const istart = (const BYTE*) src; 556*3117ece4Schristos const BYTE* const iend = istart + srcSize; 557*3117ece4Schristos const BYTE* ip=iend; 558*3117ece4Schristos 559*3117ece4Schristos BIT_CStream_t bitC; 560*3117ece4Schristos FSE_CState_t CState1, CState2; 561*3117ece4Schristos 562*3117ece4Schristos /* init */ 563*3117ece4Schristos if (srcSize <= 2) return 0; 564*3117ece4Schristos { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); 565*3117ece4Schristos if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } 566*3117ece4Schristos 567*3117ece4Schristos #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) 568*3117ece4Schristos 569*3117ece4Schristos if (srcSize & 1) { 570*3117ece4Schristos FSE_initCState2(&CState1, ct, *--ip); 571*3117ece4Schristos FSE_initCState2(&CState2, ct, *--ip); 572*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState1, *--ip); 573*3117ece4Schristos FSE_FLUSHBITS(&bitC); 574*3117ece4Schristos } else { 575*3117ece4Schristos FSE_initCState2(&CState2, ct, *--ip); 576*3117ece4Schristos FSE_initCState2(&CState1, ct, *--ip); 577*3117ece4Schristos } 578*3117ece4Schristos 579*3117ece4Schristos /* join to mod 4 */ 580*3117ece4Schristos srcSize -= 2; 581*3117ece4Schristos if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ 582*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState2, *--ip); 583*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState1, *--ip); 584*3117ece4Schristos FSE_FLUSHBITS(&bitC); 585*3117ece4Schristos } 586*3117ece4Schristos 587*3117ece4Schristos /* 2 or 4 encoding per loop */ 588*3117ece4Schristos while ( ip>istart ) { 589*3117ece4Schristos 590*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState2, *--ip); 591*3117ece4Schristos 592*3117ece4Schristos if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ 593*3117ece4Schristos FSE_FLUSHBITS(&bitC); 594*3117ece4Schristos 595*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState1, *--ip); 596*3117ece4Schristos 597*3117ece4Schristos if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ 598*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState2, *--ip); 599*3117ece4Schristos FSE_encodeSymbol(&bitC, &CState1, *--ip); 600*3117ece4Schristos } 601*3117ece4Schristos 602*3117ece4Schristos FSE_FLUSHBITS(&bitC); 603*3117ece4Schristos } 604*3117ece4Schristos 605*3117ece4Schristos FSE_flushCState(&bitC, &CState2); 606*3117ece4Schristos FSE_flushCState(&bitC, &CState1); 607*3117ece4Schristos return BIT_closeCStream(&bitC); 608*3117ece4Schristos } 609*3117ece4Schristos 610*3117ece4Schristos size_t FSE_compress_usingCTable (void* dst, size_t dstSize, 611*3117ece4Schristos const void* src, size_t srcSize, 612*3117ece4Schristos const FSE_CTable* ct) 613*3117ece4Schristos { 614*3117ece4Schristos unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); 615*3117ece4Schristos 616*3117ece4Schristos if (fast) 617*3117ece4Schristos return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); 618*3117ece4Schristos else 619*3117ece4Schristos return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); 620*3117ece4Schristos } 621*3117ece4Schristos 622*3117ece4Schristos 623*3117ece4Schristos size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } 624*3117ece4Schristos 625*3117ece4Schristos #endif /* FSE_COMMONDEFS_ONLY */ 626