1*3117ece4Schristos /* ****************************************************************** 2*3117ece4Schristos * Huffman encoder, part of New Generation Entropy library 3*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 4*3117ece4Schristos * 5*3117ece4Schristos * You can contact the author at : 6*3117ece4Schristos * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy 7*3117ece4Schristos * - Public forum : https://groups.google.com/forum/#!forum/lz4c 8*3117ece4Schristos * 9*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 10*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 12*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 13*3117ece4Schristos ****************************************************************** */ 14*3117ece4Schristos 15*3117ece4Schristos /* ************************************************************** 16*3117ece4Schristos * Compiler specifics 17*3117ece4Schristos ****************************************************************/ 18*3117ece4Schristos #ifdef _MSC_VER /* Visual Studio */ 19*3117ece4Schristos # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 20*3117ece4Schristos #endif 21*3117ece4Schristos 22*3117ece4Schristos 23*3117ece4Schristos /* ************************************************************** 24*3117ece4Schristos * Includes 25*3117ece4Schristos ****************************************************************/ 26*3117ece4Schristos #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ 27*3117ece4Schristos #include "../common/compiler.h" 28*3117ece4Schristos #include "../common/bitstream.h" 29*3117ece4Schristos #include "hist.h" 30*3117ece4Schristos #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ 31*3117ece4Schristos #include "../common/fse.h" /* header compression */ 32*3117ece4Schristos #include "../common/huf.h" 33*3117ece4Schristos #include "../common/error_private.h" 34*3117ece4Schristos #include "../common/bits.h" /* ZSTD_highbit32 */ 35*3117ece4Schristos 36*3117ece4Schristos 37*3117ece4Schristos /* ************************************************************** 38*3117ece4Schristos * Error Management 39*3117ece4Schristos ****************************************************************/ 40*3117ece4Schristos #define HUF_isError ERR_isError 41*3117ece4Schristos #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ 42*3117ece4Schristos 43*3117ece4Schristos 44*3117ece4Schristos /* ************************************************************** 45*3117ece4Schristos * Required declarations 46*3117ece4Schristos ****************************************************************/ 47*3117ece4Schristos typedef struct nodeElt_s { 48*3117ece4Schristos U32 count; 49*3117ece4Schristos U16 parent; 50*3117ece4Schristos BYTE byte; 51*3117ece4Schristos BYTE nbBits; 52*3117ece4Schristos } nodeElt; 53*3117ece4Schristos 54*3117ece4Schristos 55*3117ece4Schristos /* ************************************************************** 56*3117ece4Schristos * Debug Traces 57*3117ece4Schristos ****************************************************************/ 58*3117ece4Schristos 59*3117ece4Schristos #if DEBUGLEVEL >= 2 60*3117ece4Schristos 61*3117ece4Schristos static size_t showU32(const U32* arr, size_t size) 62*3117ece4Schristos { 63*3117ece4Schristos size_t u; 64*3117ece4Schristos for (u=0; u<size; u++) { 65*3117ece4Schristos RAWLOG(6, " %u", arr[u]); (void)arr; 66*3117ece4Schristos } 67*3117ece4Schristos RAWLOG(6, " \n"); 68*3117ece4Schristos return size; 69*3117ece4Schristos } 70*3117ece4Schristos 71*3117ece4Schristos static size_t HUF_getNbBits(HUF_CElt elt); 72*3117ece4Schristos 73*3117ece4Schristos static size_t showCTableBits(const HUF_CElt* ctable, size_t size) 74*3117ece4Schristos { 75*3117ece4Schristos size_t u; 76*3117ece4Schristos for (u=0; u<size; u++) { 77*3117ece4Schristos RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable; 78*3117ece4Schristos } 79*3117ece4Schristos RAWLOG(6, " \n"); 80*3117ece4Schristos return size; 81*3117ece4Schristos 82*3117ece4Schristos } 83*3117ece4Schristos 84*3117ece4Schristos static size_t showHNodeSymbols(const nodeElt* hnode, size_t size) 85*3117ece4Schristos { 86*3117ece4Schristos size_t u; 87*3117ece4Schristos for (u=0; u<size; u++) { 88*3117ece4Schristos RAWLOG(6, " %u", hnode[u].byte); (void)hnode; 89*3117ece4Schristos } 90*3117ece4Schristos RAWLOG(6, " \n"); 91*3117ece4Schristos return size; 92*3117ece4Schristos } 93*3117ece4Schristos 94*3117ece4Schristos static size_t showHNodeBits(const nodeElt* hnode, size_t size) 95*3117ece4Schristos { 96*3117ece4Schristos size_t u; 97*3117ece4Schristos for (u=0; u<size; u++) { 98*3117ece4Schristos RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode; 99*3117ece4Schristos } 100*3117ece4Schristos RAWLOG(6, " \n"); 101*3117ece4Schristos return size; 102*3117ece4Schristos } 103*3117ece4Schristos 104*3117ece4Schristos #endif 105*3117ece4Schristos 106*3117ece4Schristos 107*3117ece4Schristos /* ******************************************************* 108*3117ece4Schristos * HUF : Huffman block compression 109*3117ece4Schristos *********************************************************/ 110*3117ece4Schristos #define HUF_WORKSPACE_MAX_ALIGNMENT 8 111*3117ece4Schristos 112*3117ece4Schristos static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align) 113*3117ece4Schristos { 114*3117ece4Schristos size_t const mask = align - 1; 115*3117ece4Schristos size_t const rem = (size_t)workspace & mask; 116*3117ece4Schristos size_t const add = (align - rem) & mask; 117*3117ece4Schristos BYTE* const aligned = (BYTE*)workspace + add; 118*3117ece4Schristos assert((align & (align - 1)) == 0); /* pow 2 */ 119*3117ece4Schristos assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT); 120*3117ece4Schristos if (*workspaceSizePtr >= add) { 121*3117ece4Schristos assert(add < align); 122*3117ece4Schristos assert(((size_t)aligned & mask) == 0); 123*3117ece4Schristos *workspaceSizePtr -= add; 124*3117ece4Schristos return aligned; 125*3117ece4Schristos } else { 126*3117ece4Schristos *workspaceSizePtr = 0; 127*3117ece4Schristos return NULL; 128*3117ece4Schristos } 129*3117ece4Schristos } 130*3117ece4Schristos 131*3117ece4Schristos 132*3117ece4Schristos /* HUF_compressWeights() : 133*3117ece4Schristos * Same as FSE_compress(), but dedicated to huff0's weights compression. 134*3117ece4Schristos * The use case needs much less stack memory. 135*3117ece4Schristos * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. 136*3117ece4Schristos */ 137*3117ece4Schristos #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 138*3117ece4Schristos 139*3117ece4Schristos typedef struct { 140*3117ece4Schristos FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; 141*3117ece4Schristos U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; 142*3117ece4Schristos unsigned count[HUF_TABLELOG_MAX+1]; 143*3117ece4Schristos S16 norm[HUF_TABLELOG_MAX+1]; 144*3117ece4Schristos } HUF_CompressWeightsWksp; 145*3117ece4Schristos 146*3117ece4Schristos static size_t 147*3117ece4Schristos HUF_compressWeights(void* dst, size_t dstSize, 148*3117ece4Schristos const void* weightTable, size_t wtSize, 149*3117ece4Schristos void* workspace, size_t workspaceSize) 150*3117ece4Schristos { 151*3117ece4Schristos BYTE* const ostart = (BYTE*) dst; 152*3117ece4Schristos BYTE* op = ostart; 153*3117ece4Schristos BYTE* const oend = ostart + dstSize; 154*3117ece4Schristos 155*3117ece4Schristos unsigned maxSymbolValue = HUF_TABLELOG_MAX; 156*3117ece4Schristos U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; 157*3117ece4Schristos HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 158*3117ece4Schristos 159*3117ece4Schristos if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); 160*3117ece4Schristos 161*3117ece4Schristos /* init conditions */ 162*3117ece4Schristos if (wtSize <= 1) return 0; /* Not compressible */ 163*3117ece4Schristos 164*3117ece4Schristos /* Scan input and build symbol stats */ 165*3117ece4Schristos { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ 166*3117ece4Schristos if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ 167*3117ece4Schristos if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ 168*3117ece4Schristos } 169*3117ece4Schristos 170*3117ece4Schristos tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); 171*3117ece4Schristos CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); 172*3117ece4Schristos 173*3117ece4Schristos /* Write table description header */ 174*3117ece4Schristos { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); 175*3117ece4Schristos op += hSize; 176*3117ece4Schristos } 177*3117ece4Schristos 178*3117ece4Schristos /* Compress */ 179*3117ece4Schristos CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); 180*3117ece4Schristos { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); 181*3117ece4Schristos if (cSize == 0) return 0; /* not enough space for compressed data */ 182*3117ece4Schristos op += cSize; 183*3117ece4Schristos } 184*3117ece4Schristos 185*3117ece4Schristos return (size_t)(op-ostart); 186*3117ece4Schristos } 187*3117ece4Schristos 188*3117ece4Schristos static size_t HUF_getNbBits(HUF_CElt elt) 189*3117ece4Schristos { 190*3117ece4Schristos return elt & 0xFF; 191*3117ece4Schristos } 192*3117ece4Schristos 193*3117ece4Schristos static size_t HUF_getNbBitsFast(HUF_CElt elt) 194*3117ece4Schristos { 195*3117ece4Schristos return elt; 196*3117ece4Schristos } 197*3117ece4Schristos 198*3117ece4Schristos static size_t HUF_getValue(HUF_CElt elt) 199*3117ece4Schristos { 200*3117ece4Schristos return elt & ~(size_t)0xFF; 201*3117ece4Schristos } 202*3117ece4Schristos 203*3117ece4Schristos static size_t HUF_getValueFast(HUF_CElt elt) 204*3117ece4Schristos { 205*3117ece4Schristos return elt; 206*3117ece4Schristos } 207*3117ece4Schristos 208*3117ece4Schristos static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) 209*3117ece4Schristos { 210*3117ece4Schristos assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX); 211*3117ece4Schristos *elt = nbBits; 212*3117ece4Schristos } 213*3117ece4Schristos 214*3117ece4Schristos static void HUF_setValue(HUF_CElt* elt, size_t value) 215*3117ece4Schristos { 216*3117ece4Schristos size_t const nbBits = HUF_getNbBits(*elt); 217*3117ece4Schristos if (nbBits > 0) { 218*3117ece4Schristos assert((value >> nbBits) == 0); 219*3117ece4Schristos *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits); 220*3117ece4Schristos } 221*3117ece4Schristos } 222*3117ece4Schristos 223*3117ece4Schristos HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable) 224*3117ece4Schristos { 225*3117ece4Schristos HUF_CTableHeader header; 226*3117ece4Schristos ZSTD_memcpy(&header, ctable, sizeof(header)); 227*3117ece4Schristos return header; 228*3117ece4Schristos } 229*3117ece4Schristos 230*3117ece4Schristos static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue) 231*3117ece4Schristos { 232*3117ece4Schristos HUF_CTableHeader header; 233*3117ece4Schristos HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header)); 234*3117ece4Schristos ZSTD_memset(&header, 0, sizeof(header)); 235*3117ece4Schristos assert(tableLog < 256); 236*3117ece4Schristos header.tableLog = (BYTE)tableLog; 237*3117ece4Schristos assert(maxSymbolValue < 256); 238*3117ece4Schristos header.maxSymbolValue = (BYTE)maxSymbolValue; 239*3117ece4Schristos ZSTD_memcpy(ctable, &header, sizeof(header)); 240*3117ece4Schristos } 241*3117ece4Schristos 242*3117ece4Schristos typedef struct { 243*3117ece4Schristos HUF_CompressWeightsWksp wksp; 244*3117ece4Schristos BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ 245*3117ece4Schristos BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; 246*3117ece4Schristos } HUF_WriteCTableWksp; 247*3117ece4Schristos 248*3117ece4Schristos size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, 249*3117ece4Schristos const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, 250*3117ece4Schristos void* workspace, size_t workspaceSize) 251*3117ece4Schristos { 252*3117ece4Schristos HUF_CElt const* const ct = CTable + 1; 253*3117ece4Schristos BYTE* op = (BYTE*)dst; 254*3117ece4Schristos U32 n; 255*3117ece4Schristos HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); 256*3117ece4Schristos 257*3117ece4Schristos HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp)); 258*3117ece4Schristos 259*3117ece4Schristos assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue); 260*3117ece4Schristos assert(HUF_readCTableHeader(CTable).tableLog == huffLog); 261*3117ece4Schristos 262*3117ece4Schristos /* check conditions */ 263*3117ece4Schristos if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); 264*3117ece4Schristos if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 265*3117ece4Schristos 266*3117ece4Schristos /* convert to weight */ 267*3117ece4Schristos wksp->bitsToWeight[0] = 0; 268*3117ece4Schristos for (n=1; n<huffLog+1; n++) 269*3117ece4Schristos wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n); 270*3117ece4Schristos for (n=0; n<maxSymbolValue; n++) 271*3117ece4Schristos wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])]; 272*3117ece4Schristos 273*3117ece4Schristos /* attempt weights compression by FSE */ 274*3117ece4Schristos if (maxDstSize < 1) return ERROR(dstSize_tooSmall); 275*3117ece4Schristos { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); 276*3117ece4Schristos if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ 277*3117ece4Schristos op[0] = (BYTE)hSize; 278*3117ece4Schristos return hSize+1; 279*3117ece4Schristos } } 280*3117ece4Schristos 281*3117ece4Schristos /* write raw values as 4-bits (max : 15) */ 282*3117ece4Schristos if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ 283*3117ece4Schristos if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ 284*3117ece4Schristos op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); 285*3117ece4Schristos wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ 286*3117ece4Schristos for (n=0; n<maxSymbolValue; n+=2) 287*3117ece4Schristos op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]); 288*3117ece4Schristos return ((maxSymbolValue+1)/2) + 1; 289*3117ece4Schristos } 290*3117ece4Schristos 291*3117ece4Schristos 292*3117ece4Schristos size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) 293*3117ece4Schristos { 294*3117ece4Schristos BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ 295*3117ece4Schristos U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ 296*3117ece4Schristos U32 tableLog = 0; 297*3117ece4Schristos U32 nbSymbols = 0; 298*3117ece4Schristos HUF_CElt* const ct = CTable + 1; 299*3117ece4Schristos 300*3117ece4Schristos /* get symbol weights */ 301*3117ece4Schristos CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); 302*3117ece4Schristos *hasZeroWeights = (rankVal[0] > 0); 303*3117ece4Schristos 304*3117ece4Schristos /* check result */ 305*3117ece4Schristos if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 306*3117ece4Schristos if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); 307*3117ece4Schristos 308*3117ece4Schristos *maxSymbolValuePtr = nbSymbols - 1; 309*3117ece4Schristos 310*3117ece4Schristos HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr); 311*3117ece4Schristos 312*3117ece4Schristos /* Prepare base value per rank */ 313*3117ece4Schristos { U32 n, nextRankStart = 0; 314*3117ece4Schristos for (n=1; n<=tableLog; n++) { 315*3117ece4Schristos U32 curr = nextRankStart; 316*3117ece4Schristos nextRankStart += (rankVal[n] << (n-1)); 317*3117ece4Schristos rankVal[n] = curr; 318*3117ece4Schristos } } 319*3117ece4Schristos 320*3117ece4Schristos /* fill nbBits */ 321*3117ece4Schristos { U32 n; for (n=0; n<nbSymbols; n++) { 322*3117ece4Schristos const U32 w = huffWeight[n]; 323*3117ece4Schristos HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0)); 324*3117ece4Schristos } } 325*3117ece4Schristos 326*3117ece4Schristos /* fill val */ 327*3117ece4Schristos { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ 328*3117ece4Schristos U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; 329*3117ece4Schristos { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; } 330*3117ece4Schristos /* determine stating value per rank */ 331*3117ece4Schristos valPerRank[tableLog+1] = 0; /* for w==0 */ 332*3117ece4Schristos { U16 min = 0; 333*3117ece4Schristos U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ 334*3117ece4Schristos valPerRank[n] = min; /* get starting value within each rank */ 335*3117ece4Schristos min += nbPerRank[n]; 336*3117ece4Schristos min >>= 1; 337*3117ece4Schristos } } 338*3117ece4Schristos /* assign value within rank, symbol order */ 339*3117ece4Schristos { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); } 340*3117ece4Schristos } 341*3117ece4Schristos 342*3117ece4Schristos return readSize; 343*3117ece4Schristos } 344*3117ece4Schristos 345*3117ece4Schristos U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue) 346*3117ece4Schristos { 347*3117ece4Schristos const HUF_CElt* const ct = CTable + 1; 348*3117ece4Schristos assert(symbolValue <= HUF_SYMBOLVALUE_MAX); 349*3117ece4Schristos if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue) 350*3117ece4Schristos return 0; 351*3117ece4Schristos return (U32)HUF_getNbBits(ct[symbolValue]); 352*3117ece4Schristos } 353*3117ece4Schristos 354*3117ece4Schristos 355*3117ece4Schristos /** 356*3117ece4Schristos * HUF_setMaxHeight(): 357*3117ece4Schristos * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. 358*3117ece4Schristos * 359*3117ece4Schristos * It attempts to convert all nodes with nbBits > @targetNbBits 360*3117ece4Schristos * to employ @targetNbBits instead. Then it adjusts the tree 361*3117ece4Schristos * so that it remains a valid canonical Huffman tree. 362*3117ece4Schristos * 363*3117ece4Schristos * @pre The sum of the ranks of each symbol == 2^largestBits, 364*3117ece4Schristos * where largestBits == huffNode[lastNonNull].nbBits. 365*3117ece4Schristos * @post The sum of the ranks of each symbol == 2^largestBits, 366*3117ece4Schristos * where largestBits is the return value (expected <= targetNbBits). 367*3117ece4Schristos * 368*3117ece4Schristos * @param huffNode The Huffman tree modified in place to enforce targetNbBits. 369*3117ece4Schristos * It's presumed sorted, from most frequent to rarest symbol. 370*3117ece4Schristos * @param lastNonNull The symbol with the lowest count in the Huffman tree. 371*3117ece4Schristos * @param targetNbBits The allowed number of bits, which the Huffman tree 372*3117ece4Schristos * may not respect. After this function the Huffman tree will 373*3117ece4Schristos * respect targetNbBits. 374*3117ece4Schristos * @return The maximum number of bits of the Huffman tree after adjustment. 375*3117ece4Schristos */ 376*3117ece4Schristos static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) 377*3117ece4Schristos { 378*3117ece4Schristos const U32 largestBits = huffNode[lastNonNull].nbBits; 379*3117ece4Schristos /* early exit : no elt > targetNbBits, so the tree is already valid. */ 380*3117ece4Schristos if (largestBits <= targetNbBits) return largestBits; 381*3117ece4Schristos 382*3117ece4Schristos DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits); 383*3117ece4Schristos 384*3117ece4Schristos /* there are several too large elements (at least >= 2) */ 385*3117ece4Schristos { int totalCost = 0; 386*3117ece4Schristos const U32 baseCost = 1 << (largestBits - targetNbBits); 387*3117ece4Schristos int n = (int)lastNonNull; 388*3117ece4Schristos 389*3117ece4Schristos /* Adjust any ranks > targetNbBits to targetNbBits. 390*3117ece4Schristos * Compute totalCost, which is how far the sum of the ranks is 391*3117ece4Schristos * we are over 2^largestBits after adjust the offending ranks. 392*3117ece4Schristos */ 393*3117ece4Schristos while (huffNode[n].nbBits > targetNbBits) { 394*3117ece4Schristos totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); 395*3117ece4Schristos huffNode[n].nbBits = (BYTE)targetNbBits; 396*3117ece4Schristos n--; 397*3117ece4Schristos } 398*3117ece4Schristos /* n stops at huffNode[n].nbBits <= targetNbBits */ 399*3117ece4Schristos assert(huffNode[n].nbBits <= targetNbBits); 400*3117ece4Schristos /* n end at index of smallest symbol using < targetNbBits */ 401*3117ece4Schristos while (huffNode[n].nbBits == targetNbBits) --n; 402*3117ece4Schristos 403*3117ece4Schristos /* renorm totalCost from 2^largestBits to 2^targetNbBits 404*3117ece4Schristos * note : totalCost is necessarily a multiple of baseCost */ 405*3117ece4Schristos assert(((U32)totalCost & (baseCost - 1)) == 0); 406*3117ece4Schristos totalCost >>= (largestBits - targetNbBits); 407*3117ece4Schristos assert(totalCost > 0); 408*3117ece4Schristos 409*3117ece4Schristos /* repay normalized cost */ 410*3117ece4Schristos { U32 const noSymbol = 0xF0F0F0F0; 411*3117ece4Schristos U32 rankLast[HUF_TABLELOG_MAX+2]; 412*3117ece4Schristos 413*3117ece4Schristos /* Get pos of last (smallest = lowest cum. count) symbol per rank */ 414*3117ece4Schristos ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); 415*3117ece4Schristos { U32 currentNbBits = targetNbBits; 416*3117ece4Schristos int pos; 417*3117ece4Schristos for (pos=n ; pos >= 0; pos--) { 418*3117ece4Schristos if (huffNode[pos].nbBits >= currentNbBits) continue; 419*3117ece4Schristos currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */ 420*3117ece4Schristos rankLast[targetNbBits-currentNbBits] = (U32)pos; 421*3117ece4Schristos } } 422*3117ece4Schristos 423*3117ece4Schristos while (totalCost > 0) { 424*3117ece4Schristos /* Try to reduce the next power of 2 above totalCost because we 425*3117ece4Schristos * gain back half the rank. 426*3117ece4Schristos */ 427*3117ece4Schristos U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; 428*3117ece4Schristos for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { 429*3117ece4Schristos U32 const highPos = rankLast[nBitsToDecrease]; 430*3117ece4Schristos U32 const lowPos = rankLast[nBitsToDecrease-1]; 431*3117ece4Schristos if (highPos == noSymbol) continue; 432*3117ece4Schristos /* Decrease highPos if no symbols of lowPos or if it is 433*3117ece4Schristos * not cheaper to remove 2 lowPos than highPos. 434*3117ece4Schristos */ 435*3117ece4Schristos if (lowPos == noSymbol) break; 436*3117ece4Schristos { U32 const highTotal = huffNode[highPos].count; 437*3117ece4Schristos U32 const lowTotal = 2 * huffNode[lowPos].count; 438*3117ece4Schristos if (highTotal <= lowTotal) break; 439*3117ece4Schristos } } 440*3117ece4Schristos /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ 441*3117ece4Schristos assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); 442*3117ece4Schristos /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ 443*3117ece4Schristos while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) 444*3117ece4Schristos nBitsToDecrease++; 445*3117ece4Schristos assert(rankLast[nBitsToDecrease] != noSymbol); 446*3117ece4Schristos /* Increase the number of bits to gain back half the rank cost. */ 447*3117ece4Schristos totalCost -= 1 << (nBitsToDecrease-1); 448*3117ece4Schristos huffNode[rankLast[nBitsToDecrease]].nbBits++; 449*3117ece4Schristos 450*3117ece4Schristos /* Fix up the new rank. 451*3117ece4Schristos * If the new rank was empty, this symbol is now its smallest. 452*3117ece4Schristos * Otherwise, this symbol will be the largest in the new rank so no adjustment. 453*3117ece4Schristos */ 454*3117ece4Schristos if (rankLast[nBitsToDecrease-1] == noSymbol) 455*3117ece4Schristos rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; 456*3117ece4Schristos /* Fix up the old rank. 457*3117ece4Schristos * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, 458*3117ece4Schristos * it must be the only symbol in its rank, so the old rank now has no symbols. 459*3117ece4Schristos * Otherwise, since the Huffman nodes are sorted by count, the previous position is now 460*3117ece4Schristos * the smallest node in the rank. If the previous position belongs to a different rank, 461*3117ece4Schristos * then the rank is now empty. 462*3117ece4Schristos */ 463*3117ece4Schristos if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ 464*3117ece4Schristos rankLast[nBitsToDecrease] = noSymbol; 465*3117ece4Schristos else { 466*3117ece4Schristos rankLast[nBitsToDecrease]--; 467*3117ece4Schristos if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease) 468*3117ece4Schristos rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ 469*3117ece4Schristos } 470*3117ece4Schristos } /* while (totalCost > 0) */ 471*3117ece4Schristos 472*3117ece4Schristos /* If we've removed too much weight, then we have to add it back. 473*3117ece4Schristos * To avoid overshooting again, we only adjust the smallest rank. 474*3117ece4Schristos * We take the largest nodes from the lowest rank 0 and move them 475*3117ece4Schristos * to rank 1. There's guaranteed to be enough rank 0 symbols because 476*3117ece4Schristos * TODO. 477*3117ece4Schristos */ 478*3117ece4Schristos while (totalCost < 0) { /* Sometimes, cost correction overshoot */ 479*3117ece4Schristos /* special case : no rank 1 symbol (using targetNbBits-1); 480*3117ece4Schristos * let's create one from largest rank 0 (using targetNbBits). 481*3117ece4Schristos */ 482*3117ece4Schristos if (rankLast[1] == noSymbol) { 483*3117ece4Schristos while (huffNode[n].nbBits == targetNbBits) n--; 484*3117ece4Schristos huffNode[n+1].nbBits--; 485*3117ece4Schristos assert(n >= 0); 486*3117ece4Schristos rankLast[1] = (U32)(n+1); 487*3117ece4Schristos totalCost++; 488*3117ece4Schristos continue; 489*3117ece4Schristos } 490*3117ece4Schristos huffNode[ rankLast[1] + 1 ].nbBits--; 491*3117ece4Schristos rankLast[1]++; 492*3117ece4Schristos totalCost ++; 493*3117ece4Schristos } 494*3117ece4Schristos } /* repay normalized cost */ 495*3117ece4Schristos } /* there are several too large elements (at least >= 2) */ 496*3117ece4Schristos 497*3117ece4Schristos return targetNbBits; 498*3117ece4Schristos } 499*3117ece4Schristos 500*3117ece4Schristos typedef struct { 501*3117ece4Schristos U16 base; 502*3117ece4Schristos U16 curr; 503*3117ece4Schristos } rankPos; 504*3117ece4Schristos 505*3117ece4Schristos typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)]; 506*3117ece4Schristos 507*3117ece4Schristos /* Number of buckets available for HUF_sort() */ 508*3117ece4Schristos #define RANK_POSITION_TABLE_SIZE 192 509*3117ece4Schristos 510*3117ece4Schristos typedef struct { 511*3117ece4Schristos huffNodeTable huffNodeTbl; 512*3117ece4Schristos rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; 513*3117ece4Schristos } HUF_buildCTable_wksp_tables; 514*3117ece4Schristos 515*3117ece4Schristos /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing. 516*3117ece4Schristos * Strategy is to use as many buckets as possible for representing distinct 517*3117ece4Schristos * counts while using the remainder to represent all "large" counts. 518*3117ece4Schristos * 519*3117ece4Schristos * To satisfy this requirement for 192 buckets, we can do the following: 520*3117ece4Schristos * Let buckets 0-166 represent distinct counts of [0, 166] 521*3117ece4Schristos * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. 522*3117ece4Schristos */ 523*3117ece4Schristos #define RANK_POSITION_MAX_COUNT_LOG 32 524*3117ece4Schristos #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */) 525*3117ece4Schristos #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */) 526*3117ece4Schristos 527*3117ece4Schristos /* Return the appropriate bucket index for a given count. See definition of 528*3117ece4Schristos * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. 529*3117ece4Schristos */ 530*3117ece4Schristos static U32 HUF_getIndex(U32 const count) { 531*3117ece4Schristos return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) 532*3117ece4Schristos ? count 533*3117ece4Schristos : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; 534*3117ece4Schristos } 535*3117ece4Schristos 536*3117ece4Schristos /* Helper swap function for HUF_quickSortPartition() */ 537*3117ece4Schristos static void HUF_swapNodes(nodeElt* a, nodeElt* b) { 538*3117ece4Schristos nodeElt tmp = *a; 539*3117ece4Schristos *a = *b; 540*3117ece4Schristos *b = tmp; 541*3117ece4Schristos } 542*3117ece4Schristos 543*3117ece4Schristos /* Returns 0 if the huffNode array is not sorted by descending count */ 544*3117ece4Schristos MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) { 545*3117ece4Schristos U32 i; 546*3117ece4Schristos for (i = 1; i < maxSymbolValue1; ++i) { 547*3117ece4Schristos if (huffNode[i].count > huffNode[i-1].count) { 548*3117ece4Schristos return 0; 549*3117ece4Schristos } 550*3117ece4Schristos } 551*3117ece4Schristos return 1; 552*3117ece4Schristos } 553*3117ece4Schristos 554*3117ece4Schristos /* Insertion sort by descending order */ 555*3117ece4Schristos HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { 556*3117ece4Schristos int i; 557*3117ece4Schristos int const size = high-low+1; 558*3117ece4Schristos huffNode += low; 559*3117ece4Schristos for (i = 1; i < size; ++i) { 560*3117ece4Schristos nodeElt const key = huffNode[i]; 561*3117ece4Schristos int j = i - 1; 562*3117ece4Schristos while (j >= 0 && huffNode[j].count < key.count) { 563*3117ece4Schristos huffNode[j + 1] = huffNode[j]; 564*3117ece4Schristos j--; 565*3117ece4Schristos } 566*3117ece4Schristos huffNode[j + 1] = key; 567*3117ece4Schristos } 568*3117ece4Schristos } 569*3117ece4Schristos 570*3117ece4Schristos /* Pivot helper function for quicksort. */ 571*3117ece4Schristos static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { 572*3117ece4Schristos /* Simply select rightmost element as pivot. "Better" selectors like 573*3117ece4Schristos * median-of-three don't experimentally appear to have any benefit. 574*3117ece4Schristos */ 575*3117ece4Schristos U32 const pivot = arr[high].count; 576*3117ece4Schristos int i = low - 1; 577*3117ece4Schristos int j = low; 578*3117ece4Schristos for ( ; j < high; j++) { 579*3117ece4Schristos if (arr[j].count > pivot) { 580*3117ece4Schristos i++; 581*3117ece4Schristos HUF_swapNodes(&arr[i], &arr[j]); 582*3117ece4Schristos } 583*3117ece4Schristos } 584*3117ece4Schristos HUF_swapNodes(&arr[i + 1], &arr[high]); 585*3117ece4Schristos return i + 1; 586*3117ece4Schristos } 587*3117ece4Schristos 588*3117ece4Schristos /* Classic quicksort by descending with partially iterative calls 589*3117ece4Schristos * to reduce worst case callstack size. 590*3117ece4Schristos */ 591*3117ece4Schristos static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { 592*3117ece4Schristos int const kInsertionSortThreshold = 8; 593*3117ece4Schristos if (high - low < kInsertionSortThreshold) { 594*3117ece4Schristos HUF_insertionSort(arr, low, high); 595*3117ece4Schristos return; 596*3117ece4Schristos } 597*3117ece4Schristos while (low < high) { 598*3117ece4Schristos int const idx = HUF_quickSortPartition(arr, low, high); 599*3117ece4Schristos if (idx - low < high - idx) { 600*3117ece4Schristos HUF_simpleQuickSort(arr, low, idx - 1); 601*3117ece4Schristos low = idx + 1; 602*3117ece4Schristos } else { 603*3117ece4Schristos HUF_simpleQuickSort(arr, idx + 1, high); 604*3117ece4Schristos high = idx - 1; 605*3117ece4Schristos } 606*3117ece4Schristos } 607*3117ece4Schristos } 608*3117ece4Schristos 609*3117ece4Schristos /** 610*3117ece4Schristos * HUF_sort(): 611*3117ece4Schristos * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. 612*3117ece4Schristos * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. 613*3117ece4Schristos * 614*3117ece4Schristos * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. 615*3117ece4Schristos * Must have (maxSymbolValue + 1) entries. 616*3117ece4Schristos * @param[in] count Histogram of the symbols. 617*3117ece4Schristos * @param[in] maxSymbolValue Maximum symbol value. 618*3117ece4Schristos * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. 619*3117ece4Schristos */ 620*3117ece4Schristos static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { 621*3117ece4Schristos U32 n; 622*3117ece4Schristos U32 const maxSymbolValue1 = maxSymbolValue+1; 623*3117ece4Schristos 624*3117ece4Schristos /* Compute base and set curr to base. 625*3117ece4Schristos * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1. 626*3117ece4Schristos * See HUF_getIndex to see bucketing strategy. 627*3117ece4Schristos * We attribute each symbol to lowerRank's base value, because we want to know where 628*3117ece4Schristos * each rank begins in the output, so for rank R we want to count ranks R+1 and above. 629*3117ece4Schristos */ 630*3117ece4Schristos ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); 631*3117ece4Schristos for (n = 0; n < maxSymbolValue1; ++n) { 632*3117ece4Schristos U32 lowerRank = HUF_getIndex(count[n]); 633*3117ece4Schristos assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1); 634*3117ece4Schristos rankPosition[lowerRank].base++; 635*3117ece4Schristos } 636*3117ece4Schristos 637*3117ece4Schristos assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); 638*3117ece4Schristos /* Set up the rankPosition table */ 639*3117ece4Schristos for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { 640*3117ece4Schristos rankPosition[n-1].base += rankPosition[n].base; 641*3117ece4Schristos rankPosition[n-1].curr = rankPosition[n-1].base; 642*3117ece4Schristos } 643*3117ece4Schristos 644*3117ece4Schristos /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */ 645*3117ece4Schristos for (n = 0; n < maxSymbolValue1; ++n) { 646*3117ece4Schristos U32 const c = count[n]; 647*3117ece4Schristos U32 const r = HUF_getIndex(c) + 1; 648*3117ece4Schristos U32 const pos = rankPosition[r].curr++; 649*3117ece4Schristos assert(pos < maxSymbolValue1); 650*3117ece4Schristos huffNode[pos].count = c; 651*3117ece4Schristos huffNode[pos].byte = (BYTE)n; 652*3117ece4Schristos } 653*3117ece4Schristos 654*3117ece4Schristos /* Sort each bucket. */ 655*3117ece4Schristos for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { 656*3117ece4Schristos int const bucketSize = rankPosition[n].curr - rankPosition[n].base; 657*3117ece4Schristos U32 const bucketStartIdx = rankPosition[n].base; 658*3117ece4Schristos if (bucketSize > 1) { 659*3117ece4Schristos assert(bucketStartIdx < maxSymbolValue1); 660*3117ece4Schristos HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1); 661*3117ece4Schristos } 662*3117ece4Schristos } 663*3117ece4Schristos 664*3117ece4Schristos assert(HUF_isSorted(huffNode, maxSymbolValue1)); 665*3117ece4Schristos } 666*3117ece4Schristos 667*3117ece4Schristos 668*3117ece4Schristos /** HUF_buildCTable_wksp() : 669*3117ece4Schristos * Same as HUF_buildCTable(), but using externally allocated scratch buffer. 670*3117ece4Schristos * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). 671*3117ece4Schristos */ 672*3117ece4Schristos #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) 673*3117ece4Schristos 674*3117ece4Schristos /* HUF_buildTree(): 675*3117ece4Schristos * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. 676*3117ece4Schristos * 677*3117ece4Schristos * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. 678*3117ece4Schristos * @param maxSymbolValue The maximum symbol value. 679*3117ece4Schristos * @return The smallest node in the Huffman tree (by count). 680*3117ece4Schristos */ 681*3117ece4Schristos static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) 682*3117ece4Schristos { 683*3117ece4Schristos nodeElt* const huffNode0 = huffNode - 1; 684*3117ece4Schristos int nonNullRank; 685*3117ece4Schristos int lowS, lowN; 686*3117ece4Schristos int nodeNb = STARTNODE; 687*3117ece4Schristos int n, nodeRoot; 688*3117ece4Schristos DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1); 689*3117ece4Schristos /* init for parents */ 690*3117ece4Schristos nonNullRank = (int)maxSymbolValue; 691*3117ece4Schristos while(huffNode[nonNullRank].count == 0) nonNullRank--; 692*3117ece4Schristos lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; 693*3117ece4Schristos huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; 694*3117ece4Schristos huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; 695*3117ece4Schristos nodeNb++; lowS-=2; 696*3117ece4Schristos for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); 697*3117ece4Schristos huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ 698*3117ece4Schristos 699*3117ece4Schristos /* create parents */ 700*3117ece4Schristos while (nodeNb <= nodeRoot) { 701*3117ece4Schristos int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 702*3117ece4Schristos int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; 703*3117ece4Schristos huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; 704*3117ece4Schristos huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; 705*3117ece4Schristos nodeNb++; 706*3117ece4Schristos } 707*3117ece4Schristos 708*3117ece4Schristos /* distribute weights (unlimited tree height) */ 709*3117ece4Schristos huffNode[nodeRoot].nbBits = 0; 710*3117ece4Schristos for (n=nodeRoot-1; n>=STARTNODE; n--) 711*3117ece4Schristos huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 712*3117ece4Schristos for (n=0; n<=nonNullRank; n++) 713*3117ece4Schristos huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; 714*3117ece4Schristos 715*3117ece4Schristos DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1)); 716*3117ece4Schristos 717*3117ece4Schristos return nonNullRank; 718*3117ece4Schristos } 719*3117ece4Schristos 720*3117ece4Schristos /** 721*3117ece4Schristos * HUF_buildCTableFromTree(): 722*3117ece4Schristos * Build the CTable given the Huffman tree in huffNode. 723*3117ece4Schristos * 724*3117ece4Schristos * @param[out] CTable The output Huffman CTable. 725*3117ece4Schristos * @param huffNode The Huffman tree. 726*3117ece4Schristos * @param nonNullRank The last and smallest node in the Huffman tree. 727*3117ece4Schristos * @param maxSymbolValue The maximum symbol value. 728*3117ece4Schristos * @param maxNbBits The exact maximum number of bits used in the Huffman tree. 729*3117ece4Schristos */ 730*3117ece4Schristos static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) 731*3117ece4Schristos { 732*3117ece4Schristos HUF_CElt* const ct = CTable + 1; 733*3117ece4Schristos /* fill result into ctable (val, nbBits) */ 734*3117ece4Schristos int n; 735*3117ece4Schristos U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; 736*3117ece4Schristos U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; 737*3117ece4Schristos int const alphabetSize = (int)(maxSymbolValue + 1); 738*3117ece4Schristos for (n=0; n<=nonNullRank; n++) 739*3117ece4Schristos nbPerRank[huffNode[n].nbBits]++; 740*3117ece4Schristos /* determine starting value per rank */ 741*3117ece4Schristos { U16 min = 0; 742*3117ece4Schristos for (n=(int)maxNbBits; n>0; n--) { 743*3117ece4Schristos valPerRank[n] = min; /* get starting value within each rank */ 744*3117ece4Schristos min += nbPerRank[n]; 745*3117ece4Schristos min >>= 1; 746*3117ece4Schristos } } 747*3117ece4Schristos for (n=0; n<alphabetSize; n++) 748*3117ece4Schristos HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */ 749*3117ece4Schristos for (n=0; n<alphabetSize; n++) 750*3117ece4Schristos HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */ 751*3117ece4Schristos 752*3117ece4Schristos HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue); 753*3117ece4Schristos } 754*3117ece4Schristos 755*3117ece4Schristos size_t 756*3117ece4Schristos HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, 757*3117ece4Schristos void* workSpace, size_t wkspSize) 758*3117ece4Schristos { 759*3117ece4Schristos HUF_buildCTable_wksp_tables* const wksp_tables = 760*3117ece4Schristos (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32)); 761*3117ece4Schristos nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; 762*3117ece4Schristos nodeElt* const huffNode = huffNode0+1; 763*3117ece4Schristos int nonNullRank; 764*3117ece4Schristos 765*3117ece4Schristos HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables)); 766*3117ece4Schristos 767*3117ece4Schristos DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1); 768*3117ece4Schristos 769*3117ece4Schristos /* safety checks */ 770*3117ece4Schristos if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) 771*3117ece4Schristos return ERROR(workSpace_tooSmall); 772*3117ece4Schristos if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; 773*3117ece4Schristos if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) 774*3117ece4Schristos return ERROR(maxSymbolValue_tooLarge); 775*3117ece4Schristos ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); 776*3117ece4Schristos 777*3117ece4Schristos /* sort, decreasing order */ 778*3117ece4Schristos HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); 779*3117ece4Schristos DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1)); 780*3117ece4Schristos 781*3117ece4Schristos /* build tree */ 782*3117ece4Schristos nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); 783*3117ece4Schristos 784*3117ece4Schristos /* determine and enforce maxTableLog */ 785*3117ece4Schristos maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); 786*3117ece4Schristos if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ 787*3117ece4Schristos 788*3117ece4Schristos HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits); 789*3117ece4Schristos 790*3117ece4Schristos return maxNbBits; 791*3117ece4Schristos } 792*3117ece4Schristos 793*3117ece4Schristos size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) 794*3117ece4Schristos { 795*3117ece4Schristos HUF_CElt const* ct = CTable + 1; 796*3117ece4Schristos size_t nbBits = 0; 797*3117ece4Schristos int s; 798*3117ece4Schristos for (s = 0; s <= (int)maxSymbolValue; ++s) { 799*3117ece4Schristos nbBits += HUF_getNbBits(ct[s]) * count[s]; 800*3117ece4Schristos } 801*3117ece4Schristos return nbBits >> 3; 802*3117ece4Schristos } 803*3117ece4Schristos 804*3117ece4Schristos int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { 805*3117ece4Schristos HUF_CTableHeader header = HUF_readCTableHeader(CTable); 806*3117ece4Schristos HUF_CElt const* ct = CTable + 1; 807*3117ece4Schristos int bad = 0; 808*3117ece4Schristos int s; 809*3117ece4Schristos 810*3117ece4Schristos assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX); 811*3117ece4Schristos 812*3117ece4Schristos if (header.maxSymbolValue < maxSymbolValue) 813*3117ece4Schristos return 0; 814*3117ece4Schristos 815*3117ece4Schristos for (s = 0; s <= (int)maxSymbolValue; ++s) { 816*3117ece4Schristos bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); 817*3117ece4Schristos } 818*3117ece4Schristos return !bad; 819*3117ece4Schristos } 820*3117ece4Schristos 821*3117ece4Schristos size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } 822*3117ece4Schristos 823*3117ece4Schristos /** HUF_CStream_t: 824*3117ece4Schristos * Huffman uses its own BIT_CStream_t implementation. 825*3117ece4Schristos * There are three major differences from BIT_CStream_t: 826*3117ece4Schristos * 1. HUF_addBits() takes a HUF_CElt (size_t) which is 827*3117ece4Schristos * the pair (nbBits, value) in the format: 828*3117ece4Schristos * format: 829*3117ece4Schristos * - Bits [0, 4) = nbBits 830*3117ece4Schristos * - Bits [4, 64 - nbBits) = 0 831*3117ece4Schristos * - Bits [64 - nbBits, 64) = value 832*3117ece4Schristos * 2. The bitContainer is built from the upper bits and 833*3117ece4Schristos * right shifted. E.g. to add a new value of N bits 834*3117ece4Schristos * you right shift the bitContainer by N, then or in 835*3117ece4Schristos * the new value into the N upper bits. 836*3117ece4Schristos * 3. The bitstream has two bit containers. You can add 837*3117ece4Schristos * bits to the second container and merge them into 838*3117ece4Schristos * the first container. 839*3117ece4Schristos */ 840*3117ece4Schristos 841*3117ece4Schristos #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8) 842*3117ece4Schristos 843*3117ece4Schristos typedef struct { 844*3117ece4Schristos size_t bitContainer[2]; 845*3117ece4Schristos size_t bitPos[2]; 846*3117ece4Schristos 847*3117ece4Schristos BYTE* startPtr; 848*3117ece4Schristos BYTE* ptr; 849*3117ece4Schristos BYTE* endPtr; 850*3117ece4Schristos } HUF_CStream_t; 851*3117ece4Schristos 852*3117ece4Schristos /**! HUF_initCStream(): 853*3117ece4Schristos * Initializes the bitstream. 854*3117ece4Schristos * @returns 0 or an error code. 855*3117ece4Schristos */ 856*3117ece4Schristos static size_t HUF_initCStream(HUF_CStream_t* bitC, 857*3117ece4Schristos void* startPtr, size_t dstCapacity) 858*3117ece4Schristos { 859*3117ece4Schristos ZSTD_memset(bitC, 0, sizeof(*bitC)); 860*3117ece4Schristos bitC->startPtr = (BYTE*)startPtr; 861*3117ece4Schristos bitC->ptr = bitC->startPtr; 862*3117ece4Schristos bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]); 863*3117ece4Schristos if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall); 864*3117ece4Schristos return 0; 865*3117ece4Schristos } 866*3117ece4Schristos 867*3117ece4Schristos /*! HUF_addBits(): 868*3117ece4Schristos * Adds the symbol stored in HUF_CElt elt to the bitstream. 869*3117ece4Schristos * 870*3117ece4Schristos * @param elt The element we're adding. This is a (nbBits, value) pair. 871*3117ece4Schristos * See the HUF_CStream_t docs for the format. 872*3117ece4Schristos * @param idx Insert into the bitstream at this idx. 873*3117ece4Schristos * @param kFast This is a template parameter. If the bitstream is guaranteed 874*3117ece4Schristos * to have at least 4 unused bits after this call it may be 1, 875*3117ece4Schristos * otherwise it must be 0. HUF_addBits() is faster when fast is set. 876*3117ece4Schristos */ 877*3117ece4Schristos FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) 878*3117ece4Schristos { 879*3117ece4Schristos assert(idx <= 1); 880*3117ece4Schristos assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX); 881*3117ece4Schristos /* This is efficient on x86-64 with BMI2 because shrx 882*3117ece4Schristos * only reads the low 6 bits of the register. The compiler 883*3117ece4Schristos * knows this and elides the mask. When fast is set, 884*3117ece4Schristos * every operation can use the same value loaded from elt. 885*3117ece4Schristos */ 886*3117ece4Schristos bitC->bitContainer[idx] >>= HUF_getNbBits(elt); 887*3117ece4Schristos bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt); 888*3117ece4Schristos /* We only read the low 8 bits of bitC->bitPos[idx] so it 889*3117ece4Schristos * doesn't matter that the high bits have noise from the value. 890*3117ece4Schristos */ 891*3117ece4Schristos bitC->bitPos[idx] += HUF_getNbBitsFast(elt); 892*3117ece4Schristos assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); 893*3117ece4Schristos /* The last 4-bits of elt are dirty if fast is set, 894*3117ece4Schristos * so we must not be overwriting bits that have already been 895*3117ece4Schristos * inserted into the bit container. 896*3117ece4Schristos */ 897*3117ece4Schristos #if DEBUGLEVEL >= 1 898*3117ece4Schristos { 899*3117ece4Schristos size_t const nbBits = HUF_getNbBits(elt); 900*3117ece4Schristos size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; 901*3117ece4Schristos (void)dirtyBits; 902*3117ece4Schristos /* Middle bits are 0. */ 903*3117ece4Schristos assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); 904*3117ece4Schristos /* We didn't overwrite any bits in the bit container. */ 905*3117ece4Schristos assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); 906*3117ece4Schristos (void)dirtyBits; 907*3117ece4Schristos } 908*3117ece4Schristos #endif 909*3117ece4Schristos } 910*3117ece4Schristos 911*3117ece4Schristos FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) 912*3117ece4Schristos { 913*3117ece4Schristos bitC->bitContainer[1] = 0; 914*3117ece4Schristos bitC->bitPos[1] = 0; 915*3117ece4Schristos } 916*3117ece4Schristos 917*3117ece4Schristos /*! HUF_mergeIndex1() : 918*3117ece4Schristos * Merges the bit container @ index 1 into the bit container @ index 0 919*3117ece4Schristos * and zeros the bit container @ index 1. 920*3117ece4Schristos */ 921*3117ece4Schristos FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) 922*3117ece4Schristos { 923*3117ece4Schristos assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER); 924*3117ece4Schristos bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF); 925*3117ece4Schristos bitC->bitContainer[0] |= bitC->bitContainer[1]; 926*3117ece4Schristos bitC->bitPos[0] += bitC->bitPos[1]; 927*3117ece4Schristos assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER); 928*3117ece4Schristos } 929*3117ece4Schristos 930*3117ece4Schristos /*! HUF_flushBits() : 931*3117ece4Schristos * Flushes the bits in the bit container @ index 0. 932*3117ece4Schristos * 933*3117ece4Schristos * @post bitPos will be < 8. 934*3117ece4Schristos * @param kFast If kFast is set then we must know a-priori that 935*3117ece4Schristos * the bit container will not overflow. 936*3117ece4Schristos */ 937*3117ece4Schristos FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) 938*3117ece4Schristos { 939*3117ece4Schristos /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */ 940*3117ece4Schristos size_t const nbBits = bitC->bitPos[0] & 0xFF; 941*3117ece4Schristos size_t const nbBytes = nbBits >> 3; 942*3117ece4Schristos /* The top nbBits bits of bitContainer are the ones we need. */ 943*3117ece4Schristos size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits); 944*3117ece4Schristos /* Mask bitPos to account for the bytes we consumed. */ 945*3117ece4Schristos bitC->bitPos[0] &= 7; 946*3117ece4Schristos assert(nbBits > 0); 947*3117ece4Schristos assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8); 948*3117ece4Schristos assert(bitC->ptr <= bitC->endPtr); 949*3117ece4Schristos MEM_writeLEST(bitC->ptr, bitContainer); 950*3117ece4Schristos bitC->ptr += nbBytes; 951*3117ece4Schristos assert(!kFast || bitC->ptr <= bitC->endPtr); 952*3117ece4Schristos if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; 953*3117ece4Schristos /* bitContainer doesn't need to be modified because the leftover 954*3117ece4Schristos * bits are already the top bitPos bits. And we don't care about 955*3117ece4Schristos * noise in the lower values. 956*3117ece4Schristos */ 957*3117ece4Schristos } 958*3117ece4Schristos 959*3117ece4Schristos /*! HUF_endMark() 960*3117ece4Schristos * @returns The Huffman stream end mark: A 1-bit value = 1. 961*3117ece4Schristos */ 962*3117ece4Schristos static HUF_CElt HUF_endMark(void) 963*3117ece4Schristos { 964*3117ece4Schristos HUF_CElt endMark; 965*3117ece4Schristos HUF_setNbBits(&endMark, 1); 966*3117ece4Schristos HUF_setValue(&endMark, 1); 967*3117ece4Schristos return endMark; 968*3117ece4Schristos } 969*3117ece4Schristos 970*3117ece4Schristos /*! HUF_closeCStream() : 971*3117ece4Schristos * @return Size of CStream, in bytes, 972*3117ece4Schristos * or 0 if it could not fit into dstBuffer */ 973*3117ece4Schristos static size_t HUF_closeCStream(HUF_CStream_t* bitC) 974*3117ece4Schristos { 975*3117ece4Schristos HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0); 976*3117ece4Schristos HUF_flushBits(bitC, /* kFast */ 0); 977*3117ece4Schristos { 978*3117ece4Schristos size_t const nbBits = bitC->bitPos[0] & 0xFF; 979*3117ece4Schristos if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ 980*3117ece4Schristos return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0); 981*3117ece4Schristos } 982*3117ece4Schristos } 983*3117ece4Schristos 984*3117ece4Schristos FORCE_INLINE_TEMPLATE void 985*3117ece4Schristos HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) 986*3117ece4Schristos { 987*3117ece4Schristos HUF_addBits(bitCPtr, CTable[symbol], idx, fast); 988*3117ece4Schristos } 989*3117ece4Schristos 990*3117ece4Schristos FORCE_INLINE_TEMPLATE void 991*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, 992*3117ece4Schristos const BYTE* ip, size_t srcSize, 993*3117ece4Schristos const HUF_CElt* ct, 994*3117ece4Schristos int kUnroll, int kFastFlush, int kLastFast) 995*3117ece4Schristos { 996*3117ece4Schristos /* Join to kUnroll */ 997*3117ece4Schristos int n = (int)srcSize; 998*3117ece4Schristos int rem = n % kUnroll; 999*3117ece4Schristos if (rem > 0) { 1000*3117ece4Schristos for (; rem > 0; --rem) { 1001*3117ece4Schristos HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0); 1002*3117ece4Schristos } 1003*3117ece4Schristos HUF_flushBits(bitC, kFastFlush); 1004*3117ece4Schristos } 1005*3117ece4Schristos assert(n % kUnroll == 0); 1006*3117ece4Schristos 1007*3117ece4Schristos /* Join to 2 * kUnroll */ 1008*3117ece4Schristos if (n % (2 * kUnroll)) { 1009*3117ece4Schristos int u; 1010*3117ece4Schristos for (u = 1; u < kUnroll; ++u) { 1011*3117ece4Schristos HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1); 1012*3117ece4Schristos } 1013*3117ece4Schristos HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast); 1014*3117ece4Schristos HUF_flushBits(bitC, kFastFlush); 1015*3117ece4Schristos n -= kUnroll; 1016*3117ece4Schristos } 1017*3117ece4Schristos assert(n % (2 * kUnroll) == 0); 1018*3117ece4Schristos 1019*3117ece4Schristos for (; n>0; n-= 2 * kUnroll) { 1020*3117ece4Schristos /* Encode kUnroll symbols into the bitstream @ index 0. */ 1021*3117ece4Schristos int u; 1022*3117ece4Schristos for (u = 1; u < kUnroll; ++u) { 1023*3117ece4Schristos HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1); 1024*3117ece4Schristos } 1025*3117ece4Schristos HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast); 1026*3117ece4Schristos HUF_flushBits(bitC, kFastFlush); 1027*3117ece4Schristos /* Encode kUnroll symbols into the bitstream @ index 1. 1028*3117ece4Schristos * This allows us to start filling the bit container 1029*3117ece4Schristos * without any data dependencies. 1030*3117ece4Schristos */ 1031*3117ece4Schristos HUF_zeroIndex1(bitC); 1032*3117ece4Schristos for (u = 1; u < kUnroll; ++u) { 1033*3117ece4Schristos HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1); 1034*3117ece4Schristos } 1035*3117ece4Schristos HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast); 1036*3117ece4Schristos /* Merge bitstream @ index 1 into the bitstream @ index 0 */ 1037*3117ece4Schristos HUF_mergeIndex1(bitC); 1038*3117ece4Schristos HUF_flushBits(bitC, kFastFlush); 1039*3117ece4Schristos } 1040*3117ece4Schristos assert(n == 0); 1041*3117ece4Schristos 1042*3117ece4Schristos } 1043*3117ece4Schristos 1044*3117ece4Schristos /** 1045*3117ece4Schristos * Returns a tight upper bound on the output space needed by Huffman 1046*3117ece4Schristos * with 8 bytes buffer to handle over-writes. If the output is at least 1047*3117ece4Schristos * this large we don't need to do bounds checks during Huffman encoding. 1048*3117ece4Schristos */ 1049*3117ece4Schristos static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) 1050*3117ece4Schristos { 1051*3117ece4Schristos return ((srcSize * tableLog) >> 3) + 8; 1052*3117ece4Schristos } 1053*3117ece4Schristos 1054*3117ece4Schristos 1055*3117ece4Schristos FORCE_INLINE_TEMPLATE size_t 1056*3117ece4Schristos HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, 1057*3117ece4Schristos const void* src, size_t srcSize, 1058*3117ece4Schristos const HUF_CElt* CTable) 1059*3117ece4Schristos { 1060*3117ece4Schristos U32 const tableLog = HUF_readCTableHeader(CTable).tableLog; 1061*3117ece4Schristos HUF_CElt const* ct = CTable + 1; 1062*3117ece4Schristos const BYTE* ip = (const BYTE*) src; 1063*3117ece4Schristos BYTE* const ostart = (BYTE*)dst; 1064*3117ece4Schristos BYTE* const oend = ostart + dstSize; 1065*3117ece4Schristos HUF_CStream_t bitC; 1066*3117ece4Schristos 1067*3117ece4Schristos /* init */ 1068*3117ece4Schristos if (dstSize < 8) return 0; /* not enough space to compress */ 1069*3117ece4Schristos { BYTE* op = ostart; 1070*3117ece4Schristos size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); 1071*3117ece4Schristos if (HUF_isError(initErr)) return 0; } 1072*3117ece4Schristos 1073*3117ece4Schristos if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) 1074*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0); 1075*3117ece4Schristos else { 1076*3117ece4Schristos if (MEM_32bits()) { 1077*3117ece4Schristos switch (tableLog) { 1078*3117ece4Schristos case 11: 1079*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0); 1080*3117ece4Schristos break; 1081*3117ece4Schristos case 10: ZSTD_FALLTHROUGH; 1082*3117ece4Schristos case 9: ZSTD_FALLTHROUGH; 1083*3117ece4Schristos case 8: 1084*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1); 1085*3117ece4Schristos break; 1086*3117ece4Schristos case 7: ZSTD_FALLTHROUGH; 1087*3117ece4Schristos default: 1088*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1); 1089*3117ece4Schristos break; 1090*3117ece4Schristos } 1091*3117ece4Schristos } else { 1092*3117ece4Schristos switch (tableLog) { 1093*3117ece4Schristos case 11: 1094*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0); 1095*3117ece4Schristos break; 1096*3117ece4Schristos case 10: 1097*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1); 1098*3117ece4Schristos break; 1099*3117ece4Schristos case 9: 1100*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0); 1101*3117ece4Schristos break; 1102*3117ece4Schristos case 8: 1103*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0); 1104*3117ece4Schristos break; 1105*3117ece4Schristos case 7: 1106*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0); 1107*3117ece4Schristos break; 1108*3117ece4Schristos case 6: ZSTD_FALLTHROUGH; 1109*3117ece4Schristos default: 1110*3117ece4Schristos HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1); 1111*3117ece4Schristos break; 1112*3117ece4Schristos } 1113*3117ece4Schristos } 1114*3117ece4Schristos } 1115*3117ece4Schristos assert(bitC.ptr <= bitC.endPtr); 1116*3117ece4Schristos 1117*3117ece4Schristos return HUF_closeCStream(&bitC); 1118*3117ece4Schristos } 1119*3117ece4Schristos 1120*3117ece4Schristos #if DYNAMIC_BMI2 1121*3117ece4Schristos 1122*3117ece4Schristos static BMI2_TARGET_ATTRIBUTE size_t 1123*3117ece4Schristos HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, 1124*3117ece4Schristos const void* src, size_t srcSize, 1125*3117ece4Schristos const HUF_CElt* CTable) 1126*3117ece4Schristos { 1127*3117ece4Schristos return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1128*3117ece4Schristos } 1129*3117ece4Schristos 1130*3117ece4Schristos static size_t 1131*3117ece4Schristos HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, 1132*3117ece4Schristos const void* src, size_t srcSize, 1133*3117ece4Schristos const HUF_CElt* CTable) 1134*3117ece4Schristos { 1135*3117ece4Schristos return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1136*3117ece4Schristos } 1137*3117ece4Schristos 1138*3117ece4Schristos static size_t 1139*3117ece4Schristos HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1140*3117ece4Schristos const void* src, size_t srcSize, 1141*3117ece4Schristos const HUF_CElt* CTable, const int flags) 1142*3117ece4Schristos { 1143*3117ece4Schristos if (flags & HUF_flags_bmi2) { 1144*3117ece4Schristos return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); 1145*3117ece4Schristos } 1146*3117ece4Schristos return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); 1147*3117ece4Schristos } 1148*3117ece4Schristos 1149*3117ece4Schristos #else 1150*3117ece4Schristos 1151*3117ece4Schristos static size_t 1152*3117ece4Schristos HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, 1153*3117ece4Schristos const void* src, size_t srcSize, 1154*3117ece4Schristos const HUF_CElt* CTable, const int flags) 1155*3117ece4Schristos { 1156*3117ece4Schristos (void)flags; 1157*3117ece4Schristos return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); 1158*3117ece4Schristos } 1159*3117ece4Schristos 1160*3117ece4Schristos #endif 1161*3117ece4Schristos 1162*3117ece4Schristos size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) 1163*3117ece4Schristos { 1164*3117ece4Schristos return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); 1165*3117ece4Schristos } 1166*3117ece4Schristos 1167*3117ece4Schristos static size_t 1168*3117ece4Schristos HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, 1169*3117ece4Schristos const void* src, size_t srcSize, 1170*3117ece4Schristos const HUF_CElt* CTable, int flags) 1171*3117ece4Schristos { 1172*3117ece4Schristos size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ 1173*3117ece4Schristos const BYTE* ip = (const BYTE*) src; 1174*3117ece4Schristos const BYTE* const iend = ip + srcSize; 1175*3117ece4Schristos BYTE* const ostart = (BYTE*) dst; 1176*3117ece4Schristos BYTE* const oend = ostart + dstSize; 1177*3117ece4Schristos BYTE* op = ostart; 1178*3117ece4Schristos 1179*3117ece4Schristos if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ 1180*3117ece4Schristos if (srcSize < 12) return 0; /* no saving possible : too small input */ 1181*3117ece4Schristos op += 6; /* jumpTable */ 1182*3117ece4Schristos 1183*3117ece4Schristos assert(op <= oend); 1184*3117ece4Schristos { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); 1185*3117ece4Schristos if (cSize == 0 || cSize > 65535) return 0; 1186*3117ece4Schristos MEM_writeLE16(ostart, (U16)cSize); 1187*3117ece4Schristos op += cSize; 1188*3117ece4Schristos } 1189*3117ece4Schristos 1190*3117ece4Schristos ip += segmentSize; 1191*3117ece4Schristos assert(op <= oend); 1192*3117ece4Schristos { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); 1193*3117ece4Schristos if (cSize == 0 || cSize > 65535) return 0; 1194*3117ece4Schristos MEM_writeLE16(ostart+2, (U16)cSize); 1195*3117ece4Schristos op += cSize; 1196*3117ece4Schristos } 1197*3117ece4Schristos 1198*3117ece4Schristos ip += segmentSize; 1199*3117ece4Schristos assert(op <= oend); 1200*3117ece4Schristos { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); 1201*3117ece4Schristos if (cSize == 0 || cSize > 65535) return 0; 1202*3117ece4Schristos MEM_writeLE16(ostart+4, (U16)cSize); 1203*3117ece4Schristos op += cSize; 1204*3117ece4Schristos } 1205*3117ece4Schristos 1206*3117ece4Schristos ip += segmentSize; 1207*3117ece4Schristos assert(op <= oend); 1208*3117ece4Schristos assert(ip <= iend); 1209*3117ece4Schristos { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); 1210*3117ece4Schristos if (cSize == 0 || cSize > 65535) return 0; 1211*3117ece4Schristos op += cSize; 1212*3117ece4Schristos } 1213*3117ece4Schristos 1214*3117ece4Schristos return (size_t)(op-ostart); 1215*3117ece4Schristos } 1216*3117ece4Schristos 1217*3117ece4Schristos size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) 1218*3117ece4Schristos { 1219*3117ece4Schristos return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); 1220*3117ece4Schristos } 1221*3117ece4Schristos 1222*3117ece4Schristos typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; 1223*3117ece4Schristos 1224*3117ece4Schristos static size_t HUF_compressCTable_internal( 1225*3117ece4Schristos BYTE* const ostart, BYTE* op, BYTE* const oend, 1226*3117ece4Schristos const void* src, size_t srcSize, 1227*3117ece4Schristos HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) 1228*3117ece4Schristos { 1229*3117ece4Schristos size_t const cSize = (nbStreams==HUF_singleStream) ? 1230*3117ece4Schristos HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : 1231*3117ece4Schristos HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); 1232*3117ece4Schristos if (HUF_isError(cSize)) { return cSize; } 1233*3117ece4Schristos if (cSize==0) { return 0; } /* uncompressible */ 1234*3117ece4Schristos op += cSize; 1235*3117ece4Schristos /* check compressibility */ 1236*3117ece4Schristos assert(op >= ostart); 1237*3117ece4Schristos if ((size_t)(op-ostart) >= srcSize-1) { return 0; } 1238*3117ece4Schristos return (size_t)(op-ostart); 1239*3117ece4Schristos } 1240*3117ece4Schristos 1241*3117ece4Schristos typedef struct { 1242*3117ece4Schristos unsigned count[HUF_SYMBOLVALUE_MAX + 1]; 1243*3117ece4Schristos HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)]; 1244*3117ece4Schristos union { 1245*3117ece4Schristos HUF_buildCTable_wksp_tables buildCTable_wksp; 1246*3117ece4Schristos HUF_WriteCTableWksp writeCTable_wksp; 1247*3117ece4Schristos U32 hist_wksp[HIST_WKSP_SIZE_U32]; 1248*3117ece4Schristos } wksps; 1249*3117ece4Schristos } HUF_compress_tables_t; 1250*3117ece4Schristos 1251*3117ece4Schristos #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 1252*3117ece4Schristos #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ 1253*3117ece4Schristos 1254*3117ece4Schristos unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) 1255*3117ece4Schristos { 1256*3117ece4Schristos unsigned cardinality = 0; 1257*3117ece4Schristos unsigned i; 1258*3117ece4Schristos 1259*3117ece4Schristos for (i = 0; i < maxSymbolValue + 1; i++) { 1260*3117ece4Schristos if (count[i] != 0) cardinality += 1; 1261*3117ece4Schristos } 1262*3117ece4Schristos 1263*3117ece4Schristos return cardinality; 1264*3117ece4Schristos } 1265*3117ece4Schristos 1266*3117ece4Schristos unsigned HUF_minTableLog(unsigned symbolCardinality) 1267*3117ece4Schristos { 1268*3117ece4Schristos U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1; 1269*3117ece4Schristos return minBitsSymbols; 1270*3117ece4Schristos } 1271*3117ece4Schristos 1272*3117ece4Schristos unsigned HUF_optimalTableLog( 1273*3117ece4Schristos unsigned maxTableLog, 1274*3117ece4Schristos size_t srcSize, 1275*3117ece4Schristos unsigned maxSymbolValue, 1276*3117ece4Schristos void* workSpace, size_t wkspSize, 1277*3117ece4Schristos HUF_CElt* table, 1278*3117ece4Schristos const unsigned* count, 1279*3117ece4Schristos int flags) 1280*3117ece4Schristos { 1281*3117ece4Schristos assert(srcSize > 1); /* Not supported, RLE should be used instead */ 1282*3117ece4Schristos assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); 1283*3117ece4Schristos 1284*3117ece4Schristos if (!(flags & HUF_flags_optimalDepth)) { 1285*3117ece4Schristos /* cheap evaluation, based on FSE */ 1286*3117ece4Schristos return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); 1287*3117ece4Schristos } 1288*3117ece4Schristos 1289*3117ece4Schristos { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); 1290*3117ece4Schristos size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); 1291*3117ece4Schristos size_t hSize, newSize; 1292*3117ece4Schristos const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); 1293*3117ece4Schristos const unsigned minTableLog = HUF_minTableLog(symbolCardinality); 1294*3117ece4Schristos size_t optSize = ((size_t) ~0) - 1; 1295*3117ece4Schristos unsigned optLog = maxTableLog, optLogGuess; 1296*3117ece4Schristos 1297*3117ece4Schristos DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); 1298*3117ece4Schristos 1299*3117ece4Schristos /* Search until size increases */ 1300*3117ece4Schristos for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { 1301*3117ece4Schristos DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); 1302*3117ece4Schristos 1303*3117ece4Schristos { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); 1304*3117ece4Schristos if (ERR_isError(maxBits)) continue; 1305*3117ece4Schristos 1306*3117ece4Schristos if (maxBits < optLogGuess && optLogGuess > minTableLog) break; 1307*3117ece4Schristos 1308*3117ece4Schristos hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); 1309*3117ece4Schristos } 1310*3117ece4Schristos 1311*3117ece4Schristos if (ERR_isError(hSize)) continue; 1312*3117ece4Schristos 1313*3117ece4Schristos newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; 1314*3117ece4Schristos 1315*3117ece4Schristos if (newSize > optSize + 1) { 1316*3117ece4Schristos break; 1317*3117ece4Schristos } 1318*3117ece4Schristos 1319*3117ece4Schristos if (newSize < optSize) { 1320*3117ece4Schristos optSize = newSize; 1321*3117ece4Schristos optLog = optLogGuess; 1322*3117ece4Schristos } 1323*3117ece4Schristos } 1324*3117ece4Schristos assert(optLog <= HUF_TABLELOG_MAX); 1325*3117ece4Schristos return optLog; 1326*3117ece4Schristos } 1327*3117ece4Schristos } 1328*3117ece4Schristos 1329*3117ece4Schristos /* HUF_compress_internal() : 1330*3117ece4Schristos * `workSpace_align4` must be aligned on 4-bytes boundaries, 1331*3117ece4Schristos * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ 1332*3117ece4Schristos static size_t 1333*3117ece4Schristos HUF_compress_internal (void* dst, size_t dstSize, 1334*3117ece4Schristos const void* src, size_t srcSize, 1335*3117ece4Schristos unsigned maxSymbolValue, unsigned huffLog, 1336*3117ece4Schristos HUF_nbStreams_e nbStreams, 1337*3117ece4Schristos void* workSpace, size_t wkspSize, 1338*3117ece4Schristos HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) 1339*3117ece4Schristos { 1340*3117ece4Schristos HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); 1341*3117ece4Schristos BYTE* const ostart = (BYTE*)dst; 1342*3117ece4Schristos BYTE* const oend = ostart + dstSize; 1343*3117ece4Schristos BYTE* op = ostart; 1344*3117ece4Schristos 1345*3117ece4Schristos DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); 1346*3117ece4Schristos HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); 1347*3117ece4Schristos 1348*3117ece4Schristos /* checks & inits */ 1349*3117ece4Schristos if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); 1350*3117ece4Schristos if (!srcSize) return 0; /* Uncompressed */ 1351*3117ece4Schristos if (!dstSize) return 0; /* cannot fit anything within dst budget */ 1352*3117ece4Schristos if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ 1353*3117ece4Schristos if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); 1354*3117ece4Schristos if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); 1355*3117ece4Schristos if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; 1356*3117ece4Schristos if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; 1357*3117ece4Schristos 1358*3117ece4Schristos /* Heuristic : If old table is valid, use it for small inputs */ 1359*3117ece4Schristos if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { 1360*3117ece4Schristos return HUF_compressCTable_internal(ostart, op, oend, 1361*3117ece4Schristos src, srcSize, 1362*3117ece4Schristos nbStreams, oldHufTable, flags); 1363*3117ece4Schristos } 1364*3117ece4Schristos 1365*3117ece4Schristos /* If uncompressible data is suspected, do a smaller sampling first */ 1366*3117ece4Schristos DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); 1367*3117ece4Schristos if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { 1368*3117ece4Schristos size_t largestTotal = 0; 1369*3117ece4Schristos DEBUGLOG(5, "input suspected incompressible : sampling to check"); 1370*3117ece4Schristos { unsigned maxSymbolValueBegin = maxSymbolValue; 1371*3117ece4Schristos CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1372*3117ece4Schristos largestTotal += largestBegin; 1373*3117ece4Schristos } 1374*3117ece4Schristos { unsigned maxSymbolValueEnd = maxSymbolValue; 1375*3117ece4Schristos CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); 1376*3117ece4Schristos largestTotal += largestEnd; 1377*3117ece4Schristos } 1378*3117ece4Schristos if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1379*3117ece4Schristos } 1380*3117ece4Schristos 1381*3117ece4Schristos /* Scan input and build symbol stats */ 1382*3117ece4Schristos { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) ); 1383*3117ece4Schristos if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ 1384*3117ece4Schristos if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ 1385*3117ece4Schristos } 1386*3117ece4Schristos DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); 1387*3117ece4Schristos 1388*3117ece4Schristos /* Check validity of previous table */ 1389*3117ece4Schristos if ( repeat 1390*3117ece4Schristos && *repeat == HUF_repeat_check 1391*3117ece4Schristos && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { 1392*3117ece4Schristos *repeat = HUF_repeat_none; 1393*3117ece4Schristos } 1394*3117ece4Schristos /* Heuristic : use existing table for small inputs */ 1395*3117ece4Schristos if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { 1396*3117ece4Schristos return HUF_compressCTable_internal(ostart, op, oend, 1397*3117ece4Schristos src, srcSize, 1398*3117ece4Schristos nbStreams, oldHufTable, flags); 1399*3117ece4Schristos } 1400*3117ece4Schristos 1401*3117ece4Schristos /* Build Huffman Tree */ 1402*3117ece4Schristos huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); 1403*3117ece4Schristos { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, 1404*3117ece4Schristos maxSymbolValue, huffLog, 1405*3117ece4Schristos &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); 1406*3117ece4Schristos CHECK_F(maxBits); 1407*3117ece4Schristos huffLog = (U32)maxBits; 1408*3117ece4Schristos DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); 1409*3117ece4Schristos } 1410*3117ece4Schristos 1411*3117ece4Schristos /* Write table description header */ 1412*3117ece4Schristos { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, 1413*3117ece4Schristos &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); 1414*3117ece4Schristos /* Check if using previous huffman table is beneficial */ 1415*3117ece4Schristos if (repeat && *repeat != HUF_repeat_none) { 1416*3117ece4Schristos size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); 1417*3117ece4Schristos size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); 1418*3117ece4Schristos if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { 1419*3117ece4Schristos return HUF_compressCTable_internal(ostart, op, oend, 1420*3117ece4Schristos src, srcSize, 1421*3117ece4Schristos nbStreams, oldHufTable, flags); 1422*3117ece4Schristos } } 1423*3117ece4Schristos 1424*3117ece4Schristos /* Use the new huffman table */ 1425*3117ece4Schristos if (hSize + 12ul >= srcSize) { return 0; } 1426*3117ece4Schristos op += hSize; 1427*3117ece4Schristos if (repeat) { *repeat = HUF_repeat_none; } 1428*3117ece4Schristos if (oldHufTable) 1429*3117ece4Schristos ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ 1430*3117ece4Schristos } 1431*3117ece4Schristos return HUF_compressCTable_internal(ostart, op, oend, 1432*3117ece4Schristos src, srcSize, 1433*3117ece4Schristos nbStreams, table->CTable, flags); 1434*3117ece4Schristos } 1435*3117ece4Schristos 1436*3117ece4Schristos size_t HUF_compress1X_repeat (void* dst, size_t dstSize, 1437*3117ece4Schristos const void* src, size_t srcSize, 1438*3117ece4Schristos unsigned maxSymbolValue, unsigned huffLog, 1439*3117ece4Schristos void* workSpace, size_t wkspSize, 1440*3117ece4Schristos HUF_CElt* hufTable, HUF_repeat* repeat, int flags) 1441*3117ece4Schristos { 1442*3117ece4Schristos DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); 1443*3117ece4Schristos return HUF_compress_internal(dst, dstSize, src, srcSize, 1444*3117ece4Schristos maxSymbolValue, huffLog, HUF_singleStream, 1445*3117ece4Schristos workSpace, wkspSize, hufTable, 1446*3117ece4Schristos repeat, flags); 1447*3117ece4Schristos } 1448*3117ece4Schristos 1449*3117ece4Schristos /* HUF_compress4X_repeat(): 1450*3117ece4Schristos * compress input using 4 streams. 1451*3117ece4Schristos * consider skipping quickly 1452*3117ece4Schristos * reuse an existing huffman compression table */ 1453*3117ece4Schristos size_t HUF_compress4X_repeat (void* dst, size_t dstSize, 1454*3117ece4Schristos const void* src, size_t srcSize, 1455*3117ece4Schristos unsigned maxSymbolValue, unsigned huffLog, 1456*3117ece4Schristos void* workSpace, size_t wkspSize, 1457*3117ece4Schristos HUF_CElt* hufTable, HUF_repeat* repeat, int flags) 1458*3117ece4Schristos { 1459*3117ece4Schristos DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); 1460*3117ece4Schristos return HUF_compress_internal(dst, dstSize, src, srcSize, 1461*3117ece4Schristos maxSymbolValue, huffLog, HUF_fourStreams, 1462*3117ece4Schristos workSpace, wkspSize, 1463*3117ece4Schristos hufTable, repeat, flags); 1464*3117ece4Schristos } 1465