1*c03c5b1cSMartin Matuska /* ******************************************************************
2*c03c5b1cSMartin Matuska * Huffman encoder, part of New Generation Entropy library
3*c03c5b1cSMartin Matuska * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4*c03c5b1cSMartin Matuska *
5*c03c5b1cSMartin Matuska * You can contact the author at :
6*c03c5b1cSMartin Matuska * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
7*c03c5b1cSMartin Matuska * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8*c03c5b1cSMartin Matuska *
9*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the
10*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree).
12*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses.
13*c03c5b1cSMartin Matuska ****************************************************************** */
14*c03c5b1cSMartin Matuska
15*c03c5b1cSMartin Matuska /* **************************************************************
16*c03c5b1cSMartin Matuska * Compiler specifics
17*c03c5b1cSMartin Matuska ****************************************************************/
18*c03c5b1cSMartin Matuska #ifdef _MSC_VER /* Visual Studio */
19*c03c5b1cSMartin Matuska # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
20*c03c5b1cSMartin Matuska #endif
21*c03c5b1cSMartin Matuska
22*c03c5b1cSMartin Matuska
23*c03c5b1cSMartin Matuska /* **************************************************************
24*c03c5b1cSMartin Matuska * Includes
25*c03c5b1cSMartin Matuska ****************************************************************/
26*c03c5b1cSMartin Matuska #include <string.h> /* memcpy, memset */
27*c03c5b1cSMartin Matuska #include <stdio.h> /* printf (debug) */
28*c03c5b1cSMartin Matuska #include "../common/compiler.h"
29*c03c5b1cSMartin Matuska #include "../common/bitstream.h"
30*c03c5b1cSMartin Matuska #include "hist.h"
31*c03c5b1cSMartin Matuska #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
32*c03c5b1cSMartin Matuska #include "../common/fse.h" /* header compression */
33*c03c5b1cSMartin Matuska #define HUF_STATIC_LINKING_ONLY
34*c03c5b1cSMartin Matuska #include "../common/huf.h"
35*c03c5b1cSMartin Matuska #include "../common/error_private.h"
36*c03c5b1cSMartin Matuska
37*c03c5b1cSMartin Matuska
38*c03c5b1cSMartin Matuska /* **************************************************************
39*c03c5b1cSMartin Matuska * Error Management
40*c03c5b1cSMartin Matuska ****************************************************************/
41*c03c5b1cSMartin Matuska #define HUF_isError ERR_isError
42*c03c5b1cSMartin Matuska #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
43*c03c5b1cSMartin Matuska
44*c03c5b1cSMartin Matuska
45*c03c5b1cSMartin Matuska /* **************************************************************
46*c03c5b1cSMartin Matuska * Utils
47*c03c5b1cSMartin Matuska ****************************************************************/
HUF_optimalTableLog(unsigned maxTableLog,size_t srcSize,unsigned maxSymbolValue)48*c03c5b1cSMartin Matuska unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
49*c03c5b1cSMartin Matuska {
50*c03c5b1cSMartin Matuska return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
51*c03c5b1cSMartin Matuska }
52*c03c5b1cSMartin Matuska
53*c03c5b1cSMartin Matuska
54*c03c5b1cSMartin Matuska /* *******************************************************
55*c03c5b1cSMartin Matuska * HUF : Huffman block compression
56*c03c5b1cSMartin Matuska *********************************************************/
57*c03c5b1cSMartin Matuska /* HUF_compressWeights() :
58*c03c5b1cSMartin Matuska * Same as FSE_compress(), but dedicated to huff0's weights compression.
59*c03c5b1cSMartin Matuska * The use case needs much less stack memory.
60*c03c5b1cSMartin Matuska * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
61*c03c5b1cSMartin Matuska */
62*c03c5b1cSMartin Matuska #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
HUF_compressWeights(void * dst,size_t dstSize,const void * weightTable,size_t wtSize)63*c03c5b1cSMartin Matuska static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
64*c03c5b1cSMartin Matuska {
65*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*) dst;
66*c03c5b1cSMartin Matuska BYTE* op = ostart;
67*c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
68*c03c5b1cSMartin Matuska
69*c03c5b1cSMartin Matuska unsigned maxSymbolValue = HUF_TABLELOG_MAX;
70*c03c5b1cSMartin Matuska U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
71*c03c5b1cSMartin Matuska
72*c03c5b1cSMartin Matuska FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
73*c03c5b1cSMartin Matuska BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
74*c03c5b1cSMartin Matuska
75*c03c5b1cSMartin Matuska unsigned count[HUF_TABLELOG_MAX+1];
76*c03c5b1cSMartin Matuska S16 norm[HUF_TABLELOG_MAX+1];
77*c03c5b1cSMartin Matuska
78*c03c5b1cSMartin Matuska /* init conditions */
79*c03c5b1cSMartin Matuska if (wtSize <= 1) return 0; /* Not compressible */
80*c03c5b1cSMartin Matuska
81*c03c5b1cSMartin Matuska /* Scan input and build symbol stats */
82*c03c5b1cSMartin Matuska { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
83*c03c5b1cSMartin Matuska if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
84*c03c5b1cSMartin Matuska if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
85*c03c5b1cSMartin Matuska }
86*c03c5b1cSMartin Matuska
87*c03c5b1cSMartin Matuska tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
88*c03c5b1cSMartin Matuska CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
89*c03c5b1cSMartin Matuska
90*c03c5b1cSMartin Matuska /* Write table description header */
91*c03c5b1cSMartin Matuska { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
92*c03c5b1cSMartin Matuska op += hSize;
93*c03c5b1cSMartin Matuska }
94*c03c5b1cSMartin Matuska
95*c03c5b1cSMartin Matuska /* Compress */
96*c03c5b1cSMartin Matuska CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
97*c03c5b1cSMartin Matuska { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
98*c03c5b1cSMartin Matuska if (cSize == 0) return 0; /* not enough space for compressed data */
99*c03c5b1cSMartin Matuska op += cSize;
100*c03c5b1cSMartin Matuska }
101*c03c5b1cSMartin Matuska
102*c03c5b1cSMartin Matuska return (size_t)(op-ostart);
103*c03c5b1cSMartin Matuska }
104*c03c5b1cSMartin Matuska
105*c03c5b1cSMartin Matuska
106*c03c5b1cSMartin Matuska struct HUF_CElt_s {
107*c03c5b1cSMartin Matuska U16 val;
108*c03c5b1cSMartin Matuska BYTE nbBits;
109*c03c5b1cSMartin Matuska }; /* typedef'd to HUF_CElt within "huf.h" */
110*c03c5b1cSMartin Matuska
111*c03c5b1cSMartin Matuska /*! HUF_writeCTable() :
112*c03c5b1cSMartin Matuska `CTable` : Huffman tree to save, using huf representation.
113*c03c5b1cSMartin Matuska @return : size of saved CTable */
HUF_writeCTable(void * dst,size_t maxDstSize,const HUF_CElt * CTable,unsigned maxSymbolValue,unsigned huffLog)114*c03c5b1cSMartin Matuska size_t HUF_writeCTable (void* dst, size_t maxDstSize,
115*c03c5b1cSMartin Matuska const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
116*c03c5b1cSMartin Matuska {
117*c03c5b1cSMartin Matuska BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
118*c03c5b1cSMartin Matuska BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
119*c03c5b1cSMartin Matuska BYTE* op = (BYTE*)dst;
120*c03c5b1cSMartin Matuska U32 n;
121*c03c5b1cSMartin Matuska
122*c03c5b1cSMartin Matuska /* check conditions */
123*c03c5b1cSMartin Matuska if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
124*c03c5b1cSMartin Matuska
125*c03c5b1cSMartin Matuska /* convert to weight */
126*c03c5b1cSMartin Matuska bitsToWeight[0] = 0;
127*c03c5b1cSMartin Matuska for (n=1; n<huffLog+1; n++)
128*c03c5b1cSMartin Matuska bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
129*c03c5b1cSMartin Matuska for (n=0; n<maxSymbolValue; n++)
130*c03c5b1cSMartin Matuska huffWeight[n] = bitsToWeight[CTable[n].nbBits];
131*c03c5b1cSMartin Matuska
132*c03c5b1cSMartin Matuska /* attempt weights compression by FSE */
133*c03c5b1cSMartin Matuska { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
134*c03c5b1cSMartin Matuska if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
135*c03c5b1cSMartin Matuska op[0] = (BYTE)hSize;
136*c03c5b1cSMartin Matuska return hSize+1;
137*c03c5b1cSMartin Matuska } }
138*c03c5b1cSMartin Matuska
139*c03c5b1cSMartin Matuska /* write raw values as 4-bits (max : 15) */
140*c03c5b1cSMartin Matuska if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
141*c03c5b1cSMartin Matuska if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
142*c03c5b1cSMartin Matuska op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
143*c03c5b1cSMartin Matuska huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
144*c03c5b1cSMartin Matuska for (n=0; n<maxSymbolValue; n+=2)
145*c03c5b1cSMartin Matuska op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
146*c03c5b1cSMartin Matuska return ((maxSymbolValue+1)/2) + 1;
147*c03c5b1cSMartin Matuska }
148*c03c5b1cSMartin Matuska
149*c03c5b1cSMartin Matuska
HUF_readCTable(HUF_CElt * CTable,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize,unsigned * hasZeroWeights)150*c03c5b1cSMartin Matuska size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
151*c03c5b1cSMartin Matuska {
152*c03c5b1cSMartin Matuska BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
153*c03c5b1cSMartin Matuska U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
154*c03c5b1cSMartin Matuska U32 tableLog = 0;
155*c03c5b1cSMartin Matuska U32 nbSymbols = 0;
156*c03c5b1cSMartin Matuska
157*c03c5b1cSMartin Matuska /* get symbol weights */
158*c03c5b1cSMartin Matuska CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
159*c03c5b1cSMartin Matuska
160*c03c5b1cSMartin Matuska /* check result */
161*c03c5b1cSMartin Matuska if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
162*c03c5b1cSMartin Matuska if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
163*c03c5b1cSMartin Matuska
164*c03c5b1cSMartin Matuska /* Prepare base value per rank */
165*c03c5b1cSMartin Matuska { U32 n, nextRankStart = 0;
166*c03c5b1cSMartin Matuska for (n=1; n<=tableLog; n++) {
167*c03c5b1cSMartin Matuska U32 current = nextRankStart;
168*c03c5b1cSMartin Matuska nextRankStart += (rankVal[n] << (n-1));
169*c03c5b1cSMartin Matuska rankVal[n] = current;
170*c03c5b1cSMartin Matuska } }
171*c03c5b1cSMartin Matuska
172*c03c5b1cSMartin Matuska /* fill nbBits */
173*c03c5b1cSMartin Matuska *hasZeroWeights = 0;
174*c03c5b1cSMartin Matuska { U32 n; for (n=0; n<nbSymbols; n++) {
175*c03c5b1cSMartin Matuska const U32 w = huffWeight[n];
176*c03c5b1cSMartin Matuska *hasZeroWeights |= (w == 0);
177*c03c5b1cSMartin Matuska CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
178*c03c5b1cSMartin Matuska } }
179*c03c5b1cSMartin Matuska
180*c03c5b1cSMartin Matuska /* fill val */
181*c03c5b1cSMartin Matuska { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
182*c03c5b1cSMartin Matuska U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
183*c03c5b1cSMartin Matuska { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
184*c03c5b1cSMartin Matuska /* determine stating value per rank */
185*c03c5b1cSMartin Matuska valPerRank[tableLog+1] = 0; /* for w==0 */
186*c03c5b1cSMartin Matuska { U16 min = 0;
187*c03c5b1cSMartin Matuska U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
188*c03c5b1cSMartin Matuska valPerRank[n] = min; /* get starting value within each rank */
189*c03c5b1cSMartin Matuska min += nbPerRank[n];
190*c03c5b1cSMartin Matuska min >>= 1;
191*c03c5b1cSMartin Matuska } }
192*c03c5b1cSMartin Matuska /* assign value within rank, symbol order */
193*c03c5b1cSMartin Matuska { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
194*c03c5b1cSMartin Matuska }
195*c03c5b1cSMartin Matuska
196*c03c5b1cSMartin Matuska *maxSymbolValuePtr = nbSymbols - 1;
197*c03c5b1cSMartin Matuska return readSize;
198*c03c5b1cSMartin Matuska }
199*c03c5b1cSMartin Matuska
HUF_getNbBits(const void * symbolTable,U32 symbolValue)200*c03c5b1cSMartin Matuska U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
201*c03c5b1cSMartin Matuska {
202*c03c5b1cSMartin Matuska const HUF_CElt* table = (const HUF_CElt*)symbolTable;
203*c03c5b1cSMartin Matuska assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
204*c03c5b1cSMartin Matuska return table[symbolValue].nbBits;
205*c03c5b1cSMartin Matuska }
206*c03c5b1cSMartin Matuska
207*c03c5b1cSMartin Matuska
208*c03c5b1cSMartin Matuska typedef struct nodeElt_s {
209*c03c5b1cSMartin Matuska U32 count;
210*c03c5b1cSMartin Matuska U16 parent;
211*c03c5b1cSMartin Matuska BYTE byte;
212*c03c5b1cSMartin Matuska BYTE nbBits;
213*c03c5b1cSMartin Matuska } nodeElt;
214*c03c5b1cSMartin Matuska
HUF_setMaxHeight(nodeElt * huffNode,U32 lastNonNull,U32 maxNbBits)215*c03c5b1cSMartin Matuska static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
216*c03c5b1cSMartin Matuska {
217*c03c5b1cSMartin Matuska const U32 largestBits = huffNode[lastNonNull].nbBits;
218*c03c5b1cSMartin Matuska if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
219*c03c5b1cSMartin Matuska
220*c03c5b1cSMartin Matuska /* there are several too large elements (at least >= 2) */
221*c03c5b1cSMartin Matuska { int totalCost = 0;
222*c03c5b1cSMartin Matuska const U32 baseCost = 1 << (largestBits - maxNbBits);
223*c03c5b1cSMartin Matuska int n = (int)lastNonNull;
224*c03c5b1cSMartin Matuska
225*c03c5b1cSMartin Matuska while (huffNode[n].nbBits > maxNbBits) {
226*c03c5b1cSMartin Matuska totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
227*c03c5b1cSMartin Matuska huffNode[n].nbBits = (BYTE)maxNbBits;
228*c03c5b1cSMartin Matuska n --;
229*c03c5b1cSMartin Matuska } /* n stops at huffNode[n].nbBits <= maxNbBits */
230*c03c5b1cSMartin Matuska while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
231*c03c5b1cSMartin Matuska
232*c03c5b1cSMartin Matuska /* renorm totalCost */
233*c03c5b1cSMartin Matuska totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
234*c03c5b1cSMartin Matuska
235*c03c5b1cSMartin Matuska /* repay normalized cost */
236*c03c5b1cSMartin Matuska { U32 const noSymbol = 0xF0F0F0F0;
237*c03c5b1cSMartin Matuska U32 rankLast[HUF_TABLELOG_MAX+2];
238*c03c5b1cSMartin Matuska
239*c03c5b1cSMartin Matuska /* Get pos of last (smallest) symbol per rank */
240*c03c5b1cSMartin Matuska memset(rankLast, 0xF0, sizeof(rankLast));
241*c03c5b1cSMartin Matuska { U32 currentNbBits = maxNbBits;
242*c03c5b1cSMartin Matuska int pos;
243*c03c5b1cSMartin Matuska for (pos=n ; pos >= 0; pos--) {
244*c03c5b1cSMartin Matuska if (huffNode[pos].nbBits >= currentNbBits) continue;
245*c03c5b1cSMartin Matuska currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
246*c03c5b1cSMartin Matuska rankLast[maxNbBits-currentNbBits] = (U32)pos;
247*c03c5b1cSMartin Matuska } }
248*c03c5b1cSMartin Matuska
249*c03c5b1cSMartin Matuska while (totalCost > 0) {
250*c03c5b1cSMartin Matuska U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
251*c03c5b1cSMartin Matuska for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
252*c03c5b1cSMartin Matuska U32 const highPos = rankLast[nBitsToDecrease];
253*c03c5b1cSMartin Matuska U32 const lowPos = rankLast[nBitsToDecrease-1];
254*c03c5b1cSMartin Matuska if (highPos == noSymbol) continue;
255*c03c5b1cSMartin Matuska if (lowPos == noSymbol) break;
256*c03c5b1cSMartin Matuska { U32 const highTotal = huffNode[highPos].count;
257*c03c5b1cSMartin Matuska U32 const lowTotal = 2 * huffNode[lowPos].count;
258*c03c5b1cSMartin Matuska if (highTotal <= lowTotal) break;
259*c03c5b1cSMartin Matuska } }
260*c03c5b1cSMartin Matuska /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
261*c03c5b1cSMartin Matuska /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
262*c03c5b1cSMartin Matuska while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
263*c03c5b1cSMartin Matuska nBitsToDecrease ++;
264*c03c5b1cSMartin Matuska totalCost -= 1 << (nBitsToDecrease-1);
265*c03c5b1cSMartin Matuska if (rankLast[nBitsToDecrease-1] == noSymbol)
266*c03c5b1cSMartin Matuska rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
267*c03c5b1cSMartin Matuska huffNode[rankLast[nBitsToDecrease]].nbBits ++;
268*c03c5b1cSMartin Matuska if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
269*c03c5b1cSMartin Matuska rankLast[nBitsToDecrease] = noSymbol;
270*c03c5b1cSMartin Matuska else {
271*c03c5b1cSMartin Matuska rankLast[nBitsToDecrease]--;
272*c03c5b1cSMartin Matuska if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
273*c03c5b1cSMartin Matuska rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
274*c03c5b1cSMartin Matuska } } /* while (totalCost > 0) */
275*c03c5b1cSMartin Matuska
276*c03c5b1cSMartin Matuska while (totalCost < 0) { /* Sometimes, cost correction overshoot */
277*c03c5b1cSMartin Matuska if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
278*c03c5b1cSMartin Matuska while (huffNode[n].nbBits == maxNbBits) n--;
279*c03c5b1cSMartin Matuska huffNode[n+1].nbBits--;
280*c03c5b1cSMartin Matuska assert(n >= 0);
281*c03c5b1cSMartin Matuska rankLast[1] = (U32)(n+1);
282*c03c5b1cSMartin Matuska totalCost++;
283*c03c5b1cSMartin Matuska continue;
284*c03c5b1cSMartin Matuska }
285*c03c5b1cSMartin Matuska huffNode[ rankLast[1] + 1 ].nbBits--;
286*c03c5b1cSMartin Matuska rankLast[1]++;
287*c03c5b1cSMartin Matuska totalCost ++;
288*c03c5b1cSMartin Matuska } } } /* there are several too large elements (at least >= 2) */
289*c03c5b1cSMartin Matuska
290*c03c5b1cSMartin Matuska return maxNbBits;
291*c03c5b1cSMartin Matuska }
292*c03c5b1cSMartin Matuska
293*c03c5b1cSMartin Matuska typedef struct {
294*c03c5b1cSMartin Matuska U32 base;
295*c03c5b1cSMartin Matuska U32 current;
296*c03c5b1cSMartin Matuska } rankPos;
297*c03c5b1cSMartin Matuska
298*c03c5b1cSMartin Matuska typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
299*c03c5b1cSMartin Matuska
300*c03c5b1cSMartin Matuska #define RANK_POSITION_TABLE_SIZE 32
301*c03c5b1cSMartin Matuska
302*c03c5b1cSMartin Matuska typedef struct {
303*c03c5b1cSMartin Matuska huffNodeTable huffNodeTbl;
304*c03c5b1cSMartin Matuska rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
305*c03c5b1cSMartin Matuska } HUF_buildCTable_wksp_tables;
306*c03c5b1cSMartin Matuska
HUF_sort(nodeElt * huffNode,const unsigned * count,U32 maxSymbolValue,rankPos * rankPosition)307*c03c5b1cSMartin Matuska static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
308*c03c5b1cSMartin Matuska {
309*c03c5b1cSMartin Matuska U32 n;
310*c03c5b1cSMartin Matuska
311*c03c5b1cSMartin Matuska memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
312*c03c5b1cSMartin Matuska for (n=0; n<=maxSymbolValue; n++) {
313*c03c5b1cSMartin Matuska U32 r = BIT_highbit32(count[n] + 1);
314*c03c5b1cSMartin Matuska rankPosition[r].base ++;
315*c03c5b1cSMartin Matuska }
316*c03c5b1cSMartin Matuska for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
317*c03c5b1cSMartin Matuska for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
318*c03c5b1cSMartin Matuska for (n=0; n<=maxSymbolValue; n++) {
319*c03c5b1cSMartin Matuska U32 const c = count[n];
320*c03c5b1cSMartin Matuska U32 const r = BIT_highbit32(c+1) + 1;
321*c03c5b1cSMartin Matuska U32 pos = rankPosition[r].current++;
322*c03c5b1cSMartin Matuska while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
323*c03c5b1cSMartin Matuska huffNode[pos] = huffNode[pos-1];
324*c03c5b1cSMartin Matuska pos--;
325*c03c5b1cSMartin Matuska }
326*c03c5b1cSMartin Matuska huffNode[pos].count = c;
327*c03c5b1cSMartin Matuska huffNode[pos].byte = (BYTE)n;
328*c03c5b1cSMartin Matuska }
329*c03c5b1cSMartin Matuska }
330*c03c5b1cSMartin Matuska
331*c03c5b1cSMartin Matuska
332*c03c5b1cSMartin Matuska /** HUF_buildCTable_wksp() :
333*c03c5b1cSMartin Matuska * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
334*c03c5b1cSMartin Matuska * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
335*c03c5b1cSMartin Matuska */
336*c03c5b1cSMartin Matuska #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
337*c03c5b1cSMartin Matuska
HUF_buildCTable_wksp(HUF_CElt * tree,const unsigned * count,U32 maxSymbolValue,U32 maxNbBits,void * workSpace,size_t wkspSize)338*c03c5b1cSMartin Matuska size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
339*c03c5b1cSMartin Matuska {
340*c03c5b1cSMartin Matuska HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
341*c03c5b1cSMartin Matuska nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
342*c03c5b1cSMartin Matuska nodeElt* const huffNode = huffNode0+1;
343*c03c5b1cSMartin Matuska int nonNullRank;
344*c03c5b1cSMartin Matuska int lowS, lowN;
345*c03c5b1cSMartin Matuska int nodeNb = STARTNODE;
346*c03c5b1cSMartin Matuska int n, nodeRoot;
347*c03c5b1cSMartin Matuska
348*c03c5b1cSMartin Matuska /* safety checks */
349*c03c5b1cSMartin Matuska if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
350*c03c5b1cSMartin Matuska if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
351*c03c5b1cSMartin Matuska return ERROR(workSpace_tooSmall);
352*c03c5b1cSMartin Matuska if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
353*c03c5b1cSMartin Matuska if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
354*c03c5b1cSMartin Matuska return ERROR(maxSymbolValue_tooLarge);
355*c03c5b1cSMartin Matuska memset(huffNode0, 0, sizeof(huffNodeTable));
356*c03c5b1cSMartin Matuska
357*c03c5b1cSMartin Matuska /* sort, decreasing order */
358*c03c5b1cSMartin Matuska HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
359*c03c5b1cSMartin Matuska
360*c03c5b1cSMartin Matuska /* init for parents */
361*c03c5b1cSMartin Matuska nonNullRank = (int)maxSymbolValue;
362*c03c5b1cSMartin Matuska while(huffNode[nonNullRank].count == 0) nonNullRank--;
363*c03c5b1cSMartin Matuska lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
364*c03c5b1cSMartin Matuska huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
365*c03c5b1cSMartin Matuska huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
366*c03c5b1cSMartin Matuska nodeNb++; lowS-=2;
367*c03c5b1cSMartin Matuska for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
368*c03c5b1cSMartin Matuska huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
369*c03c5b1cSMartin Matuska
370*c03c5b1cSMartin Matuska /* create parents */
371*c03c5b1cSMartin Matuska while (nodeNb <= nodeRoot) {
372*c03c5b1cSMartin Matuska int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
373*c03c5b1cSMartin Matuska int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
374*c03c5b1cSMartin Matuska huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
375*c03c5b1cSMartin Matuska huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
376*c03c5b1cSMartin Matuska nodeNb++;
377*c03c5b1cSMartin Matuska }
378*c03c5b1cSMartin Matuska
379*c03c5b1cSMartin Matuska /* distribute weights (unlimited tree height) */
380*c03c5b1cSMartin Matuska huffNode[nodeRoot].nbBits = 0;
381*c03c5b1cSMartin Matuska for (n=nodeRoot-1; n>=STARTNODE; n--)
382*c03c5b1cSMartin Matuska huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
383*c03c5b1cSMartin Matuska for (n=0; n<=nonNullRank; n++)
384*c03c5b1cSMartin Matuska huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
385*c03c5b1cSMartin Matuska
386*c03c5b1cSMartin Matuska /* enforce maxTableLog */
387*c03c5b1cSMartin Matuska maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
388*c03c5b1cSMartin Matuska
389*c03c5b1cSMartin Matuska /* fill result into tree (val, nbBits) */
390*c03c5b1cSMartin Matuska { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
391*c03c5b1cSMartin Matuska U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
392*c03c5b1cSMartin Matuska int const alphabetSize = (int)(maxSymbolValue + 1);
393*c03c5b1cSMartin Matuska if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
394*c03c5b1cSMartin Matuska for (n=0; n<=nonNullRank; n++)
395*c03c5b1cSMartin Matuska nbPerRank[huffNode[n].nbBits]++;
396*c03c5b1cSMartin Matuska /* determine stating value per rank */
397*c03c5b1cSMartin Matuska { U16 min = 0;
398*c03c5b1cSMartin Matuska for (n=(int)maxNbBits; n>0; n--) {
399*c03c5b1cSMartin Matuska valPerRank[n] = min; /* get starting value within each rank */
400*c03c5b1cSMartin Matuska min += nbPerRank[n];
401*c03c5b1cSMartin Matuska min >>= 1;
402*c03c5b1cSMartin Matuska } }
403*c03c5b1cSMartin Matuska for (n=0; n<alphabetSize; n++)
404*c03c5b1cSMartin Matuska tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
405*c03c5b1cSMartin Matuska for (n=0; n<alphabetSize; n++)
406*c03c5b1cSMartin Matuska tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
407*c03c5b1cSMartin Matuska }
408*c03c5b1cSMartin Matuska
409*c03c5b1cSMartin Matuska return maxNbBits;
410*c03c5b1cSMartin Matuska }
411*c03c5b1cSMartin Matuska
412*c03c5b1cSMartin Matuska /** HUF_buildCTable() :
413*c03c5b1cSMartin Matuska * @return : maxNbBits
414*c03c5b1cSMartin Matuska * Note : count is used before tree is written, so they can safely overlap
415*c03c5b1cSMartin Matuska */
HUF_buildCTable(HUF_CElt * tree,const unsigned * count,unsigned maxSymbolValue,unsigned maxNbBits)416*c03c5b1cSMartin Matuska size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
417*c03c5b1cSMartin Matuska {
418*c03c5b1cSMartin Matuska HUF_buildCTable_wksp_tables workspace;
419*c03c5b1cSMartin Matuska return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
420*c03c5b1cSMartin Matuska }
421*c03c5b1cSMartin Matuska
HUF_estimateCompressedSize(const HUF_CElt * CTable,const unsigned * count,unsigned maxSymbolValue)422*c03c5b1cSMartin Matuska size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
423*c03c5b1cSMartin Matuska {
424*c03c5b1cSMartin Matuska size_t nbBits = 0;
425*c03c5b1cSMartin Matuska int s;
426*c03c5b1cSMartin Matuska for (s = 0; s <= (int)maxSymbolValue; ++s) {
427*c03c5b1cSMartin Matuska nbBits += CTable[s].nbBits * count[s];
428*c03c5b1cSMartin Matuska }
429*c03c5b1cSMartin Matuska return nbBits >> 3;
430*c03c5b1cSMartin Matuska }
431*c03c5b1cSMartin Matuska
HUF_validateCTable(const HUF_CElt * CTable,const unsigned * count,unsigned maxSymbolValue)432*c03c5b1cSMartin Matuska int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
433*c03c5b1cSMartin Matuska int bad = 0;
434*c03c5b1cSMartin Matuska int s;
435*c03c5b1cSMartin Matuska for (s = 0; s <= (int)maxSymbolValue; ++s) {
436*c03c5b1cSMartin Matuska bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
437*c03c5b1cSMartin Matuska }
438*c03c5b1cSMartin Matuska return !bad;
439*c03c5b1cSMartin Matuska }
440*c03c5b1cSMartin Matuska
HUF_compressBound(size_t size)441*c03c5b1cSMartin Matuska size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
442*c03c5b1cSMartin Matuska
443*c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE void
HUF_encodeSymbol(BIT_CStream_t * bitCPtr,U32 symbol,const HUF_CElt * CTable)444*c03c5b1cSMartin Matuska HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
445*c03c5b1cSMartin Matuska {
446*c03c5b1cSMartin Matuska BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
447*c03c5b1cSMartin Matuska }
448*c03c5b1cSMartin Matuska
449*c03c5b1cSMartin Matuska #define HUF_FLUSHBITS(s) BIT_flushBits(s)
450*c03c5b1cSMartin Matuska
451*c03c5b1cSMartin Matuska #define HUF_FLUSHBITS_1(stream) \
452*c03c5b1cSMartin Matuska if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
453*c03c5b1cSMartin Matuska
454*c03c5b1cSMartin Matuska #define HUF_FLUSHBITS_2(stream) \
455*c03c5b1cSMartin Matuska if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
456*c03c5b1cSMartin Matuska
457*c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
HUF_compress1X_usingCTable_internal_body(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)458*c03c5b1cSMartin Matuska HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
459*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
460*c03c5b1cSMartin Matuska const HUF_CElt* CTable)
461*c03c5b1cSMartin Matuska {
462*c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) src;
463*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*)dst;
464*c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
465*c03c5b1cSMartin Matuska BYTE* op = ostart;
466*c03c5b1cSMartin Matuska size_t n;
467*c03c5b1cSMartin Matuska BIT_CStream_t bitC;
468*c03c5b1cSMartin Matuska
469*c03c5b1cSMartin Matuska /* init */
470*c03c5b1cSMartin Matuska if (dstSize < 8) return 0; /* not enough space to compress */
471*c03c5b1cSMartin Matuska { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
472*c03c5b1cSMartin Matuska if (HUF_isError(initErr)) return 0; }
473*c03c5b1cSMartin Matuska
474*c03c5b1cSMartin Matuska n = srcSize & ~3; /* join to mod 4 */
475*c03c5b1cSMartin Matuska switch (srcSize & 3)
476*c03c5b1cSMartin Matuska {
477*c03c5b1cSMartin Matuska case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
478*c03c5b1cSMartin Matuska HUF_FLUSHBITS_2(&bitC);
479*c03c5b1cSMartin Matuska /* fall-through */
480*c03c5b1cSMartin Matuska case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
481*c03c5b1cSMartin Matuska HUF_FLUSHBITS_1(&bitC);
482*c03c5b1cSMartin Matuska /* fall-through */
483*c03c5b1cSMartin Matuska case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
484*c03c5b1cSMartin Matuska HUF_FLUSHBITS(&bitC);
485*c03c5b1cSMartin Matuska /* fall-through */
486*c03c5b1cSMartin Matuska case 0 : /* fall-through */
487*c03c5b1cSMartin Matuska default: break;
488*c03c5b1cSMartin Matuska }
489*c03c5b1cSMartin Matuska
490*c03c5b1cSMartin Matuska for (; n>0; n-=4) { /* note : n&3==0 at this stage */
491*c03c5b1cSMartin Matuska HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
492*c03c5b1cSMartin Matuska HUF_FLUSHBITS_1(&bitC);
493*c03c5b1cSMartin Matuska HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
494*c03c5b1cSMartin Matuska HUF_FLUSHBITS_2(&bitC);
495*c03c5b1cSMartin Matuska HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
496*c03c5b1cSMartin Matuska HUF_FLUSHBITS_1(&bitC);
497*c03c5b1cSMartin Matuska HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
498*c03c5b1cSMartin Matuska HUF_FLUSHBITS(&bitC);
499*c03c5b1cSMartin Matuska }
500*c03c5b1cSMartin Matuska
501*c03c5b1cSMartin Matuska return BIT_closeCStream(&bitC);
502*c03c5b1cSMartin Matuska }
503*c03c5b1cSMartin Matuska
504*c03c5b1cSMartin Matuska #if DYNAMIC_BMI2
505*c03c5b1cSMartin Matuska
506*c03c5b1cSMartin Matuska static TARGET_ATTRIBUTE("bmi2") size_t
HUF_compress1X_usingCTable_internal_bmi2(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)507*c03c5b1cSMartin Matuska HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
508*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
509*c03c5b1cSMartin Matuska const HUF_CElt* CTable)
510*c03c5b1cSMartin Matuska {
511*c03c5b1cSMartin Matuska return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
512*c03c5b1cSMartin Matuska }
513*c03c5b1cSMartin Matuska
514*c03c5b1cSMartin Matuska static size_t
HUF_compress1X_usingCTable_internal_default(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)515*c03c5b1cSMartin Matuska HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
516*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
517*c03c5b1cSMartin Matuska const HUF_CElt* CTable)
518*c03c5b1cSMartin Matuska {
519*c03c5b1cSMartin Matuska return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520*c03c5b1cSMartin Matuska }
521*c03c5b1cSMartin Matuska
522*c03c5b1cSMartin Matuska static size_t
HUF_compress1X_usingCTable_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,const int bmi2)523*c03c5b1cSMartin Matuska HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
524*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
525*c03c5b1cSMartin Matuska const HUF_CElt* CTable, const int bmi2)
526*c03c5b1cSMartin Matuska {
527*c03c5b1cSMartin Matuska if (bmi2) {
528*c03c5b1cSMartin Matuska return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
529*c03c5b1cSMartin Matuska }
530*c03c5b1cSMartin Matuska return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
531*c03c5b1cSMartin Matuska }
532*c03c5b1cSMartin Matuska
533*c03c5b1cSMartin Matuska #else
534*c03c5b1cSMartin Matuska
535*c03c5b1cSMartin Matuska static size_t
HUF_compress1X_usingCTable_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,const int bmi2)536*c03c5b1cSMartin Matuska HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
537*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
538*c03c5b1cSMartin Matuska const HUF_CElt* CTable, const int bmi2)
539*c03c5b1cSMartin Matuska {
540*c03c5b1cSMartin Matuska (void)bmi2;
541*c03c5b1cSMartin Matuska return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
542*c03c5b1cSMartin Matuska }
543*c03c5b1cSMartin Matuska
544*c03c5b1cSMartin Matuska #endif
545*c03c5b1cSMartin Matuska
HUF_compress1X_usingCTable(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)546*c03c5b1cSMartin Matuska size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
547*c03c5b1cSMartin Matuska {
548*c03c5b1cSMartin Matuska return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
549*c03c5b1cSMartin Matuska }
550*c03c5b1cSMartin Matuska
551*c03c5b1cSMartin Matuska
552*c03c5b1cSMartin Matuska static size_t
HUF_compress4X_usingCTable_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable,int bmi2)553*c03c5b1cSMartin Matuska HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
554*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
555*c03c5b1cSMartin Matuska const HUF_CElt* CTable, int bmi2)
556*c03c5b1cSMartin Matuska {
557*c03c5b1cSMartin Matuska size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
558*c03c5b1cSMartin Matuska const BYTE* ip = (const BYTE*) src;
559*c03c5b1cSMartin Matuska const BYTE* const iend = ip + srcSize;
560*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*) dst;
561*c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
562*c03c5b1cSMartin Matuska BYTE* op = ostart;
563*c03c5b1cSMartin Matuska
564*c03c5b1cSMartin Matuska if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
565*c03c5b1cSMartin Matuska if (srcSize < 12) return 0; /* no saving possible : too small input */
566*c03c5b1cSMartin Matuska op += 6; /* jumpTable */
567*c03c5b1cSMartin Matuska
568*c03c5b1cSMartin Matuska assert(op <= oend);
569*c03c5b1cSMartin Matuska { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
570*c03c5b1cSMartin Matuska if (cSize==0) return 0;
571*c03c5b1cSMartin Matuska assert(cSize <= 65535);
572*c03c5b1cSMartin Matuska MEM_writeLE16(ostart, (U16)cSize);
573*c03c5b1cSMartin Matuska op += cSize;
574*c03c5b1cSMartin Matuska }
575*c03c5b1cSMartin Matuska
576*c03c5b1cSMartin Matuska ip += segmentSize;
577*c03c5b1cSMartin Matuska assert(op <= oend);
578*c03c5b1cSMartin Matuska { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
579*c03c5b1cSMartin Matuska if (cSize==0) return 0;
580*c03c5b1cSMartin Matuska assert(cSize <= 65535);
581*c03c5b1cSMartin Matuska MEM_writeLE16(ostart+2, (U16)cSize);
582*c03c5b1cSMartin Matuska op += cSize;
583*c03c5b1cSMartin Matuska }
584*c03c5b1cSMartin Matuska
585*c03c5b1cSMartin Matuska ip += segmentSize;
586*c03c5b1cSMartin Matuska assert(op <= oend);
587*c03c5b1cSMartin Matuska { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
588*c03c5b1cSMartin Matuska if (cSize==0) return 0;
589*c03c5b1cSMartin Matuska assert(cSize <= 65535);
590*c03c5b1cSMartin Matuska MEM_writeLE16(ostart+4, (U16)cSize);
591*c03c5b1cSMartin Matuska op += cSize;
592*c03c5b1cSMartin Matuska }
593*c03c5b1cSMartin Matuska
594*c03c5b1cSMartin Matuska ip += segmentSize;
595*c03c5b1cSMartin Matuska assert(op <= oend);
596*c03c5b1cSMartin Matuska assert(ip <= iend);
597*c03c5b1cSMartin Matuska { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
598*c03c5b1cSMartin Matuska if (cSize==0) return 0;
599*c03c5b1cSMartin Matuska op += cSize;
600*c03c5b1cSMartin Matuska }
601*c03c5b1cSMartin Matuska
602*c03c5b1cSMartin Matuska return (size_t)(op-ostart);
603*c03c5b1cSMartin Matuska }
604*c03c5b1cSMartin Matuska
HUF_compress4X_usingCTable(void * dst,size_t dstSize,const void * src,size_t srcSize,const HUF_CElt * CTable)605*c03c5b1cSMartin Matuska size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
606*c03c5b1cSMartin Matuska {
607*c03c5b1cSMartin Matuska return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
608*c03c5b1cSMartin Matuska }
609*c03c5b1cSMartin Matuska
610*c03c5b1cSMartin Matuska typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
611*c03c5b1cSMartin Matuska
HUF_compressCTable_internal(BYTE * const ostart,BYTE * op,BYTE * const oend,const void * src,size_t srcSize,HUF_nbStreams_e nbStreams,const HUF_CElt * CTable,const int bmi2)612*c03c5b1cSMartin Matuska static size_t HUF_compressCTable_internal(
613*c03c5b1cSMartin Matuska BYTE* const ostart, BYTE* op, BYTE* const oend,
614*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
615*c03c5b1cSMartin Matuska HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
616*c03c5b1cSMartin Matuska {
617*c03c5b1cSMartin Matuska size_t const cSize = (nbStreams==HUF_singleStream) ?
618*c03c5b1cSMartin Matuska HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
619*c03c5b1cSMartin Matuska HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
620*c03c5b1cSMartin Matuska if (HUF_isError(cSize)) { return cSize; }
621*c03c5b1cSMartin Matuska if (cSize==0) { return 0; } /* uncompressible */
622*c03c5b1cSMartin Matuska op += cSize;
623*c03c5b1cSMartin Matuska /* check compressibility */
624*c03c5b1cSMartin Matuska assert(op >= ostart);
625*c03c5b1cSMartin Matuska if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
626*c03c5b1cSMartin Matuska return (size_t)(op-ostart);
627*c03c5b1cSMartin Matuska }
628*c03c5b1cSMartin Matuska
629*c03c5b1cSMartin Matuska typedef struct {
630*c03c5b1cSMartin Matuska unsigned count[HUF_SYMBOLVALUE_MAX + 1];
631*c03c5b1cSMartin Matuska HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
632*c03c5b1cSMartin Matuska HUF_buildCTable_wksp_tables buildCTable_wksp;
633*c03c5b1cSMartin Matuska } HUF_compress_tables_t;
634*c03c5b1cSMartin Matuska
635*c03c5b1cSMartin Matuska /* HUF_compress_internal() :
636*c03c5b1cSMartin Matuska * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
637*c03c5b1cSMartin Matuska static size_t
HUF_compress_internal(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,HUF_nbStreams_e nbStreams,void * workSpace,size_t wkspSize,HUF_CElt * oldHufTable,HUF_repeat * repeat,int preferRepeat,const int bmi2)638*c03c5b1cSMartin Matuska HUF_compress_internal (void* dst, size_t dstSize,
639*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
640*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog,
641*c03c5b1cSMartin Matuska HUF_nbStreams_e nbStreams,
642*c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize,
643*c03c5b1cSMartin Matuska HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
644*c03c5b1cSMartin Matuska const int bmi2)
645*c03c5b1cSMartin Matuska {
646*c03c5b1cSMartin Matuska HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
647*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*)dst;
648*c03c5b1cSMartin Matuska BYTE* const oend = ostart + dstSize;
649*c03c5b1cSMartin Matuska BYTE* op = ostart;
650*c03c5b1cSMartin Matuska
651*c03c5b1cSMartin Matuska HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
652*c03c5b1cSMartin Matuska
653*c03c5b1cSMartin Matuska /* checks & inits */
654*c03c5b1cSMartin Matuska if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
655*c03c5b1cSMartin Matuska if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
656*c03c5b1cSMartin Matuska if (!srcSize) return 0; /* Uncompressed */
657*c03c5b1cSMartin Matuska if (!dstSize) return 0; /* cannot fit anything within dst budget */
658*c03c5b1cSMartin Matuska if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
659*c03c5b1cSMartin Matuska if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
660*c03c5b1cSMartin Matuska if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
661*c03c5b1cSMartin Matuska if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
662*c03c5b1cSMartin Matuska if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
663*c03c5b1cSMartin Matuska
664*c03c5b1cSMartin Matuska /* Heuristic : If old table is valid, use it for small inputs */
665*c03c5b1cSMartin Matuska if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
666*c03c5b1cSMartin Matuska return HUF_compressCTable_internal(ostart, op, oend,
667*c03c5b1cSMartin Matuska src, srcSize,
668*c03c5b1cSMartin Matuska nbStreams, oldHufTable, bmi2);
669*c03c5b1cSMartin Matuska }
670*c03c5b1cSMartin Matuska
671*c03c5b1cSMartin Matuska /* Scan input and build symbol stats */
672*c03c5b1cSMartin Matuska { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
673*c03c5b1cSMartin Matuska if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
674*c03c5b1cSMartin Matuska if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
675*c03c5b1cSMartin Matuska }
676*c03c5b1cSMartin Matuska
677*c03c5b1cSMartin Matuska /* Check validity of previous table */
678*c03c5b1cSMartin Matuska if ( repeat
679*c03c5b1cSMartin Matuska && *repeat == HUF_repeat_check
680*c03c5b1cSMartin Matuska && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
681*c03c5b1cSMartin Matuska *repeat = HUF_repeat_none;
682*c03c5b1cSMartin Matuska }
683*c03c5b1cSMartin Matuska /* Heuristic : use existing table for small inputs */
684*c03c5b1cSMartin Matuska if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
685*c03c5b1cSMartin Matuska return HUF_compressCTable_internal(ostart, op, oend,
686*c03c5b1cSMartin Matuska src, srcSize,
687*c03c5b1cSMartin Matuska nbStreams, oldHufTable, bmi2);
688*c03c5b1cSMartin Matuska }
689*c03c5b1cSMartin Matuska
690*c03c5b1cSMartin Matuska /* Build Huffman Tree */
691*c03c5b1cSMartin Matuska huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
692*c03c5b1cSMartin Matuska { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693*c03c5b1cSMartin Matuska maxSymbolValue, huffLog,
694*c03c5b1cSMartin Matuska &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
695*c03c5b1cSMartin Matuska CHECK_F(maxBits);
696*c03c5b1cSMartin Matuska huffLog = (U32)maxBits;
697*c03c5b1cSMartin Matuska /* Zero unused symbols in CTable, so we can check it for validity */
698*c03c5b1cSMartin Matuska memset(table->CTable + (maxSymbolValue + 1), 0,
699*c03c5b1cSMartin Matuska sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
700*c03c5b1cSMartin Matuska }
701*c03c5b1cSMartin Matuska
702*c03c5b1cSMartin Matuska /* Write table description header */
703*c03c5b1cSMartin Matuska { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
704*c03c5b1cSMartin Matuska /* Check if using previous huffman table is beneficial */
705*c03c5b1cSMartin Matuska if (repeat && *repeat != HUF_repeat_none) {
706*c03c5b1cSMartin Matuska size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
707*c03c5b1cSMartin Matuska size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
708*c03c5b1cSMartin Matuska if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
709*c03c5b1cSMartin Matuska return HUF_compressCTable_internal(ostart, op, oend,
710*c03c5b1cSMartin Matuska src, srcSize,
711*c03c5b1cSMartin Matuska nbStreams, oldHufTable, bmi2);
712*c03c5b1cSMartin Matuska } }
713*c03c5b1cSMartin Matuska
714*c03c5b1cSMartin Matuska /* Use the new huffman table */
715*c03c5b1cSMartin Matuska if (hSize + 12ul >= srcSize) { return 0; }
716*c03c5b1cSMartin Matuska op += hSize;
717*c03c5b1cSMartin Matuska if (repeat) { *repeat = HUF_repeat_none; }
718*c03c5b1cSMartin Matuska if (oldHufTable)
719*c03c5b1cSMartin Matuska memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
720*c03c5b1cSMartin Matuska }
721*c03c5b1cSMartin Matuska return HUF_compressCTable_internal(ostart, op, oend,
722*c03c5b1cSMartin Matuska src, srcSize,
723*c03c5b1cSMartin Matuska nbStreams, table->CTable, bmi2);
724*c03c5b1cSMartin Matuska }
725*c03c5b1cSMartin Matuska
726*c03c5b1cSMartin Matuska
HUF_compress1X_wksp(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,void * workSpace,size_t wkspSize)727*c03c5b1cSMartin Matuska size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
728*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
729*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog,
730*c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
731*c03c5b1cSMartin Matuska {
732*c03c5b1cSMartin Matuska return HUF_compress_internal(dst, dstSize, src, srcSize,
733*c03c5b1cSMartin Matuska maxSymbolValue, huffLog, HUF_singleStream,
734*c03c5b1cSMartin Matuska workSpace, wkspSize,
735*c03c5b1cSMartin Matuska NULL, NULL, 0, 0 /*bmi2*/);
736*c03c5b1cSMartin Matuska }
737*c03c5b1cSMartin Matuska
HUF_compress1X_repeat(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,void * workSpace,size_t wkspSize,HUF_CElt * hufTable,HUF_repeat * repeat,int preferRepeat,int bmi2)738*c03c5b1cSMartin Matuska size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
740*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog,
741*c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize,
742*c03c5b1cSMartin Matuska HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
743*c03c5b1cSMartin Matuska {
744*c03c5b1cSMartin Matuska return HUF_compress_internal(dst, dstSize, src, srcSize,
745*c03c5b1cSMartin Matuska maxSymbolValue, huffLog, HUF_singleStream,
746*c03c5b1cSMartin Matuska workSpace, wkspSize, hufTable,
747*c03c5b1cSMartin Matuska repeat, preferRepeat, bmi2);
748*c03c5b1cSMartin Matuska }
749*c03c5b1cSMartin Matuska
HUF_compress1X(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog)750*c03c5b1cSMartin Matuska size_t HUF_compress1X (void* dst, size_t dstSize,
751*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
752*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog)
753*c03c5b1cSMartin Matuska {
754*c03c5b1cSMartin Matuska unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755*c03c5b1cSMartin Matuska return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
756*c03c5b1cSMartin Matuska }
757*c03c5b1cSMartin Matuska
758*c03c5b1cSMartin Matuska /* HUF_compress4X_repeat():
759*c03c5b1cSMartin Matuska * compress input using 4 streams.
760*c03c5b1cSMartin Matuska * provide workspace to generate compression tables */
HUF_compress4X_wksp(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,void * workSpace,size_t wkspSize)761*c03c5b1cSMartin Matuska size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
762*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
763*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog,
764*c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize)
765*c03c5b1cSMartin Matuska {
766*c03c5b1cSMartin Matuska return HUF_compress_internal(dst, dstSize, src, srcSize,
767*c03c5b1cSMartin Matuska maxSymbolValue, huffLog, HUF_fourStreams,
768*c03c5b1cSMartin Matuska workSpace, wkspSize,
769*c03c5b1cSMartin Matuska NULL, NULL, 0, 0 /*bmi2*/);
770*c03c5b1cSMartin Matuska }
771*c03c5b1cSMartin Matuska
772*c03c5b1cSMartin Matuska /* HUF_compress4X_repeat():
773*c03c5b1cSMartin Matuska * compress input using 4 streams.
774*c03c5b1cSMartin Matuska * re-use an existing huffman compression table */
HUF_compress4X_repeat(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog,void * workSpace,size_t wkspSize,HUF_CElt * hufTable,HUF_repeat * repeat,int preferRepeat,int bmi2)775*c03c5b1cSMartin Matuska size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
777*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog,
778*c03c5b1cSMartin Matuska void* workSpace, size_t wkspSize,
779*c03c5b1cSMartin Matuska HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
780*c03c5b1cSMartin Matuska {
781*c03c5b1cSMartin Matuska return HUF_compress_internal(dst, dstSize, src, srcSize,
782*c03c5b1cSMartin Matuska maxSymbolValue, huffLog, HUF_fourStreams,
783*c03c5b1cSMartin Matuska workSpace, wkspSize,
784*c03c5b1cSMartin Matuska hufTable, repeat, preferRepeat, bmi2);
785*c03c5b1cSMartin Matuska }
786*c03c5b1cSMartin Matuska
HUF_compress2(void * dst,size_t dstSize,const void * src,size_t srcSize,unsigned maxSymbolValue,unsigned huffLog)787*c03c5b1cSMartin Matuska size_t HUF_compress2 (void* dst, size_t dstSize,
788*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
789*c03c5b1cSMartin Matuska unsigned maxSymbolValue, unsigned huffLog)
790*c03c5b1cSMartin Matuska {
791*c03c5b1cSMartin Matuska unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
792*c03c5b1cSMartin Matuska return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793*c03c5b1cSMartin Matuska }
794*c03c5b1cSMartin Matuska
HUF_compress(void * dst,size_t maxDstSize,const void * src,size_t srcSize)795*c03c5b1cSMartin Matuska size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
796*c03c5b1cSMartin Matuska {
797*c03c5b1cSMartin Matuska return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
798*c03c5b1cSMartin Matuska }
799