1*c03c5b1cSMartin Matuska /*
2*c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*c03c5b1cSMartin Matuska * All rights reserved.
4*c03c5b1cSMartin Matuska *
5*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the
6*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree).
8*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses.
9*c03c5b1cSMartin Matuska */
10*c03c5b1cSMartin Matuska
11*c03c5b1cSMartin Matuska /*-*************************************
12*c03c5b1cSMartin Matuska * Dependencies
13*c03c5b1cSMartin Matuska ***************************************/
14*c03c5b1cSMartin Matuska #include "zstd_compress_sequences.h"
15*c03c5b1cSMartin Matuska
16*c03c5b1cSMartin Matuska /**
17*c03c5b1cSMartin Matuska * -log2(x / 256) lookup table for x in [0, 256).
18*c03c5b1cSMartin Matuska * If x == 0: Return 0
19*c03c5b1cSMartin Matuska * Else: Return floor(-log2(x / 256) * 256)
20*c03c5b1cSMartin Matuska */
21*c03c5b1cSMartin Matuska static unsigned const kInverseProbabilityLog256[256] = {
22*c03c5b1cSMartin Matuska 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
23*c03c5b1cSMartin Matuska 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
24*c03c5b1cSMartin Matuska 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
25*c03c5b1cSMartin Matuska 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
26*c03c5b1cSMartin Matuska 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
27*c03c5b1cSMartin Matuska 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
28*c03c5b1cSMartin Matuska 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
29*c03c5b1cSMartin Matuska 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
30*c03c5b1cSMartin Matuska 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
31*c03c5b1cSMartin Matuska 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
32*c03c5b1cSMartin Matuska 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
33*c03c5b1cSMartin Matuska 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
34*c03c5b1cSMartin Matuska 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
35*c03c5b1cSMartin Matuska 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
36*c03c5b1cSMartin Matuska 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
37*c03c5b1cSMartin Matuska 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
38*c03c5b1cSMartin Matuska 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
39*c03c5b1cSMartin Matuska 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
40*c03c5b1cSMartin Matuska 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
41*c03c5b1cSMartin Matuska 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
42*c03c5b1cSMartin Matuska 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
43*c03c5b1cSMartin Matuska 5, 4, 2, 1,
44*c03c5b1cSMartin Matuska };
45*c03c5b1cSMartin Matuska
ZSTD_getFSEMaxSymbolValue(FSE_CTable const * ctable)46*c03c5b1cSMartin Matuska static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
47*c03c5b1cSMartin Matuska void const* ptr = ctable;
48*c03c5b1cSMartin Matuska U16 const* u16ptr = (U16 const*)ptr;
49*c03c5b1cSMartin Matuska U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
50*c03c5b1cSMartin Matuska return maxSymbolValue;
51*c03c5b1cSMartin Matuska }
52*c03c5b1cSMartin Matuska
53*c03c5b1cSMartin Matuska /**
54*c03c5b1cSMartin Matuska * Returns the cost in bytes of encoding the normalized count header.
55*c03c5b1cSMartin Matuska * Returns an error if any of the helper functions return an error.
56*c03c5b1cSMartin Matuska */
ZSTD_NCountCost(unsigned const * count,unsigned const max,size_t const nbSeq,unsigned const FSELog)57*c03c5b1cSMartin Matuska static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
58*c03c5b1cSMartin Matuska size_t const nbSeq, unsigned const FSELog)
59*c03c5b1cSMartin Matuska {
60*c03c5b1cSMartin Matuska BYTE wksp[FSE_NCOUNTBOUND];
61*c03c5b1cSMartin Matuska S16 norm[MaxSeq + 1];
62*c03c5b1cSMartin Matuska const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
63*c03c5b1cSMartin Matuska FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
64*c03c5b1cSMartin Matuska return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
65*c03c5b1cSMartin Matuska }
66*c03c5b1cSMartin Matuska
67*c03c5b1cSMartin Matuska /**
68*c03c5b1cSMartin Matuska * Returns the cost in bits of encoding the distribution described by count
69*c03c5b1cSMartin Matuska * using the entropy bound.
70*c03c5b1cSMartin Matuska */
ZSTD_entropyCost(unsigned const * count,unsigned const max,size_t const total)71*c03c5b1cSMartin Matuska static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
72*c03c5b1cSMartin Matuska {
73*c03c5b1cSMartin Matuska unsigned cost = 0;
74*c03c5b1cSMartin Matuska unsigned s;
75*c03c5b1cSMartin Matuska for (s = 0; s <= max; ++s) {
76*c03c5b1cSMartin Matuska unsigned norm = (unsigned)((256 * count[s]) / total);
77*c03c5b1cSMartin Matuska if (count[s] != 0 && norm == 0)
78*c03c5b1cSMartin Matuska norm = 1;
79*c03c5b1cSMartin Matuska assert(count[s] < total);
80*c03c5b1cSMartin Matuska cost += count[s] * kInverseProbabilityLog256[norm];
81*c03c5b1cSMartin Matuska }
82*c03c5b1cSMartin Matuska return cost >> 8;
83*c03c5b1cSMartin Matuska }
84*c03c5b1cSMartin Matuska
85*c03c5b1cSMartin Matuska /**
86*c03c5b1cSMartin Matuska * Returns the cost in bits of encoding the distribution in count using ctable.
87*c03c5b1cSMartin Matuska * Returns an error if ctable cannot represent all the symbols in count.
88*c03c5b1cSMartin Matuska */
ZSTD_fseBitCost(FSE_CTable const * ctable,unsigned const * count,unsigned const max)89*c03c5b1cSMartin Matuska size_t ZSTD_fseBitCost(
90*c03c5b1cSMartin Matuska FSE_CTable const* ctable,
91*c03c5b1cSMartin Matuska unsigned const* count,
92*c03c5b1cSMartin Matuska unsigned const max)
93*c03c5b1cSMartin Matuska {
94*c03c5b1cSMartin Matuska unsigned const kAccuracyLog = 8;
95*c03c5b1cSMartin Matuska size_t cost = 0;
96*c03c5b1cSMartin Matuska unsigned s;
97*c03c5b1cSMartin Matuska FSE_CState_t cstate;
98*c03c5b1cSMartin Matuska FSE_initCState(&cstate, ctable);
99*c03c5b1cSMartin Matuska if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
100*c03c5b1cSMartin Matuska DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
101*c03c5b1cSMartin Matuska ZSTD_getFSEMaxSymbolValue(ctable), max);
102*c03c5b1cSMartin Matuska return ERROR(GENERIC);
103*c03c5b1cSMartin Matuska }
104*c03c5b1cSMartin Matuska for (s = 0; s <= max; ++s) {
105*c03c5b1cSMartin Matuska unsigned const tableLog = cstate.stateLog;
106*c03c5b1cSMartin Matuska unsigned const badCost = (tableLog + 1) << kAccuracyLog;
107*c03c5b1cSMartin Matuska unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
108*c03c5b1cSMartin Matuska if (count[s] == 0)
109*c03c5b1cSMartin Matuska continue;
110*c03c5b1cSMartin Matuska if (bitCost >= badCost) {
111*c03c5b1cSMartin Matuska DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
112*c03c5b1cSMartin Matuska return ERROR(GENERIC);
113*c03c5b1cSMartin Matuska }
114*c03c5b1cSMartin Matuska cost += (size_t)count[s] * bitCost;
115*c03c5b1cSMartin Matuska }
116*c03c5b1cSMartin Matuska return cost >> kAccuracyLog;
117*c03c5b1cSMartin Matuska }
118*c03c5b1cSMartin Matuska
119*c03c5b1cSMartin Matuska /**
120*c03c5b1cSMartin Matuska * Returns the cost in bits of encoding the distribution in count using the
121*c03c5b1cSMartin Matuska * table described by norm. The max symbol support by norm is assumed >= max.
122*c03c5b1cSMartin Matuska * norm must be valid for every symbol with non-zero probability in count.
123*c03c5b1cSMartin Matuska */
ZSTD_crossEntropyCost(short const * norm,unsigned accuracyLog,unsigned const * count,unsigned const max)124*c03c5b1cSMartin Matuska size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
125*c03c5b1cSMartin Matuska unsigned const* count, unsigned const max)
126*c03c5b1cSMartin Matuska {
127*c03c5b1cSMartin Matuska unsigned const shift = 8 - accuracyLog;
128*c03c5b1cSMartin Matuska size_t cost = 0;
129*c03c5b1cSMartin Matuska unsigned s;
130*c03c5b1cSMartin Matuska assert(accuracyLog <= 8);
131*c03c5b1cSMartin Matuska for (s = 0; s <= max; ++s) {
132*c03c5b1cSMartin Matuska unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
133*c03c5b1cSMartin Matuska unsigned const norm256 = normAcc << shift;
134*c03c5b1cSMartin Matuska assert(norm256 > 0);
135*c03c5b1cSMartin Matuska assert(norm256 < 256);
136*c03c5b1cSMartin Matuska cost += count[s] * kInverseProbabilityLog256[norm256];
137*c03c5b1cSMartin Matuska }
138*c03c5b1cSMartin Matuska return cost >> 8;
139*c03c5b1cSMartin Matuska }
140*c03c5b1cSMartin Matuska
141*c03c5b1cSMartin Matuska symbolEncodingType_e
ZSTD_selectEncodingType(FSE_repeat * repeatMode,unsigned const * count,unsigned const max,size_t const mostFrequent,size_t nbSeq,unsigned const FSELog,FSE_CTable const * prevCTable,short const * defaultNorm,U32 defaultNormLog,ZSTD_defaultPolicy_e const isDefaultAllowed,ZSTD_strategy const strategy)142*c03c5b1cSMartin Matuska ZSTD_selectEncodingType(
143*c03c5b1cSMartin Matuska FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
144*c03c5b1cSMartin Matuska size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
145*c03c5b1cSMartin Matuska FSE_CTable const* prevCTable,
146*c03c5b1cSMartin Matuska short const* defaultNorm, U32 defaultNormLog,
147*c03c5b1cSMartin Matuska ZSTD_defaultPolicy_e const isDefaultAllowed,
148*c03c5b1cSMartin Matuska ZSTD_strategy const strategy)
149*c03c5b1cSMartin Matuska {
150*c03c5b1cSMartin Matuska ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
151*c03c5b1cSMartin Matuska if (mostFrequent == nbSeq) {
152*c03c5b1cSMartin Matuska *repeatMode = FSE_repeat_none;
153*c03c5b1cSMartin Matuska if (isDefaultAllowed && nbSeq <= 2) {
154*c03c5b1cSMartin Matuska /* Prefer set_basic over set_rle when there are 2 or less symbols,
155*c03c5b1cSMartin Matuska * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
156*c03c5b1cSMartin Matuska * If basic encoding isn't possible, always choose RLE.
157*c03c5b1cSMartin Matuska */
158*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_basic");
159*c03c5b1cSMartin Matuska return set_basic;
160*c03c5b1cSMartin Matuska }
161*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_rle");
162*c03c5b1cSMartin Matuska return set_rle;
163*c03c5b1cSMartin Matuska }
164*c03c5b1cSMartin Matuska if (strategy < ZSTD_lazy) {
165*c03c5b1cSMartin Matuska if (isDefaultAllowed) {
166*c03c5b1cSMartin Matuska size_t const staticFse_nbSeq_max = 1000;
167*c03c5b1cSMartin Matuska size_t const mult = 10 - strategy;
168*c03c5b1cSMartin Matuska size_t const baseLog = 3;
169*c03c5b1cSMartin Matuska size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
170*c03c5b1cSMartin Matuska assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
171*c03c5b1cSMartin Matuska assert(mult <= 9 && mult >= 7);
172*c03c5b1cSMartin Matuska if ( (*repeatMode == FSE_repeat_valid)
173*c03c5b1cSMartin Matuska && (nbSeq < staticFse_nbSeq_max) ) {
174*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_repeat");
175*c03c5b1cSMartin Matuska return set_repeat;
176*c03c5b1cSMartin Matuska }
177*c03c5b1cSMartin Matuska if ( (nbSeq < dynamicFse_nbSeq_min)
178*c03c5b1cSMartin Matuska || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
179*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_basic");
180*c03c5b1cSMartin Matuska /* The format allows default tables to be repeated, but it isn't useful.
181*c03c5b1cSMartin Matuska * When using simple heuristics to select encoding type, we don't want
182*c03c5b1cSMartin Matuska * to confuse these tables with dictionaries. When running more careful
183*c03c5b1cSMartin Matuska * analysis, we don't need to waste time checking both repeating tables
184*c03c5b1cSMartin Matuska * and default tables.
185*c03c5b1cSMartin Matuska */
186*c03c5b1cSMartin Matuska *repeatMode = FSE_repeat_none;
187*c03c5b1cSMartin Matuska return set_basic;
188*c03c5b1cSMartin Matuska }
189*c03c5b1cSMartin Matuska }
190*c03c5b1cSMartin Matuska } else {
191*c03c5b1cSMartin Matuska size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
192*c03c5b1cSMartin Matuska size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
193*c03c5b1cSMartin Matuska size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
194*c03c5b1cSMartin Matuska size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
195*c03c5b1cSMartin Matuska
196*c03c5b1cSMartin Matuska if (isDefaultAllowed) {
197*c03c5b1cSMartin Matuska assert(!ZSTD_isError(basicCost));
198*c03c5b1cSMartin Matuska assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
199*c03c5b1cSMartin Matuska }
200*c03c5b1cSMartin Matuska assert(!ZSTD_isError(NCountCost));
201*c03c5b1cSMartin Matuska assert(compressedCost < ERROR(maxCode));
202*c03c5b1cSMartin Matuska DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
203*c03c5b1cSMartin Matuska (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
204*c03c5b1cSMartin Matuska if (basicCost <= repeatCost && basicCost <= compressedCost) {
205*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_basic");
206*c03c5b1cSMartin Matuska assert(isDefaultAllowed);
207*c03c5b1cSMartin Matuska *repeatMode = FSE_repeat_none;
208*c03c5b1cSMartin Matuska return set_basic;
209*c03c5b1cSMartin Matuska }
210*c03c5b1cSMartin Matuska if (repeatCost <= compressedCost) {
211*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_repeat");
212*c03c5b1cSMartin Matuska assert(!ZSTD_isError(repeatCost));
213*c03c5b1cSMartin Matuska return set_repeat;
214*c03c5b1cSMartin Matuska }
215*c03c5b1cSMartin Matuska assert(compressedCost < basicCost && compressedCost < repeatCost);
216*c03c5b1cSMartin Matuska }
217*c03c5b1cSMartin Matuska DEBUGLOG(5, "Selected set_compressed");
218*c03c5b1cSMartin Matuska *repeatMode = FSE_repeat_check;
219*c03c5b1cSMartin Matuska return set_compressed;
220*c03c5b1cSMartin Matuska }
221*c03c5b1cSMartin Matuska
222*c03c5b1cSMartin Matuska size_t
ZSTD_buildCTable(void * dst,size_t dstCapacity,FSE_CTable * nextCTable,U32 FSELog,symbolEncodingType_e type,unsigned * count,U32 max,const BYTE * codeTable,size_t nbSeq,const S16 * defaultNorm,U32 defaultNormLog,U32 defaultMax,const FSE_CTable * prevCTable,size_t prevCTableSize,void * entropyWorkspace,size_t entropyWorkspaceSize)223*c03c5b1cSMartin Matuska ZSTD_buildCTable(void* dst, size_t dstCapacity,
224*c03c5b1cSMartin Matuska FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
225*c03c5b1cSMartin Matuska unsigned* count, U32 max,
226*c03c5b1cSMartin Matuska const BYTE* codeTable, size_t nbSeq,
227*c03c5b1cSMartin Matuska const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
228*c03c5b1cSMartin Matuska const FSE_CTable* prevCTable, size_t prevCTableSize,
229*c03c5b1cSMartin Matuska void* entropyWorkspace, size_t entropyWorkspaceSize)
230*c03c5b1cSMartin Matuska {
231*c03c5b1cSMartin Matuska BYTE* op = (BYTE*)dst;
232*c03c5b1cSMartin Matuska const BYTE* const oend = op + dstCapacity;
233*c03c5b1cSMartin Matuska DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
234*c03c5b1cSMartin Matuska
235*c03c5b1cSMartin Matuska switch (type) {
236*c03c5b1cSMartin Matuska case set_rle:
237*c03c5b1cSMartin Matuska FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
238*c03c5b1cSMartin Matuska RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
239*c03c5b1cSMartin Matuska *op = codeTable[0];
240*c03c5b1cSMartin Matuska return 1;
241*c03c5b1cSMartin Matuska case set_repeat:
242*c03c5b1cSMartin Matuska memcpy(nextCTable, prevCTable, prevCTableSize);
243*c03c5b1cSMartin Matuska return 0;
244*c03c5b1cSMartin Matuska case set_basic:
245*c03c5b1cSMartin Matuska FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
246*c03c5b1cSMartin Matuska return 0;
247*c03c5b1cSMartin Matuska case set_compressed: {
248*c03c5b1cSMartin Matuska S16 norm[MaxSeq + 1];
249*c03c5b1cSMartin Matuska size_t nbSeq_1 = nbSeq;
250*c03c5b1cSMartin Matuska const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
251*c03c5b1cSMartin Matuska if (count[codeTable[nbSeq-1]] > 1) {
252*c03c5b1cSMartin Matuska count[codeTable[nbSeq-1]]--;
253*c03c5b1cSMartin Matuska nbSeq_1--;
254*c03c5b1cSMartin Matuska }
255*c03c5b1cSMartin Matuska assert(nbSeq_1 > 1);
256*c03c5b1cSMartin Matuska FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
257*c03c5b1cSMartin Matuska { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
258*c03c5b1cSMartin Matuska FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
259*c03c5b1cSMartin Matuska FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
260*c03c5b1cSMartin Matuska return NCountSize;
261*c03c5b1cSMartin Matuska }
262*c03c5b1cSMartin Matuska }
263*c03c5b1cSMartin Matuska default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
264*c03c5b1cSMartin Matuska }
265*c03c5b1cSMartin Matuska }
266*c03c5b1cSMartin Matuska
267*c03c5b1cSMartin Matuska FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)268*c03c5b1cSMartin Matuska ZSTD_encodeSequences_body(
269*c03c5b1cSMartin Matuska void* dst, size_t dstCapacity,
270*c03c5b1cSMartin Matuska FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
271*c03c5b1cSMartin Matuska FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
272*c03c5b1cSMartin Matuska FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
273*c03c5b1cSMartin Matuska seqDef const* sequences, size_t nbSeq, int longOffsets)
274*c03c5b1cSMartin Matuska {
275*c03c5b1cSMartin Matuska BIT_CStream_t blockStream;
276*c03c5b1cSMartin Matuska FSE_CState_t stateMatchLength;
277*c03c5b1cSMartin Matuska FSE_CState_t stateOffsetBits;
278*c03c5b1cSMartin Matuska FSE_CState_t stateLitLength;
279*c03c5b1cSMartin Matuska
280*c03c5b1cSMartin Matuska RETURN_ERROR_IF(
281*c03c5b1cSMartin Matuska ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
282*c03c5b1cSMartin Matuska dstSize_tooSmall, "not enough space remaining");
283*c03c5b1cSMartin Matuska DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
284*c03c5b1cSMartin Matuska (int)(blockStream.endPtr - blockStream.startPtr),
285*c03c5b1cSMartin Matuska (unsigned)dstCapacity);
286*c03c5b1cSMartin Matuska
287*c03c5b1cSMartin Matuska /* first symbols */
288*c03c5b1cSMartin Matuska FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
289*c03c5b1cSMartin Matuska FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
290*c03c5b1cSMartin Matuska FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
291*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
292*c03c5b1cSMartin Matuska if (MEM_32bits()) BIT_flushBits(&blockStream);
293*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
294*c03c5b1cSMartin Matuska if (MEM_32bits()) BIT_flushBits(&blockStream);
295*c03c5b1cSMartin Matuska if (longOffsets) {
296*c03c5b1cSMartin Matuska U32 const ofBits = ofCodeTable[nbSeq-1];
297*c03c5b1cSMartin Matuska unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
298*c03c5b1cSMartin Matuska if (extraBits) {
299*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
300*c03c5b1cSMartin Matuska BIT_flushBits(&blockStream);
301*c03c5b1cSMartin Matuska }
302*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
303*c03c5b1cSMartin Matuska ofBits - extraBits);
304*c03c5b1cSMartin Matuska } else {
305*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
306*c03c5b1cSMartin Matuska }
307*c03c5b1cSMartin Matuska BIT_flushBits(&blockStream);
308*c03c5b1cSMartin Matuska
309*c03c5b1cSMartin Matuska { size_t n;
310*c03c5b1cSMartin Matuska for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
311*c03c5b1cSMartin Matuska BYTE const llCode = llCodeTable[n];
312*c03c5b1cSMartin Matuska BYTE const ofCode = ofCodeTable[n];
313*c03c5b1cSMartin Matuska BYTE const mlCode = mlCodeTable[n];
314*c03c5b1cSMartin Matuska U32 const llBits = LL_bits[llCode];
315*c03c5b1cSMartin Matuska U32 const ofBits = ofCode;
316*c03c5b1cSMartin Matuska U32 const mlBits = ML_bits[mlCode];
317*c03c5b1cSMartin Matuska DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
318*c03c5b1cSMartin Matuska (unsigned)sequences[n].litLength,
319*c03c5b1cSMartin Matuska (unsigned)sequences[n].matchLength + MINMATCH,
320*c03c5b1cSMartin Matuska (unsigned)sequences[n].offset);
321*c03c5b1cSMartin Matuska /* 32b*/ /* 64b*/
322*c03c5b1cSMartin Matuska /* (7)*/ /* (7)*/
323*c03c5b1cSMartin Matuska FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
324*c03c5b1cSMartin Matuska FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
325*c03c5b1cSMartin Matuska if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
326*c03c5b1cSMartin Matuska FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
327*c03c5b1cSMartin Matuska if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
328*c03c5b1cSMartin Matuska BIT_flushBits(&blockStream); /* (7)*/
329*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[n].litLength, llBits);
330*c03c5b1cSMartin Matuska if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
331*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
332*c03c5b1cSMartin Matuska if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
333*c03c5b1cSMartin Matuska if (longOffsets) {
334*c03c5b1cSMartin Matuska unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
335*c03c5b1cSMartin Matuska if (extraBits) {
336*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[n].offset, extraBits);
337*c03c5b1cSMartin Matuska BIT_flushBits(&blockStream); /* (7)*/
338*c03c5b1cSMartin Matuska }
339*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
340*c03c5b1cSMartin Matuska ofBits - extraBits); /* 31 */
341*c03c5b1cSMartin Matuska } else {
342*c03c5b1cSMartin Matuska BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
343*c03c5b1cSMartin Matuska }
344*c03c5b1cSMartin Matuska BIT_flushBits(&blockStream); /* (7)*/
345*c03c5b1cSMartin Matuska DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
346*c03c5b1cSMartin Matuska } }
347*c03c5b1cSMartin Matuska
348*c03c5b1cSMartin Matuska DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
349*c03c5b1cSMartin Matuska FSE_flushCState(&blockStream, &stateMatchLength);
350*c03c5b1cSMartin Matuska DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
351*c03c5b1cSMartin Matuska FSE_flushCState(&blockStream, &stateOffsetBits);
352*c03c5b1cSMartin Matuska DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
353*c03c5b1cSMartin Matuska FSE_flushCState(&blockStream, &stateLitLength);
354*c03c5b1cSMartin Matuska
355*c03c5b1cSMartin Matuska { size_t const streamSize = BIT_closeCStream(&blockStream);
356*c03c5b1cSMartin Matuska RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
357*c03c5b1cSMartin Matuska return streamSize;
358*c03c5b1cSMartin Matuska }
359*c03c5b1cSMartin Matuska }
360*c03c5b1cSMartin Matuska
361*c03c5b1cSMartin Matuska static size_t
ZSTD_encodeSequences_default(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)362*c03c5b1cSMartin Matuska ZSTD_encodeSequences_default(
363*c03c5b1cSMartin Matuska void* dst, size_t dstCapacity,
364*c03c5b1cSMartin Matuska FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
365*c03c5b1cSMartin Matuska FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
366*c03c5b1cSMartin Matuska FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
367*c03c5b1cSMartin Matuska seqDef const* sequences, size_t nbSeq, int longOffsets)
368*c03c5b1cSMartin Matuska {
369*c03c5b1cSMartin Matuska return ZSTD_encodeSequences_body(dst, dstCapacity,
370*c03c5b1cSMartin Matuska CTable_MatchLength, mlCodeTable,
371*c03c5b1cSMartin Matuska CTable_OffsetBits, ofCodeTable,
372*c03c5b1cSMartin Matuska CTable_LitLength, llCodeTable,
373*c03c5b1cSMartin Matuska sequences, nbSeq, longOffsets);
374*c03c5b1cSMartin Matuska }
375*c03c5b1cSMartin Matuska
376*c03c5b1cSMartin Matuska
377*c03c5b1cSMartin Matuska #if DYNAMIC_BMI2
378*c03c5b1cSMartin Matuska
379*c03c5b1cSMartin Matuska static TARGET_ATTRIBUTE("bmi2") size_t
ZSTD_encodeSequences_bmi2(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)380*c03c5b1cSMartin Matuska ZSTD_encodeSequences_bmi2(
381*c03c5b1cSMartin Matuska void* dst, size_t dstCapacity,
382*c03c5b1cSMartin Matuska FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
383*c03c5b1cSMartin Matuska FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
384*c03c5b1cSMartin Matuska FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
385*c03c5b1cSMartin Matuska seqDef const* sequences, size_t nbSeq, int longOffsets)
386*c03c5b1cSMartin Matuska {
387*c03c5b1cSMartin Matuska return ZSTD_encodeSequences_body(dst, dstCapacity,
388*c03c5b1cSMartin Matuska CTable_MatchLength, mlCodeTable,
389*c03c5b1cSMartin Matuska CTable_OffsetBits, ofCodeTable,
390*c03c5b1cSMartin Matuska CTable_LitLength, llCodeTable,
391*c03c5b1cSMartin Matuska sequences, nbSeq, longOffsets);
392*c03c5b1cSMartin Matuska }
393*c03c5b1cSMartin Matuska
394*c03c5b1cSMartin Matuska #endif
395*c03c5b1cSMartin Matuska
ZSTD_encodeSequences(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets,int bmi2)396*c03c5b1cSMartin Matuska size_t ZSTD_encodeSequences(
397*c03c5b1cSMartin Matuska void* dst, size_t dstCapacity,
398*c03c5b1cSMartin Matuska FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
399*c03c5b1cSMartin Matuska FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
400*c03c5b1cSMartin Matuska FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
401*c03c5b1cSMartin Matuska seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
402*c03c5b1cSMartin Matuska {
403*c03c5b1cSMartin Matuska DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
404*c03c5b1cSMartin Matuska #if DYNAMIC_BMI2
405*c03c5b1cSMartin Matuska if (bmi2) {
406*c03c5b1cSMartin Matuska return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
407*c03c5b1cSMartin Matuska CTable_MatchLength, mlCodeTable,
408*c03c5b1cSMartin Matuska CTable_OffsetBits, ofCodeTable,
409*c03c5b1cSMartin Matuska CTable_LitLength, llCodeTable,
410*c03c5b1cSMartin Matuska sequences, nbSeq, longOffsets);
411*c03c5b1cSMartin Matuska }
412*c03c5b1cSMartin Matuska #endif
413*c03c5b1cSMartin Matuska (void)bmi2;
414*c03c5b1cSMartin Matuska return ZSTD_encodeSequences_default(dst, dstCapacity,
415*c03c5b1cSMartin Matuska CTable_MatchLength, mlCodeTable,
416*c03c5b1cSMartin Matuska CTable_OffsetBits, ofCodeTable,
417*c03c5b1cSMartin Matuska CTable_LitLength, llCodeTable,
418*c03c5b1cSMartin Matuska sequences, nbSeq, longOffsets);
419*c03c5b1cSMartin Matuska }
420