10c16b537SWarner Losh /* ******************************************************************
237f1f268SConrad Meyer * huff0 huffman decoder,
337f1f268SConrad Meyer * part of Finite State Entropy library
4*5ff13fbcSAllan Jude * Copyright (c) Yann Collet, Facebook, Inc.
537f1f268SConrad Meyer *
637f1f268SConrad Meyer * You can contact the author at :
737f1f268SConrad Meyer * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
837f1f268SConrad Meyer *
937f1f268SConrad Meyer * This source code is licensed under both the BSD-style license (found in the
1037f1f268SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1137f1f268SConrad Meyer * in the COPYING file in the root directory of this source tree).
1237f1f268SConrad Meyer * You may select, at your option, one of the above-listed licenses.
130c16b537SWarner Losh ****************************************************************** */
140c16b537SWarner Losh
150c16b537SWarner Losh /* **************************************************************
160c16b537SWarner Losh * Dependencies
170c16b537SWarner Losh ****************************************************************/
18f7cd7fe5SConrad Meyer #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
1937f1f268SConrad Meyer #include "../common/compiler.h"
2037f1f268SConrad Meyer #include "../common/bitstream.h" /* BIT_* */
2137f1f268SConrad Meyer #include "../common/fse.h" /* to compress headers */
220c16b537SWarner Losh #define HUF_STATIC_LINKING_ONLY
2337f1f268SConrad Meyer #include "../common/huf.h"
2437f1f268SConrad Meyer #include "../common/error_private.h"
25*5ff13fbcSAllan Jude #include "../common/zstd_internal.h"
26*5ff13fbcSAllan Jude
27*5ff13fbcSAllan Jude /* **************************************************************
28*5ff13fbcSAllan Jude * Constants
29*5ff13fbcSAllan Jude ****************************************************************/
30*5ff13fbcSAllan Jude
31*5ff13fbcSAllan Jude #define HUF_DECODER_FAST_TABLELOG 11
320c16b537SWarner Losh
33a0483764SConrad Meyer /* **************************************************************
34a0483764SConrad Meyer * Macros
35a0483764SConrad Meyer ****************************************************************/
36a0483764SConrad Meyer
37a0483764SConrad Meyer /* These two optional macros force the use one way or another of the two
38a0483764SConrad Meyer * Huffman decompression implementations. You can't force in both directions
39a0483764SConrad Meyer * at the same time.
40a0483764SConrad Meyer */
41a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1) && \
42a0483764SConrad Meyer defined(HUF_FORCE_DECOMPRESS_X2)
43a0483764SConrad Meyer #error "Cannot force the use of the X1 and X2 decoders at the same time!"
44a0483764SConrad Meyer #endif
45a0483764SConrad Meyer
46*5ff13fbcSAllan Jude #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
47*5ff13fbcSAllan Jude # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
48*5ff13fbcSAllan Jude #else
49*5ff13fbcSAllan Jude # define HUF_ASM_X86_64_BMI2_ATTRS
50*5ff13fbcSAllan Jude #endif
51*5ff13fbcSAllan Jude
52*5ff13fbcSAllan Jude #ifdef __cplusplus
53*5ff13fbcSAllan Jude # define HUF_EXTERN_C extern "C"
54*5ff13fbcSAllan Jude #else
55*5ff13fbcSAllan Jude # define HUF_EXTERN_C
56*5ff13fbcSAllan Jude #endif
57*5ff13fbcSAllan Jude #define HUF_ASM_DECL HUF_EXTERN_C
58*5ff13fbcSAllan Jude
59*5ff13fbcSAllan Jude #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
60*5ff13fbcSAllan Jude # define HUF_NEED_BMI2_FUNCTION 1
61*5ff13fbcSAllan Jude #else
62*5ff13fbcSAllan Jude # define HUF_NEED_BMI2_FUNCTION 0
63*5ff13fbcSAllan Jude #endif
64*5ff13fbcSAllan Jude
65*5ff13fbcSAllan Jude #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
66*5ff13fbcSAllan Jude # define HUF_NEED_DEFAULT_FUNCTION 1
67*5ff13fbcSAllan Jude #else
68*5ff13fbcSAllan Jude # define HUF_NEED_DEFAULT_FUNCTION 0
69*5ff13fbcSAllan Jude #endif
700c16b537SWarner Losh
710c16b537SWarner Losh /* **************************************************************
720c16b537SWarner Losh * Error Management
730c16b537SWarner Losh ****************************************************************/
740c16b537SWarner Losh #define HUF_isError ERR_isError
750c16b537SWarner Losh
760c16b537SWarner Losh
770c16b537SWarner Losh /* **************************************************************
780c16b537SWarner Losh * Byte alignment for workSpace management
790c16b537SWarner Losh ****************************************************************/
800c16b537SWarner Losh #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
810c16b537SWarner Losh #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
820c16b537SWarner Losh
8319fcbaf1SConrad Meyer
84a0483764SConrad Meyer /* **************************************************************
85a0483764SConrad Meyer * BMI2 Variant Wrappers
86a0483764SConrad Meyer ****************************************************************/
87a0483764SConrad Meyer #if DYNAMIC_BMI2
88a0483764SConrad Meyer
89a0483764SConrad Meyer #define HUF_DGEN(fn) \
90a0483764SConrad Meyer \
91a0483764SConrad Meyer static size_t fn##_default( \
92a0483764SConrad Meyer void* dst, size_t dstSize, \
93a0483764SConrad Meyer const void* cSrc, size_t cSrcSize, \
94a0483764SConrad Meyer const HUF_DTable* DTable) \
95a0483764SConrad Meyer { \
96a0483764SConrad Meyer return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
97a0483764SConrad Meyer } \
98a0483764SConrad Meyer \
99*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
100a0483764SConrad Meyer void* dst, size_t dstSize, \
101a0483764SConrad Meyer const void* cSrc, size_t cSrcSize, \
102a0483764SConrad Meyer const HUF_DTable* DTable) \
103a0483764SConrad Meyer { \
104a0483764SConrad Meyer return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
105a0483764SConrad Meyer } \
106a0483764SConrad Meyer \
107a0483764SConrad Meyer static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
108a0483764SConrad Meyer size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
109a0483764SConrad Meyer { \
110a0483764SConrad Meyer if (bmi2) { \
111a0483764SConrad Meyer return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
112a0483764SConrad Meyer } \
113a0483764SConrad Meyer return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
114a0483764SConrad Meyer }
115a0483764SConrad Meyer
116a0483764SConrad Meyer #else
117a0483764SConrad Meyer
118a0483764SConrad Meyer #define HUF_DGEN(fn) \
119a0483764SConrad Meyer static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
120a0483764SConrad Meyer size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
121a0483764SConrad Meyer { \
122a0483764SConrad Meyer (void)bmi2; \
123a0483764SConrad Meyer return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
124a0483764SConrad Meyer }
125a0483764SConrad Meyer
126a0483764SConrad Meyer #endif
127a0483764SConrad Meyer
128a0483764SConrad Meyer
1290c16b537SWarner Losh /*-***************************/
1300c16b537SWarner Losh /* generic DTableDesc */
1310c16b537SWarner Losh /*-***************************/
1320c16b537SWarner Losh typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
1330c16b537SWarner Losh
HUF_getDTableDesc(const HUF_DTable * table)1340c16b537SWarner Losh static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
1350c16b537SWarner Losh {
1360c16b537SWarner Losh DTableDesc dtd;
137f7cd7fe5SConrad Meyer ZSTD_memcpy(&dtd, table, sizeof(dtd));
1380c16b537SWarner Losh return dtd;
1390c16b537SWarner Losh }
1400c16b537SWarner Losh
141*5ff13fbcSAllan Jude #if ZSTD_ENABLE_ASM_X86_64_BMI2
142*5ff13fbcSAllan Jude
HUF_initDStream(BYTE const * ip)143*5ff13fbcSAllan Jude static size_t HUF_initDStream(BYTE const* ip) {
144*5ff13fbcSAllan Jude BYTE const lastByte = ip[7];
145*5ff13fbcSAllan Jude size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
146*5ff13fbcSAllan Jude size_t const value = MEM_readLEST(ip) | 1;
147*5ff13fbcSAllan Jude assert(bitsConsumed <= 8);
148*5ff13fbcSAllan Jude return value << bitsConsumed;
149*5ff13fbcSAllan Jude }
150*5ff13fbcSAllan Jude typedef struct {
151*5ff13fbcSAllan Jude BYTE const* ip[4];
152*5ff13fbcSAllan Jude BYTE* op[4];
153*5ff13fbcSAllan Jude U64 bits[4];
154*5ff13fbcSAllan Jude void const* dt;
155*5ff13fbcSAllan Jude BYTE const* ilimit;
156*5ff13fbcSAllan Jude BYTE* oend;
157*5ff13fbcSAllan Jude BYTE const* iend[4];
158*5ff13fbcSAllan Jude } HUF_DecompressAsmArgs;
159*5ff13fbcSAllan Jude
160*5ff13fbcSAllan Jude /**
161*5ff13fbcSAllan Jude * Initializes args for the asm decoding loop.
162*5ff13fbcSAllan Jude * @returns 0 on success
163*5ff13fbcSAllan Jude * 1 if the fallback implementation should be used.
164*5ff13fbcSAllan Jude * Or an error code on failure.
165*5ff13fbcSAllan Jude */
HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs * args,void * dst,size_t dstSize,void const * src,size_t srcSize,const HUF_DTable * DTable)166*5ff13fbcSAllan Jude static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
167*5ff13fbcSAllan Jude {
168*5ff13fbcSAllan Jude void const* dt = DTable + 1;
169*5ff13fbcSAllan Jude U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
170*5ff13fbcSAllan Jude
171*5ff13fbcSAllan Jude const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
172*5ff13fbcSAllan Jude
173*5ff13fbcSAllan Jude BYTE* const oend = (BYTE*)dst + dstSize;
174*5ff13fbcSAllan Jude
175*5ff13fbcSAllan Jude /* The following condition is false on x32 platform,
176*5ff13fbcSAllan Jude * but HUF_asm is not compatible with this ABI */
177*5ff13fbcSAllan Jude if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
178*5ff13fbcSAllan Jude
179*5ff13fbcSAllan Jude /* strict minimum : jump table + 1 byte per stream */
180*5ff13fbcSAllan Jude if (srcSize < 10)
181*5ff13fbcSAllan Jude return ERROR(corruption_detected);
182*5ff13fbcSAllan Jude
183*5ff13fbcSAllan Jude /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
184*5ff13fbcSAllan Jude * If table log is not correct at this point, fallback to the old decoder.
185*5ff13fbcSAllan Jude * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
186*5ff13fbcSAllan Jude */
187*5ff13fbcSAllan Jude if (dtLog != HUF_DECODER_FAST_TABLELOG)
188*5ff13fbcSAllan Jude return 1;
189*5ff13fbcSAllan Jude
190*5ff13fbcSAllan Jude /* Read the jump table. */
191*5ff13fbcSAllan Jude {
192*5ff13fbcSAllan Jude const BYTE* const istart = (const BYTE*)src;
193*5ff13fbcSAllan Jude size_t const length1 = MEM_readLE16(istart);
194*5ff13fbcSAllan Jude size_t const length2 = MEM_readLE16(istart+2);
195*5ff13fbcSAllan Jude size_t const length3 = MEM_readLE16(istart+4);
196*5ff13fbcSAllan Jude size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
197*5ff13fbcSAllan Jude args->iend[0] = istart + 6; /* jumpTable */
198*5ff13fbcSAllan Jude args->iend[1] = args->iend[0] + length1;
199*5ff13fbcSAllan Jude args->iend[2] = args->iend[1] + length2;
200*5ff13fbcSAllan Jude args->iend[3] = args->iend[2] + length3;
201*5ff13fbcSAllan Jude
202*5ff13fbcSAllan Jude /* HUF_initDStream() requires this, and this small of an input
203*5ff13fbcSAllan Jude * won't benefit from the ASM loop anyways.
204*5ff13fbcSAllan Jude * length1 must be >= 16 so that ip[0] >= ilimit before the loop
205*5ff13fbcSAllan Jude * starts.
206*5ff13fbcSAllan Jude */
207*5ff13fbcSAllan Jude if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
208*5ff13fbcSAllan Jude return 1;
209*5ff13fbcSAllan Jude if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
210*5ff13fbcSAllan Jude }
211*5ff13fbcSAllan Jude /* ip[] contains the position that is currently loaded into bits[]. */
212*5ff13fbcSAllan Jude args->ip[0] = args->iend[1] - sizeof(U64);
213*5ff13fbcSAllan Jude args->ip[1] = args->iend[2] - sizeof(U64);
214*5ff13fbcSAllan Jude args->ip[2] = args->iend[3] - sizeof(U64);
215*5ff13fbcSAllan Jude args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
216*5ff13fbcSAllan Jude
217*5ff13fbcSAllan Jude /* op[] contains the output pointers. */
218*5ff13fbcSAllan Jude args->op[0] = (BYTE*)dst;
219*5ff13fbcSAllan Jude args->op[1] = args->op[0] + (dstSize+3)/4;
220*5ff13fbcSAllan Jude args->op[2] = args->op[1] + (dstSize+3)/4;
221*5ff13fbcSAllan Jude args->op[3] = args->op[2] + (dstSize+3)/4;
222*5ff13fbcSAllan Jude
223*5ff13fbcSAllan Jude /* No point to call the ASM loop for tiny outputs. */
224*5ff13fbcSAllan Jude if (args->op[3] >= oend)
225*5ff13fbcSAllan Jude return 1;
226*5ff13fbcSAllan Jude
227*5ff13fbcSAllan Jude /* bits[] is the bit container.
228*5ff13fbcSAllan Jude * It is read from the MSB down to the LSB.
229*5ff13fbcSAllan Jude * It is shifted left as it is read, and zeros are
230*5ff13fbcSAllan Jude * shifted in. After the lowest valid bit a 1 is
231*5ff13fbcSAllan Jude * set, so that CountTrailingZeros(bits[]) can be used
232*5ff13fbcSAllan Jude * to count how many bits we've consumed.
233*5ff13fbcSAllan Jude */
234*5ff13fbcSAllan Jude args->bits[0] = HUF_initDStream(args->ip[0]);
235*5ff13fbcSAllan Jude args->bits[1] = HUF_initDStream(args->ip[1]);
236*5ff13fbcSAllan Jude args->bits[2] = HUF_initDStream(args->ip[2]);
237*5ff13fbcSAllan Jude args->bits[3] = HUF_initDStream(args->ip[3]);
238*5ff13fbcSAllan Jude
239*5ff13fbcSAllan Jude /* If ip[] >= ilimit, it is guaranteed to be safe to
240*5ff13fbcSAllan Jude * reload bits[]. It may be beyond its section, but is
241*5ff13fbcSAllan Jude * guaranteed to be valid (>= istart).
242*5ff13fbcSAllan Jude */
243*5ff13fbcSAllan Jude args->ilimit = ilimit;
244*5ff13fbcSAllan Jude
245*5ff13fbcSAllan Jude args->oend = oend;
246*5ff13fbcSAllan Jude args->dt = dt;
247*5ff13fbcSAllan Jude
248*5ff13fbcSAllan Jude return 0;
249*5ff13fbcSAllan Jude }
250*5ff13fbcSAllan Jude
HUF_initRemainingDStream(BIT_DStream_t * bit,HUF_DecompressAsmArgs const * args,int stream,BYTE * segmentEnd)251*5ff13fbcSAllan Jude static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
252*5ff13fbcSAllan Jude {
253*5ff13fbcSAllan Jude /* Validate that we haven't overwritten. */
254*5ff13fbcSAllan Jude if (args->op[stream] > segmentEnd)
255*5ff13fbcSAllan Jude return ERROR(corruption_detected);
256*5ff13fbcSAllan Jude /* Validate that we haven't read beyond iend[].
257*5ff13fbcSAllan Jude * Note that ip[] may be < iend[] because the MSB is
258*5ff13fbcSAllan Jude * the next bit to read, and we may have consumed 100%
259*5ff13fbcSAllan Jude * of the stream, so down to iend[i] - 8 is valid.
260*5ff13fbcSAllan Jude */
261*5ff13fbcSAllan Jude if (args->ip[stream] < args->iend[stream] - 8)
262*5ff13fbcSAllan Jude return ERROR(corruption_detected);
263*5ff13fbcSAllan Jude
264*5ff13fbcSAllan Jude /* Construct the BIT_DStream_t. */
265*5ff13fbcSAllan Jude bit->bitContainer = MEM_readLE64(args->ip[stream]);
266*5ff13fbcSAllan Jude bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
267*5ff13fbcSAllan Jude bit->start = (const char*)args->iend[0];
268*5ff13fbcSAllan Jude bit->limitPtr = bit->start + sizeof(size_t);
269*5ff13fbcSAllan Jude bit->ptr = (const char*)args->ip[stream];
270*5ff13fbcSAllan Jude
271*5ff13fbcSAllan Jude return 0;
272*5ff13fbcSAllan Jude }
273*5ff13fbcSAllan Jude #endif
274*5ff13fbcSAllan Jude
2750c16b537SWarner Losh
276a0483764SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X2
277a0483764SConrad Meyer
2780c16b537SWarner Losh /*-***************************/
2790c16b537SWarner Losh /* single-symbol decoding */
2800c16b537SWarner Losh /*-***************************/
281*5ff13fbcSAllan Jude typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
2820c16b537SWarner Losh
283f7cd7fe5SConrad Meyer /**
284f7cd7fe5SConrad Meyer * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
285f7cd7fe5SConrad Meyer * a time.
286f7cd7fe5SConrad Meyer */
HUF_DEltX1_set4(BYTE symbol,BYTE nbBits)287f7cd7fe5SConrad Meyer static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
288f7cd7fe5SConrad Meyer U64 D4;
289f7cd7fe5SConrad Meyer if (MEM_isLittleEndian()) {
290f7cd7fe5SConrad Meyer D4 = (symbol << 8) + nbBits;
291*5ff13fbcSAllan Jude } else {
292*5ff13fbcSAllan Jude D4 = symbol + (nbBits << 8);
293f7cd7fe5SConrad Meyer }
294f7cd7fe5SConrad Meyer D4 *= 0x0001000100010001ULL;
295f7cd7fe5SConrad Meyer return D4;
296f7cd7fe5SConrad Meyer }
297f7cd7fe5SConrad Meyer
298*5ff13fbcSAllan Jude /**
299*5ff13fbcSAllan Jude * Increase the tableLog to targetTableLog and rescales the stats.
300*5ff13fbcSAllan Jude * If tableLog > targetTableLog this is a no-op.
301*5ff13fbcSAllan Jude * @returns New tableLog
302*5ff13fbcSAllan Jude */
HUF_rescaleStats(BYTE * huffWeight,U32 * rankVal,U32 nbSymbols,U32 tableLog,U32 targetTableLog)303*5ff13fbcSAllan Jude static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
304*5ff13fbcSAllan Jude {
305*5ff13fbcSAllan Jude if (tableLog > targetTableLog)
306*5ff13fbcSAllan Jude return tableLog;
307*5ff13fbcSAllan Jude if (tableLog < targetTableLog) {
308*5ff13fbcSAllan Jude U32 const scale = targetTableLog - tableLog;
309*5ff13fbcSAllan Jude U32 s;
310*5ff13fbcSAllan Jude /* Increase the weight for all non-zero probability symbols by scale. */
311*5ff13fbcSAllan Jude for (s = 0; s < nbSymbols; ++s) {
312*5ff13fbcSAllan Jude huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
313*5ff13fbcSAllan Jude }
314*5ff13fbcSAllan Jude /* Update rankVal to reflect the new weights.
315*5ff13fbcSAllan Jude * All weights except 0 get moved to weight + scale.
316*5ff13fbcSAllan Jude * Weights [1, scale] are empty.
317*5ff13fbcSAllan Jude */
318*5ff13fbcSAllan Jude for (s = targetTableLog; s > scale; --s) {
319*5ff13fbcSAllan Jude rankVal[s] = rankVal[s - scale];
320*5ff13fbcSAllan Jude }
321*5ff13fbcSAllan Jude for (s = scale; s > 0; --s) {
322*5ff13fbcSAllan Jude rankVal[s] = 0;
323*5ff13fbcSAllan Jude }
324*5ff13fbcSAllan Jude }
325*5ff13fbcSAllan Jude return targetTableLog;
326*5ff13fbcSAllan Jude }
327*5ff13fbcSAllan Jude
328f7cd7fe5SConrad Meyer typedef struct {
329f7cd7fe5SConrad Meyer U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
330f7cd7fe5SConrad Meyer U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
331f7cd7fe5SConrad Meyer U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
332f7cd7fe5SConrad Meyer BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
333f7cd7fe5SConrad Meyer BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
334f7cd7fe5SConrad Meyer } HUF_ReadDTableX1_Workspace;
335f7cd7fe5SConrad Meyer
336f7cd7fe5SConrad Meyer
HUF_readDTableX1_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)3370f743729SConrad Meyer size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
3380c16b537SWarner Losh {
339f7cd7fe5SConrad Meyer return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
340f7cd7fe5SConrad Meyer }
341f7cd7fe5SConrad Meyer
HUF_readDTableX1_wksp_bmi2(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize,int bmi2)342f7cd7fe5SConrad Meyer size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
343f7cd7fe5SConrad Meyer {
3440c16b537SWarner Losh U32 tableLog = 0;
3450c16b537SWarner Losh U32 nbSymbols = 0;
3460c16b537SWarner Losh size_t iSize;
3470c16b537SWarner Losh void* const dtPtr = DTable + 1;
3480f743729SConrad Meyer HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
349f7cd7fe5SConrad Meyer HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
3500c16b537SWarner Losh
351f7cd7fe5SConrad Meyer DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
352f7cd7fe5SConrad Meyer if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
3530c16b537SWarner Losh
3540f743729SConrad Meyer DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
355f7cd7fe5SConrad Meyer /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
3560c16b537SWarner Losh
357f7cd7fe5SConrad Meyer iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
3580c16b537SWarner Losh if (HUF_isError(iSize)) return iSize;
3590c16b537SWarner Losh
360*5ff13fbcSAllan Jude
3610c16b537SWarner Losh /* Table header */
3620c16b537SWarner Losh { DTableDesc dtd = HUF_getDTableDesc(DTable);
363*5ff13fbcSAllan Jude U32 const maxTableLog = dtd.maxTableLog + 1;
364*5ff13fbcSAllan Jude U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
365*5ff13fbcSAllan Jude tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
3660c16b537SWarner Losh if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
3670c16b537SWarner Losh dtd.tableType = 0;
3680c16b537SWarner Losh dtd.tableLog = (BYTE)tableLog;
369f7cd7fe5SConrad Meyer ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
3700c16b537SWarner Losh }
3710c16b537SWarner Losh
372f7cd7fe5SConrad Meyer /* Compute symbols and rankStart given rankVal:
373f7cd7fe5SConrad Meyer *
374f7cd7fe5SConrad Meyer * rankVal already contains the number of values of each weight.
375f7cd7fe5SConrad Meyer *
376f7cd7fe5SConrad Meyer * symbols contains the symbols ordered by weight. First are the rankVal[0]
377f7cd7fe5SConrad Meyer * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
378f7cd7fe5SConrad Meyer * symbols[0] is filled (but unused) to avoid a branch.
379f7cd7fe5SConrad Meyer *
380f7cd7fe5SConrad Meyer * rankStart contains the offset where each rank belongs in the DTable.
381f7cd7fe5SConrad Meyer * rankStart[0] is not filled because there are no entries in the table for
382f7cd7fe5SConrad Meyer * weight 0.
383f7cd7fe5SConrad Meyer */
3840c16b537SWarner Losh {
385f7cd7fe5SConrad Meyer int n;
386f7cd7fe5SConrad Meyer int nextRankStart = 0;
387f7cd7fe5SConrad Meyer int const unroll = 4;
388f7cd7fe5SConrad Meyer int const nLimit = (int)nbSymbols - unroll + 1;
389f7cd7fe5SConrad Meyer for (n=0; n<(int)tableLog+1; n++) {
390f7cd7fe5SConrad Meyer U32 const curr = nextRankStart;
391f7cd7fe5SConrad Meyer nextRankStart += wksp->rankVal[n];
392f7cd7fe5SConrad Meyer wksp->rankStart[n] = curr;
393f7cd7fe5SConrad Meyer }
394f7cd7fe5SConrad Meyer for (n=0; n < nLimit; n += unroll) {
395f7cd7fe5SConrad Meyer int u;
396f7cd7fe5SConrad Meyer for (u=0; u < unroll; ++u) {
397f7cd7fe5SConrad Meyer size_t const w = wksp->huffWeight[n+u];
398f7cd7fe5SConrad Meyer wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
399f7cd7fe5SConrad Meyer }
400f7cd7fe5SConrad Meyer }
401f7cd7fe5SConrad Meyer for (; n < (int)nbSymbols; ++n) {
402f7cd7fe5SConrad Meyer size_t const w = wksp->huffWeight[n];
403f7cd7fe5SConrad Meyer wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
404f7cd7fe5SConrad Meyer }
405f7cd7fe5SConrad Meyer }
406f7cd7fe5SConrad Meyer
407f7cd7fe5SConrad Meyer /* fill DTable
408f7cd7fe5SConrad Meyer * We fill all entries of each weight in order.
409*5ff13fbcSAllan Jude * That way length is a constant for each iteration of the outer loop.
410f7cd7fe5SConrad Meyer * We can switch based on the length to a different inner loop which is
411f7cd7fe5SConrad Meyer * optimized for that particular case.
412f7cd7fe5SConrad Meyer */
413f7cd7fe5SConrad Meyer {
414f7cd7fe5SConrad Meyer U32 w;
415f7cd7fe5SConrad Meyer int symbol=wksp->rankVal[0];
416f7cd7fe5SConrad Meyer int rankStart=0;
417f7cd7fe5SConrad Meyer for (w=1; w<tableLog+1; ++w) {
418f7cd7fe5SConrad Meyer int const symbolCount = wksp->rankVal[w];
419f7cd7fe5SConrad Meyer int const length = (1 << w) >> 1;
420f7cd7fe5SConrad Meyer int uStart = rankStart;
421f7cd7fe5SConrad Meyer BYTE const nbBits = (BYTE)(tableLog + 1 - w);
422f7cd7fe5SConrad Meyer int s;
423f7cd7fe5SConrad Meyer int u;
424f7cd7fe5SConrad Meyer switch (length) {
425f7cd7fe5SConrad Meyer case 1:
426f7cd7fe5SConrad Meyer for (s=0; s<symbolCount; ++s) {
427f7cd7fe5SConrad Meyer HUF_DEltX1 D;
428f7cd7fe5SConrad Meyer D.byte = wksp->symbols[symbol + s];
429f7cd7fe5SConrad Meyer D.nbBits = nbBits;
430f7cd7fe5SConrad Meyer dt[uStart] = D;
431f7cd7fe5SConrad Meyer uStart += 1;
432f7cd7fe5SConrad Meyer }
433f7cd7fe5SConrad Meyer break;
434f7cd7fe5SConrad Meyer case 2:
435f7cd7fe5SConrad Meyer for (s=0; s<symbolCount; ++s) {
436f7cd7fe5SConrad Meyer HUF_DEltX1 D;
437f7cd7fe5SConrad Meyer D.byte = wksp->symbols[symbol + s];
438f7cd7fe5SConrad Meyer D.nbBits = nbBits;
439f7cd7fe5SConrad Meyer dt[uStart+0] = D;
440f7cd7fe5SConrad Meyer dt[uStart+1] = D;
441f7cd7fe5SConrad Meyer uStart += 2;
442f7cd7fe5SConrad Meyer }
443f7cd7fe5SConrad Meyer break;
444f7cd7fe5SConrad Meyer case 4:
445f7cd7fe5SConrad Meyer for (s=0; s<symbolCount; ++s) {
446f7cd7fe5SConrad Meyer U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
447f7cd7fe5SConrad Meyer MEM_write64(dt + uStart, D4);
448f7cd7fe5SConrad Meyer uStart += 4;
449f7cd7fe5SConrad Meyer }
450f7cd7fe5SConrad Meyer break;
451f7cd7fe5SConrad Meyer case 8:
452f7cd7fe5SConrad Meyer for (s=0; s<symbolCount; ++s) {
453f7cd7fe5SConrad Meyer U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
454f7cd7fe5SConrad Meyer MEM_write64(dt + uStart, D4);
455f7cd7fe5SConrad Meyer MEM_write64(dt + uStart + 4, D4);
456f7cd7fe5SConrad Meyer uStart += 8;
457f7cd7fe5SConrad Meyer }
458f7cd7fe5SConrad Meyer break;
459f7cd7fe5SConrad Meyer default:
460f7cd7fe5SConrad Meyer for (s=0; s<symbolCount; ++s) {
461f7cd7fe5SConrad Meyer U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
462f7cd7fe5SConrad Meyer for (u=0; u < length; u += 16) {
463f7cd7fe5SConrad Meyer MEM_write64(dt + uStart + u + 0, D4);
464f7cd7fe5SConrad Meyer MEM_write64(dt + uStart + u + 4, D4);
465f7cd7fe5SConrad Meyer MEM_write64(dt + uStart + u + 8, D4);
466f7cd7fe5SConrad Meyer MEM_write64(dt + uStart + u + 12, D4);
467f7cd7fe5SConrad Meyer }
468f7cd7fe5SConrad Meyer assert(u == length);
469f7cd7fe5SConrad Meyer uStart += length;
470f7cd7fe5SConrad Meyer }
471f7cd7fe5SConrad Meyer break;
472f7cd7fe5SConrad Meyer }
473f7cd7fe5SConrad Meyer symbol += symbolCount;
474f7cd7fe5SConrad Meyer rankStart += symbolCount * length;
475f7cd7fe5SConrad Meyer }
476f7cd7fe5SConrad Meyer }
477f7cd7fe5SConrad Meyer return iSize;
4780c16b537SWarner Losh }
4790c16b537SWarner Losh
48019fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE BYTE
HUF_decodeSymbolX1(BIT_DStream_t * Dstream,const HUF_DEltX1 * dt,const U32 dtLog)4810f743729SConrad Meyer HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
4820c16b537SWarner Losh {
4830c16b537SWarner Losh size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
4840c16b537SWarner Losh BYTE const c = dt[val].byte;
4850c16b537SWarner Losh BIT_skipBits(Dstream, dt[val].nbBits);
4860c16b537SWarner Losh return c;
4870c16b537SWarner Losh }
4880c16b537SWarner Losh
4890f743729SConrad Meyer #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
4900f743729SConrad Meyer *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
4910c16b537SWarner Losh
4920f743729SConrad Meyer #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
4930c16b537SWarner Losh if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
4940f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
4950c16b537SWarner Losh
4960f743729SConrad Meyer #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
4970c16b537SWarner Losh if (MEM_64bits()) \
4980f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
4990c16b537SWarner Losh
50019fcbaf1SConrad Meyer HINT_INLINE size_t
HUF_decodeStreamX1(BYTE * p,BIT_DStream_t * const bitDPtr,BYTE * const pEnd,const HUF_DEltX1 * const dt,const U32 dtLog)5010f743729SConrad Meyer HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
5020c16b537SWarner Losh {
5030c16b537SWarner Losh BYTE* const pStart = p;
5040c16b537SWarner Losh
5050c16b537SWarner Losh /* up to 4 symbols at a time */
506*5ff13fbcSAllan Jude if ((pEnd - p) > 3) {
50719fcbaf1SConrad Meyer while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
5080f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
5090f743729SConrad Meyer HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
5100f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
5110f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
5120c16b537SWarner Losh }
513*5ff13fbcSAllan Jude } else {
514*5ff13fbcSAllan Jude BIT_reloadDStream(bitDPtr);
515*5ff13fbcSAllan Jude }
5160c16b537SWarner Losh
51719fcbaf1SConrad Meyer /* [0-3] symbols remaining */
51819fcbaf1SConrad Meyer if (MEM_32bits())
51919fcbaf1SConrad Meyer while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
5200f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
5210c16b537SWarner Losh
52219fcbaf1SConrad Meyer /* no more data to retrieve from bitstream, no need to reload */
5230c16b537SWarner Losh while (p < pEnd)
5240f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
5250c16b537SWarner Losh
5260c16b537SWarner Losh return pEnd-pStart;
5270c16b537SWarner Losh }
5280c16b537SWarner Losh
52919fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)5300f743729SConrad Meyer HUF_decompress1X1_usingDTable_internal_body(
5310c16b537SWarner Losh void* dst, size_t dstSize,
5320c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
5330c16b537SWarner Losh const HUF_DTable* DTable)
5340c16b537SWarner Losh {
5350c16b537SWarner Losh BYTE* op = (BYTE*)dst;
5360c16b537SWarner Losh BYTE* const oend = op + dstSize;
5370c16b537SWarner Losh const void* dtPtr = DTable + 1;
5380f743729SConrad Meyer const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
5390c16b537SWarner Losh BIT_DStream_t bitD;
5400c16b537SWarner Losh DTableDesc const dtd = HUF_getDTableDesc(DTable);
5410c16b537SWarner Losh U32 const dtLog = dtd.tableLog;
5420c16b537SWarner Losh
54319fcbaf1SConrad Meyer CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
5440c16b537SWarner Losh
5450f743729SConrad Meyer HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
5460c16b537SWarner Losh
5470c16b537SWarner Losh if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
5480c16b537SWarner Losh
5490c16b537SWarner Losh return dstSize;
5500c16b537SWarner Losh }
5510c16b537SWarner Losh
55219fcbaf1SConrad Meyer FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)5530f743729SConrad Meyer HUF_decompress4X1_usingDTable_internal_body(
5540c16b537SWarner Losh void* dst, size_t dstSize,
5550c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
5560c16b537SWarner Losh const HUF_DTable* DTable)
5570c16b537SWarner Losh {
5580c16b537SWarner Losh /* Check */
5590c16b537SWarner Losh if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
5600c16b537SWarner Losh
5610c16b537SWarner Losh { const BYTE* const istart = (const BYTE*) cSrc;
5620c16b537SWarner Losh BYTE* const ostart = (BYTE*) dst;
5630c16b537SWarner Losh BYTE* const oend = ostart + dstSize;
56437f1f268SConrad Meyer BYTE* const olimit = oend - 3;
5650c16b537SWarner Losh const void* const dtPtr = DTable + 1;
5660f743729SConrad Meyer const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
5670c16b537SWarner Losh
5680c16b537SWarner Losh /* Init */
5690c16b537SWarner Losh BIT_DStream_t bitD1;
5700c16b537SWarner Losh BIT_DStream_t bitD2;
5710c16b537SWarner Losh BIT_DStream_t bitD3;
5720c16b537SWarner Losh BIT_DStream_t bitD4;
5730c16b537SWarner Losh size_t const length1 = MEM_readLE16(istart);
5740c16b537SWarner Losh size_t const length2 = MEM_readLE16(istart+2);
5750c16b537SWarner Losh size_t const length3 = MEM_readLE16(istart+4);
5760c16b537SWarner Losh size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
5770c16b537SWarner Losh const BYTE* const istart1 = istart + 6; /* jumpTable */
5780c16b537SWarner Losh const BYTE* const istart2 = istart1 + length1;
5790c16b537SWarner Losh const BYTE* const istart3 = istart2 + length2;
5800c16b537SWarner Losh const BYTE* const istart4 = istart3 + length3;
5810c16b537SWarner Losh const size_t segmentSize = (dstSize+3) / 4;
5820c16b537SWarner Losh BYTE* const opStart2 = ostart + segmentSize;
5830c16b537SWarner Losh BYTE* const opStart3 = opStart2 + segmentSize;
5840c16b537SWarner Losh BYTE* const opStart4 = opStart3 + segmentSize;
5850c16b537SWarner Losh BYTE* op1 = ostart;
5860c16b537SWarner Losh BYTE* op2 = opStart2;
5870c16b537SWarner Losh BYTE* op3 = opStart3;
5880c16b537SWarner Losh BYTE* op4 = opStart4;
5890c16b537SWarner Losh DTableDesc const dtd = HUF_getDTableDesc(DTable);
5900c16b537SWarner Losh U32 const dtLog = dtd.tableLog;
59137f1f268SConrad Meyer U32 endSignal = 1;
5920c16b537SWarner Losh
5930c16b537SWarner Losh if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
594*5ff13fbcSAllan Jude if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
59519fcbaf1SConrad Meyer CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
59619fcbaf1SConrad Meyer CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
59719fcbaf1SConrad Meyer CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
59819fcbaf1SConrad Meyer CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
5990c16b537SWarner Losh
60019fcbaf1SConrad Meyer /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
601*5ff13fbcSAllan Jude if ((size_t)(oend - op4) >= sizeof(size_t)) {
60237f1f268SConrad Meyer for ( ; (endSignal) & (op4 < olimit) ; ) {
6030f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
6040f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
6050f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
6060f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
6070f743729SConrad Meyer HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
6080f743729SConrad Meyer HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
6090f743729SConrad Meyer HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
6100f743729SConrad Meyer HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
6110f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
6120f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
6130f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
6140f743729SConrad Meyer HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
6150f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
6160f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
6170f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
6180f743729SConrad Meyer HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
61937f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
62037f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
62137f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
62237f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
6230c16b537SWarner Losh }
624*5ff13fbcSAllan Jude }
6250c16b537SWarner Losh
6260c16b537SWarner Losh /* check corruption */
62719fcbaf1SConrad Meyer /* note : should not be necessary : op# advance in lock step, and we control op4.
62819fcbaf1SConrad Meyer * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
6290c16b537SWarner Losh if (op1 > opStart2) return ERROR(corruption_detected);
6300c16b537SWarner Losh if (op2 > opStart3) return ERROR(corruption_detected);
6310c16b537SWarner Losh if (op3 > opStart4) return ERROR(corruption_detected);
6320c16b537SWarner Losh /* note : op4 supposed already verified within main loop */
6330c16b537SWarner Losh
6340c16b537SWarner Losh /* finish bitStreams one by one */
6350f743729SConrad Meyer HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
6360f743729SConrad Meyer HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
6370f743729SConrad Meyer HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
6380f743729SConrad Meyer HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
63919fcbaf1SConrad Meyer
64019fcbaf1SConrad Meyer /* check */
64119fcbaf1SConrad Meyer { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
64219fcbaf1SConrad Meyer if (!endCheck) return ERROR(corruption_detected); }
64319fcbaf1SConrad Meyer
64419fcbaf1SConrad Meyer /* decoded size */
64519fcbaf1SConrad Meyer return dstSize;
64619fcbaf1SConrad Meyer }
64719fcbaf1SConrad Meyer }
64819fcbaf1SConrad Meyer
649*5ff13fbcSAllan Jude #if HUF_NEED_BMI2_FUNCTION
650*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE
HUF_decompress4X1_usingDTable_internal_bmi2(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)651*5ff13fbcSAllan Jude size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
652*5ff13fbcSAllan Jude size_t cSrcSize, HUF_DTable const* DTable) {
653*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
654*5ff13fbcSAllan Jude }
655*5ff13fbcSAllan Jude #endif
656*5ff13fbcSAllan Jude
657*5ff13fbcSAllan Jude #if HUF_NEED_DEFAULT_FUNCTION
658*5ff13fbcSAllan Jude static
HUF_decompress4X1_usingDTable_internal_default(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)659*5ff13fbcSAllan Jude size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
660*5ff13fbcSAllan Jude size_t cSrcSize, HUF_DTable const* DTable) {
661*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
662*5ff13fbcSAllan Jude }
663*5ff13fbcSAllan Jude #endif
664*5ff13fbcSAllan Jude
665*5ff13fbcSAllan Jude #if ZSTD_ENABLE_ASM_X86_64_BMI2
666*5ff13fbcSAllan Jude
667*5ff13fbcSAllan Jude HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
668*5ff13fbcSAllan Jude
669*5ff13fbcSAllan Jude static HUF_ASM_X86_64_BMI2_ATTRS
670*5ff13fbcSAllan Jude size_t
HUF_decompress4X1_usingDTable_internal_bmi2_asm(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)671*5ff13fbcSAllan Jude HUF_decompress4X1_usingDTable_internal_bmi2_asm(
672*5ff13fbcSAllan Jude void* dst, size_t dstSize,
673*5ff13fbcSAllan Jude const void* cSrc, size_t cSrcSize,
674*5ff13fbcSAllan Jude const HUF_DTable* DTable)
675*5ff13fbcSAllan Jude {
676*5ff13fbcSAllan Jude void const* dt = DTable + 1;
677*5ff13fbcSAllan Jude const BYTE* const iend = (const BYTE*)cSrc + 6;
678*5ff13fbcSAllan Jude BYTE* const oend = (BYTE*)dst + dstSize;
679*5ff13fbcSAllan Jude HUF_DecompressAsmArgs args;
680*5ff13fbcSAllan Jude {
681*5ff13fbcSAllan Jude size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
682*5ff13fbcSAllan Jude FORWARD_IF_ERROR(ret, "Failed to init asm args");
683*5ff13fbcSAllan Jude if (ret != 0)
684*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
685*5ff13fbcSAllan Jude }
686*5ff13fbcSAllan Jude
687*5ff13fbcSAllan Jude assert(args.ip[0] >= args.ilimit);
688*5ff13fbcSAllan Jude HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
689*5ff13fbcSAllan Jude
690*5ff13fbcSAllan Jude /* Our loop guarantees that ip[] >= ilimit and that we haven't
691*5ff13fbcSAllan Jude * overwritten any op[].
692*5ff13fbcSAllan Jude */
693*5ff13fbcSAllan Jude assert(args.ip[0] >= iend);
694*5ff13fbcSAllan Jude assert(args.ip[1] >= iend);
695*5ff13fbcSAllan Jude assert(args.ip[2] >= iend);
696*5ff13fbcSAllan Jude assert(args.ip[3] >= iend);
697*5ff13fbcSAllan Jude assert(args.op[3] <= oend);
698*5ff13fbcSAllan Jude (void)iend;
699*5ff13fbcSAllan Jude
700*5ff13fbcSAllan Jude /* finish bit streams one by one. */
701*5ff13fbcSAllan Jude {
702*5ff13fbcSAllan Jude size_t const segmentSize = (dstSize+3) / 4;
703*5ff13fbcSAllan Jude BYTE* segmentEnd = (BYTE*)dst;
704*5ff13fbcSAllan Jude int i;
705*5ff13fbcSAllan Jude for (i = 0; i < 4; ++i) {
706*5ff13fbcSAllan Jude BIT_DStream_t bit;
707*5ff13fbcSAllan Jude if (segmentSize <= (size_t)(oend - segmentEnd))
708*5ff13fbcSAllan Jude segmentEnd += segmentSize;
709*5ff13fbcSAllan Jude else
710*5ff13fbcSAllan Jude segmentEnd = oend;
711*5ff13fbcSAllan Jude FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
712*5ff13fbcSAllan Jude /* Decompress and validate that we've produced exactly the expected length. */
713*5ff13fbcSAllan Jude args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
714*5ff13fbcSAllan Jude if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
715*5ff13fbcSAllan Jude }
716*5ff13fbcSAllan Jude }
717*5ff13fbcSAllan Jude
718*5ff13fbcSAllan Jude /* decoded size */
719*5ff13fbcSAllan Jude return dstSize;
720*5ff13fbcSAllan Jude }
721*5ff13fbcSAllan Jude #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
72219fcbaf1SConrad Meyer
72319fcbaf1SConrad Meyer typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
72419fcbaf1SConrad Meyer const void *cSrc,
72519fcbaf1SConrad Meyer size_t cSrcSize,
72619fcbaf1SConrad Meyer const HUF_DTable *DTable);
72719fcbaf1SConrad Meyer
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)7280f743729SConrad Meyer HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
72919fcbaf1SConrad Meyer
730*5ff13fbcSAllan Jude static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
731*5ff13fbcSAllan Jude size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
732*5ff13fbcSAllan Jude {
733*5ff13fbcSAllan Jude #if DYNAMIC_BMI2
734*5ff13fbcSAllan Jude if (bmi2) {
735*5ff13fbcSAllan Jude # if ZSTD_ENABLE_ASM_X86_64_BMI2
736*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
737*5ff13fbcSAllan Jude # else
738*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
739*5ff13fbcSAllan Jude # endif
740*5ff13fbcSAllan Jude }
741*5ff13fbcSAllan Jude #else
742*5ff13fbcSAllan Jude (void)bmi2;
743*5ff13fbcSAllan Jude #endif
744*5ff13fbcSAllan Jude
745*5ff13fbcSAllan Jude #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
746*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
747*5ff13fbcSAllan Jude #else
748*5ff13fbcSAllan Jude return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
749*5ff13fbcSAllan Jude #endif
750*5ff13fbcSAllan Jude }
75119fcbaf1SConrad Meyer
7520f743729SConrad Meyer
HUF_decompress1X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)7530f743729SConrad Meyer size_t HUF_decompress1X1_usingDTable(
75419fcbaf1SConrad Meyer void* dst, size_t dstSize,
75519fcbaf1SConrad Meyer const void* cSrc, size_t cSrcSize,
75619fcbaf1SConrad Meyer const HUF_DTable* DTable)
75719fcbaf1SConrad Meyer {
75819fcbaf1SConrad Meyer DTableDesc dtd = HUF_getDTableDesc(DTable);
75919fcbaf1SConrad Meyer if (dtd.tableType != 0) return ERROR(GENERIC);
7600f743729SConrad Meyer return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
76119fcbaf1SConrad Meyer }
76219fcbaf1SConrad Meyer
HUF_decompress1X1_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)7630f743729SConrad Meyer size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
76419fcbaf1SConrad Meyer const void* cSrc, size_t cSrcSize,
76519fcbaf1SConrad Meyer void* workSpace, size_t wkspSize)
76619fcbaf1SConrad Meyer {
76719fcbaf1SConrad Meyer const BYTE* ip = (const BYTE*) cSrc;
76819fcbaf1SConrad Meyer
7690f743729SConrad Meyer size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
77019fcbaf1SConrad Meyer if (HUF_isError(hSize)) return hSize;
77119fcbaf1SConrad Meyer if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
77219fcbaf1SConrad Meyer ip += hSize; cSrcSize -= hSize;
77319fcbaf1SConrad Meyer
7740f743729SConrad Meyer return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
77519fcbaf1SConrad Meyer }
77619fcbaf1SConrad Meyer
77719fcbaf1SConrad Meyer
HUF_decompress4X1_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)7780f743729SConrad Meyer size_t HUF_decompress4X1_usingDTable(
7790c16b537SWarner Losh void* dst, size_t dstSize,
7800c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
7810c16b537SWarner Losh const HUF_DTable* DTable)
7820c16b537SWarner Losh {
7830c16b537SWarner Losh DTableDesc dtd = HUF_getDTableDesc(DTable);
7840c16b537SWarner Losh if (dtd.tableType != 0) return ERROR(GENERIC);
7850f743729SConrad Meyer return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
7860c16b537SWarner Losh }
7870c16b537SWarner Losh
HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)7880f743729SConrad Meyer static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
7890c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
79019fcbaf1SConrad Meyer void* workSpace, size_t wkspSize, int bmi2)
7910c16b537SWarner Losh {
7920c16b537SWarner Losh const BYTE* ip = (const BYTE*) cSrc;
7930c16b537SWarner Losh
794f7cd7fe5SConrad Meyer size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
7950c16b537SWarner Losh if (HUF_isError(hSize)) return hSize;
7960c16b537SWarner Losh if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
7970c16b537SWarner Losh ip += hSize; cSrcSize -= hSize;
7980c16b537SWarner Losh
7990f743729SConrad Meyer return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
80019fcbaf1SConrad Meyer }
80119fcbaf1SConrad Meyer
HUF_decompress4X1_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)8020f743729SConrad Meyer size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
80319fcbaf1SConrad Meyer const void* cSrc, size_t cSrcSize,
80419fcbaf1SConrad Meyer void* workSpace, size_t wkspSize)
80519fcbaf1SConrad Meyer {
8060f743729SConrad Meyer return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
8070c16b537SWarner Losh }
8080c16b537SWarner Losh
8090c16b537SWarner Losh
810a0483764SConrad Meyer #endif /* HUF_FORCE_DECOMPRESS_X2 */
811a0483764SConrad Meyer
812a0483764SConrad Meyer
813a0483764SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X1
8140c16b537SWarner Losh
8150c16b537SWarner Losh /* *************************/
8160c16b537SWarner Losh /* double-symbols decoding */
8170c16b537SWarner Losh /* *************************/
8180c16b537SWarner Losh
8190f743729SConrad Meyer typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
820*5ff13fbcSAllan Jude typedef struct { BYTE symbol; } sortedSymbol_t;
8210f743729SConrad Meyer typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
8220f743729SConrad Meyer typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
8230f743729SConrad Meyer
824*5ff13fbcSAllan Jude /**
825*5ff13fbcSAllan Jude * Constructs a HUF_DEltX2 in a U32.
826*5ff13fbcSAllan Jude */
HUF_buildDEltX2U32(U32 symbol,U32 nbBits,U32 baseSeq,int level)827*5ff13fbcSAllan Jude static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
828*5ff13fbcSAllan Jude {
829*5ff13fbcSAllan Jude U32 seq;
830*5ff13fbcSAllan Jude DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
831*5ff13fbcSAllan Jude DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
832*5ff13fbcSAllan Jude DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
833*5ff13fbcSAllan Jude DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
834*5ff13fbcSAllan Jude if (MEM_isLittleEndian()) {
835*5ff13fbcSAllan Jude seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
836*5ff13fbcSAllan Jude return seq + (nbBits << 16) + ((U32)level << 24);
837*5ff13fbcSAllan Jude } else {
838*5ff13fbcSAllan Jude seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
839*5ff13fbcSAllan Jude return (seq << 16) + (nbBits << 8) + (U32)level;
840*5ff13fbcSAllan Jude }
841*5ff13fbcSAllan Jude }
842*5ff13fbcSAllan Jude
843*5ff13fbcSAllan Jude /**
844*5ff13fbcSAllan Jude * Constructs a HUF_DEltX2.
845*5ff13fbcSAllan Jude */
HUF_buildDEltX2(U32 symbol,U32 nbBits,U32 baseSeq,int level)846*5ff13fbcSAllan Jude static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
847*5ff13fbcSAllan Jude {
848*5ff13fbcSAllan Jude HUF_DEltX2 DElt;
849*5ff13fbcSAllan Jude U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
850*5ff13fbcSAllan Jude DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
851*5ff13fbcSAllan Jude ZSTD_memcpy(&DElt, &val, sizeof(val));
852*5ff13fbcSAllan Jude return DElt;
853*5ff13fbcSAllan Jude }
854*5ff13fbcSAllan Jude
855*5ff13fbcSAllan Jude /**
856*5ff13fbcSAllan Jude * Constructs 2 HUF_DEltX2s and packs them into a U64.
857*5ff13fbcSAllan Jude */
HUF_buildDEltX2U64(U32 symbol,U32 nbBits,U16 baseSeq,int level)858*5ff13fbcSAllan Jude static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
859*5ff13fbcSAllan Jude {
860*5ff13fbcSAllan Jude U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
861*5ff13fbcSAllan Jude return (U64)DElt + ((U64)DElt << 32);
862*5ff13fbcSAllan Jude }
863*5ff13fbcSAllan Jude
864*5ff13fbcSAllan Jude /**
865*5ff13fbcSAllan Jude * Fills the DTable rank with all the symbols from [begin, end) that are each
866*5ff13fbcSAllan Jude * nbBits long.
867*5ff13fbcSAllan Jude *
868*5ff13fbcSAllan Jude * @param DTableRank The start of the rank in the DTable.
869*5ff13fbcSAllan Jude * @param begin The first symbol to fill (inclusive).
870*5ff13fbcSAllan Jude * @param end The last symbol to fill (exclusive).
871*5ff13fbcSAllan Jude * @param nbBits Each symbol is nbBits long.
872*5ff13fbcSAllan Jude * @param tableLog The table log.
873*5ff13fbcSAllan Jude * @param baseSeq If level == 1 { 0 } else { the first level symbol }
874*5ff13fbcSAllan Jude * @param level The level in the table. Must be 1 or 2.
875*5ff13fbcSAllan Jude */
HUF_fillDTableX2ForWeight(HUF_DEltX2 * DTableRank,sortedSymbol_t const * begin,sortedSymbol_t const * end,U32 nbBits,U32 tableLog,U16 baseSeq,int const level)876*5ff13fbcSAllan Jude static void HUF_fillDTableX2ForWeight(
877*5ff13fbcSAllan Jude HUF_DEltX2* DTableRank,
878*5ff13fbcSAllan Jude sortedSymbol_t const* begin, sortedSymbol_t const* end,
879*5ff13fbcSAllan Jude U32 nbBits, U32 tableLog,
880*5ff13fbcSAllan Jude U16 baseSeq, int const level)
881*5ff13fbcSAllan Jude {
882*5ff13fbcSAllan Jude U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
883*5ff13fbcSAllan Jude const sortedSymbol_t* ptr;
884*5ff13fbcSAllan Jude assert(level >= 1 && level <= 2);
885*5ff13fbcSAllan Jude switch (length) {
886*5ff13fbcSAllan Jude case 1:
887*5ff13fbcSAllan Jude for (ptr = begin; ptr != end; ++ptr) {
888*5ff13fbcSAllan Jude HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
889*5ff13fbcSAllan Jude *DTableRank++ = DElt;
890*5ff13fbcSAllan Jude }
891*5ff13fbcSAllan Jude break;
892*5ff13fbcSAllan Jude case 2:
893*5ff13fbcSAllan Jude for (ptr = begin; ptr != end; ++ptr) {
894*5ff13fbcSAllan Jude HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
895*5ff13fbcSAllan Jude DTableRank[0] = DElt;
896*5ff13fbcSAllan Jude DTableRank[1] = DElt;
897*5ff13fbcSAllan Jude DTableRank += 2;
898*5ff13fbcSAllan Jude }
899*5ff13fbcSAllan Jude break;
900*5ff13fbcSAllan Jude case 4:
901*5ff13fbcSAllan Jude for (ptr = begin; ptr != end; ++ptr) {
902*5ff13fbcSAllan Jude U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
903*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
904*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
905*5ff13fbcSAllan Jude DTableRank += 4;
906*5ff13fbcSAllan Jude }
907*5ff13fbcSAllan Jude break;
908*5ff13fbcSAllan Jude case 8:
909*5ff13fbcSAllan Jude for (ptr = begin; ptr != end; ++ptr) {
910*5ff13fbcSAllan Jude U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
911*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
912*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
913*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
914*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
915*5ff13fbcSAllan Jude DTableRank += 8;
916*5ff13fbcSAllan Jude }
917*5ff13fbcSAllan Jude break;
918*5ff13fbcSAllan Jude default:
919*5ff13fbcSAllan Jude for (ptr = begin; ptr != end; ++ptr) {
920*5ff13fbcSAllan Jude U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
921*5ff13fbcSAllan Jude HUF_DEltX2* const DTableRankEnd = DTableRank + length;
922*5ff13fbcSAllan Jude for (; DTableRank != DTableRankEnd; DTableRank += 8) {
923*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
924*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
925*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
926*5ff13fbcSAllan Jude ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
927*5ff13fbcSAllan Jude }
928*5ff13fbcSAllan Jude }
929*5ff13fbcSAllan Jude break;
930*5ff13fbcSAllan Jude }
931*5ff13fbcSAllan Jude }
9320f743729SConrad Meyer
9330f743729SConrad Meyer /* HUF_fillDTableX2Level2() :
9340c16b537SWarner Losh * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
HUF_fillDTableX2Level2(HUF_DEltX2 * DTable,U32 targetLog,const U32 consumedBits,const U32 * rankVal,const int minWeight,const int maxWeight1,const sortedSymbol_t * sortedSymbols,U32 const * rankStart,U32 nbBitsBaseline,U16 baseSeq)935*5ff13fbcSAllan Jude static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
936*5ff13fbcSAllan Jude const U32* rankVal, const int minWeight, const int maxWeight1,
937*5ff13fbcSAllan Jude const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
9380c16b537SWarner Losh U32 nbBitsBaseline, U16 baseSeq)
9390c16b537SWarner Losh {
940*5ff13fbcSAllan Jude /* Fill skipped values (all positions up to rankVal[minWeight]).
941*5ff13fbcSAllan Jude * These are positions only get a single symbol because the combined weight
942*5ff13fbcSAllan Jude * is too large.
943*5ff13fbcSAllan Jude */
9440c16b537SWarner Losh if (minWeight>1) {
945*5ff13fbcSAllan Jude U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
946*5ff13fbcSAllan Jude U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
947*5ff13fbcSAllan Jude int const skipSize = rankVal[minWeight];
948*5ff13fbcSAllan Jude assert(length > 1);
949*5ff13fbcSAllan Jude assert((U32)skipSize < length);
950*5ff13fbcSAllan Jude switch (length) {
951*5ff13fbcSAllan Jude case 2:
952*5ff13fbcSAllan Jude assert(skipSize == 1);
953*5ff13fbcSAllan Jude ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
954*5ff13fbcSAllan Jude break;
955*5ff13fbcSAllan Jude case 4:
956*5ff13fbcSAllan Jude assert(skipSize <= 4);
957*5ff13fbcSAllan Jude ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
958*5ff13fbcSAllan Jude ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
959*5ff13fbcSAllan Jude break;
960*5ff13fbcSAllan Jude default:
961*5ff13fbcSAllan Jude {
962*5ff13fbcSAllan Jude int i;
963*5ff13fbcSAllan Jude for (i = 0; i < skipSize; i += 8) {
964*5ff13fbcSAllan Jude ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
965*5ff13fbcSAllan Jude ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
966*5ff13fbcSAllan Jude ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
967*5ff13fbcSAllan Jude ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
968*5ff13fbcSAllan Jude }
969*5ff13fbcSAllan Jude }
970*5ff13fbcSAllan Jude }
9710c16b537SWarner Losh }
9720c16b537SWarner Losh
973*5ff13fbcSAllan Jude /* Fill each of the second level symbols by weight. */
974*5ff13fbcSAllan Jude {
975*5ff13fbcSAllan Jude int w;
976*5ff13fbcSAllan Jude for (w = minWeight; w < maxWeight1; ++w) {
977*5ff13fbcSAllan Jude int const begin = rankStart[w];
978*5ff13fbcSAllan Jude int const end = rankStart[w+1];
979*5ff13fbcSAllan Jude U32 const nbBits = nbBitsBaseline - w;
980*5ff13fbcSAllan Jude U32 const totalBits = nbBits + consumedBits;
981*5ff13fbcSAllan Jude HUF_fillDTableX2ForWeight(
982*5ff13fbcSAllan Jude DTable + rankVal[w],
983*5ff13fbcSAllan Jude sortedSymbols + begin, sortedSymbols + end,
984*5ff13fbcSAllan Jude totalBits, targetLog,
985*5ff13fbcSAllan Jude baseSeq, /* level */ 2);
9860c16b537SWarner Losh }
987*5ff13fbcSAllan Jude }
988*5ff13fbcSAllan Jude }
9890c16b537SWarner Losh
HUF_fillDTableX2(HUF_DEltX2 * DTable,const U32 targetLog,const sortedSymbol_t * sortedList,const U32 * rankStart,rankVal_t rankValOrigin,const U32 maxWeight,const U32 nbBitsBaseline)9900f743729SConrad Meyer static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
991*5ff13fbcSAllan Jude const sortedSymbol_t* sortedList,
9920c16b537SWarner Losh const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
9930c16b537SWarner Losh const U32 nbBitsBaseline)
9940c16b537SWarner Losh {
995*5ff13fbcSAllan Jude U32* const rankVal = rankValOrigin[0];
9960c16b537SWarner Losh const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
9970c16b537SWarner Losh const U32 minBits = nbBitsBaseline - maxWeight;
998*5ff13fbcSAllan Jude int w;
999*5ff13fbcSAllan Jude int const wEnd = (int)maxWeight + 1;
10000c16b537SWarner Losh
1001*5ff13fbcSAllan Jude /* Fill DTable in order of weight. */
1002*5ff13fbcSAllan Jude for (w = 1; w < wEnd; ++w) {
1003*5ff13fbcSAllan Jude int const begin = (int)rankStart[w];
1004*5ff13fbcSAllan Jude int const end = (int)rankStart[w+1];
1005*5ff13fbcSAllan Jude U32 const nbBits = nbBitsBaseline - w;
10060c16b537SWarner Losh
1007*5ff13fbcSAllan Jude if (targetLog-nbBits >= minBits) {
1008*5ff13fbcSAllan Jude /* Enough room for a second symbol. */
1009*5ff13fbcSAllan Jude int start = rankVal[w];
1010*5ff13fbcSAllan Jude U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
10110c16b537SWarner Losh int minWeight = nbBits + scaleLog;
1012*5ff13fbcSAllan Jude int s;
10130c16b537SWarner Losh if (minWeight < 1) minWeight = 1;
1014*5ff13fbcSAllan Jude /* Fill the DTable for every symbol of weight w.
1015*5ff13fbcSAllan Jude * These symbols get at least 1 second symbol.
1016*5ff13fbcSAllan Jude */
1017*5ff13fbcSAllan Jude for (s = begin; s != end; ++s) {
1018*5ff13fbcSAllan Jude HUF_fillDTableX2Level2(
1019*5ff13fbcSAllan Jude DTable + start, targetLog, nbBits,
1020*5ff13fbcSAllan Jude rankValOrigin[nbBits], minWeight, wEnd,
1021*5ff13fbcSAllan Jude sortedList, rankStart,
1022*5ff13fbcSAllan Jude nbBitsBaseline, sortedList[s].symbol);
1023*5ff13fbcSAllan Jude start += length;
1024*5ff13fbcSAllan Jude }
10250c16b537SWarner Losh } else {
1026*5ff13fbcSAllan Jude /* Only a single symbol. */
1027*5ff13fbcSAllan Jude HUF_fillDTableX2ForWeight(
1028*5ff13fbcSAllan Jude DTable + rankVal[w],
1029*5ff13fbcSAllan Jude sortedList + begin, sortedList + end,
1030*5ff13fbcSAllan Jude nbBits, targetLog,
1031*5ff13fbcSAllan Jude /* baseSeq */ 0, /* level */ 1);
10320c16b537SWarner Losh }
10330c16b537SWarner Losh }
1034*5ff13fbcSAllan Jude }
1035*5ff13fbcSAllan Jude
1036*5ff13fbcSAllan Jude typedef struct {
1037*5ff13fbcSAllan Jude rankValCol_t rankVal[HUF_TABLELOG_MAX];
1038*5ff13fbcSAllan Jude U32 rankStats[HUF_TABLELOG_MAX + 1];
1039*5ff13fbcSAllan Jude U32 rankStart0[HUF_TABLELOG_MAX + 3];
1040*5ff13fbcSAllan Jude sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
1041*5ff13fbcSAllan Jude BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
1042*5ff13fbcSAllan Jude U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
1043*5ff13fbcSAllan Jude } HUF_ReadDTableX2_Workspace;
10440c16b537SWarner Losh
HUF_readDTableX2_wksp(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize)10450f743729SConrad Meyer size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
10460f743729SConrad Meyer const void* src, size_t srcSize,
10470f743729SConrad Meyer void* workSpace, size_t wkspSize)
10480c16b537SWarner Losh {
1049*5ff13fbcSAllan Jude return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
1050*5ff13fbcSAllan Jude }
1051*5ff13fbcSAllan Jude
HUF_readDTableX2_wksp_bmi2(HUF_DTable * DTable,const void * src,size_t srcSize,void * workSpace,size_t wkspSize,int bmi2)1052*5ff13fbcSAllan Jude size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
1053*5ff13fbcSAllan Jude const void* src, size_t srcSize,
1054*5ff13fbcSAllan Jude void* workSpace, size_t wkspSize, int bmi2)
1055*5ff13fbcSAllan Jude {
1056*5ff13fbcSAllan Jude U32 tableLog, maxW, nbSymbols;
10570c16b537SWarner Losh DTableDesc dtd = HUF_getDTableDesc(DTable);
1058*5ff13fbcSAllan Jude U32 maxTableLog = dtd.maxTableLog;
10590c16b537SWarner Losh size_t iSize;
10600c16b537SWarner Losh void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
10610f743729SConrad Meyer HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
10620c16b537SWarner Losh U32 *rankStart;
10630c16b537SWarner Losh
1064*5ff13fbcSAllan Jude HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
10650c16b537SWarner Losh
1066*5ff13fbcSAllan Jude if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
10670c16b537SWarner Losh
1068*5ff13fbcSAllan Jude rankStart = wksp->rankStart0 + 1;
1069*5ff13fbcSAllan Jude ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
1070*5ff13fbcSAllan Jude ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
10710c16b537SWarner Losh
10720f743729SConrad Meyer DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
10730c16b537SWarner Losh if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1074f7cd7fe5SConrad Meyer /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
10750c16b537SWarner Losh
1076*5ff13fbcSAllan Jude iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
10770c16b537SWarner Losh if (HUF_isError(iSize)) return iSize;
10780c16b537SWarner Losh
10790c16b537SWarner Losh /* check result */
10800c16b537SWarner Losh if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
1081*5ff13fbcSAllan Jude if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
10820c16b537SWarner Losh
10830c16b537SWarner Losh /* find maxWeight */
1084*5ff13fbcSAllan Jude for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
10850c16b537SWarner Losh
10860c16b537SWarner Losh /* Get start index of each weight */
10870c16b537SWarner Losh { U32 w, nextRankStart = 0;
10880c16b537SWarner Losh for (w=1; w<maxW+1; w++) {
1089f7cd7fe5SConrad Meyer U32 curr = nextRankStart;
1090*5ff13fbcSAllan Jude nextRankStart += wksp->rankStats[w];
1091f7cd7fe5SConrad Meyer rankStart[w] = curr;
10920c16b537SWarner Losh }
10930c16b537SWarner Losh rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
1094*5ff13fbcSAllan Jude rankStart[maxW+1] = nextRankStart;
10950c16b537SWarner Losh }
10960c16b537SWarner Losh
10970c16b537SWarner Losh /* sort symbols by weight */
10980c16b537SWarner Losh { U32 s;
10990c16b537SWarner Losh for (s=0; s<nbSymbols; s++) {
1100*5ff13fbcSAllan Jude U32 const w = wksp->weightList[s];
11010c16b537SWarner Losh U32 const r = rankStart[w]++;
1102*5ff13fbcSAllan Jude wksp->sortedSymbol[r].symbol = (BYTE)s;
11030c16b537SWarner Losh }
11040c16b537SWarner Losh rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
11050c16b537SWarner Losh }
11060c16b537SWarner Losh
11070c16b537SWarner Losh /* Build rankVal */
1108*5ff13fbcSAllan Jude { U32* const rankVal0 = wksp->rankVal[0];
11090c16b537SWarner Losh { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
11100c16b537SWarner Losh U32 nextRankVal = 0;
11110c16b537SWarner Losh U32 w;
11120c16b537SWarner Losh for (w=1; w<maxW+1; w++) {
1113f7cd7fe5SConrad Meyer U32 curr = nextRankVal;
1114*5ff13fbcSAllan Jude nextRankVal += wksp->rankStats[w] << (w+rescale);
1115f7cd7fe5SConrad Meyer rankVal0[w] = curr;
11160c16b537SWarner Losh } }
11170c16b537SWarner Losh { U32 const minBits = tableLog+1 - maxW;
11180c16b537SWarner Losh U32 consumed;
11190c16b537SWarner Losh for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
1120*5ff13fbcSAllan Jude U32* const rankValPtr = wksp->rankVal[consumed];
11210c16b537SWarner Losh U32 w;
11220c16b537SWarner Losh for (w = 1; w < maxW+1; w++) {
11230c16b537SWarner Losh rankValPtr[w] = rankVal0[w] >> consumed;
11240c16b537SWarner Losh } } } }
11250c16b537SWarner Losh
11260f743729SConrad Meyer HUF_fillDTableX2(dt, maxTableLog,
1127*5ff13fbcSAllan Jude wksp->sortedSymbol,
1128*5ff13fbcSAllan Jude wksp->rankStart0, wksp->rankVal, maxW,
11290c16b537SWarner Losh tableLog+1);
11300c16b537SWarner Losh
11310c16b537SWarner Losh dtd.tableLog = (BYTE)maxTableLog;
11320c16b537SWarner Losh dtd.tableType = 1;
1133f7cd7fe5SConrad Meyer ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
11340c16b537SWarner Losh return iSize;
11350c16b537SWarner Losh }
11360c16b537SWarner Losh
11370f743729SConrad Meyer
11380f743729SConrad Meyer FORCE_INLINE_TEMPLATE U32
HUF_decodeSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)11390f743729SConrad Meyer HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
11400f743729SConrad Meyer {
11410f743729SConrad Meyer size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
1142*5ff13fbcSAllan Jude ZSTD_memcpy(op, &dt[val].sequence, 2);
11430f743729SConrad Meyer BIT_skipBits(DStream, dt[val].nbBits);
11440f743729SConrad Meyer return dt[val].length;
11450f743729SConrad Meyer }
11460f743729SConrad Meyer
11470f743729SConrad Meyer FORCE_INLINE_TEMPLATE U32
HUF_decodeLastSymbolX2(void * op,BIT_DStream_t * DStream,const HUF_DEltX2 * dt,const U32 dtLog)11480f743729SConrad Meyer HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
11490f743729SConrad Meyer {
11500f743729SConrad Meyer size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
1151*5ff13fbcSAllan Jude ZSTD_memcpy(op, &dt[val].sequence, 1);
1152*5ff13fbcSAllan Jude if (dt[val].length==1) {
1153*5ff13fbcSAllan Jude BIT_skipBits(DStream, dt[val].nbBits);
1154*5ff13fbcSAllan Jude } else {
11550f743729SConrad Meyer if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
11560f743729SConrad Meyer BIT_skipBits(DStream, dt[val].nbBits);
11570f743729SConrad Meyer if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
11580f743729SConrad Meyer /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
11590f743729SConrad Meyer DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
1160*5ff13fbcSAllan Jude }
1161*5ff13fbcSAllan Jude }
11620f743729SConrad Meyer return 1;
11630f743729SConrad Meyer }
11640f743729SConrad Meyer
11650f743729SConrad Meyer #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
11660f743729SConrad Meyer ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
11670f743729SConrad Meyer
11680f743729SConrad Meyer #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
11690f743729SConrad Meyer if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
11700f743729SConrad Meyer ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
11710f743729SConrad Meyer
11720f743729SConrad Meyer #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
11730f743729SConrad Meyer if (MEM_64bits()) \
11740f743729SConrad Meyer ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
11750f743729SConrad Meyer
11760f743729SConrad Meyer HINT_INLINE size_t
HUF_decodeStreamX2(BYTE * p,BIT_DStream_t * bitDPtr,BYTE * const pEnd,const HUF_DEltX2 * const dt,const U32 dtLog)11770f743729SConrad Meyer HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
11780f743729SConrad Meyer const HUF_DEltX2* const dt, const U32 dtLog)
11790f743729SConrad Meyer {
11800f743729SConrad Meyer BYTE* const pStart = p;
11810f743729SConrad Meyer
11820f743729SConrad Meyer /* up to 8 symbols at a time */
1183*5ff13fbcSAllan Jude if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
1184*5ff13fbcSAllan Jude if (dtLog <= 11 && MEM_64bits()) {
1185*5ff13fbcSAllan Jude /* up to 10 symbols at a time */
1186*5ff13fbcSAllan Jude while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
1187*5ff13fbcSAllan Jude HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1188*5ff13fbcSAllan Jude HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1189*5ff13fbcSAllan Jude HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1190*5ff13fbcSAllan Jude HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1191*5ff13fbcSAllan Jude HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1192*5ff13fbcSAllan Jude }
1193*5ff13fbcSAllan Jude } else {
1194*5ff13fbcSAllan Jude /* up to 8 symbols at a time */
11950f743729SConrad Meyer while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
11960f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
11970f743729SConrad Meyer HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
11980f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
11990f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
12000f743729SConrad Meyer }
1201*5ff13fbcSAllan Jude }
1202*5ff13fbcSAllan Jude } else {
1203*5ff13fbcSAllan Jude BIT_reloadDStream(bitDPtr);
1204*5ff13fbcSAllan Jude }
12050f743729SConrad Meyer
12060f743729SConrad Meyer /* closer to end : up to 2 symbols at a time */
1207*5ff13fbcSAllan Jude if ((size_t)(pEnd - p) >= 2) {
12080f743729SConrad Meyer while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
12090f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
12100f743729SConrad Meyer
12110f743729SConrad Meyer while (p <= pEnd-2)
12120f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
1213*5ff13fbcSAllan Jude }
12140f743729SConrad Meyer
12150f743729SConrad Meyer if (p < pEnd)
12160f743729SConrad Meyer p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
12170f743729SConrad Meyer
12180f743729SConrad Meyer return p-pStart;
12190f743729SConrad Meyer }
12200f743729SConrad Meyer
12210f743729SConrad Meyer FORCE_INLINE_TEMPLATE size_t
HUF_decompress1X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)12220f743729SConrad Meyer HUF_decompress1X2_usingDTable_internal_body(
12230f743729SConrad Meyer void* dst, size_t dstSize,
12240f743729SConrad Meyer const void* cSrc, size_t cSrcSize,
12250f743729SConrad Meyer const HUF_DTable* DTable)
12260f743729SConrad Meyer {
12270f743729SConrad Meyer BIT_DStream_t bitD;
12280f743729SConrad Meyer
12290f743729SConrad Meyer /* Init */
12300f743729SConrad Meyer CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
12310f743729SConrad Meyer
12320f743729SConrad Meyer /* decode */
12330f743729SConrad Meyer { BYTE* const ostart = (BYTE*) dst;
12340f743729SConrad Meyer BYTE* const oend = ostart + dstSize;
12350f743729SConrad Meyer const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
12360f743729SConrad Meyer const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
12370f743729SConrad Meyer DTableDesc const dtd = HUF_getDTableDesc(DTable);
12380f743729SConrad Meyer HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
12390f743729SConrad Meyer }
12400f743729SConrad Meyer
12410f743729SConrad Meyer /* check */
12420f743729SConrad Meyer if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
12430f743729SConrad Meyer
12440f743729SConrad Meyer /* decoded size */
12450f743729SConrad Meyer return dstSize;
12460f743729SConrad Meyer }
12470f743729SConrad Meyer FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)12480f743729SConrad Meyer HUF_decompress4X2_usingDTable_internal_body(
12490f743729SConrad Meyer void* dst, size_t dstSize,
12500f743729SConrad Meyer const void* cSrc, size_t cSrcSize,
12510f743729SConrad Meyer const HUF_DTable* DTable)
12520f743729SConrad Meyer {
12530f743729SConrad Meyer if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
12540f743729SConrad Meyer
12550f743729SConrad Meyer { const BYTE* const istart = (const BYTE*) cSrc;
12560f743729SConrad Meyer BYTE* const ostart = (BYTE*) dst;
12570f743729SConrad Meyer BYTE* const oend = ostart + dstSize;
125837f1f268SConrad Meyer BYTE* const olimit = oend - (sizeof(size_t)-1);
12590f743729SConrad Meyer const void* const dtPtr = DTable+1;
12600f743729SConrad Meyer const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
12610f743729SConrad Meyer
12620f743729SConrad Meyer /* Init */
12630f743729SConrad Meyer BIT_DStream_t bitD1;
12640f743729SConrad Meyer BIT_DStream_t bitD2;
12650f743729SConrad Meyer BIT_DStream_t bitD3;
12660f743729SConrad Meyer BIT_DStream_t bitD4;
12670f743729SConrad Meyer size_t const length1 = MEM_readLE16(istart);
12680f743729SConrad Meyer size_t const length2 = MEM_readLE16(istart+2);
12690f743729SConrad Meyer size_t const length3 = MEM_readLE16(istart+4);
12700f743729SConrad Meyer size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
12710f743729SConrad Meyer const BYTE* const istart1 = istart + 6; /* jumpTable */
12720f743729SConrad Meyer const BYTE* const istart2 = istart1 + length1;
12730f743729SConrad Meyer const BYTE* const istart3 = istart2 + length2;
12740f743729SConrad Meyer const BYTE* const istart4 = istart3 + length3;
12750f743729SConrad Meyer size_t const segmentSize = (dstSize+3) / 4;
12760f743729SConrad Meyer BYTE* const opStart2 = ostart + segmentSize;
12770f743729SConrad Meyer BYTE* const opStart3 = opStart2 + segmentSize;
12780f743729SConrad Meyer BYTE* const opStart4 = opStart3 + segmentSize;
12790f743729SConrad Meyer BYTE* op1 = ostart;
12800f743729SConrad Meyer BYTE* op2 = opStart2;
12810f743729SConrad Meyer BYTE* op3 = opStart3;
12820f743729SConrad Meyer BYTE* op4 = opStart4;
128337f1f268SConrad Meyer U32 endSignal = 1;
12840f743729SConrad Meyer DTableDesc const dtd = HUF_getDTableDesc(DTable);
12850f743729SConrad Meyer U32 const dtLog = dtd.tableLog;
12860f743729SConrad Meyer
12870f743729SConrad Meyer if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
1288*5ff13fbcSAllan Jude if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
12890f743729SConrad Meyer CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
12900f743729SConrad Meyer CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
12910f743729SConrad Meyer CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
12920f743729SConrad Meyer CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
12930f743729SConrad Meyer
12940f743729SConrad Meyer /* 16-32 symbols per loop (4-8 symbols per stream) */
1295*5ff13fbcSAllan Jude if ((size_t)(oend - op4) >= sizeof(size_t)) {
129637f1f268SConrad Meyer for ( ; (endSignal) & (op4 < olimit); ) {
129737f1f268SConrad Meyer #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
129837f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
129937f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
130037f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
130137f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
130237f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
130337f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
130437f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
130537f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
130637f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
130737f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
130837f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
130937f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
131037f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
131137f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
131237f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
131337f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
131437f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
131537f1f268SConrad Meyer HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
131637f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
131737f1f268SConrad Meyer endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
131837f1f268SConrad Meyer #else
13190f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
13200f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
13210f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
13220f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
13230f743729SConrad Meyer HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
13240f743729SConrad Meyer HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
13250f743729SConrad Meyer HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
13260f743729SConrad Meyer HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
13270f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
13280f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
13290f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
13300f743729SConrad Meyer HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
13310f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
13320f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
13330f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
13340f743729SConrad Meyer HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1335*5ff13fbcSAllan Jude endSignal = (U32)LIKELY((U32)
133637f1f268SConrad Meyer (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
133737f1f268SConrad Meyer & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
133837f1f268SConrad Meyer & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
133937f1f268SConrad Meyer & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
134037f1f268SConrad Meyer #endif
13410f743729SConrad Meyer }
1342*5ff13fbcSAllan Jude }
13430f743729SConrad Meyer
13440f743729SConrad Meyer /* check corruption */
13450f743729SConrad Meyer if (op1 > opStart2) return ERROR(corruption_detected);
13460f743729SConrad Meyer if (op2 > opStart3) return ERROR(corruption_detected);
13470f743729SConrad Meyer if (op3 > opStart4) return ERROR(corruption_detected);
13480f743729SConrad Meyer /* note : op4 already verified within main loop */
13490f743729SConrad Meyer
13500f743729SConrad Meyer /* finish bitStreams one by one */
13510f743729SConrad Meyer HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
13520f743729SConrad Meyer HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
13530f743729SConrad Meyer HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
13540f743729SConrad Meyer HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
13550f743729SConrad Meyer
13560f743729SConrad Meyer /* check */
13570f743729SConrad Meyer { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
13580f743729SConrad Meyer if (!endCheck) return ERROR(corruption_detected); }
13590f743729SConrad Meyer
13600f743729SConrad Meyer /* decoded size */
13610f743729SConrad Meyer return dstSize;
13620f743729SConrad Meyer }
13630f743729SConrad Meyer }
13640f743729SConrad Meyer
1365*5ff13fbcSAllan Jude #if HUF_NEED_BMI2_FUNCTION
1366*5ff13fbcSAllan Jude static BMI2_TARGET_ATTRIBUTE
HUF_decompress4X2_usingDTable_internal_bmi2(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)1367*5ff13fbcSAllan Jude size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
1368*5ff13fbcSAllan Jude size_t cSrcSize, HUF_DTable const* DTable) {
1369*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1370*5ff13fbcSAllan Jude }
1371*5ff13fbcSAllan Jude #endif
1372*5ff13fbcSAllan Jude
1373*5ff13fbcSAllan Jude #if HUF_NEED_DEFAULT_FUNCTION
1374*5ff13fbcSAllan Jude static
HUF_decompress4X2_usingDTable_internal_default(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable)1375*5ff13fbcSAllan Jude size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
1376*5ff13fbcSAllan Jude size_t cSrcSize, HUF_DTable const* DTable) {
1377*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1378*5ff13fbcSAllan Jude }
1379*5ff13fbcSAllan Jude #endif
1380*5ff13fbcSAllan Jude
1381*5ff13fbcSAllan Jude #if ZSTD_ENABLE_ASM_X86_64_BMI2
1382*5ff13fbcSAllan Jude
1383*5ff13fbcSAllan Jude HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
1384*5ff13fbcSAllan Jude
1385*5ff13fbcSAllan Jude static HUF_ASM_X86_64_BMI2_ATTRS size_t
HUF_decompress4X2_usingDTable_internal_bmi2_asm(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)1386*5ff13fbcSAllan Jude HUF_decompress4X2_usingDTable_internal_bmi2_asm(
1387*5ff13fbcSAllan Jude void* dst, size_t dstSize,
1388*5ff13fbcSAllan Jude const void* cSrc, size_t cSrcSize,
1389*5ff13fbcSAllan Jude const HUF_DTable* DTable) {
1390*5ff13fbcSAllan Jude void const* dt = DTable + 1;
1391*5ff13fbcSAllan Jude const BYTE* const iend = (const BYTE*)cSrc + 6;
1392*5ff13fbcSAllan Jude BYTE* const oend = (BYTE*)dst + dstSize;
1393*5ff13fbcSAllan Jude HUF_DecompressAsmArgs args;
1394*5ff13fbcSAllan Jude {
1395*5ff13fbcSAllan Jude size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1396*5ff13fbcSAllan Jude FORWARD_IF_ERROR(ret, "Failed to init asm args");
1397*5ff13fbcSAllan Jude if (ret != 0)
1398*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1399*5ff13fbcSAllan Jude }
1400*5ff13fbcSAllan Jude
1401*5ff13fbcSAllan Jude assert(args.ip[0] >= args.ilimit);
1402*5ff13fbcSAllan Jude HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
1403*5ff13fbcSAllan Jude
1404*5ff13fbcSAllan Jude /* note : op4 already verified within main loop */
1405*5ff13fbcSAllan Jude assert(args.ip[0] >= iend);
1406*5ff13fbcSAllan Jude assert(args.ip[1] >= iend);
1407*5ff13fbcSAllan Jude assert(args.ip[2] >= iend);
1408*5ff13fbcSAllan Jude assert(args.ip[3] >= iend);
1409*5ff13fbcSAllan Jude assert(args.op[3] <= oend);
1410*5ff13fbcSAllan Jude (void)iend;
1411*5ff13fbcSAllan Jude
1412*5ff13fbcSAllan Jude /* finish bitStreams one by one */
1413*5ff13fbcSAllan Jude {
1414*5ff13fbcSAllan Jude size_t const segmentSize = (dstSize+3) / 4;
1415*5ff13fbcSAllan Jude BYTE* segmentEnd = (BYTE*)dst;
1416*5ff13fbcSAllan Jude int i;
1417*5ff13fbcSAllan Jude for (i = 0; i < 4; ++i) {
1418*5ff13fbcSAllan Jude BIT_DStream_t bit;
1419*5ff13fbcSAllan Jude if (segmentSize <= (size_t)(oend - segmentEnd))
1420*5ff13fbcSAllan Jude segmentEnd += segmentSize;
1421*5ff13fbcSAllan Jude else
1422*5ff13fbcSAllan Jude segmentEnd = oend;
1423*5ff13fbcSAllan Jude FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
1424*5ff13fbcSAllan Jude args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
1425*5ff13fbcSAllan Jude if (args.op[i] != segmentEnd)
1426*5ff13fbcSAllan Jude return ERROR(corruption_detected);
1427*5ff13fbcSAllan Jude }
1428*5ff13fbcSAllan Jude }
1429*5ff13fbcSAllan Jude
1430*5ff13fbcSAllan Jude /* decoded size */
1431*5ff13fbcSAllan Jude return dstSize;
1432*5ff13fbcSAllan Jude }
1433*5ff13fbcSAllan Jude #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
1434*5ff13fbcSAllan Jude
HUF_decompress4X2_usingDTable_internal(void * dst,size_t dstSize,void const * cSrc,size_t cSrcSize,HUF_DTable const * DTable,int bmi2)1435*5ff13fbcSAllan Jude static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
1436*5ff13fbcSAllan Jude size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
1437*5ff13fbcSAllan Jude {
1438*5ff13fbcSAllan Jude #if DYNAMIC_BMI2
1439*5ff13fbcSAllan Jude if (bmi2) {
1440*5ff13fbcSAllan Jude # if ZSTD_ENABLE_ASM_X86_64_BMI2
1441*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1442*5ff13fbcSAllan Jude # else
1443*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1444*5ff13fbcSAllan Jude # endif
1445*5ff13fbcSAllan Jude }
1446*5ff13fbcSAllan Jude #else
1447*5ff13fbcSAllan Jude (void)bmi2;
1448*5ff13fbcSAllan Jude #endif
1449*5ff13fbcSAllan Jude
1450*5ff13fbcSAllan Jude #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
1451*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1452*5ff13fbcSAllan Jude #else
1453*5ff13fbcSAllan Jude return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
1454*5ff13fbcSAllan Jude #endif
1455*5ff13fbcSAllan Jude }
1456*5ff13fbcSAllan Jude
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)14570f743729SConrad Meyer HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
14580f743729SConrad Meyer
14590f743729SConrad Meyer size_t HUF_decompress1X2_usingDTable(
14600c16b537SWarner Losh void* dst, size_t dstSize,
14610c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
14620c16b537SWarner Losh const HUF_DTable* DTable)
14630c16b537SWarner Losh {
14640c16b537SWarner Losh DTableDesc dtd = HUF_getDTableDesc(DTable);
14650c16b537SWarner Losh if (dtd.tableType != 1) return ERROR(GENERIC);
14660f743729SConrad Meyer return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
14670c16b537SWarner Losh }
14680c16b537SWarner Losh
HUF_decompress1X2_DCtx_wksp(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)14690f743729SConrad Meyer size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
14700c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
14710c16b537SWarner Losh void* workSpace, size_t wkspSize)
14720c16b537SWarner Losh {
14730c16b537SWarner Losh const BYTE* ip = (const BYTE*) cSrc;
14740c16b537SWarner Losh
14750f743729SConrad Meyer size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
14760c16b537SWarner Losh workSpace, wkspSize);
14770c16b537SWarner Losh if (HUF_isError(hSize)) return hSize;
14780c16b537SWarner Losh if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
14790c16b537SWarner Losh ip += hSize; cSrcSize -= hSize;
14800c16b537SWarner Losh
14810f743729SConrad Meyer return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
14820c16b537SWarner Losh }
14830c16b537SWarner Losh
14840c16b537SWarner Losh
HUF_decompress4X2_usingDTable(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)14850f743729SConrad Meyer size_t HUF_decompress4X2_usingDTable(
14860c16b537SWarner Losh void* dst, size_t dstSize,
14870c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
14880c16b537SWarner Losh const HUF_DTable* DTable)
14890c16b537SWarner Losh {
14900c16b537SWarner Losh DTableDesc dtd = HUF_getDTableDesc(DTable);
14910c16b537SWarner Losh if (dtd.tableType != 1) return ERROR(GENERIC);
14920f743729SConrad Meyer return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
14930c16b537SWarner Losh }
14940c16b537SWarner Losh
HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)14950f743729SConrad Meyer static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
14960c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
149719fcbaf1SConrad Meyer void* workSpace, size_t wkspSize, int bmi2)
14980c16b537SWarner Losh {
14990c16b537SWarner Losh const BYTE* ip = (const BYTE*) cSrc;
15000c16b537SWarner Losh
15010f743729SConrad Meyer size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
15020c16b537SWarner Losh workSpace, wkspSize);
15030c16b537SWarner Losh if (HUF_isError(hSize)) return hSize;
15040c16b537SWarner Losh if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
15050c16b537SWarner Losh ip += hSize; cSrcSize -= hSize;
15060c16b537SWarner Losh
15070f743729SConrad Meyer return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
150819fcbaf1SConrad Meyer }
150919fcbaf1SConrad Meyer
HUF_decompress4X2_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)15100f743729SConrad Meyer size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
151119fcbaf1SConrad Meyer const void* cSrc, size_t cSrcSize,
151219fcbaf1SConrad Meyer void* workSpace, size_t wkspSize)
151319fcbaf1SConrad Meyer {
15140f743729SConrad Meyer return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
15150c16b537SWarner Losh }
15160c16b537SWarner Losh
15170c16b537SWarner Losh
1518a0483764SConrad Meyer #endif /* HUF_FORCE_DECOMPRESS_X1 */
1519a0483764SConrad Meyer
15200c16b537SWarner Losh
15210f743729SConrad Meyer /* ***********************************/
15220f743729SConrad Meyer /* Universal decompression selectors */
15230f743729SConrad Meyer /* ***********************************/
15240c16b537SWarner Losh
HUF_decompress1X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)15250c16b537SWarner Losh size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
15260c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
15270c16b537SWarner Losh const HUF_DTable* DTable)
15280c16b537SWarner Losh {
15290c16b537SWarner Losh DTableDesc const dtd = HUF_getDTableDesc(DTable);
1530a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1531a0483764SConrad Meyer (void)dtd;
1532a0483764SConrad Meyer assert(dtd.tableType == 0);
1533a0483764SConrad Meyer return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1534a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1535a0483764SConrad Meyer (void)dtd;
1536a0483764SConrad Meyer assert(dtd.tableType == 1);
1537a0483764SConrad Meyer return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1538a0483764SConrad Meyer #else
15390f743729SConrad Meyer return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
15400f743729SConrad Meyer HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1541a0483764SConrad Meyer #endif
15420c16b537SWarner Losh }
15430c16b537SWarner Losh
HUF_decompress4X_usingDTable(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable)15440c16b537SWarner Losh size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
15450c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
15460c16b537SWarner Losh const HUF_DTable* DTable)
15470c16b537SWarner Losh {
15480c16b537SWarner Losh DTableDesc const dtd = HUF_getDTableDesc(DTable);
1549a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1550a0483764SConrad Meyer (void)dtd;
1551a0483764SConrad Meyer assert(dtd.tableType == 0);
1552a0483764SConrad Meyer return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1553a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1554a0483764SConrad Meyer (void)dtd;
1555a0483764SConrad Meyer assert(dtd.tableType == 1);
1556a0483764SConrad Meyer return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1557a0483764SConrad Meyer #else
15580f743729SConrad Meyer return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
15590f743729SConrad Meyer HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1560a0483764SConrad Meyer #endif
15610c16b537SWarner Losh }
15620c16b537SWarner Losh
15630c16b537SWarner Losh
1564a0483764SConrad Meyer #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
15650c16b537SWarner Losh typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1566*5ff13fbcSAllan Jude static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
15670c16b537SWarner Losh {
15680c16b537SWarner Losh /* single, double, quad */
1569*5ff13fbcSAllan Jude {{0,0}, {1,1}}, /* Q==0 : impossible */
1570*5ff13fbcSAllan Jude {{0,0}, {1,1}}, /* Q==1 : impossible */
1571*5ff13fbcSAllan Jude {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
1572*5ff13fbcSAllan Jude {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
1573*5ff13fbcSAllan Jude {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
1574*5ff13fbcSAllan Jude {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
1575*5ff13fbcSAllan Jude {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
1576*5ff13fbcSAllan Jude {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
1577*5ff13fbcSAllan Jude {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
1578*5ff13fbcSAllan Jude {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
1579*5ff13fbcSAllan Jude {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
1580*5ff13fbcSAllan Jude {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
1581*5ff13fbcSAllan Jude {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
1582*5ff13fbcSAllan Jude {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
1583*5ff13fbcSAllan Jude {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
1584*5ff13fbcSAllan Jude {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
15850c16b537SWarner Losh };
1586a0483764SConrad Meyer #endif
15870c16b537SWarner Losh
15880c16b537SWarner Losh /** HUF_selectDecoder() :
15890c16b537SWarner Losh * Tells which decoder is likely to decode faster,
159019fcbaf1SConrad Meyer * based on a set of pre-computed metrics.
15910f743729SConrad Meyer * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
159219fcbaf1SConrad Meyer * Assumption : 0 < dstSize <= 128 KB */
HUF_selectDecoder(size_t dstSize,size_t cSrcSize)15930c16b537SWarner Losh U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
15940c16b537SWarner Losh {
159519fcbaf1SConrad Meyer assert(dstSize > 0);
15960f743729SConrad Meyer assert(dstSize <= 128*1024);
1597a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1598a0483764SConrad Meyer (void)dstSize;
1599a0483764SConrad Meyer (void)cSrcSize;
1600a0483764SConrad Meyer return 0;
1601a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1602a0483764SConrad Meyer (void)dstSize;
1603a0483764SConrad Meyer (void)cSrcSize;
1604a0483764SConrad Meyer return 1;
1605a0483764SConrad Meyer #else
16060c16b537SWarner Losh /* decoder timing evaluation */
160719fcbaf1SConrad Meyer { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
16080c16b537SWarner Losh U32 const D256 = (U32)(dstSize >> 8);
16090c16b537SWarner Losh U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
16100c16b537SWarner Losh U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1611*5ff13fbcSAllan Jude DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
16120c16b537SWarner Losh return DTime1 < DTime0;
1613a0483764SConrad Meyer }
1614a0483764SConrad Meyer #endif
1615a0483764SConrad Meyer }
16160c16b537SWarner Losh
16170c16b537SWarner Losh
HUF_decompress4X_hufOnly_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)16180c16b537SWarner Losh size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
16190c16b537SWarner Losh size_t dstSize, const void* cSrc,
16200c16b537SWarner Losh size_t cSrcSize, void* workSpace,
16210c16b537SWarner Losh size_t wkspSize)
16220c16b537SWarner Losh {
16230c16b537SWarner Losh /* validation checks */
16240c16b537SWarner Losh if (dstSize == 0) return ERROR(dstSize_tooSmall);
16250c16b537SWarner Losh if (cSrcSize == 0) return ERROR(corruption_detected);
16260c16b537SWarner Losh
16270c16b537SWarner Losh { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1628a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1629a0483764SConrad Meyer (void)algoNb;
1630a0483764SConrad Meyer assert(algoNb == 0);
1631a0483764SConrad Meyer return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1632a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1633a0483764SConrad Meyer (void)algoNb;
1634a0483764SConrad Meyer assert(algoNb == 1);
1635a0483764SConrad Meyer return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1636a0483764SConrad Meyer #else
1637a0483764SConrad Meyer return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1638a0483764SConrad Meyer cSrcSize, workSpace, wkspSize):
16390f743729SConrad Meyer HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1640a0483764SConrad Meyer #endif
16410c16b537SWarner Losh }
16420c16b537SWarner Losh }
16430c16b537SWarner Losh
HUF_decompress1X_DCtx_wksp(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize)16440c16b537SWarner Losh size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
16450c16b537SWarner Losh const void* cSrc, size_t cSrcSize,
16460c16b537SWarner Losh void* workSpace, size_t wkspSize)
16470c16b537SWarner Losh {
16480c16b537SWarner Losh /* validation checks */
16490c16b537SWarner Losh if (dstSize == 0) return ERROR(dstSize_tooSmall);
16500c16b537SWarner Losh if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1651f7cd7fe5SConrad Meyer if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1652f7cd7fe5SConrad Meyer if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
16530c16b537SWarner Losh
16540c16b537SWarner Losh { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1655a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1656a0483764SConrad Meyer (void)algoNb;
1657a0483764SConrad Meyer assert(algoNb == 0);
1658a0483764SConrad Meyer return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1659a0483764SConrad Meyer cSrcSize, workSpace, wkspSize);
1660a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1661a0483764SConrad Meyer (void)algoNb;
1662a0483764SConrad Meyer assert(algoNb == 1);
1663a0483764SConrad Meyer return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1664a0483764SConrad Meyer cSrcSize, workSpace, wkspSize);
1665a0483764SConrad Meyer #else
16660f743729SConrad Meyer return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
16670c16b537SWarner Losh cSrcSize, workSpace, wkspSize):
16680f743729SConrad Meyer HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
16690c16b537SWarner Losh cSrcSize, workSpace, wkspSize);
1670a0483764SConrad Meyer #endif
16710c16b537SWarner Losh }
16720c16b537SWarner Losh }
16730c16b537SWarner Losh
167419fcbaf1SConrad Meyer
HUF_decompress1X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)167519fcbaf1SConrad Meyer size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
167619fcbaf1SConrad Meyer {
167719fcbaf1SConrad Meyer DTableDesc const dtd = HUF_getDTableDesc(DTable);
1678a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1679a0483764SConrad Meyer (void)dtd;
1680a0483764SConrad Meyer assert(dtd.tableType == 0);
1681a0483764SConrad Meyer return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1682a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1683a0483764SConrad Meyer (void)dtd;
1684a0483764SConrad Meyer assert(dtd.tableType == 1);
1685a0483764SConrad Meyer return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1686a0483764SConrad Meyer #else
16870f743729SConrad Meyer return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
16880f743729SConrad Meyer HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1689a0483764SConrad Meyer #endif
169019fcbaf1SConrad Meyer }
169119fcbaf1SConrad Meyer
1692a0483764SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)16930f743729SConrad Meyer size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
169419fcbaf1SConrad Meyer {
169519fcbaf1SConrad Meyer const BYTE* ip = (const BYTE*) cSrc;
169619fcbaf1SConrad Meyer
1697f7cd7fe5SConrad Meyer size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
169819fcbaf1SConrad Meyer if (HUF_isError(hSize)) return hSize;
169919fcbaf1SConrad Meyer if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
170019fcbaf1SConrad Meyer ip += hSize; cSrcSize -= hSize;
170119fcbaf1SConrad Meyer
17020f743729SConrad Meyer return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
170319fcbaf1SConrad Meyer }
1704a0483764SConrad Meyer #endif
170519fcbaf1SConrad Meyer
HUF_decompress4X_usingDTable_bmi2(void * dst,size_t maxDstSize,const void * cSrc,size_t cSrcSize,const HUF_DTable * DTable,int bmi2)170619fcbaf1SConrad Meyer size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
170719fcbaf1SConrad Meyer {
170819fcbaf1SConrad Meyer DTableDesc const dtd = HUF_getDTableDesc(DTable);
1709a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1710a0483764SConrad Meyer (void)dtd;
1711a0483764SConrad Meyer assert(dtd.tableType == 0);
1712a0483764SConrad Meyer return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1713a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1714a0483764SConrad Meyer (void)dtd;
1715a0483764SConrad Meyer assert(dtd.tableType == 1);
1716a0483764SConrad Meyer return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1717a0483764SConrad Meyer #else
17180f743729SConrad Meyer return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
17190f743729SConrad Meyer HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1720a0483764SConrad Meyer #endif
172119fcbaf1SConrad Meyer }
172219fcbaf1SConrad Meyer
HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize,void * workSpace,size_t wkspSize,int bmi2)172319fcbaf1SConrad Meyer size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
172419fcbaf1SConrad Meyer {
172519fcbaf1SConrad Meyer /* validation checks */
172619fcbaf1SConrad Meyer if (dstSize == 0) return ERROR(dstSize_tooSmall);
172719fcbaf1SConrad Meyer if (cSrcSize == 0) return ERROR(corruption_detected);
172819fcbaf1SConrad Meyer
172919fcbaf1SConrad Meyer { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1730a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1731a0483764SConrad Meyer (void)algoNb;
1732a0483764SConrad Meyer assert(algoNb == 0);
1733a0483764SConrad Meyer return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1734a0483764SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1735a0483764SConrad Meyer (void)algoNb;
1736a0483764SConrad Meyer assert(algoNb == 1);
1737a0483764SConrad Meyer return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1738a0483764SConrad Meyer #else
17390f743729SConrad Meyer return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
17400f743729SConrad Meyer HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1741a0483764SConrad Meyer #endif
174219fcbaf1SConrad Meyer }
174319fcbaf1SConrad Meyer }
1744f7cd7fe5SConrad Meyer
1745f7cd7fe5SConrad Meyer #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1746f7cd7fe5SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_readDTableX1(HUF_DTable * DTable,const void * src,size_t srcSize)1747f7cd7fe5SConrad Meyer size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1748f7cd7fe5SConrad Meyer {
1749f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1750f7cd7fe5SConrad Meyer return HUF_readDTableX1_wksp(DTable, src, srcSize,
1751f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1752f7cd7fe5SConrad Meyer }
1753f7cd7fe5SConrad Meyer
HUF_decompress1X1_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1754f7cd7fe5SConrad Meyer size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1755f7cd7fe5SConrad Meyer const void* cSrc, size_t cSrcSize)
1756f7cd7fe5SConrad Meyer {
1757f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1758f7cd7fe5SConrad Meyer return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1759f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1760f7cd7fe5SConrad Meyer }
1761f7cd7fe5SConrad Meyer
HUF_decompress1X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1762f7cd7fe5SConrad Meyer size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1763f7cd7fe5SConrad Meyer {
1764f7cd7fe5SConrad Meyer HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1765f7cd7fe5SConrad Meyer return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1766f7cd7fe5SConrad Meyer }
1767f7cd7fe5SConrad Meyer #endif
1768f7cd7fe5SConrad Meyer
1769f7cd7fe5SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X1
HUF_readDTableX2(HUF_DTable * DTable,const void * src,size_t srcSize)1770f7cd7fe5SConrad Meyer size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1771f7cd7fe5SConrad Meyer {
1772f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1773f7cd7fe5SConrad Meyer return HUF_readDTableX2_wksp(DTable, src, srcSize,
1774f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1775f7cd7fe5SConrad Meyer }
1776f7cd7fe5SConrad Meyer
HUF_decompress1X2_DCtx(HUF_DTable * DCtx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1777f7cd7fe5SConrad Meyer size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1778f7cd7fe5SConrad Meyer const void* cSrc, size_t cSrcSize)
1779f7cd7fe5SConrad Meyer {
1780f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1781f7cd7fe5SConrad Meyer return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1782f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1783f7cd7fe5SConrad Meyer }
1784f7cd7fe5SConrad Meyer
HUF_decompress1X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1785f7cd7fe5SConrad Meyer size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1786f7cd7fe5SConrad Meyer {
1787f7cd7fe5SConrad Meyer HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1788f7cd7fe5SConrad Meyer return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1789f7cd7fe5SConrad Meyer }
1790f7cd7fe5SConrad Meyer #endif
1791f7cd7fe5SConrad Meyer
1792f7cd7fe5SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X2
HUF_decompress4X1_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1793f7cd7fe5SConrad Meyer size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1794f7cd7fe5SConrad Meyer {
1795f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1796f7cd7fe5SConrad Meyer return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1797f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1798f7cd7fe5SConrad Meyer }
HUF_decompress4X1(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1799f7cd7fe5SConrad Meyer size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1800f7cd7fe5SConrad Meyer {
1801f7cd7fe5SConrad Meyer HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1802f7cd7fe5SConrad Meyer return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1803f7cd7fe5SConrad Meyer }
1804f7cd7fe5SConrad Meyer #endif
1805f7cd7fe5SConrad Meyer
1806f7cd7fe5SConrad Meyer #ifndef HUF_FORCE_DECOMPRESS_X1
HUF_decompress4X2_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1807f7cd7fe5SConrad Meyer size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1808f7cd7fe5SConrad Meyer const void* cSrc, size_t cSrcSize)
1809f7cd7fe5SConrad Meyer {
1810f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1811f7cd7fe5SConrad Meyer return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1812f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1813f7cd7fe5SConrad Meyer }
1814f7cd7fe5SConrad Meyer
HUF_decompress4X2(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1815f7cd7fe5SConrad Meyer size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1816f7cd7fe5SConrad Meyer {
1817f7cd7fe5SConrad Meyer HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1818f7cd7fe5SConrad Meyer return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1819f7cd7fe5SConrad Meyer }
1820f7cd7fe5SConrad Meyer #endif
1821f7cd7fe5SConrad Meyer
1822f7cd7fe5SConrad Meyer typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1823f7cd7fe5SConrad Meyer
HUF_decompress(void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1824f7cd7fe5SConrad Meyer size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1825f7cd7fe5SConrad Meyer {
1826f7cd7fe5SConrad Meyer #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1827f7cd7fe5SConrad Meyer static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1828f7cd7fe5SConrad Meyer #endif
1829f7cd7fe5SConrad Meyer
1830f7cd7fe5SConrad Meyer /* validation checks */
1831f7cd7fe5SConrad Meyer if (dstSize == 0) return ERROR(dstSize_tooSmall);
1832f7cd7fe5SConrad Meyer if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1833f7cd7fe5SConrad Meyer if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1834f7cd7fe5SConrad Meyer if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1835f7cd7fe5SConrad Meyer
1836f7cd7fe5SConrad Meyer { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1837f7cd7fe5SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1838f7cd7fe5SConrad Meyer (void)algoNb;
1839f7cd7fe5SConrad Meyer assert(algoNb == 0);
1840f7cd7fe5SConrad Meyer return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1841f7cd7fe5SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1842f7cd7fe5SConrad Meyer (void)algoNb;
1843f7cd7fe5SConrad Meyer assert(algoNb == 1);
1844f7cd7fe5SConrad Meyer return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1845f7cd7fe5SConrad Meyer #else
1846f7cd7fe5SConrad Meyer return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1847f7cd7fe5SConrad Meyer #endif
1848f7cd7fe5SConrad Meyer }
1849f7cd7fe5SConrad Meyer }
1850f7cd7fe5SConrad Meyer
HUF_decompress4X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1851f7cd7fe5SConrad Meyer size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1852f7cd7fe5SConrad Meyer {
1853f7cd7fe5SConrad Meyer /* validation checks */
1854f7cd7fe5SConrad Meyer if (dstSize == 0) return ERROR(dstSize_tooSmall);
1855f7cd7fe5SConrad Meyer if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1856f7cd7fe5SConrad Meyer if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1857f7cd7fe5SConrad Meyer if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1858f7cd7fe5SConrad Meyer
1859f7cd7fe5SConrad Meyer { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1860f7cd7fe5SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X1)
1861f7cd7fe5SConrad Meyer (void)algoNb;
1862f7cd7fe5SConrad Meyer assert(algoNb == 0);
1863f7cd7fe5SConrad Meyer return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1864f7cd7fe5SConrad Meyer #elif defined(HUF_FORCE_DECOMPRESS_X2)
1865f7cd7fe5SConrad Meyer (void)algoNb;
1866f7cd7fe5SConrad Meyer assert(algoNb == 1);
1867f7cd7fe5SConrad Meyer return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1868f7cd7fe5SConrad Meyer #else
1869f7cd7fe5SConrad Meyer return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1870f7cd7fe5SConrad Meyer HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1871f7cd7fe5SConrad Meyer #endif
1872f7cd7fe5SConrad Meyer }
1873f7cd7fe5SConrad Meyer }
1874f7cd7fe5SConrad Meyer
HUF_decompress4X_hufOnly(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1875f7cd7fe5SConrad Meyer size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1876f7cd7fe5SConrad Meyer {
1877f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1878f7cd7fe5SConrad Meyer return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1879f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1880f7cd7fe5SConrad Meyer }
1881f7cd7fe5SConrad Meyer
HUF_decompress1X_DCtx(HUF_DTable * dctx,void * dst,size_t dstSize,const void * cSrc,size_t cSrcSize)1882f7cd7fe5SConrad Meyer size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1883f7cd7fe5SConrad Meyer const void* cSrc, size_t cSrcSize)
1884f7cd7fe5SConrad Meyer {
1885f7cd7fe5SConrad Meyer U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1886f7cd7fe5SConrad Meyer return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1887f7cd7fe5SConrad Meyer workSpace, sizeof(workSpace));
1888f7cd7fe5SConrad Meyer }
1889f7cd7fe5SConrad Meyer #endif
1890