xref: /freebsd-src/sys/contrib/zstd/lib/decompress/zstd_decompress_block.c (revision 37f1f2684f2670b204080ef2d6c303becd28545f)
1a0483764SConrad Meyer /*
2*37f1f268SConrad Meyer  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3a0483764SConrad Meyer  * All rights reserved.
4a0483764SConrad Meyer  *
5a0483764SConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
6a0483764SConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a0483764SConrad Meyer  * in the COPYING file in the root directory of this source tree).
8a0483764SConrad Meyer  * You may select, at your option, one of the above-listed licenses.
9a0483764SConrad Meyer  */
10a0483764SConrad Meyer 
11a0483764SConrad Meyer /* zstd_decompress_block :
12a0483764SConrad Meyer  * this module takes care of decompressing _compressed_ block */
13a0483764SConrad Meyer 
14a0483764SConrad Meyer /*-*******************************************************
15a0483764SConrad Meyer *  Dependencies
16a0483764SConrad Meyer *********************************************************/
17a0483764SConrad Meyer #include <string.h>      /* memcpy, memmove, memset */
18*37f1f268SConrad Meyer #include "../common/compiler.h"    /* prefetch */
19*37f1f268SConrad Meyer #include "../common/cpu.h"         /* bmi2 */
20*37f1f268SConrad Meyer #include "../common/mem.h"         /* low level memory routines */
21a0483764SConrad Meyer #define FSE_STATIC_LINKING_ONLY
22*37f1f268SConrad Meyer #include "../common/fse.h"
23a0483764SConrad Meyer #define HUF_STATIC_LINKING_ONLY
24*37f1f268SConrad Meyer #include "../common/huf.h"
25*37f1f268SConrad Meyer #include "../common/zstd_internal.h"
26a0483764SConrad Meyer #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
27a0483764SConrad Meyer #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
28a0483764SConrad Meyer #include "zstd_decompress_block.h"
29a0483764SConrad Meyer 
30a0483764SConrad Meyer /*_*******************************************************
31a0483764SConrad Meyer *  Macros
32a0483764SConrad Meyer **********************************************************/
33a0483764SConrad Meyer 
34a0483764SConrad Meyer /* These two optional macros force the use one way or another of the two
35a0483764SConrad Meyer  * ZSTD_decompressSequences implementations. You can't force in both directions
36a0483764SConrad Meyer  * at the same time.
37a0483764SConrad Meyer  */
38a0483764SConrad Meyer #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39a0483764SConrad Meyer     defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40a0483764SConrad Meyer #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41a0483764SConrad Meyer #endif
42a0483764SConrad Meyer 
43a0483764SConrad Meyer 
44a0483764SConrad Meyer /*_*******************************************************
45a0483764SConrad Meyer *  Memory operations
46a0483764SConrad Meyer **********************************************************/
47a0483764SConrad Meyer static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
48a0483764SConrad Meyer 
49a0483764SConrad Meyer 
50a0483764SConrad Meyer /*-*************************************************************
51a0483764SConrad Meyer  *   Block decoding
52a0483764SConrad Meyer  ***************************************************************/
53a0483764SConrad Meyer 
54a0483764SConrad Meyer /*! ZSTD_getcBlockSize() :
55a0483764SConrad Meyer  *  Provides the size of compressed block from block header `src` */
56a0483764SConrad Meyer size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57a0483764SConrad Meyer                           blockProperties_t* bpPtr)
58a0483764SConrad Meyer {
59*37f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
602b9c00cbSConrad Meyer 
61a0483764SConrad Meyer     {   U32 const cBlockHeader = MEM_readLE24(src);
62a0483764SConrad Meyer         U32 const cSize = cBlockHeader >> 3;
63a0483764SConrad Meyer         bpPtr->lastBlock = cBlockHeader & 1;
64a0483764SConrad Meyer         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65a0483764SConrad Meyer         bpPtr->origSize = cSize;   /* only useful for RLE */
66a0483764SConrad Meyer         if (bpPtr->blockType == bt_rle) return 1;
67*37f1f268SConrad Meyer         RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68a0483764SConrad Meyer         return cSize;
69a0483764SConrad Meyer     }
70a0483764SConrad Meyer }
71a0483764SConrad Meyer 
72a0483764SConrad Meyer 
73a0483764SConrad Meyer /* Hidden declaration for fullbench */
74a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
75a0483764SConrad Meyer                           const void* src, size_t srcSize);
76a0483764SConrad Meyer /*! ZSTD_decodeLiteralsBlock() :
77a0483764SConrad Meyer  * @return : nb of bytes read from src (< srcSize )
78a0483764SConrad Meyer  *  note : symbol not declared but exposed for fullbench */
79a0483764SConrad Meyer size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
80a0483764SConrad Meyer                           const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
81a0483764SConrad Meyer {
829cbefe25SConrad Meyer     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
83*37f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
84a0483764SConrad Meyer 
85a0483764SConrad Meyer     {   const BYTE* const istart = (const BYTE*) src;
86a0483764SConrad Meyer         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
87a0483764SConrad Meyer 
88a0483764SConrad Meyer         switch(litEncType)
89a0483764SConrad Meyer         {
90a0483764SConrad Meyer         case set_repeat:
919cbefe25SConrad Meyer             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
92*37f1f268SConrad Meyer             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
93a0483764SConrad Meyer             /* fall-through */
94a0483764SConrad Meyer 
95a0483764SConrad Meyer         case set_compressed:
962b9c00cbSConrad Meyer             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
97a0483764SConrad Meyer             {   size_t lhSize, litSize, litCSize;
98a0483764SConrad Meyer                 U32 singleStream=0;
99a0483764SConrad Meyer                 U32 const lhlCode = (istart[0] >> 2) & 3;
100a0483764SConrad Meyer                 U32 const lhc = MEM_readLE32(istart);
101a0483764SConrad Meyer                 size_t hufSuccess;
102a0483764SConrad Meyer                 switch(lhlCode)
103a0483764SConrad Meyer                 {
104a0483764SConrad Meyer                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
105a0483764SConrad Meyer                     /* 2 - 2 - 10 - 10 */
106a0483764SConrad Meyer                     singleStream = !lhlCode;
107a0483764SConrad Meyer                     lhSize = 3;
108a0483764SConrad Meyer                     litSize  = (lhc >> 4) & 0x3FF;
109a0483764SConrad Meyer                     litCSize = (lhc >> 14) & 0x3FF;
110a0483764SConrad Meyer                     break;
111a0483764SConrad Meyer                 case 2:
112a0483764SConrad Meyer                     /* 2 - 2 - 14 - 14 */
113a0483764SConrad Meyer                     lhSize = 4;
114a0483764SConrad Meyer                     litSize  = (lhc >> 4) & 0x3FFF;
115a0483764SConrad Meyer                     litCSize = lhc >> 18;
116a0483764SConrad Meyer                     break;
117a0483764SConrad Meyer                 case 3:
118a0483764SConrad Meyer                     /* 2 - 2 - 18 - 18 */
119a0483764SConrad Meyer                     lhSize = 5;
120a0483764SConrad Meyer                     litSize  = (lhc >> 4) & 0x3FFFF;
1219cbefe25SConrad Meyer                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
122a0483764SConrad Meyer                     break;
123a0483764SConrad Meyer                 }
124*37f1f268SConrad Meyer                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
125*37f1f268SConrad Meyer                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
126a0483764SConrad Meyer 
127a0483764SConrad Meyer                 /* prefetch huffman table if cold */
128a0483764SConrad Meyer                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
129a0483764SConrad Meyer                     PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
130a0483764SConrad Meyer                 }
131a0483764SConrad Meyer 
132a0483764SConrad Meyer                 if (litEncType==set_repeat) {
133a0483764SConrad Meyer                     if (singleStream) {
134a0483764SConrad Meyer                         hufSuccess = HUF_decompress1X_usingDTable_bmi2(
135a0483764SConrad Meyer                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
136a0483764SConrad Meyer                             dctx->HUFptr, dctx->bmi2);
137a0483764SConrad Meyer                     } else {
138a0483764SConrad Meyer                         hufSuccess = HUF_decompress4X_usingDTable_bmi2(
139a0483764SConrad Meyer                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
140a0483764SConrad Meyer                             dctx->HUFptr, dctx->bmi2);
141a0483764SConrad Meyer                     }
142a0483764SConrad Meyer                 } else {
143a0483764SConrad Meyer                     if (singleStream) {
144a0483764SConrad Meyer #if defined(HUF_FORCE_DECOMPRESS_X2)
145a0483764SConrad Meyer                         hufSuccess = HUF_decompress1X_DCtx_wksp(
146a0483764SConrad Meyer                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
147a0483764SConrad Meyer                             istart+lhSize, litCSize, dctx->workspace,
148a0483764SConrad Meyer                             sizeof(dctx->workspace));
149a0483764SConrad Meyer #else
150a0483764SConrad Meyer                         hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
151a0483764SConrad Meyer                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
152a0483764SConrad Meyer                             istart+lhSize, litCSize, dctx->workspace,
153a0483764SConrad Meyer                             sizeof(dctx->workspace), dctx->bmi2);
154a0483764SConrad Meyer #endif
155a0483764SConrad Meyer                     } else {
156a0483764SConrad Meyer                         hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
157a0483764SConrad Meyer                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
158a0483764SConrad Meyer                             istart+lhSize, litCSize, dctx->workspace,
159a0483764SConrad Meyer                             sizeof(dctx->workspace), dctx->bmi2);
160a0483764SConrad Meyer                     }
161a0483764SConrad Meyer                 }
162a0483764SConrad Meyer 
163*37f1f268SConrad Meyer                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
164a0483764SConrad Meyer 
165a0483764SConrad Meyer                 dctx->litPtr = dctx->litBuffer;
166a0483764SConrad Meyer                 dctx->litSize = litSize;
167a0483764SConrad Meyer                 dctx->litEntropy = 1;
168a0483764SConrad Meyer                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
169a0483764SConrad Meyer                 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
170a0483764SConrad Meyer                 return litCSize + lhSize;
171a0483764SConrad Meyer             }
172a0483764SConrad Meyer 
173a0483764SConrad Meyer         case set_basic:
174a0483764SConrad Meyer             {   size_t litSize, lhSize;
175a0483764SConrad Meyer                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
176a0483764SConrad Meyer                 switch(lhlCode)
177a0483764SConrad Meyer                 {
178a0483764SConrad Meyer                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
179a0483764SConrad Meyer                     lhSize = 1;
180a0483764SConrad Meyer                     litSize = istart[0] >> 3;
181a0483764SConrad Meyer                     break;
182a0483764SConrad Meyer                 case 1:
183a0483764SConrad Meyer                     lhSize = 2;
184a0483764SConrad Meyer                     litSize = MEM_readLE16(istart) >> 4;
185a0483764SConrad Meyer                     break;
186a0483764SConrad Meyer                 case 3:
187a0483764SConrad Meyer                     lhSize = 3;
188a0483764SConrad Meyer                     litSize = MEM_readLE24(istart) >> 4;
189a0483764SConrad Meyer                     break;
190a0483764SConrad Meyer                 }
191a0483764SConrad Meyer 
192a0483764SConrad Meyer                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
193*37f1f268SConrad Meyer                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
194a0483764SConrad Meyer                     memcpy(dctx->litBuffer, istart+lhSize, litSize);
195a0483764SConrad Meyer                     dctx->litPtr = dctx->litBuffer;
196a0483764SConrad Meyer                     dctx->litSize = litSize;
197a0483764SConrad Meyer                     memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
198a0483764SConrad Meyer                     return lhSize+litSize;
199a0483764SConrad Meyer                 }
200a0483764SConrad Meyer                 /* direct reference into compressed stream */
201a0483764SConrad Meyer                 dctx->litPtr = istart+lhSize;
202a0483764SConrad Meyer                 dctx->litSize = litSize;
203a0483764SConrad Meyer                 return lhSize+litSize;
204a0483764SConrad Meyer             }
205a0483764SConrad Meyer 
206a0483764SConrad Meyer         case set_rle:
207a0483764SConrad Meyer             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
208a0483764SConrad Meyer                 size_t litSize, lhSize;
209a0483764SConrad Meyer                 switch(lhlCode)
210a0483764SConrad Meyer                 {
211a0483764SConrad Meyer                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
212a0483764SConrad Meyer                     lhSize = 1;
213a0483764SConrad Meyer                     litSize = istart[0] >> 3;
214a0483764SConrad Meyer                     break;
215a0483764SConrad Meyer                 case 1:
216a0483764SConrad Meyer                     lhSize = 2;
217a0483764SConrad Meyer                     litSize = MEM_readLE16(istart) >> 4;
218a0483764SConrad Meyer                     break;
219a0483764SConrad Meyer                 case 3:
220a0483764SConrad Meyer                     lhSize = 3;
221a0483764SConrad Meyer                     litSize = MEM_readLE24(istart) >> 4;
2222b9c00cbSConrad Meyer                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
223a0483764SConrad Meyer                     break;
224a0483764SConrad Meyer                 }
225*37f1f268SConrad Meyer                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
226a0483764SConrad Meyer                 memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
227a0483764SConrad Meyer                 dctx->litPtr = dctx->litBuffer;
228a0483764SConrad Meyer                 dctx->litSize = litSize;
229a0483764SConrad Meyer                 return lhSize+1;
230a0483764SConrad Meyer             }
231a0483764SConrad Meyer         default:
2322b9c00cbSConrad Meyer             RETURN_ERROR(corruption_detected, "impossible");
233a0483764SConrad Meyer         }
234a0483764SConrad Meyer     }
235a0483764SConrad Meyer }
236a0483764SConrad Meyer 
237a0483764SConrad Meyer /* Default FSE distribution tables.
238a0483764SConrad Meyer  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
239a0483764SConrad Meyer  * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
240a0483764SConrad Meyer  * They were generated programmatically with following method :
241a0483764SConrad Meyer  * - start from default distributions, present in /lib/common/zstd_internal.h
242a0483764SConrad Meyer  * - generate tables normally, using ZSTD_buildFSETable()
243a0483764SConrad Meyer  * - printout the content of tables
244a0483764SConrad Meyer  * - pretify output, report below, test with fuzzer to ensure it's correct */
245a0483764SConrad Meyer 
246a0483764SConrad Meyer /* Default FSE distribution table for Literal Lengths */
247a0483764SConrad Meyer static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
248a0483764SConrad Meyer      {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
249a0483764SConrad Meyer      /* nextState, nbAddBits, nbBits, baseVal */
250a0483764SConrad Meyer      {  0,  0,  4,    0},  { 16,  0,  4,    0},
251a0483764SConrad Meyer      { 32,  0,  5,    1},  {  0,  0,  5,    3},
252a0483764SConrad Meyer      {  0,  0,  5,    4},  {  0,  0,  5,    6},
253a0483764SConrad Meyer      {  0,  0,  5,    7},  {  0,  0,  5,    9},
254a0483764SConrad Meyer      {  0,  0,  5,   10},  {  0,  0,  5,   12},
255a0483764SConrad Meyer      {  0,  0,  6,   14},  {  0,  1,  5,   16},
256a0483764SConrad Meyer      {  0,  1,  5,   20},  {  0,  1,  5,   22},
257a0483764SConrad Meyer      {  0,  2,  5,   28},  {  0,  3,  5,   32},
258a0483764SConrad Meyer      {  0,  4,  5,   48},  { 32,  6,  5,   64},
259a0483764SConrad Meyer      {  0,  7,  5,  128},  {  0,  8,  6,  256},
260a0483764SConrad Meyer      {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
261a0483764SConrad Meyer      { 32,  0,  4,    0},  {  0,  0,  4,    1},
262a0483764SConrad Meyer      {  0,  0,  5,    2},  { 32,  0,  5,    4},
263a0483764SConrad Meyer      {  0,  0,  5,    5},  { 32,  0,  5,    7},
264a0483764SConrad Meyer      {  0,  0,  5,    8},  { 32,  0,  5,   10},
265a0483764SConrad Meyer      {  0,  0,  5,   11},  {  0,  0,  6,   13},
266a0483764SConrad Meyer      { 32,  1,  5,   16},  {  0,  1,  5,   18},
267a0483764SConrad Meyer      { 32,  1,  5,   22},  {  0,  2,  5,   24},
268a0483764SConrad Meyer      { 32,  3,  5,   32},  {  0,  3,  5,   40},
269a0483764SConrad Meyer      {  0,  6,  4,   64},  { 16,  6,  4,   64},
270a0483764SConrad Meyer      { 32,  7,  5,  128},  {  0,  9,  6,  512},
271a0483764SConrad Meyer      {  0, 11,  6, 2048},  { 48,  0,  4,    0},
272a0483764SConrad Meyer      { 16,  0,  4,    1},  { 32,  0,  5,    2},
273a0483764SConrad Meyer      { 32,  0,  5,    3},  { 32,  0,  5,    5},
274a0483764SConrad Meyer      { 32,  0,  5,    6},  { 32,  0,  5,    8},
275a0483764SConrad Meyer      { 32,  0,  5,    9},  { 32,  0,  5,   11},
276a0483764SConrad Meyer      { 32,  0,  5,   12},  {  0,  0,  6,   15},
277a0483764SConrad Meyer      { 32,  1,  5,   18},  { 32,  1,  5,   20},
278a0483764SConrad Meyer      { 32,  2,  5,   24},  { 32,  2,  5,   28},
279a0483764SConrad Meyer      { 32,  3,  5,   40},  { 32,  4,  5,   48},
280a0483764SConrad Meyer      {  0, 16,  6,65536},  {  0, 15,  6,32768},
281a0483764SConrad Meyer      {  0, 14,  6,16384},  {  0, 13,  6, 8192},
282a0483764SConrad Meyer };   /* LL_defaultDTable */
283a0483764SConrad Meyer 
284a0483764SConrad Meyer /* Default FSE distribution table for Offset Codes */
285a0483764SConrad Meyer static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
286a0483764SConrad Meyer     {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
287a0483764SConrad Meyer     /* nextState, nbAddBits, nbBits, baseVal */
288a0483764SConrad Meyer     {  0,  0,  5,    0},     {  0,  6,  4,   61},
289a0483764SConrad Meyer     {  0,  9,  5,  509},     {  0, 15,  5,32765},
290a0483764SConrad Meyer     {  0, 21,  5,2097149},   {  0,  3,  5,    5},
291a0483764SConrad Meyer     {  0,  7,  4,  125},     {  0, 12,  5, 4093},
292a0483764SConrad Meyer     {  0, 18,  5,262141},    {  0, 23,  5,8388605},
293a0483764SConrad Meyer     {  0,  5,  5,   29},     {  0,  8,  4,  253},
294a0483764SConrad Meyer     {  0, 14,  5,16381},     {  0, 20,  5,1048573},
295a0483764SConrad Meyer     {  0,  2,  5,    1},     { 16,  7,  4,  125},
296a0483764SConrad Meyer     {  0, 11,  5, 2045},     {  0, 17,  5,131069},
297a0483764SConrad Meyer     {  0, 22,  5,4194301},   {  0,  4,  5,   13},
298a0483764SConrad Meyer     { 16,  8,  4,  253},     {  0, 13,  5, 8189},
299a0483764SConrad Meyer     {  0, 19,  5,524285},    {  0,  1,  5,    1},
300a0483764SConrad Meyer     { 16,  6,  4,   61},     {  0, 10,  5, 1021},
301a0483764SConrad Meyer     {  0, 16,  5,65533},     {  0, 28,  5,268435453},
302a0483764SConrad Meyer     {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
303a0483764SConrad Meyer     {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
304a0483764SConrad Meyer };   /* OF_defaultDTable */
305a0483764SConrad Meyer 
306a0483764SConrad Meyer 
307a0483764SConrad Meyer /* Default FSE distribution table for Match Lengths */
308a0483764SConrad Meyer static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
309a0483764SConrad Meyer     {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
310a0483764SConrad Meyer     /* nextState, nbAddBits, nbBits, baseVal */
311a0483764SConrad Meyer     {  0,  0,  6,    3},  {  0,  0,  4,    4},
312a0483764SConrad Meyer     { 32,  0,  5,    5},  {  0,  0,  5,    6},
313a0483764SConrad Meyer     {  0,  0,  5,    8},  {  0,  0,  5,    9},
314a0483764SConrad Meyer     {  0,  0,  5,   11},  {  0,  0,  6,   13},
315a0483764SConrad Meyer     {  0,  0,  6,   16},  {  0,  0,  6,   19},
316a0483764SConrad Meyer     {  0,  0,  6,   22},  {  0,  0,  6,   25},
317a0483764SConrad Meyer     {  0,  0,  6,   28},  {  0,  0,  6,   31},
318a0483764SConrad Meyer     {  0,  0,  6,   34},  {  0,  1,  6,   37},
319a0483764SConrad Meyer     {  0,  1,  6,   41},  {  0,  2,  6,   47},
320a0483764SConrad Meyer     {  0,  3,  6,   59},  {  0,  4,  6,   83},
321a0483764SConrad Meyer     {  0,  7,  6,  131},  {  0,  9,  6,  515},
322a0483764SConrad Meyer     { 16,  0,  4,    4},  {  0,  0,  4,    5},
323a0483764SConrad Meyer     { 32,  0,  5,    6},  {  0,  0,  5,    7},
324a0483764SConrad Meyer     { 32,  0,  5,    9},  {  0,  0,  5,   10},
325a0483764SConrad Meyer     {  0,  0,  6,   12},  {  0,  0,  6,   15},
326a0483764SConrad Meyer     {  0,  0,  6,   18},  {  0,  0,  6,   21},
327a0483764SConrad Meyer     {  0,  0,  6,   24},  {  0,  0,  6,   27},
328a0483764SConrad Meyer     {  0,  0,  6,   30},  {  0,  0,  6,   33},
329a0483764SConrad Meyer     {  0,  1,  6,   35},  {  0,  1,  6,   39},
330a0483764SConrad Meyer     {  0,  2,  6,   43},  {  0,  3,  6,   51},
331a0483764SConrad Meyer     {  0,  4,  6,   67},  {  0,  5,  6,   99},
332a0483764SConrad Meyer     {  0,  8,  6,  259},  { 32,  0,  4,    4},
333a0483764SConrad Meyer     { 48,  0,  4,    4},  { 16,  0,  4,    5},
334a0483764SConrad Meyer     { 32,  0,  5,    7},  { 32,  0,  5,    8},
335a0483764SConrad Meyer     { 32,  0,  5,   10},  { 32,  0,  5,   11},
336a0483764SConrad Meyer     {  0,  0,  6,   14},  {  0,  0,  6,   17},
337a0483764SConrad Meyer     {  0,  0,  6,   20},  {  0,  0,  6,   23},
338a0483764SConrad Meyer     {  0,  0,  6,   26},  {  0,  0,  6,   29},
339a0483764SConrad Meyer     {  0,  0,  6,   32},  {  0, 16,  6,65539},
340a0483764SConrad Meyer     {  0, 15,  6,32771},  {  0, 14,  6,16387},
341a0483764SConrad Meyer     {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
342a0483764SConrad Meyer     {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
343a0483764SConrad Meyer };   /* ML_defaultDTable */
344a0483764SConrad Meyer 
345a0483764SConrad Meyer 
346a0483764SConrad Meyer static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
347a0483764SConrad Meyer {
348a0483764SConrad Meyer     void* ptr = dt;
349a0483764SConrad Meyer     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
350a0483764SConrad Meyer     ZSTD_seqSymbol* const cell = dt + 1;
351a0483764SConrad Meyer 
352a0483764SConrad Meyer     DTableH->tableLog = 0;
353a0483764SConrad Meyer     DTableH->fastMode = 0;
354a0483764SConrad Meyer 
355a0483764SConrad Meyer     cell->nbBits = 0;
356a0483764SConrad Meyer     cell->nextState = 0;
357a0483764SConrad Meyer     assert(nbAddBits < 255);
358a0483764SConrad Meyer     cell->nbAdditionalBits = (BYTE)nbAddBits;
359a0483764SConrad Meyer     cell->baseValue = baseValue;
360a0483764SConrad Meyer }
361a0483764SConrad Meyer 
362a0483764SConrad Meyer 
363a0483764SConrad Meyer /* ZSTD_buildFSETable() :
364a0483764SConrad Meyer  * generate FSE decoding table for one symbol (ll, ml or off)
365a0483764SConrad Meyer  * cannot fail if input is valid =>
366a0483764SConrad Meyer  * all inputs are presumed validated at this stage */
367a0483764SConrad Meyer void
368a0483764SConrad Meyer ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
369a0483764SConrad Meyer             const short* normalizedCounter, unsigned maxSymbolValue,
370a0483764SConrad Meyer             const U32* baseValue, const U32* nbAdditionalBits,
371a0483764SConrad Meyer             unsigned tableLog)
372a0483764SConrad Meyer {
373a0483764SConrad Meyer     ZSTD_seqSymbol* const tableDecode = dt+1;
374a0483764SConrad Meyer     U16 symbolNext[MaxSeq+1];
375a0483764SConrad Meyer 
376a0483764SConrad Meyer     U32 const maxSV1 = maxSymbolValue + 1;
377a0483764SConrad Meyer     U32 const tableSize = 1 << tableLog;
378a0483764SConrad Meyer     U32 highThreshold = tableSize-1;
379a0483764SConrad Meyer 
380a0483764SConrad Meyer     /* Sanity Checks */
381a0483764SConrad Meyer     assert(maxSymbolValue <= MaxSeq);
382a0483764SConrad Meyer     assert(tableLog <= MaxFSELog);
383a0483764SConrad Meyer 
384a0483764SConrad Meyer     /* Init, lay down lowprob symbols */
385a0483764SConrad Meyer     {   ZSTD_seqSymbol_header DTableH;
386a0483764SConrad Meyer         DTableH.tableLog = tableLog;
387a0483764SConrad Meyer         DTableH.fastMode = 1;
388a0483764SConrad Meyer         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
389a0483764SConrad Meyer             U32 s;
390a0483764SConrad Meyer             for (s=0; s<maxSV1; s++) {
391a0483764SConrad Meyer                 if (normalizedCounter[s]==-1) {
392a0483764SConrad Meyer                     tableDecode[highThreshold--].baseValue = s;
393a0483764SConrad Meyer                     symbolNext[s] = 1;
394a0483764SConrad Meyer                 } else {
395a0483764SConrad Meyer                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
3969cbefe25SConrad Meyer                     assert(normalizedCounter[s]>=0);
3979cbefe25SConrad Meyer                     symbolNext[s] = (U16)normalizedCounter[s];
398a0483764SConrad Meyer         }   }   }
399a0483764SConrad Meyer         memcpy(dt, &DTableH, sizeof(DTableH));
400a0483764SConrad Meyer     }
401a0483764SConrad Meyer 
402a0483764SConrad Meyer     /* Spread symbols */
403a0483764SConrad Meyer     {   U32 const tableMask = tableSize-1;
404a0483764SConrad Meyer         U32 const step = FSE_TABLESTEP(tableSize);
405a0483764SConrad Meyer         U32 s, position = 0;
406a0483764SConrad Meyer         for (s=0; s<maxSV1; s++) {
407a0483764SConrad Meyer             int i;
408a0483764SConrad Meyer             for (i=0; i<normalizedCounter[s]; i++) {
409a0483764SConrad Meyer                 tableDecode[position].baseValue = s;
410a0483764SConrad Meyer                 position = (position + step) & tableMask;
411a0483764SConrad Meyer                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
412a0483764SConrad Meyer         }   }
413a0483764SConrad Meyer         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
414a0483764SConrad Meyer     }
415a0483764SConrad Meyer 
416a0483764SConrad Meyer     /* Build Decoding table */
417a0483764SConrad Meyer     {   U32 u;
418a0483764SConrad Meyer         for (u=0; u<tableSize; u++) {
419a0483764SConrad Meyer             U32 const symbol = tableDecode[u].baseValue;
420a0483764SConrad Meyer             U32 const nextState = symbolNext[symbol]++;
421a0483764SConrad Meyer             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
422a0483764SConrad Meyer             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
423a0483764SConrad Meyer             assert(nbAdditionalBits[symbol] < 255);
424a0483764SConrad Meyer             tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
425a0483764SConrad Meyer             tableDecode[u].baseValue = baseValue[symbol];
426a0483764SConrad Meyer     }   }
427a0483764SConrad Meyer }
428a0483764SConrad Meyer 
429a0483764SConrad Meyer 
430a0483764SConrad Meyer /*! ZSTD_buildSeqTable() :
431a0483764SConrad Meyer  * @return : nb bytes read from src,
432a0483764SConrad Meyer  *           or an error code if it fails */
433a0483764SConrad Meyer static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
434a0483764SConrad Meyer                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
435a0483764SConrad Meyer                                  const void* src, size_t srcSize,
436a0483764SConrad Meyer                                  const U32* baseValue, const U32* nbAdditionalBits,
437a0483764SConrad Meyer                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
438a0483764SConrad Meyer                                  int ddictIsCold, int nbSeq)
439a0483764SConrad Meyer {
440a0483764SConrad Meyer     switch(type)
441a0483764SConrad Meyer     {
442a0483764SConrad Meyer     case set_rle :
443*37f1f268SConrad Meyer         RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
444*37f1f268SConrad Meyer         RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
445a0483764SConrad Meyer         {   U32 const symbol = *(const BYTE*)src;
446a0483764SConrad Meyer             U32 const baseline = baseValue[symbol];
447a0483764SConrad Meyer             U32 const nbBits = nbAdditionalBits[symbol];
448a0483764SConrad Meyer             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
449a0483764SConrad Meyer         }
450a0483764SConrad Meyer         *DTablePtr = DTableSpace;
451a0483764SConrad Meyer         return 1;
452a0483764SConrad Meyer     case set_basic :
453a0483764SConrad Meyer         *DTablePtr = defaultTable;
454a0483764SConrad Meyer         return 0;
455a0483764SConrad Meyer     case set_repeat:
456*37f1f268SConrad Meyer         RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
457a0483764SConrad Meyer         /* prefetch FSE table if used */
458a0483764SConrad Meyer         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
459a0483764SConrad Meyer             const void* const pStart = *DTablePtr;
460a0483764SConrad Meyer             size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
461a0483764SConrad Meyer             PREFETCH_AREA(pStart, pSize);
462a0483764SConrad Meyer         }
463a0483764SConrad Meyer         return 0;
464a0483764SConrad Meyer     case set_compressed :
465a0483764SConrad Meyer         {   unsigned tableLog;
466a0483764SConrad Meyer             S16 norm[MaxSeq+1];
467a0483764SConrad Meyer             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
468*37f1f268SConrad Meyer             RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
469*37f1f268SConrad Meyer             RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
470a0483764SConrad Meyer             ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
471a0483764SConrad Meyer             *DTablePtr = DTableSpace;
472a0483764SConrad Meyer             return headerSize;
473a0483764SConrad Meyer         }
4742b9c00cbSConrad Meyer     default :
475a0483764SConrad Meyer         assert(0);
4762b9c00cbSConrad Meyer         RETURN_ERROR(GENERIC, "impossible");
477a0483764SConrad Meyer     }
478a0483764SConrad Meyer }
479a0483764SConrad Meyer 
480a0483764SConrad Meyer size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
481a0483764SConrad Meyer                              const void* src, size_t srcSize)
482a0483764SConrad Meyer {
483a0483764SConrad Meyer     const BYTE* const istart = (const BYTE* const)src;
484a0483764SConrad Meyer     const BYTE* const iend = istart + srcSize;
485a0483764SConrad Meyer     const BYTE* ip = istart;
486a0483764SConrad Meyer     int nbSeq;
487a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
488a0483764SConrad Meyer 
489a0483764SConrad Meyer     /* check */
490*37f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
491a0483764SConrad Meyer 
492a0483764SConrad Meyer     /* SeqHead */
493a0483764SConrad Meyer     nbSeq = *ip++;
494a0483764SConrad Meyer     if (!nbSeq) {
495a0483764SConrad Meyer         *nbSeqPtr=0;
496*37f1f268SConrad Meyer         RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
497a0483764SConrad Meyer         return 1;
498a0483764SConrad Meyer     }
499a0483764SConrad Meyer     if (nbSeq > 0x7F) {
500a0483764SConrad Meyer         if (nbSeq == 0xFF) {
501*37f1f268SConrad Meyer             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
502a0483764SConrad Meyer             nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
503a0483764SConrad Meyer         } else {
504*37f1f268SConrad Meyer             RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
505a0483764SConrad Meyer             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
506a0483764SConrad Meyer         }
507a0483764SConrad Meyer     }
508a0483764SConrad Meyer     *nbSeqPtr = nbSeq;
509a0483764SConrad Meyer 
510a0483764SConrad Meyer     /* FSE table descriptors */
511*37f1f268SConrad Meyer     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
512a0483764SConrad Meyer     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
513a0483764SConrad Meyer         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
514a0483764SConrad Meyer         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
515a0483764SConrad Meyer         ip++;
516a0483764SConrad Meyer 
517a0483764SConrad Meyer         /* Build DTables */
518a0483764SConrad Meyer         {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
519a0483764SConrad Meyer                                                       LLtype, MaxLL, LLFSELog,
520a0483764SConrad Meyer                                                       ip, iend-ip,
521a0483764SConrad Meyer                                                       LL_base, LL_bits,
522a0483764SConrad Meyer                                                       LL_defaultDTable, dctx->fseEntropy,
523a0483764SConrad Meyer                                                       dctx->ddictIsCold, nbSeq);
524*37f1f268SConrad Meyer             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
525a0483764SConrad Meyer             ip += llhSize;
526a0483764SConrad Meyer         }
527a0483764SConrad Meyer 
528a0483764SConrad Meyer         {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
529a0483764SConrad Meyer                                                       OFtype, MaxOff, OffFSELog,
530a0483764SConrad Meyer                                                       ip, iend-ip,
531a0483764SConrad Meyer                                                       OF_base, OF_bits,
532a0483764SConrad Meyer                                                       OF_defaultDTable, dctx->fseEntropy,
533a0483764SConrad Meyer                                                       dctx->ddictIsCold, nbSeq);
534*37f1f268SConrad Meyer             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
535a0483764SConrad Meyer             ip += ofhSize;
536a0483764SConrad Meyer         }
537a0483764SConrad Meyer 
538a0483764SConrad Meyer         {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
539a0483764SConrad Meyer                                                       MLtype, MaxML, MLFSELog,
540a0483764SConrad Meyer                                                       ip, iend-ip,
541a0483764SConrad Meyer                                                       ML_base, ML_bits,
542a0483764SConrad Meyer                                                       ML_defaultDTable, dctx->fseEntropy,
543a0483764SConrad Meyer                                                       dctx->ddictIsCold, nbSeq);
544*37f1f268SConrad Meyer             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
545a0483764SConrad Meyer             ip += mlhSize;
546a0483764SConrad Meyer         }
547a0483764SConrad Meyer     }
548a0483764SConrad Meyer 
549a0483764SConrad Meyer     return ip-istart;
550a0483764SConrad Meyer }
551a0483764SConrad Meyer 
552a0483764SConrad Meyer 
553a0483764SConrad Meyer typedef struct {
554a0483764SConrad Meyer     size_t litLength;
555a0483764SConrad Meyer     size_t matchLength;
556a0483764SConrad Meyer     size_t offset;
557a0483764SConrad Meyer     const BYTE* match;
558a0483764SConrad Meyer } seq_t;
559a0483764SConrad Meyer 
560a0483764SConrad Meyer typedef struct {
561a0483764SConrad Meyer     size_t state;
562a0483764SConrad Meyer     const ZSTD_seqSymbol* table;
563a0483764SConrad Meyer } ZSTD_fseState;
564a0483764SConrad Meyer 
565a0483764SConrad Meyer typedef struct {
566a0483764SConrad Meyer     BIT_DStream_t DStream;
567a0483764SConrad Meyer     ZSTD_fseState stateLL;
568a0483764SConrad Meyer     ZSTD_fseState stateOffb;
569a0483764SConrad Meyer     ZSTD_fseState stateML;
570a0483764SConrad Meyer     size_t prevOffset[ZSTD_REP_NUM];
571a0483764SConrad Meyer     const BYTE* prefixStart;
572a0483764SConrad Meyer     const BYTE* dictEnd;
573a0483764SConrad Meyer     size_t pos;
574a0483764SConrad Meyer } seqState_t;
575a0483764SConrad Meyer 
5769cbefe25SConrad Meyer /*! ZSTD_overlapCopy8() :
5779cbefe25SConrad Meyer  *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
5789cbefe25SConrad Meyer  *  If the offset is < 8 then the offset is spread to at least 8 bytes.
5799cbefe25SConrad Meyer  *
5809cbefe25SConrad Meyer  *  Precondition: *ip <= *op
5819cbefe25SConrad Meyer  *  Postcondition: *op - *op >= 8
5829cbefe25SConrad Meyer  */
583*37f1f268SConrad Meyer HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
5849cbefe25SConrad Meyer     assert(*ip <= *op);
5859cbefe25SConrad Meyer     if (offset < 8) {
5869cbefe25SConrad Meyer         /* close range match, overlap */
5879cbefe25SConrad Meyer         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
5889cbefe25SConrad Meyer         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
5899cbefe25SConrad Meyer         int const sub2 = dec64table[offset];
5909cbefe25SConrad Meyer         (*op)[0] = (*ip)[0];
5919cbefe25SConrad Meyer         (*op)[1] = (*ip)[1];
5929cbefe25SConrad Meyer         (*op)[2] = (*ip)[2];
5939cbefe25SConrad Meyer         (*op)[3] = (*ip)[3];
5949cbefe25SConrad Meyer         *ip += dec32table[offset];
5959cbefe25SConrad Meyer         ZSTD_copy4(*op+4, *ip);
5969cbefe25SConrad Meyer         *ip -= sub2;
5979cbefe25SConrad Meyer     } else {
5989cbefe25SConrad Meyer         ZSTD_copy8(*op, *ip);
5999cbefe25SConrad Meyer     }
6009cbefe25SConrad Meyer     *ip += 8;
6019cbefe25SConrad Meyer     *op += 8;
6029cbefe25SConrad Meyer     assert(*op - *ip >= 8);
6039cbefe25SConrad Meyer }
604a0483764SConrad Meyer 
6059cbefe25SConrad Meyer /*! ZSTD_safecopy() :
6069cbefe25SConrad Meyer  *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
6079cbefe25SConrad Meyer  *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
6089cbefe25SConrad Meyer  *  This function is only called in the uncommon case where the sequence is near the end of the block. It
6099cbefe25SConrad Meyer  *  should be fast for a single long sequence, but can be slow for several short sequences.
6109cbefe25SConrad Meyer  *
6119cbefe25SConrad Meyer  *  @param ovtype controls the overlap detection
6129cbefe25SConrad Meyer  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
6139cbefe25SConrad Meyer  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
6149cbefe25SConrad Meyer  *           The src buffer must be before the dst buffer.
6159cbefe25SConrad Meyer  */
6169cbefe25SConrad Meyer static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
6179cbefe25SConrad Meyer     ptrdiff_t const diff = op - ip;
6189cbefe25SConrad Meyer     BYTE* const oend = op + length;
6199cbefe25SConrad Meyer 
6209cbefe25SConrad Meyer     assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
6219cbefe25SConrad Meyer            (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
6229cbefe25SConrad Meyer 
6239cbefe25SConrad Meyer     if (length < 8) {
6249cbefe25SConrad Meyer         /* Handle short lengths. */
6259cbefe25SConrad Meyer         while (op < oend) *op++ = *ip++;
6269cbefe25SConrad Meyer         return;
6279cbefe25SConrad Meyer     }
6289cbefe25SConrad Meyer     if (ovtype == ZSTD_overlap_src_before_dst) {
6299cbefe25SConrad Meyer         /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
6309cbefe25SConrad Meyer         assert(length >= 8);
6319cbefe25SConrad Meyer         ZSTD_overlapCopy8(&op, &ip, diff);
6329cbefe25SConrad Meyer         assert(op - ip >= 8);
6339cbefe25SConrad Meyer         assert(op <= oend);
6349cbefe25SConrad Meyer     }
6359cbefe25SConrad Meyer 
6369cbefe25SConrad Meyer     if (oend <= oend_w) {
6379cbefe25SConrad Meyer         /* No risk of overwrite. */
6389cbefe25SConrad Meyer         ZSTD_wildcopy(op, ip, length, ovtype);
6399cbefe25SConrad Meyer         return;
6409cbefe25SConrad Meyer     }
6419cbefe25SConrad Meyer     if (op <= oend_w) {
6429cbefe25SConrad Meyer         /* Wildcopy until we get close to the end. */
6439cbefe25SConrad Meyer         assert(oend > oend_w);
6449cbefe25SConrad Meyer         ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
6459cbefe25SConrad Meyer         ip += oend_w - op;
6469cbefe25SConrad Meyer         op = oend_w;
6479cbefe25SConrad Meyer     }
6489cbefe25SConrad Meyer     /* Handle the leftovers. */
6499cbefe25SConrad Meyer     while (op < oend) *op++ = *ip++;
6509cbefe25SConrad Meyer }
6519cbefe25SConrad Meyer 
6529cbefe25SConrad Meyer /* ZSTD_execSequenceEnd():
6539cbefe25SConrad Meyer  * This version handles cases that are near the end of the output buffer. It requires
6549cbefe25SConrad Meyer  * more careful checks to make sure there is no overflow. By separating out these hard
6559cbefe25SConrad Meyer  * and unlikely cases, we can speed up the common cases.
6569cbefe25SConrad Meyer  *
6579cbefe25SConrad Meyer  * NOTE: This function needs to be fast for a single long sequence, but doesn't need
6589cbefe25SConrad Meyer  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
6599cbefe25SConrad Meyer  */
660a0483764SConrad Meyer FORCE_NOINLINE
6619cbefe25SConrad Meyer size_t ZSTD_execSequenceEnd(BYTE* op,
662a0483764SConrad Meyer                             BYTE* const oend, seq_t sequence,
663a0483764SConrad Meyer                             const BYTE** litPtr, const BYTE* const litLimit,
6649cbefe25SConrad Meyer                             const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
665a0483764SConrad Meyer {
666a0483764SConrad Meyer     BYTE* const oLitEnd = op + sequence.litLength;
667a0483764SConrad Meyer     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
668a0483764SConrad Meyer     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
669a0483764SConrad Meyer     const BYTE* match = oLitEnd - sequence.offset;
6709cbefe25SConrad Meyer     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
671a0483764SConrad Meyer 
672*37f1f268SConrad Meyer     /* bounds checks : careful of address space overflow in 32-bit mode */
673*37f1f268SConrad Meyer     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
674*37f1f268SConrad Meyer     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
675*37f1f268SConrad Meyer     assert(op < op + sequenceLength);
676*37f1f268SConrad Meyer     assert(oLitEnd < op + sequenceLength);
677a0483764SConrad Meyer 
678a0483764SConrad Meyer     /* copy literals */
6799cbefe25SConrad Meyer     ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
6809cbefe25SConrad Meyer     op = oLitEnd;
6819cbefe25SConrad Meyer     *litPtr = iLitEnd;
682a0483764SConrad Meyer 
683a0483764SConrad Meyer     /* copy Match */
6849cbefe25SConrad Meyer     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
685a0483764SConrad Meyer         /* offset beyond prefix */
686*37f1f268SConrad Meyer         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
6879cbefe25SConrad Meyer         match = dictEnd - (prefixStart-match);
688a0483764SConrad Meyer         if (match + sequence.matchLength <= dictEnd) {
689a0483764SConrad Meyer             memmove(oLitEnd, match, sequence.matchLength);
690a0483764SConrad Meyer             return sequenceLength;
691a0483764SConrad Meyer         }
692a0483764SConrad Meyer         /* span extDict & currentPrefixSegment */
693a0483764SConrad Meyer         {   size_t const length1 = dictEnd - match;
694a0483764SConrad Meyer             memmove(oLitEnd, match, length1);
695a0483764SConrad Meyer             op = oLitEnd + length1;
696a0483764SConrad Meyer             sequence.matchLength -= length1;
6979cbefe25SConrad Meyer             match = prefixStart;
698a0483764SConrad Meyer     }   }
6999cbefe25SConrad Meyer     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
700a0483764SConrad Meyer     return sequenceLength;
701a0483764SConrad Meyer }
702a0483764SConrad Meyer 
703a0483764SConrad Meyer HINT_INLINE
704a0483764SConrad Meyer size_t ZSTD_execSequence(BYTE* op,
705a0483764SConrad Meyer                          BYTE* const oend, seq_t sequence,
706a0483764SConrad Meyer                          const BYTE** litPtr, const BYTE* const litLimit,
707a0483764SConrad Meyer                          const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
708a0483764SConrad Meyer {
709a0483764SConrad Meyer     BYTE* const oLitEnd = op + sequence.litLength;
710a0483764SConrad Meyer     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
711a0483764SConrad Meyer     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
712*37f1f268SConrad Meyer     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
713a0483764SConrad Meyer     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
714a0483764SConrad Meyer     const BYTE* match = oLitEnd - sequence.offset;
715a0483764SConrad Meyer 
716*37f1f268SConrad Meyer     assert(op != NULL /* Precondition */);
717*37f1f268SConrad Meyer     assert(oend_w < oend /* No underflow */);
718*37f1f268SConrad Meyer     /* Handle edge cases in a slow path:
719*37f1f268SConrad Meyer      *   - Read beyond end of literals
720*37f1f268SConrad Meyer      *   - Match end is within WILDCOPY_OVERLIMIT of oend
721*37f1f268SConrad Meyer      *   - 32-bit mode and the match length overflows
722*37f1f268SConrad Meyer      */
723*37f1f268SConrad Meyer     if (UNLIKELY(
724*37f1f268SConrad Meyer             iLitEnd > litLimit ||
725*37f1f268SConrad Meyer             oMatchEnd > oend_w ||
726*37f1f268SConrad Meyer             (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
7279cbefe25SConrad Meyer         return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
728a0483764SConrad Meyer 
7299cbefe25SConrad Meyer     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
730*37f1f268SConrad Meyer     assert(op <= oLitEnd /* No overflow */);
731*37f1f268SConrad Meyer     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
732*37f1f268SConrad Meyer     assert(oMatchEnd <= oend /* No underflow */);
7339cbefe25SConrad Meyer     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
7349cbefe25SConrad Meyer     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
7359cbefe25SConrad Meyer     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
7369cbefe25SConrad Meyer 
7379cbefe25SConrad Meyer     /* Copy Literals:
7389cbefe25SConrad Meyer      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
7399cbefe25SConrad Meyer      * We likely don't need the full 32-byte wildcopy.
7409cbefe25SConrad Meyer      */
7419cbefe25SConrad Meyer     assert(WILDCOPY_OVERLENGTH >= 16);
7429cbefe25SConrad Meyer     ZSTD_copy16(op, (*litPtr));
743*37f1f268SConrad Meyer     if (UNLIKELY(sequence.litLength > 16)) {
7449cbefe25SConrad Meyer         ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
7459cbefe25SConrad Meyer     }
746a0483764SConrad Meyer     op = oLitEnd;
747a0483764SConrad Meyer     *litPtr = iLitEnd;   /* update for next sequence */
748a0483764SConrad Meyer 
7499cbefe25SConrad Meyer     /* Copy Match */
750a0483764SConrad Meyer     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
751a0483764SConrad Meyer         /* offset beyond prefix -> go into extDict */
752*37f1f268SConrad Meyer         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
753a0483764SConrad Meyer         match = dictEnd + (match - prefixStart);
754a0483764SConrad Meyer         if (match + sequence.matchLength <= dictEnd) {
755a0483764SConrad Meyer             memmove(oLitEnd, match, sequence.matchLength);
756a0483764SConrad Meyer             return sequenceLength;
757a0483764SConrad Meyer         }
758a0483764SConrad Meyer         /* span extDict & currentPrefixSegment */
759a0483764SConrad Meyer         {   size_t const length1 = dictEnd - match;
760a0483764SConrad Meyer             memmove(oLitEnd, match, length1);
761a0483764SConrad Meyer             op = oLitEnd + length1;
762a0483764SConrad Meyer             sequence.matchLength -= length1;
763a0483764SConrad Meyer             match = prefixStart;
764a0483764SConrad Meyer     }   }
7659cbefe25SConrad Meyer     /* Match within prefix of 1 or more bytes */
7669cbefe25SConrad Meyer     assert(op <= oMatchEnd);
7679cbefe25SConrad Meyer     assert(oMatchEnd <= oend_w);
7689cbefe25SConrad Meyer     assert(match >= prefixStart);
7699cbefe25SConrad Meyer     assert(sequence.matchLength >= 1);
770a0483764SConrad Meyer 
7719cbefe25SConrad Meyer     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
7729cbefe25SConrad Meyer      * without overlap checking.
7739cbefe25SConrad Meyer      */
774*37f1f268SConrad Meyer     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
7759cbefe25SConrad Meyer         /* We bet on a full wildcopy for matches, since we expect matches to be
7769cbefe25SConrad Meyer          * longer than literals (in general). In silesia, ~10% of matches are longer
7779cbefe25SConrad Meyer          * than 16 bytes.
7789cbefe25SConrad Meyer          */
7799cbefe25SConrad Meyer         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
780a0483764SConrad Meyer         return sequenceLength;
781a0483764SConrad Meyer     }
7829cbefe25SConrad Meyer     assert(sequence.offset < WILDCOPY_VECLEN);
783a0483764SConrad Meyer 
7849cbefe25SConrad Meyer     /* Copy 8 bytes and spread the offset to be >= 8. */
7859cbefe25SConrad Meyer     ZSTD_overlapCopy8(&op, &match, sequence.offset);
786a0483764SConrad Meyer 
7879cbefe25SConrad Meyer     /* If the match length is > 8 bytes, then continue with the wildcopy. */
7889cbefe25SConrad Meyer     if (sequence.matchLength > 8) {
7899cbefe25SConrad Meyer         assert(op < oMatchEnd);
7909cbefe25SConrad Meyer         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
791a0483764SConrad Meyer     }
792a0483764SConrad Meyer     return sequenceLength;
793a0483764SConrad Meyer }
794a0483764SConrad Meyer 
795a0483764SConrad Meyer static void
796a0483764SConrad Meyer ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
797a0483764SConrad Meyer {
798a0483764SConrad Meyer     const void* ptr = dt;
799a0483764SConrad Meyer     const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
800a0483764SConrad Meyer     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
801a0483764SConrad Meyer     DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
802a0483764SConrad Meyer                 (U32)DStatePtr->state, DTableH->tableLog);
803a0483764SConrad Meyer     BIT_reloadDStream(bitD);
804a0483764SConrad Meyer     DStatePtr->table = dt + 1;
805a0483764SConrad Meyer }
806a0483764SConrad Meyer 
807a0483764SConrad Meyer FORCE_INLINE_TEMPLATE void
808a0483764SConrad Meyer ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
809a0483764SConrad Meyer {
810a0483764SConrad Meyer     ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
811a0483764SConrad Meyer     U32 const nbBits = DInfo.nbBits;
812a0483764SConrad Meyer     size_t const lowBits = BIT_readBits(bitD, nbBits);
813a0483764SConrad Meyer     DStatePtr->state = DInfo.nextState + lowBits;
814a0483764SConrad Meyer }
815a0483764SConrad Meyer 
816*37f1f268SConrad Meyer FORCE_INLINE_TEMPLATE void
817*37f1f268SConrad Meyer ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
818*37f1f268SConrad Meyer {
819*37f1f268SConrad Meyer     U32 const nbBits = DInfo.nbBits;
820*37f1f268SConrad Meyer     size_t const lowBits = BIT_readBits(bitD, nbBits);
821*37f1f268SConrad Meyer     DStatePtr->state = DInfo.nextState + lowBits;
822*37f1f268SConrad Meyer }
823*37f1f268SConrad Meyer 
824a0483764SConrad Meyer /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
825a0483764SConrad Meyer  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
826a0483764SConrad Meyer  * bits before reloading. This value is the maximum number of bytes we read
8272b9c00cbSConrad Meyer  * after reloading when we are decoding long offsets.
828a0483764SConrad Meyer  */
829a0483764SConrad Meyer #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
830a0483764SConrad Meyer     (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
831a0483764SConrad Meyer         ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
832a0483764SConrad Meyer         : 0)
833a0483764SConrad Meyer 
834a0483764SConrad Meyer typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
835*37f1f268SConrad Meyer typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
836a0483764SConrad Meyer 
837a0483764SConrad Meyer FORCE_INLINE_TEMPLATE seq_t
838*37f1f268SConrad Meyer ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
839a0483764SConrad Meyer {
840a0483764SConrad Meyer     seq_t seq;
841*37f1f268SConrad Meyer     ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
842*37f1f268SConrad Meyer     ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
843*37f1f268SConrad Meyer     ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
844*37f1f268SConrad Meyer     U32 const llBase = llDInfo.baseValue;
845*37f1f268SConrad Meyer     U32 const mlBase = mlDInfo.baseValue;
846*37f1f268SConrad Meyer     U32 const ofBase = ofDInfo.baseValue;
847*37f1f268SConrad Meyer     BYTE const llBits = llDInfo.nbAdditionalBits;
848*37f1f268SConrad Meyer     BYTE const mlBits = mlDInfo.nbAdditionalBits;
849*37f1f268SConrad Meyer     BYTE const ofBits = ofDInfo.nbAdditionalBits;
850*37f1f268SConrad Meyer     BYTE const totalBits = llBits+mlBits+ofBits;
851a0483764SConrad Meyer 
852a0483764SConrad Meyer     /* sequence */
853a0483764SConrad Meyer     {   size_t offset;
854*37f1f268SConrad Meyer         if (ofBits > 1) {
855a0483764SConrad Meyer             ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
856a0483764SConrad Meyer             ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
857a0483764SConrad Meyer             assert(ofBits <= MaxOff);
858a0483764SConrad Meyer             if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
859a0483764SConrad Meyer                 U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
860a0483764SConrad Meyer                 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
861a0483764SConrad Meyer                 BIT_reloadDStream(&seqState->DStream);
862a0483764SConrad Meyer                 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
863a0483764SConrad Meyer                 assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
864a0483764SConrad Meyer             } else {
865a0483764SConrad Meyer                 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
866a0483764SConrad Meyer                 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
867a0483764SConrad Meyer             }
868*37f1f268SConrad Meyer             seqState->prevOffset[2] = seqState->prevOffset[1];
869*37f1f268SConrad Meyer             seqState->prevOffset[1] = seqState->prevOffset[0];
870*37f1f268SConrad Meyer             seqState->prevOffset[0] = offset;
871*37f1f268SConrad Meyer         } else {
872*37f1f268SConrad Meyer             U32 const ll0 = (llBase == 0);
873*37f1f268SConrad Meyer             if (LIKELY((ofBits == 0))) {
874*37f1f268SConrad Meyer                 if (LIKELY(!ll0))
875*37f1f268SConrad Meyer                     offset = seqState->prevOffset[0];
876*37f1f268SConrad Meyer                 else {
877*37f1f268SConrad Meyer                     offset = seqState->prevOffset[1];
878*37f1f268SConrad Meyer                     seqState->prevOffset[1] = seqState->prevOffset[0];
879*37f1f268SConrad Meyer                     seqState->prevOffset[0] = offset;
880a0483764SConrad Meyer                 }
881*37f1f268SConrad Meyer             } else {
882*37f1f268SConrad Meyer                 offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
883*37f1f268SConrad Meyer                 {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
884a0483764SConrad Meyer                     temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
885a0483764SConrad Meyer                     if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
886a0483764SConrad Meyer                     seqState->prevOffset[1] = seqState->prevOffset[0];
887a0483764SConrad Meyer                     seqState->prevOffset[0] = offset = temp;
888*37f1f268SConrad Meyer         }   }   }
889a0483764SConrad Meyer         seq.offset = offset;
890a0483764SConrad Meyer     }
891a0483764SConrad Meyer 
892*37f1f268SConrad Meyer     seq.matchLength = mlBase;
893*37f1f268SConrad Meyer     if (mlBits > 0)
894*37f1f268SConrad Meyer         seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
895*37f1f268SConrad Meyer 
896a0483764SConrad Meyer     if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
897a0483764SConrad Meyer         BIT_reloadDStream(&seqState->DStream);
898*37f1f268SConrad Meyer     if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
899a0483764SConrad Meyer         BIT_reloadDStream(&seqState->DStream);
900a0483764SConrad Meyer     /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
901a0483764SConrad Meyer     ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
902a0483764SConrad Meyer 
903*37f1f268SConrad Meyer     seq.litLength = llBase;
904*37f1f268SConrad Meyer     if (llBits > 0)
905*37f1f268SConrad Meyer         seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
906*37f1f268SConrad Meyer 
907a0483764SConrad Meyer     if (MEM_32bits())
908a0483764SConrad Meyer         BIT_reloadDStream(&seqState->DStream);
909a0483764SConrad Meyer 
910a0483764SConrad Meyer     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
911a0483764SConrad Meyer                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
912a0483764SConrad Meyer 
913*37f1f268SConrad Meyer     if (prefetch == ZSTD_p_prefetch) {
914*37f1f268SConrad Meyer         size_t const pos = seqState->pos + seq.litLength;
915*37f1f268SConrad Meyer         const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
916*37f1f268SConrad Meyer         seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
917*37f1f268SConrad Meyer                                                     * No consequence though : no memory access will occur, offset is only used for prefetching */
918*37f1f268SConrad Meyer         seqState->pos = pos + seq.matchLength;
919*37f1f268SConrad Meyer     }
920*37f1f268SConrad Meyer 
921*37f1f268SConrad Meyer     /* ANS state update
922*37f1f268SConrad Meyer      * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
923*37f1f268SConrad Meyer      * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
924*37f1f268SConrad Meyer      * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
925*37f1f268SConrad Meyer      * better option, so it is the default for other compilers. But, if you
926*37f1f268SConrad Meyer      * measure that it is worse, please put up a pull request.
927*37f1f268SConrad Meyer      */
928*37f1f268SConrad Meyer     {
929*37f1f268SConrad Meyer #if defined(__GNUC__) && !defined(__clang__)
930*37f1f268SConrad Meyer         const int kUseUpdateFseState = 1;
931*37f1f268SConrad Meyer #else
932*37f1f268SConrad Meyer         const int kUseUpdateFseState = 0;
933*37f1f268SConrad Meyer #endif
934*37f1f268SConrad Meyer         if (kUseUpdateFseState) {
935a0483764SConrad Meyer             ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
936a0483764SConrad Meyer             ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
937a0483764SConrad Meyer             if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
938a0483764SConrad Meyer             ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
939*37f1f268SConrad Meyer         } else {
940*37f1f268SConrad Meyer             ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
941*37f1f268SConrad Meyer             ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
942*37f1f268SConrad Meyer             if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
943*37f1f268SConrad Meyer             ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
944*37f1f268SConrad Meyer         }
945*37f1f268SConrad Meyer     }
946a0483764SConrad Meyer 
947a0483764SConrad Meyer     return seq;
948a0483764SConrad Meyer }
949a0483764SConrad Meyer 
950*37f1f268SConrad Meyer #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
951*37f1f268SConrad Meyer static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
952*37f1f268SConrad Meyer {
953*37f1f268SConrad Meyer     size_t const windowSize = dctx->fParams.windowSize;
954*37f1f268SConrad Meyer     /* No dictionary used. */
955*37f1f268SConrad Meyer     if (dctx->dictContentEndForFuzzing == NULL) return 0;
956*37f1f268SConrad Meyer     /* Dictionary is our prefix. */
957*37f1f268SConrad Meyer     if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
958*37f1f268SConrad Meyer     /* Dictionary is not our ext-dict. */
959*37f1f268SConrad Meyer     if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
960*37f1f268SConrad Meyer     /* Dictionary is not within our window size. */
961*37f1f268SConrad Meyer     if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
962*37f1f268SConrad Meyer     /* Dictionary is active. */
963*37f1f268SConrad Meyer     return 1;
964*37f1f268SConrad Meyer }
965*37f1f268SConrad Meyer 
966*37f1f268SConrad Meyer MEM_STATIC void ZSTD_assertValidSequence(
967*37f1f268SConrad Meyer         ZSTD_DCtx const* dctx,
968*37f1f268SConrad Meyer         BYTE const* op, BYTE const* oend,
969*37f1f268SConrad Meyer         seq_t const seq,
970*37f1f268SConrad Meyer         BYTE const* prefixStart, BYTE const* virtualStart)
971*37f1f268SConrad Meyer {
972*37f1f268SConrad Meyer     size_t const windowSize = dctx->fParams.windowSize;
973*37f1f268SConrad Meyer     size_t const sequenceSize = seq.litLength + seq.matchLength;
974*37f1f268SConrad Meyer     BYTE const* const oLitEnd = op + seq.litLength;
975*37f1f268SConrad Meyer     DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
976*37f1f268SConrad Meyer             (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
977*37f1f268SConrad Meyer     assert(op <= oend);
978*37f1f268SConrad Meyer     assert((size_t)(oend - op) >= sequenceSize);
979*37f1f268SConrad Meyer     assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
980*37f1f268SConrad Meyer     if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
981*37f1f268SConrad Meyer         size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
982*37f1f268SConrad Meyer         /* Offset must be within the dictionary. */
983*37f1f268SConrad Meyer         assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
984*37f1f268SConrad Meyer         assert(seq.offset <= windowSize + dictSize);
985*37f1f268SConrad Meyer     } else {
986*37f1f268SConrad Meyer         /* Offset must be within our window. */
987*37f1f268SConrad Meyer         assert(seq.offset <= windowSize);
988*37f1f268SConrad Meyer     }
989*37f1f268SConrad Meyer }
990*37f1f268SConrad Meyer #endif
991*37f1f268SConrad Meyer 
992*37f1f268SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
993a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t
9944d3f1eafSConrad Meyer DONT_VECTORIZE
995a0483764SConrad Meyer ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
996a0483764SConrad Meyer                                void* dst, size_t maxDstSize,
997a0483764SConrad Meyer                          const void* seqStart, size_t seqSize, int nbSeq,
998*37f1f268SConrad Meyer                          const ZSTD_longOffset_e isLongOffset,
999*37f1f268SConrad Meyer                          const int frame)
1000a0483764SConrad Meyer {
1001a0483764SConrad Meyer     const BYTE* ip = (const BYTE*)seqStart;
1002a0483764SConrad Meyer     const BYTE* const iend = ip + seqSize;
1003a0483764SConrad Meyer     BYTE* const ostart = (BYTE* const)dst;
1004a0483764SConrad Meyer     BYTE* const oend = ostart + maxDstSize;
1005a0483764SConrad Meyer     BYTE* op = ostart;
1006a0483764SConrad Meyer     const BYTE* litPtr = dctx->litPtr;
1007a0483764SConrad Meyer     const BYTE* const litEnd = litPtr + dctx->litSize;
1008a0483764SConrad Meyer     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1009a0483764SConrad Meyer     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1010a0483764SConrad Meyer     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1011a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressSequences_body");
1012*37f1f268SConrad Meyer     (void)frame;
1013a0483764SConrad Meyer 
1014a0483764SConrad Meyer     /* Regen sequences */
1015a0483764SConrad Meyer     if (nbSeq) {
1016a0483764SConrad Meyer         seqState_t seqState;
1017*37f1f268SConrad Meyer         size_t error = 0;
1018a0483764SConrad Meyer         dctx->fseEntropy = 1;
1019a0483764SConrad Meyer         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
10202b9c00cbSConrad Meyer         RETURN_ERROR_IF(
10212b9c00cbSConrad Meyer             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1022*37f1f268SConrad Meyer             corruption_detected, "");
1023a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1024a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1025a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1026*37f1f268SConrad Meyer         assert(dst != NULL);
1027a0483764SConrad Meyer 
10284d3f1eafSConrad Meyer         ZSTD_STATIC_ASSERT(
10294d3f1eafSConrad Meyer                 BIT_DStream_unfinished < BIT_DStream_completed &&
10304d3f1eafSConrad Meyer                 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
10314d3f1eafSConrad Meyer                 BIT_DStream_completed < BIT_DStream_overflow);
10324d3f1eafSConrad Meyer 
1033*37f1f268SConrad Meyer #if defined(__GNUC__) && defined(__x86_64__)
1034*37f1f268SConrad Meyer         /* Align the decompression loop to 32 + 16 bytes.
1035*37f1f268SConrad Meyer          *
1036*37f1f268SConrad Meyer          * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1037*37f1f268SConrad Meyer          * speed swings based on the alignment of the decompression loop. This
1038*37f1f268SConrad Meyer          * performance swing is caused by parts of the decompression loop falling
1039*37f1f268SConrad Meyer          * out of the DSB. The entire decompression loop should fit in the DSB,
1040*37f1f268SConrad Meyer          * when it can't we get much worse performance. You can measure if you've
1041*37f1f268SConrad Meyer          * hit the good case or the bad case with this perf command for some
1042*37f1f268SConrad Meyer          * compressed file test.zst:
1043*37f1f268SConrad Meyer          *
1044*37f1f268SConrad Meyer          *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1045*37f1f268SConrad Meyer          *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1046*37f1f268SConrad Meyer          *
1047*37f1f268SConrad Meyer          * If you see most cycles served out of the MITE you've hit the bad case.
1048*37f1f268SConrad Meyer          * If you see most cycles served out of the DSB you've hit the good case.
1049*37f1f268SConrad Meyer          * If it is pretty even then you may be in an okay case.
1050*37f1f268SConrad Meyer          *
1051*37f1f268SConrad Meyer          * I've been able to reproduce this issue on the following CPUs:
1052*37f1f268SConrad Meyer          *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1053*37f1f268SConrad Meyer          *               Use Instruments->Counters to get DSB/MITE cycles.
1054*37f1f268SConrad Meyer          *               I never got performance swings, but I was able to
1055*37f1f268SConrad Meyer          *               go from the good case of mostly DSB to half of the
1056*37f1f268SConrad Meyer          *               cycles served from MITE.
1057*37f1f268SConrad Meyer          *   - Coffeelake: Intel i9-9900k
1058*37f1f268SConrad Meyer          *
1059*37f1f268SConrad Meyer          * I haven't been able to reproduce the instability or DSB misses on any
1060*37f1f268SConrad Meyer          * of the following CPUS:
1061*37f1f268SConrad Meyer          *   - Haswell
1062*37f1f268SConrad Meyer          *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1063*37f1f268SConrad Meyer          *   - Skylake
1064*37f1f268SConrad Meyer          *
1065*37f1f268SConrad Meyer          * If you are seeing performance stability this script can help test.
1066*37f1f268SConrad Meyer          * It tests on 4 commits in zstd where I saw performance change.
1067*37f1f268SConrad Meyer          *
1068*37f1f268SConrad Meyer          *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1069*37f1f268SConrad Meyer          */
1070*37f1f268SConrad Meyer         __asm__(".p2align 5");
1071*37f1f268SConrad Meyer         __asm__("nop");
1072*37f1f268SConrad Meyer         __asm__(".p2align 4");
1073*37f1f268SConrad Meyer #endif
1074*37f1f268SConrad Meyer         for ( ; ; ) {
1075*37f1f268SConrad Meyer             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1076a0483764SConrad Meyer             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1077*37f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1078*37f1f268SConrad Meyer             assert(!ZSTD_isError(oneSeqSize));
1079*37f1f268SConrad Meyer             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1080*37f1f268SConrad Meyer #endif
1081a0483764SConrad Meyer             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1082*37f1f268SConrad Meyer             BIT_reloadDStream(&(seqState.DStream));
1083*37f1f268SConrad Meyer             /* gcc and clang both don't like early returns in this loop.
1084*37f1f268SConrad Meyer              * gcc doesn't like early breaks either.
1085*37f1f268SConrad Meyer              * Instead save an error and report it at the end.
1086*37f1f268SConrad Meyer              * When there is an error, don't increment op, so we don't
1087*37f1f268SConrad Meyer              * overwrite.
1088*37f1f268SConrad Meyer              */
1089*37f1f268SConrad Meyer             if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1090*37f1f268SConrad Meyer             else op += oneSeqSize;
1091*37f1f268SConrad Meyer             if (UNLIKELY(!--nbSeq)) break;
1092*37f1f268SConrad Meyer         }
1093a0483764SConrad Meyer 
1094a0483764SConrad Meyer         /* check if reached exact end */
1095a0483764SConrad Meyer         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1096*37f1f268SConrad Meyer         if (ZSTD_isError(error)) return error;
1097*37f1f268SConrad Meyer         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1098*37f1f268SConrad Meyer         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1099a0483764SConrad Meyer         /* save reps for next block */
1100a0483764SConrad Meyer         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1101a0483764SConrad Meyer     }
1102a0483764SConrad Meyer 
1103a0483764SConrad Meyer     /* last literal segment */
1104a0483764SConrad Meyer     {   size_t const lastLLSize = litEnd - litPtr;
1105*37f1f268SConrad Meyer         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1106*37f1f268SConrad Meyer         if (op != NULL) {
1107a0483764SConrad Meyer             memcpy(op, litPtr, lastLLSize);
1108a0483764SConrad Meyer             op += lastLLSize;
1109a0483764SConrad Meyer         }
1110*37f1f268SConrad Meyer     }
1111a0483764SConrad Meyer 
1112a0483764SConrad Meyer     return op-ostart;
1113a0483764SConrad Meyer }
1114a0483764SConrad Meyer 
1115a0483764SConrad Meyer static size_t
1116a0483764SConrad Meyer ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1117a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1118a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
1119*37f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
1120*37f1f268SConrad Meyer                            const int frame)
1121a0483764SConrad Meyer {
1122*37f1f268SConrad Meyer     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1123a0483764SConrad Meyer }
1124a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1125a0483764SConrad Meyer 
1126a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1127a0483764SConrad Meyer FORCE_INLINE_TEMPLATE size_t
1128a0483764SConrad Meyer ZSTD_decompressSequencesLong_body(
1129a0483764SConrad Meyer                                ZSTD_DCtx* dctx,
1130a0483764SConrad Meyer                                void* dst, size_t maxDstSize,
1131a0483764SConrad Meyer                          const void* seqStart, size_t seqSize, int nbSeq,
1132*37f1f268SConrad Meyer                          const ZSTD_longOffset_e isLongOffset,
1133*37f1f268SConrad Meyer                          const int frame)
1134a0483764SConrad Meyer {
1135a0483764SConrad Meyer     const BYTE* ip = (const BYTE*)seqStart;
1136a0483764SConrad Meyer     const BYTE* const iend = ip + seqSize;
1137a0483764SConrad Meyer     BYTE* const ostart = (BYTE* const)dst;
1138a0483764SConrad Meyer     BYTE* const oend = ostart + maxDstSize;
1139a0483764SConrad Meyer     BYTE* op = ostart;
1140a0483764SConrad Meyer     const BYTE* litPtr = dctx->litPtr;
1141a0483764SConrad Meyer     const BYTE* const litEnd = litPtr + dctx->litSize;
1142a0483764SConrad Meyer     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1143a0483764SConrad Meyer     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1144a0483764SConrad Meyer     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1145*37f1f268SConrad Meyer     (void)frame;
1146a0483764SConrad Meyer 
1147a0483764SConrad Meyer     /* Regen sequences */
1148a0483764SConrad Meyer     if (nbSeq) {
1149a0483764SConrad Meyer #define STORED_SEQS 4
1150a0483764SConrad Meyer #define STORED_SEQS_MASK (STORED_SEQS-1)
1151a0483764SConrad Meyer #define ADVANCED_SEQS 4
1152a0483764SConrad Meyer         seq_t sequences[STORED_SEQS];
1153a0483764SConrad Meyer         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1154a0483764SConrad Meyer         seqState_t seqState;
1155a0483764SConrad Meyer         int seqNb;
1156a0483764SConrad Meyer         dctx->fseEntropy = 1;
1157a0483764SConrad Meyer         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1158a0483764SConrad Meyer         seqState.prefixStart = prefixStart;
1159a0483764SConrad Meyer         seqState.pos = (size_t)(op-prefixStart);
1160a0483764SConrad Meyer         seqState.dictEnd = dictEnd;
1161*37f1f268SConrad Meyer         assert(dst != NULL);
1162a0483764SConrad Meyer         assert(iend >= ip);
11632b9c00cbSConrad Meyer         RETURN_ERROR_IF(
11642b9c00cbSConrad Meyer             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1165*37f1f268SConrad Meyer             corruption_detected, "");
1166a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1167a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1168a0483764SConrad Meyer         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1169a0483764SConrad Meyer 
1170a0483764SConrad Meyer         /* prepare in advance */
1171a0483764SConrad Meyer         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1172*37f1f268SConrad Meyer             sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1173a0483764SConrad Meyer             PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1174a0483764SConrad Meyer         }
1175*37f1f268SConrad Meyer         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1176a0483764SConrad Meyer 
1177a0483764SConrad Meyer         /* decode and decompress */
1178a0483764SConrad Meyer         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1179*37f1f268SConrad Meyer             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
11809cbefe25SConrad Meyer             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1181*37f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1182*37f1f268SConrad Meyer             assert(!ZSTD_isError(oneSeqSize));
1183*37f1f268SConrad Meyer             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1184*37f1f268SConrad Meyer #endif
1185a0483764SConrad Meyer             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1186a0483764SConrad Meyer             PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1187a0483764SConrad Meyer             sequences[seqNb & STORED_SEQS_MASK] = sequence;
1188a0483764SConrad Meyer             op += oneSeqSize;
1189a0483764SConrad Meyer         }
1190*37f1f268SConrad Meyer         RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1191a0483764SConrad Meyer 
1192a0483764SConrad Meyer         /* finish queue */
1193a0483764SConrad Meyer         seqNb -= seqAdvance;
1194a0483764SConrad Meyer         for ( ; seqNb<nbSeq ; seqNb++) {
11959cbefe25SConrad Meyer             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1196*37f1f268SConrad Meyer #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1197*37f1f268SConrad Meyer             assert(!ZSTD_isError(oneSeqSize));
1198*37f1f268SConrad Meyer             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1199*37f1f268SConrad Meyer #endif
1200a0483764SConrad Meyer             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1201a0483764SConrad Meyer             op += oneSeqSize;
1202a0483764SConrad Meyer         }
1203a0483764SConrad Meyer 
1204a0483764SConrad Meyer         /* save reps for next block */
1205a0483764SConrad Meyer         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1206a0483764SConrad Meyer     }
1207a0483764SConrad Meyer 
1208a0483764SConrad Meyer     /* last literal segment */
1209a0483764SConrad Meyer     {   size_t const lastLLSize = litEnd - litPtr;
1210*37f1f268SConrad Meyer         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1211*37f1f268SConrad Meyer         if (op != NULL) {
1212a0483764SConrad Meyer             memcpy(op, litPtr, lastLLSize);
1213a0483764SConrad Meyer             op += lastLLSize;
1214a0483764SConrad Meyer         }
1215*37f1f268SConrad Meyer     }
1216a0483764SConrad Meyer 
1217a0483764SConrad Meyer     return op-ostart;
1218a0483764SConrad Meyer }
1219a0483764SConrad Meyer 
1220a0483764SConrad Meyer static size_t
1221a0483764SConrad Meyer ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1222a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1223a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
1224*37f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
1225*37f1f268SConrad Meyer                            const int frame)
1226a0483764SConrad Meyer {
1227*37f1f268SConrad Meyer     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1228a0483764SConrad Meyer }
1229a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1230a0483764SConrad Meyer 
1231a0483764SConrad Meyer 
1232a0483764SConrad Meyer 
1233a0483764SConrad Meyer #if DYNAMIC_BMI2
1234a0483764SConrad Meyer 
1235a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1236a0483764SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t
12374d3f1eafSConrad Meyer DONT_VECTORIZE
1238a0483764SConrad Meyer ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1239a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1240a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
1241*37f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
1242*37f1f268SConrad Meyer                            const int frame)
1243a0483764SConrad Meyer {
1244*37f1f268SConrad Meyer     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1245a0483764SConrad Meyer }
1246a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1247a0483764SConrad Meyer 
1248a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1249a0483764SConrad Meyer static TARGET_ATTRIBUTE("bmi2") size_t
1250a0483764SConrad Meyer ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1251a0483764SConrad Meyer                                  void* dst, size_t maxDstSize,
1252a0483764SConrad Meyer                            const void* seqStart, size_t seqSize, int nbSeq,
1253*37f1f268SConrad Meyer                            const ZSTD_longOffset_e isLongOffset,
1254*37f1f268SConrad Meyer                            const int frame)
1255a0483764SConrad Meyer {
1256*37f1f268SConrad Meyer     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1257a0483764SConrad Meyer }
1258a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1259a0483764SConrad Meyer 
1260a0483764SConrad Meyer #endif /* DYNAMIC_BMI2 */
1261a0483764SConrad Meyer 
1262a0483764SConrad Meyer typedef size_t (*ZSTD_decompressSequences_t)(
1263a0483764SConrad Meyer                             ZSTD_DCtx* dctx,
1264a0483764SConrad Meyer                             void* dst, size_t maxDstSize,
1265a0483764SConrad Meyer                             const void* seqStart, size_t seqSize, int nbSeq,
1266*37f1f268SConrad Meyer                             const ZSTD_longOffset_e isLongOffset,
1267*37f1f268SConrad Meyer                             const int frame);
1268a0483764SConrad Meyer 
1269a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1270a0483764SConrad Meyer static size_t
1271a0483764SConrad Meyer ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1272a0483764SConrad Meyer                    const void* seqStart, size_t seqSize, int nbSeq,
1273*37f1f268SConrad Meyer                    const ZSTD_longOffset_e isLongOffset,
1274*37f1f268SConrad Meyer                    const int frame)
1275a0483764SConrad Meyer {
1276a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressSequences");
1277a0483764SConrad Meyer #if DYNAMIC_BMI2
1278a0483764SConrad Meyer     if (dctx->bmi2) {
1279*37f1f268SConrad Meyer         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1280a0483764SConrad Meyer     }
1281a0483764SConrad Meyer #endif
1282*37f1f268SConrad Meyer   return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1283a0483764SConrad Meyer }
1284a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1285a0483764SConrad Meyer 
1286a0483764SConrad Meyer 
1287a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1288a0483764SConrad Meyer /* ZSTD_decompressSequencesLong() :
1289a0483764SConrad Meyer  * decompression function triggered when a minimum share of offsets is considered "long",
1290a0483764SConrad Meyer  * aka out of cache.
12912b9c00cbSConrad Meyer  * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1292a0483764SConrad Meyer  * This function will try to mitigate main memory latency through the use of prefetching */
1293a0483764SConrad Meyer static size_t
1294a0483764SConrad Meyer ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1295a0483764SConrad Meyer                              void* dst, size_t maxDstSize,
1296a0483764SConrad Meyer                              const void* seqStart, size_t seqSize, int nbSeq,
1297*37f1f268SConrad Meyer                              const ZSTD_longOffset_e isLongOffset,
1298*37f1f268SConrad Meyer                              const int frame)
1299a0483764SConrad Meyer {
1300a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1301a0483764SConrad Meyer #if DYNAMIC_BMI2
1302a0483764SConrad Meyer     if (dctx->bmi2) {
1303*37f1f268SConrad Meyer         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1304a0483764SConrad Meyer     }
1305a0483764SConrad Meyer #endif
1306*37f1f268SConrad Meyer   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1307a0483764SConrad Meyer }
1308a0483764SConrad Meyer #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1309a0483764SConrad Meyer 
1310a0483764SConrad Meyer 
1311a0483764SConrad Meyer 
1312a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1313a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1314a0483764SConrad Meyer /* ZSTD_getLongOffsetsShare() :
1315a0483764SConrad Meyer  * condition : offTable must be valid
1316a0483764SConrad Meyer  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1317a0483764SConrad Meyer  *           compared to maximum possible of (1<<OffFSELog) */
1318a0483764SConrad Meyer static unsigned
1319a0483764SConrad Meyer ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1320a0483764SConrad Meyer {
1321a0483764SConrad Meyer     const void* ptr = offTable;
1322a0483764SConrad Meyer     U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1323a0483764SConrad Meyer     const ZSTD_seqSymbol* table = offTable + 1;
1324a0483764SConrad Meyer     U32 const max = 1 << tableLog;
1325a0483764SConrad Meyer     U32 u, total = 0;
1326a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1327a0483764SConrad Meyer 
1328a0483764SConrad Meyer     assert(max <= (1 << OffFSELog));  /* max not too large */
1329a0483764SConrad Meyer     for (u=0; u<max; u++) {
1330a0483764SConrad Meyer         if (table[u].nbAdditionalBits > 22) total += 1;
1331a0483764SConrad Meyer     }
1332a0483764SConrad Meyer 
1333a0483764SConrad Meyer     assert(tableLog <= OffFSELog);
1334a0483764SConrad Meyer     total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
1335a0483764SConrad Meyer 
1336a0483764SConrad Meyer     return total;
1337a0483764SConrad Meyer }
1338a0483764SConrad Meyer #endif
1339a0483764SConrad Meyer 
1340a0483764SConrad Meyer size_t
1341a0483764SConrad Meyer ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1342a0483764SConrad Meyer                               void* dst, size_t dstCapacity,
1343a0483764SConrad Meyer                         const void* src, size_t srcSize, const int frame)
1344a0483764SConrad Meyer {   /* blockType == blockCompressed */
1345a0483764SConrad Meyer     const BYTE* ip = (const BYTE*)src;
1346a0483764SConrad Meyer     /* isLongOffset must be true if there are long offsets.
1347a0483764SConrad Meyer      * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1348a0483764SConrad Meyer      * We don't expect that to be the case in 64-bit mode.
1349a0483764SConrad Meyer      * In block mode, window size is not known, so we have to be conservative.
1350a0483764SConrad Meyer      * (note: but it could be evaluated from current-lowLimit)
1351a0483764SConrad Meyer      */
1352a0483764SConrad Meyer     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1353a0483764SConrad Meyer     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1354a0483764SConrad Meyer 
1355*37f1f268SConrad Meyer     RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1356a0483764SConrad Meyer 
1357a0483764SConrad Meyer     /* Decode literals section */
1358a0483764SConrad Meyer     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
1359a0483764SConrad Meyer         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
1360a0483764SConrad Meyer         if (ZSTD_isError(litCSize)) return litCSize;
1361a0483764SConrad Meyer         ip += litCSize;
1362a0483764SConrad Meyer         srcSize -= litCSize;
1363a0483764SConrad Meyer     }
1364a0483764SConrad Meyer 
1365a0483764SConrad Meyer     /* Build Decoding Tables */
1366a0483764SConrad Meyer     {
1367a0483764SConrad Meyer         /* These macros control at build-time which decompressor implementation
1368a0483764SConrad Meyer          * we use. If neither is defined, we do some inspection and dispatch at
1369a0483764SConrad Meyer          * runtime.
1370a0483764SConrad Meyer          */
1371a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1372a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1373a0483764SConrad Meyer         int usePrefetchDecoder = dctx->ddictIsCold;
1374a0483764SConrad Meyer #endif
1375a0483764SConrad Meyer         int nbSeq;
1376a0483764SConrad Meyer         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1377a0483764SConrad Meyer         if (ZSTD_isError(seqHSize)) return seqHSize;
1378a0483764SConrad Meyer         ip += seqHSize;
1379a0483764SConrad Meyer         srcSize -= seqHSize;
1380a0483764SConrad Meyer 
1381*37f1f268SConrad Meyer         RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
1382*37f1f268SConrad Meyer 
1383a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1384a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1385a0483764SConrad Meyer         if ( !usePrefetchDecoder
1386a0483764SConrad Meyer           && (!frame || (dctx->fParams.windowSize > (1<<24)))
1387a0483764SConrad Meyer           && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
1388a0483764SConrad Meyer             U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1389a0483764SConrad Meyer             U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1390a0483764SConrad Meyer             usePrefetchDecoder = (shareLongOffsets >= minShare);
1391a0483764SConrad Meyer         }
1392a0483764SConrad Meyer #endif
1393a0483764SConrad Meyer 
1394a0483764SConrad Meyer         dctx->ddictIsCold = 0;
1395a0483764SConrad Meyer 
1396a0483764SConrad Meyer #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1397a0483764SConrad Meyer     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1398a0483764SConrad Meyer         if (usePrefetchDecoder)
1399a0483764SConrad Meyer #endif
1400a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1401*37f1f268SConrad Meyer             return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1402a0483764SConrad Meyer #endif
1403a0483764SConrad Meyer 
1404a0483764SConrad Meyer #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1405a0483764SConrad Meyer         /* else */
1406*37f1f268SConrad Meyer         return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1407a0483764SConrad Meyer #endif
1408a0483764SConrad Meyer     }
1409a0483764SConrad Meyer }
1410a0483764SConrad Meyer 
1411a0483764SConrad Meyer 
1412*37f1f268SConrad Meyer void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1413*37f1f268SConrad Meyer {
1414*37f1f268SConrad Meyer     if (dst != dctx->previousDstEnd) {   /* not contiguous */
1415*37f1f268SConrad Meyer         dctx->dictEnd = dctx->previousDstEnd;
1416*37f1f268SConrad Meyer         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1417*37f1f268SConrad Meyer         dctx->prefixStart = dst;
1418*37f1f268SConrad Meyer         dctx->previousDstEnd = dst;
1419*37f1f268SConrad Meyer     }
1420*37f1f268SConrad Meyer }
1421*37f1f268SConrad Meyer 
1422*37f1f268SConrad Meyer 
1423a0483764SConrad Meyer size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1424a0483764SConrad Meyer                             void* dst, size_t dstCapacity,
1425a0483764SConrad Meyer                       const void* src, size_t srcSize)
1426a0483764SConrad Meyer {
1427a0483764SConrad Meyer     size_t dSize;
1428a0483764SConrad Meyer     ZSTD_checkContinuity(dctx, dst);
1429a0483764SConrad Meyer     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1430a0483764SConrad Meyer     dctx->previousDstEnd = (char*)dst + dSize;
1431a0483764SConrad Meyer     return dSize;
1432a0483764SConrad Meyer }
1433