xref: /netbsd-src/external/bsd/zstd/dist/lib/decompress/zstd_decompress_internal.h (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos 
12*3117ece4Schristos /* zstd_decompress_internal:
13*3117ece4Schristos  * objects and definitions shared within lib/decompress modules */
14*3117ece4Schristos 
15*3117ece4Schristos  #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16*3117ece4Schristos  #define ZSTD_DECOMPRESS_INTERNAL_H
17*3117ece4Schristos 
18*3117ece4Schristos 
19*3117ece4Schristos /*-*******************************************************
20*3117ece4Schristos  *  Dependencies
21*3117ece4Schristos  *********************************************************/
22*3117ece4Schristos #include "../common/mem.h"             /* BYTE, U16, U32 */
23*3117ece4Schristos #include "../common/zstd_internal.h"   /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24*3117ece4Schristos 
25*3117ece4Schristos 
26*3117ece4Schristos 
27*3117ece4Schristos /*-*******************************************************
28*3117ece4Schristos  *  Constants
29*3117ece4Schristos  *********************************************************/
30*3117ece4Schristos static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31*3117ece4Schristos                  0,    1,    2,     3,     4,     5,     6,      7,
32*3117ece4Schristos                  8,    9,   10,    11,    12,    13,    14,     15,
33*3117ece4Schristos                 16,   18,   20,    22,    24,    28,    32,     40,
34*3117ece4Schristos                 48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35*3117ece4Schristos                 0x2000, 0x4000, 0x8000, 0x10000 };
36*3117ece4Schristos 
37*3117ece4Schristos static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38*3117ece4Schristos                  0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
39*3117ece4Schristos                  0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
40*3117ece4Schristos                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41*3117ece4Schristos                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42*3117ece4Schristos 
43*3117ece4Schristos static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44*3117ece4Schristos                      0,  1,  2,  3,  4,  5,  6,  7,
45*3117ece4Schristos                      8,  9, 10, 11, 12, 13, 14, 15,
46*3117ece4Schristos                     16, 17, 18, 19, 20, 21, 22, 23,
47*3117ece4Schristos                     24, 25, 26, 27, 28, 29, 30, 31 };
48*3117ece4Schristos 
49*3117ece4Schristos static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50*3117ece4Schristos                      3,  4,  5,    6,     7,     8,     9,    10,
51*3117ece4Schristos                     11, 12, 13,   14,    15,    16,    17,    18,
52*3117ece4Schristos                     19, 20, 21,   22,    23,    24,    25,    26,
53*3117ece4Schristos                     27, 28, 29,   30,    31,    32,    33,    34,
54*3117ece4Schristos                     35, 37, 39,   41,    43,    47,    51,    59,
55*3117ece4Schristos                     67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
56*3117ece4Schristos                     0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
57*3117ece4Schristos 
58*3117ece4Schristos 
59*3117ece4Schristos /*-*******************************************************
60*3117ece4Schristos  *  Decompression types
61*3117ece4Schristos  *********************************************************/
62*3117ece4Schristos  typedef struct {
63*3117ece4Schristos      U32 fastMode;
64*3117ece4Schristos      U32 tableLog;
65*3117ece4Schristos  } ZSTD_seqSymbol_header;
66*3117ece4Schristos 
67*3117ece4Schristos  typedef struct {
68*3117ece4Schristos      U16  nextState;
69*3117ece4Schristos      BYTE nbAdditionalBits;
70*3117ece4Schristos      BYTE nbBits;
71*3117ece4Schristos      U32  baseValue;
72*3117ece4Schristos  } ZSTD_seqSymbol;
73*3117ece4Schristos 
74*3117ece4Schristos  #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
75*3117ece4Schristos 
76*3117ece4Schristos #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77*3117ece4Schristos #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78*3117ece4Schristos #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12
79*3117ece4Schristos 
80*3117ece4Schristos typedef struct {
81*3117ece4Schristos     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
82*3117ece4Schristos     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
83*3117ece4Schristos     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
84*3117ece4Schristos     HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)];  /* can accommodate HUF_decompress4X */
85*3117ece4Schristos     U32 rep[ZSTD_REP_NUM];
86*3117ece4Schristos     U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
87*3117ece4Schristos } ZSTD_entropyDTables_t;
88*3117ece4Schristos 
89*3117ece4Schristos typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
90*3117ece4Schristos                ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
91*3117ece4Schristos                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
92*3117ece4Schristos                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
93*3117ece4Schristos 
94*3117ece4Schristos typedef enum { zdss_init=0, zdss_loadHeader,
95*3117ece4Schristos                zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
96*3117ece4Schristos 
97*3117ece4Schristos typedef enum {
98*3117ece4Schristos     ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
99*3117ece4Schristos     ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
100*3117ece4Schristos     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
101*3117ece4Schristos } ZSTD_dictUses_e;
102*3117ece4Schristos 
103*3117ece4Schristos /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
104*3117ece4Schristos typedef struct {
105*3117ece4Schristos     const ZSTD_DDict** ddictPtrTable;
106*3117ece4Schristos     size_t ddictPtrTableSize;
107*3117ece4Schristos     size_t ddictPtrCount;
108*3117ece4Schristos } ZSTD_DDictHashSet;
109*3117ece4Schristos 
110*3117ece4Schristos #ifndef ZSTD_DECODER_INTERNAL_BUFFER
111*3117ece4Schristos #  define ZSTD_DECODER_INTERNAL_BUFFER  (1 << 16)
112*3117ece4Schristos #endif
113*3117ece4Schristos 
114*3117ece4Schristos #define ZSTD_LBMIN 64
115*3117ece4Schristos #define ZSTD_LBMAX (128 << 10)
116*3117ece4Schristos 
117*3117ece4Schristos /* extra buffer, compensates when dst is not large enough to store litBuffer */
118*3117ece4Schristos #define ZSTD_LITBUFFEREXTRASIZE  BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
119*3117ece4Schristos 
120*3117ece4Schristos typedef enum {
121*3117ece4Schristos     ZSTD_not_in_dst = 0,  /* Stored entirely within litExtraBuffer */
122*3117ece4Schristos     ZSTD_in_dst = 1,           /* Stored entirely within dst (in memory after current output write) */
123*3117ece4Schristos     ZSTD_split = 2            /* Split between litExtraBuffer and dst */
124*3117ece4Schristos } ZSTD_litLocation_e;
125*3117ece4Schristos 
126*3117ece4Schristos struct ZSTD_DCtx_s
127*3117ece4Schristos {
128*3117ece4Schristos     const ZSTD_seqSymbol* LLTptr;
129*3117ece4Schristos     const ZSTD_seqSymbol* MLTptr;
130*3117ece4Schristos     const ZSTD_seqSymbol* OFTptr;
131*3117ece4Schristos     const HUF_DTable* HUFptr;
132*3117ece4Schristos     ZSTD_entropyDTables_t entropy;
133*3117ece4Schristos     U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
134*3117ece4Schristos     const void* previousDstEnd;   /* detect continuity */
135*3117ece4Schristos     const void* prefixStart;      /* start of current segment */
136*3117ece4Schristos     const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
137*3117ece4Schristos     const void* dictEnd;          /* end of previous segment */
138*3117ece4Schristos     size_t expected;
139*3117ece4Schristos     ZSTD_frameHeader fParams;
140*3117ece4Schristos     U64 processedCSize;
141*3117ece4Schristos     U64 decodedSize;
142*3117ece4Schristos     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
143*3117ece4Schristos     ZSTD_dStage stage;
144*3117ece4Schristos     U32 litEntropy;
145*3117ece4Schristos     U32 fseEntropy;
146*3117ece4Schristos     XXH64_state_t xxhState;
147*3117ece4Schristos     size_t headerSize;
148*3117ece4Schristos     ZSTD_format_e format;
149*3117ece4Schristos     ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
150*3117ece4Schristos     U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
151*3117ece4Schristos     const BYTE* litPtr;
152*3117ece4Schristos     ZSTD_customMem customMem;
153*3117ece4Schristos     size_t litSize;
154*3117ece4Schristos     size_t rleSize;
155*3117ece4Schristos     size_t staticSize;
156*3117ece4Schristos     int isFrameDecompression;
157*3117ece4Schristos #if DYNAMIC_BMI2 != 0
158*3117ece4Schristos     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
159*3117ece4Schristos #endif
160*3117ece4Schristos 
161*3117ece4Schristos     /* dictionary */
162*3117ece4Schristos     ZSTD_DDict* ddictLocal;
163*3117ece4Schristos     const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
164*3117ece4Schristos     U32 dictID;
165*3117ece4Schristos     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
166*3117ece4Schristos     ZSTD_dictUses_e dictUses;
167*3117ece4Schristos     ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
168*3117ece4Schristos     ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
169*3117ece4Schristos     int disableHufAsm;
170*3117ece4Schristos     int maxBlockSizeParam;
171*3117ece4Schristos 
172*3117ece4Schristos     /* streaming */
173*3117ece4Schristos     ZSTD_dStreamStage streamStage;
174*3117ece4Schristos     char*  inBuff;
175*3117ece4Schristos     size_t inBuffSize;
176*3117ece4Schristos     size_t inPos;
177*3117ece4Schristos     size_t maxWindowSize;
178*3117ece4Schristos     char*  outBuff;
179*3117ece4Schristos     size_t outBuffSize;
180*3117ece4Schristos     size_t outStart;
181*3117ece4Schristos     size_t outEnd;
182*3117ece4Schristos     size_t lhSize;
183*3117ece4Schristos #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
184*3117ece4Schristos     void* legacyContext;
185*3117ece4Schristos     U32 previousLegacyVersion;
186*3117ece4Schristos     U32 legacyVersion;
187*3117ece4Schristos #endif
188*3117ece4Schristos     U32 hostageByte;
189*3117ece4Schristos     int noForwardProgress;
190*3117ece4Schristos     ZSTD_bufferMode_e outBufferMode;
191*3117ece4Schristos     ZSTD_outBuffer expectedOutBuffer;
192*3117ece4Schristos 
193*3117ece4Schristos     /* workspace */
194*3117ece4Schristos     BYTE* litBuffer;
195*3117ece4Schristos     const BYTE* litBufferEnd;
196*3117ece4Schristos     ZSTD_litLocation_e litBufferLocation;
197*3117ece4Schristos     BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
198*3117ece4Schristos     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
199*3117ece4Schristos 
200*3117ece4Schristos     size_t oversizedDuration;
201*3117ece4Schristos 
202*3117ece4Schristos #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
203*3117ece4Schristos     void const* dictContentBeginForFuzzing;
204*3117ece4Schristos     void const* dictContentEndForFuzzing;
205*3117ece4Schristos #endif
206*3117ece4Schristos 
207*3117ece4Schristos     /* Tracing */
208*3117ece4Schristos #if ZSTD_TRACE
209*3117ece4Schristos     ZSTD_TraceCtx traceCtx;
210*3117ece4Schristos #endif
211*3117ece4Schristos };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
212*3117ece4Schristos 
213*3117ece4Schristos MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
214*3117ece4Schristos #if DYNAMIC_BMI2 != 0
215*3117ece4Schristos 	return dctx->bmi2;
216*3117ece4Schristos #else
217*3117ece4Schristos     (void)dctx;
218*3117ece4Schristos 	return 0;
219*3117ece4Schristos #endif
220*3117ece4Schristos }
221*3117ece4Schristos 
222*3117ece4Schristos /*-*******************************************************
223*3117ece4Schristos  *  Shared internal functions
224*3117ece4Schristos  *********************************************************/
225*3117ece4Schristos 
226*3117ece4Schristos /*! ZSTD_loadDEntropy() :
227*3117ece4Schristos  *  dict : must point at beginning of a valid zstd dictionary.
228*3117ece4Schristos  * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
229*3117ece4Schristos size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
230*3117ece4Schristos                    const void* const dict, size_t const dictSize);
231*3117ece4Schristos 
232*3117ece4Schristos /*! ZSTD_checkContinuity() :
233*3117ece4Schristos  *  check if next `dst` follows previous position, where decompression ended.
234*3117ece4Schristos  *  If yes, do nothing (continue on current segment).
235*3117ece4Schristos  *  If not, classify previous segment as "external dictionary", and start a new segment.
236*3117ece4Schristos  *  This function cannot fail. */
237*3117ece4Schristos void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
238*3117ece4Schristos 
239*3117ece4Schristos 
240*3117ece4Schristos #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
241