xref: /freebsd-src/sys/contrib/openzfs/module/zstd/lib/decompress/zstd_decompress_internal.h (revision c03c5b1c80914ec656fbee84539355d1fad68bf9)
1*c03c5b1cSMartin Matuska /*
2*c03c5b1cSMartin Matuska  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*c03c5b1cSMartin Matuska  * All rights reserved.
4*c03c5b1cSMartin Matuska  *
5*c03c5b1cSMartin Matuska  * This source code is licensed under both the BSD-style license (found in the
6*c03c5b1cSMartin Matuska  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*c03c5b1cSMartin Matuska  * in the COPYING file in the root directory of this source tree).
8*c03c5b1cSMartin Matuska  * You may select, at your option, one of the above-listed licenses.
9*c03c5b1cSMartin Matuska  */
10*c03c5b1cSMartin Matuska 
11*c03c5b1cSMartin Matuska 
12*c03c5b1cSMartin Matuska /* zstd_decompress_internal:
13*c03c5b1cSMartin Matuska  * objects and definitions shared within lib/decompress modules */
14*c03c5b1cSMartin Matuska 
15*c03c5b1cSMartin Matuska  #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16*c03c5b1cSMartin Matuska  #define ZSTD_DECOMPRESS_INTERNAL_H
17*c03c5b1cSMartin Matuska 
18*c03c5b1cSMartin Matuska 
19*c03c5b1cSMartin Matuska /*-*******************************************************
20*c03c5b1cSMartin Matuska  *  Dependencies
21*c03c5b1cSMartin Matuska  *********************************************************/
22*c03c5b1cSMartin Matuska #include "../common/mem.h"             /* BYTE, U16, U32 */
23*c03c5b1cSMartin Matuska #include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
24*c03c5b1cSMartin Matuska 
25*c03c5b1cSMartin Matuska 
26*c03c5b1cSMartin Matuska 
27*c03c5b1cSMartin Matuska /*-*******************************************************
28*c03c5b1cSMartin Matuska  *  Constants
29*c03c5b1cSMartin Matuska  *********************************************************/
30*c03c5b1cSMartin Matuska static const U32 LL_base[MaxLL+1] = {
31*c03c5b1cSMartin Matuska                  0,    1,    2,     3,     4,     5,     6,      7,
32*c03c5b1cSMartin Matuska                  8,    9,   10,    11,    12,    13,    14,     15,
33*c03c5b1cSMartin Matuska                 16,   18,   20,    22,    24,    28,    32,     40,
34*c03c5b1cSMartin Matuska                 48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35*c03c5b1cSMartin Matuska                 0x2000, 0x4000, 0x8000, 0x10000 };
36*c03c5b1cSMartin Matuska 
37*c03c5b1cSMartin Matuska static const U32 OF_base[MaxOff+1] = {
38*c03c5b1cSMartin Matuska                  0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
39*c03c5b1cSMartin Matuska                  0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
40*c03c5b1cSMartin Matuska                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41*c03c5b1cSMartin Matuska                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42*c03c5b1cSMartin Matuska 
43*c03c5b1cSMartin Matuska static const U32 OF_bits[MaxOff+1] = {
44*c03c5b1cSMartin Matuska                      0,  1,  2,  3,  4,  5,  6,  7,
45*c03c5b1cSMartin Matuska                      8,  9, 10, 11, 12, 13, 14, 15,
46*c03c5b1cSMartin Matuska                     16, 17, 18, 19, 20, 21, 22, 23,
47*c03c5b1cSMartin Matuska                     24, 25, 26, 27, 28, 29, 30, 31 };
48*c03c5b1cSMartin Matuska 
49*c03c5b1cSMartin Matuska static const U32 ML_base[MaxML+1] = {
50*c03c5b1cSMartin Matuska                      3,  4,  5,    6,     7,     8,     9,    10,
51*c03c5b1cSMartin Matuska                     11, 12, 13,   14,    15,    16,    17,    18,
52*c03c5b1cSMartin Matuska                     19, 20, 21,   22,    23,    24,    25,    26,
53*c03c5b1cSMartin Matuska                     27, 28, 29,   30,    31,    32,    33,    34,
54*c03c5b1cSMartin Matuska                     35, 37, 39,   41,    43,    47,    51,    59,
55*c03c5b1cSMartin Matuska                     67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
56*c03c5b1cSMartin Matuska                     0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
57*c03c5b1cSMartin Matuska 
58*c03c5b1cSMartin Matuska 
59*c03c5b1cSMartin Matuska /*-*******************************************************
60*c03c5b1cSMartin Matuska  *  Decompression types
61*c03c5b1cSMartin Matuska  *********************************************************/
62*c03c5b1cSMartin Matuska  typedef struct {
63*c03c5b1cSMartin Matuska      U32 fastMode;
64*c03c5b1cSMartin Matuska      U32 tableLog;
65*c03c5b1cSMartin Matuska  } ZSTD_seqSymbol_header;
66*c03c5b1cSMartin Matuska 
67*c03c5b1cSMartin Matuska  typedef struct {
68*c03c5b1cSMartin Matuska      U16  nextState;
69*c03c5b1cSMartin Matuska      BYTE nbAdditionalBits;
70*c03c5b1cSMartin Matuska      BYTE nbBits;
71*c03c5b1cSMartin Matuska      U32  baseValue;
72*c03c5b1cSMartin Matuska  } ZSTD_seqSymbol;
73*c03c5b1cSMartin Matuska 
74*c03c5b1cSMartin Matuska  #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
75*c03c5b1cSMartin Matuska 
76*c03c5b1cSMartin Matuska typedef struct {
77*c03c5b1cSMartin Matuska     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
78*c03c5b1cSMartin Matuska     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
79*c03c5b1cSMartin Matuska     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
80*c03c5b1cSMartin Matuska     HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
81*c03c5b1cSMartin Matuska     U32 rep[ZSTD_REP_NUM];
82*c03c5b1cSMartin Matuska } ZSTD_entropyDTables_t;
83*c03c5b1cSMartin Matuska 
84*c03c5b1cSMartin Matuska typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
85*c03c5b1cSMartin Matuska                ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
86*c03c5b1cSMartin Matuska                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
87*c03c5b1cSMartin Matuska                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
88*c03c5b1cSMartin Matuska 
89*c03c5b1cSMartin Matuska typedef enum { zdss_init=0, zdss_loadHeader,
90*c03c5b1cSMartin Matuska                zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
91*c03c5b1cSMartin Matuska 
92*c03c5b1cSMartin Matuska typedef enum {
93*c03c5b1cSMartin Matuska     ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
94*c03c5b1cSMartin Matuska     ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
95*c03c5b1cSMartin Matuska     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
96*c03c5b1cSMartin Matuska } ZSTD_dictUses_e;
97*c03c5b1cSMartin Matuska 
98*c03c5b1cSMartin Matuska typedef enum {
99*c03c5b1cSMartin Matuska     ZSTD_obm_buffered = 0,  /* Buffer the output */
100*c03c5b1cSMartin Matuska     ZSTD_obm_stable = 1     /* ZSTD_outBuffer is stable */
101*c03c5b1cSMartin Matuska } ZSTD_outBufferMode_e;
102*c03c5b1cSMartin Matuska 
103*c03c5b1cSMartin Matuska struct ZSTD_DCtx_s
104*c03c5b1cSMartin Matuska {
105*c03c5b1cSMartin Matuska     const ZSTD_seqSymbol* LLTptr;
106*c03c5b1cSMartin Matuska     const ZSTD_seqSymbol* MLTptr;
107*c03c5b1cSMartin Matuska     const ZSTD_seqSymbol* OFTptr;
108*c03c5b1cSMartin Matuska     const HUF_DTable* HUFptr;
109*c03c5b1cSMartin Matuska     ZSTD_entropyDTables_t entropy;
110*c03c5b1cSMartin Matuska     U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
111*c03c5b1cSMartin Matuska     const void* previousDstEnd;   /* detect continuity */
112*c03c5b1cSMartin Matuska     const void* prefixStart;      /* start of current segment */
113*c03c5b1cSMartin Matuska     const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
114*c03c5b1cSMartin Matuska     const void* dictEnd;          /* end of previous segment */
115*c03c5b1cSMartin Matuska     size_t expected;
116*c03c5b1cSMartin Matuska     ZSTD_frameHeader fParams;
117*c03c5b1cSMartin Matuska     U64 decodedSize;
118*c03c5b1cSMartin Matuska     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
119*c03c5b1cSMartin Matuska     ZSTD_dStage stage;
120*c03c5b1cSMartin Matuska     U32 litEntropy;
121*c03c5b1cSMartin Matuska     U32 fseEntropy;
122*c03c5b1cSMartin Matuska     XXH64_state_t xxhState;
123*c03c5b1cSMartin Matuska     size_t headerSize;
124*c03c5b1cSMartin Matuska     ZSTD_format_e format;
125*c03c5b1cSMartin Matuska     const BYTE* litPtr;
126*c03c5b1cSMartin Matuska     ZSTD_customMem customMem;
127*c03c5b1cSMartin Matuska     size_t litSize;
128*c03c5b1cSMartin Matuska     size_t rleSize;
129*c03c5b1cSMartin Matuska     size_t staticSize;
130*c03c5b1cSMartin Matuska     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
131*c03c5b1cSMartin Matuska 
132*c03c5b1cSMartin Matuska     /* dictionary */
133*c03c5b1cSMartin Matuska     ZSTD_DDict* ddictLocal;
134*c03c5b1cSMartin Matuska     const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
135*c03c5b1cSMartin Matuska     U32 dictID;
136*c03c5b1cSMartin Matuska     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
137*c03c5b1cSMartin Matuska     ZSTD_dictUses_e dictUses;
138*c03c5b1cSMartin Matuska 
139*c03c5b1cSMartin Matuska     /* streaming */
140*c03c5b1cSMartin Matuska     ZSTD_dStreamStage streamStage;
141*c03c5b1cSMartin Matuska     char*  inBuff;
142*c03c5b1cSMartin Matuska     size_t inBuffSize;
143*c03c5b1cSMartin Matuska     size_t inPos;
144*c03c5b1cSMartin Matuska     size_t maxWindowSize;
145*c03c5b1cSMartin Matuska     char*  outBuff;
146*c03c5b1cSMartin Matuska     size_t outBuffSize;
147*c03c5b1cSMartin Matuska     size_t outStart;
148*c03c5b1cSMartin Matuska     size_t outEnd;
149*c03c5b1cSMartin Matuska     size_t lhSize;
150*c03c5b1cSMartin Matuska     void* legacyContext;
151*c03c5b1cSMartin Matuska     U32 previousLegacyVersion;
152*c03c5b1cSMartin Matuska     U32 legacyVersion;
153*c03c5b1cSMartin Matuska     U32 hostageByte;
154*c03c5b1cSMartin Matuska     int noForwardProgress;
155*c03c5b1cSMartin Matuska     ZSTD_outBufferMode_e outBufferMode;
156*c03c5b1cSMartin Matuska     ZSTD_outBuffer expectedOutBuffer;
157*c03c5b1cSMartin Matuska 
158*c03c5b1cSMartin Matuska     /* workspace */
159*c03c5b1cSMartin Matuska     BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
160*c03c5b1cSMartin Matuska     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
161*c03c5b1cSMartin Matuska 
162*c03c5b1cSMartin Matuska     size_t oversizedDuration;
163*c03c5b1cSMartin Matuska 
164*c03c5b1cSMartin Matuska #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
165*c03c5b1cSMartin Matuska     void const* dictContentBeginForFuzzing;
166*c03c5b1cSMartin Matuska     void const* dictContentEndForFuzzing;
167*c03c5b1cSMartin Matuska #endif
168*c03c5b1cSMartin Matuska };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
169*c03c5b1cSMartin Matuska 
170*c03c5b1cSMartin Matuska 
171*c03c5b1cSMartin Matuska /*-*******************************************************
172*c03c5b1cSMartin Matuska  *  Shared internal functions
173*c03c5b1cSMartin Matuska  *********************************************************/
174*c03c5b1cSMartin Matuska 
175*c03c5b1cSMartin Matuska /*! ZSTD_loadDEntropy() :
176*c03c5b1cSMartin Matuska  *  dict : must point at beginning of a valid zstd dictionary.
177*c03c5b1cSMartin Matuska  * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
178*c03c5b1cSMartin Matuska size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
179*c03c5b1cSMartin Matuska                    const void* const dict, size_t const dictSize);
180*c03c5b1cSMartin Matuska 
181*c03c5b1cSMartin Matuska /*! ZSTD_checkContinuity() :
182*c03c5b1cSMartin Matuska  *  check if next `dst` follows previous position, where decompression ended.
183*c03c5b1cSMartin Matuska  *  If yes, do nothing (continue on current segment).
184*c03c5b1cSMartin Matuska  *  If not, classify previous segment as "external dictionary", and start a new segment.
185*c03c5b1cSMartin Matuska  *  This function cannot fail. */
186*c03c5b1cSMartin Matuska void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
187*c03c5b1cSMartin Matuska 
188*c03c5b1cSMartin Matuska 
189*c03c5b1cSMartin Matuska #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
190