1a0483764SConrad Meyer /*
2*5ff13fbcSAllan Jude * Copyright (c) Yann Collet, Facebook, Inc.
3a0483764SConrad Meyer * All rights reserved.
4a0483764SConrad Meyer *
5a0483764SConrad Meyer * This source code is licensed under both the BSD-style license (found in the
6a0483764SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a0483764SConrad Meyer * in the COPYING file in the root directory of this source tree).
8a0483764SConrad Meyer * You may select, at your option, one of the above-listed licenses.
9a0483764SConrad Meyer */
10a0483764SConrad Meyer
11a0483764SConrad Meyer
12a0483764SConrad Meyer /* zstd_decompress_internal:
13a0483764SConrad Meyer * objects and definitions shared within lib/decompress modules */
14a0483764SConrad Meyer
15a0483764SConrad Meyer #ifndef ZSTD_DECOMPRESS_INTERNAL_H
16a0483764SConrad Meyer #define ZSTD_DECOMPRESS_INTERNAL_H
17a0483764SConrad Meyer
18a0483764SConrad Meyer
19a0483764SConrad Meyer /*-*******************************************************
20a0483764SConrad Meyer * Dependencies
21a0483764SConrad Meyer *********************************************************/
2237f1f268SConrad Meyer #include "../common/mem.h" /* BYTE, U16, U32 */
23*5ff13fbcSAllan Jude #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
24a0483764SConrad Meyer
25a0483764SConrad Meyer
26a0483764SConrad Meyer
27a0483764SConrad Meyer /*-*******************************************************
28a0483764SConrad Meyer * Constants
29a0483764SConrad Meyer *********************************************************/
30f7cd7fe5SConrad Meyer static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
31a0483764SConrad Meyer 0, 1, 2, 3, 4, 5, 6, 7,
32a0483764SConrad Meyer 8, 9, 10, 11, 12, 13, 14, 15,
33a0483764SConrad Meyer 16, 18, 20, 22, 24, 28, 32, 40,
34a0483764SConrad Meyer 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
35a0483764SConrad Meyer 0x2000, 0x4000, 0x8000, 0x10000 };
36a0483764SConrad Meyer
37f7cd7fe5SConrad Meyer static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
38a0483764SConrad Meyer 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
39a0483764SConrad Meyer 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
40a0483764SConrad Meyer 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
41a0483764SConrad Meyer 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
42a0483764SConrad Meyer
43*5ff13fbcSAllan Jude static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
44a0483764SConrad Meyer 0, 1, 2, 3, 4, 5, 6, 7,
45a0483764SConrad Meyer 8, 9, 10, 11, 12, 13, 14, 15,
46a0483764SConrad Meyer 16, 17, 18, 19, 20, 21, 22, 23,
47a0483764SConrad Meyer 24, 25, 26, 27, 28, 29, 30, 31 };
48a0483764SConrad Meyer
49f7cd7fe5SConrad Meyer static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
50a0483764SConrad Meyer 3, 4, 5, 6, 7, 8, 9, 10,
51a0483764SConrad Meyer 11, 12, 13, 14, 15, 16, 17, 18,
52a0483764SConrad Meyer 19, 20, 21, 22, 23, 24, 25, 26,
53a0483764SConrad Meyer 27, 28, 29, 30, 31, 32, 33, 34,
54a0483764SConrad Meyer 35, 37, 39, 41, 43, 47, 51, 59,
55a0483764SConrad Meyer 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
56a0483764SConrad Meyer 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
57a0483764SConrad Meyer
58a0483764SConrad Meyer
59a0483764SConrad Meyer /*-*******************************************************
60a0483764SConrad Meyer * Decompression types
61a0483764SConrad Meyer *********************************************************/
62a0483764SConrad Meyer typedef struct {
63a0483764SConrad Meyer U32 fastMode;
64a0483764SConrad Meyer U32 tableLog;
65a0483764SConrad Meyer } ZSTD_seqSymbol_header;
66a0483764SConrad Meyer
67a0483764SConrad Meyer typedef struct {
68a0483764SConrad Meyer U16 nextState;
69a0483764SConrad Meyer BYTE nbAdditionalBits;
70a0483764SConrad Meyer BYTE nbBits;
71a0483764SConrad Meyer U32 baseValue;
72a0483764SConrad Meyer } ZSTD_seqSymbol;
73a0483764SConrad Meyer
74a0483764SConrad Meyer #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
75a0483764SConrad Meyer
76f7cd7fe5SConrad Meyer #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
77f7cd7fe5SConrad Meyer #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
78f7cd7fe5SConrad Meyer
79a0483764SConrad Meyer typedef struct {
80a0483764SConrad Meyer ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
81a0483764SConrad Meyer ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
82a0483764SConrad Meyer ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
83a0483764SConrad Meyer HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
84a0483764SConrad Meyer U32 rep[ZSTD_REP_NUM];
85f7cd7fe5SConrad Meyer U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
86a0483764SConrad Meyer } ZSTD_entropyDTables_t;
87a0483764SConrad Meyer
88a0483764SConrad Meyer typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
89a0483764SConrad Meyer ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
90a0483764SConrad Meyer ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
91a0483764SConrad Meyer ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
92a0483764SConrad Meyer
93a0483764SConrad Meyer typedef enum { zdss_init=0, zdss_loadHeader,
94a0483764SConrad Meyer zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
95a0483764SConrad Meyer
962b9c00cbSConrad Meyer typedef enum {
972b9c00cbSConrad Meyer ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
982b9c00cbSConrad Meyer ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
992b9c00cbSConrad Meyer ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
1002b9c00cbSConrad Meyer } ZSTD_dictUses_e;
1012b9c00cbSConrad Meyer
102*5ff13fbcSAllan Jude /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
103*5ff13fbcSAllan Jude typedef struct {
104*5ff13fbcSAllan Jude const ZSTD_DDict** ddictPtrTable;
105*5ff13fbcSAllan Jude size_t ddictPtrTableSize;
106*5ff13fbcSAllan Jude size_t ddictPtrCount;
107*5ff13fbcSAllan Jude } ZSTD_DDictHashSet;
108*5ff13fbcSAllan Jude
109*5ff13fbcSAllan Jude #ifndef ZSTD_DECODER_INTERNAL_BUFFER
110*5ff13fbcSAllan Jude # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16)
111*5ff13fbcSAllan Jude #endif
112*5ff13fbcSAllan Jude
113*5ff13fbcSAllan Jude #define ZSTD_LBMIN 64
114*5ff13fbcSAllan Jude #define ZSTD_LBMAX (128 << 10)
115*5ff13fbcSAllan Jude
116*5ff13fbcSAllan Jude /* extra buffer, compensates when dst is not large enough to store litBuffer */
117*5ff13fbcSAllan Jude #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
118*5ff13fbcSAllan Jude
119*5ff13fbcSAllan Jude typedef enum {
120*5ff13fbcSAllan Jude ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
121*5ff13fbcSAllan Jude ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
122*5ff13fbcSAllan Jude ZSTD_split = 2 /* Split between litExtraBuffer and dst */
123*5ff13fbcSAllan Jude } ZSTD_litLocation_e;
124*5ff13fbcSAllan Jude
125a0483764SConrad Meyer struct ZSTD_DCtx_s
126a0483764SConrad Meyer {
127a0483764SConrad Meyer const ZSTD_seqSymbol* LLTptr;
128a0483764SConrad Meyer const ZSTD_seqSymbol* MLTptr;
129a0483764SConrad Meyer const ZSTD_seqSymbol* OFTptr;
130a0483764SConrad Meyer const HUF_DTable* HUFptr;
131a0483764SConrad Meyer ZSTD_entropyDTables_t entropy;
132a0483764SConrad Meyer U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
133a0483764SConrad Meyer const void* previousDstEnd; /* detect continuity */
134a0483764SConrad Meyer const void* prefixStart; /* start of current segment */
135a0483764SConrad Meyer const void* virtualStart; /* virtual start of previous segment if it was just before current one */
136a0483764SConrad Meyer const void* dictEnd; /* end of previous segment */
137a0483764SConrad Meyer size_t expected;
138a0483764SConrad Meyer ZSTD_frameHeader fParams;
139*5ff13fbcSAllan Jude U64 processedCSize;
140a0483764SConrad Meyer U64 decodedSize;
141a0483764SConrad Meyer blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
142a0483764SConrad Meyer ZSTD_dStage stage;
143a0483764SConrad Meyer U32 litEntropy;
144a0483764SConrad Meyer U32 fseEntropy;
145a0483764SConrad Meyer XXH64_state_t xxhState;
146a0483764SConrad Meyer size_t headerSize;
147a0483764SConrad Meyer ZSTD_format_e format;
148f7cd7fe5SConrad Meyer ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
149f7cd7fe5SConrad Meyer U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
150a0483764SConrad Meyer const BYTE* litPtr;
151a0483764SConrad Meyer ZSTD_customMem customMem;
152a0483764SConrad Meyer size_t litSize;
153a0483764SConrad Meyer size_t rleSize;
154a0483764SConrad Meyer size_t staticSize;
155*5ff13fbcSAllan Jude #if DYNAMIC_BMI2 != 0
156a0483764SConrad Meyer int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
157*5ff13fbcSAllan Jude #endif
158a0483764SConrad Meyer
159a0483764SConrad Meyer /* dictionary */
160a0483764SConrad Meyer ZSTD_DDict* ddictLocal;
161a0483764SConrad Meyer const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
162a0483764SConrad Meyer U32 dictID;
163a0483764SConrad Meyer int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
1642b9c00cbSConrad Meyer ZSTD_dictUses_e dictUses;
165*5ff13fbcSAllan Jude ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
166*5ff13fbcSAllan Jude ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
167a0483764SConrad Meyer
168a0483764SConrad Meyer /* streaming */
169a0483764SConrad Meyer ZSTD_dStreamStage streamStage;
170a0483764SConrad Meyer char* inBuff;
171a0483764SConrad Meyer size_t inBuffSize;
172a0483764SConrad Meyer size_t inPos;
173a0483764SConrad Meyer size_t maxWindowSize;
174a0483764SConrad Meyer char* outBuff;
175a0483764SConrad Meyer size_t outBuffSize;
176a0483764SConrad Meyer size_t outStart;
177a0483764SConrad Meyer size_t outEnd;
178a0483764SConrad Meyer size_t lhSize;
179*5ff13fbcSAllan Jude #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
180a0483764SConrad Meyer void* legacyContext;
181a0483764SConrad Meyer U32 previousLegacyVersion;
182a0483764SConrad Meyer U32 legacyVersion;
183*5ff13fbcSAllan Jude #endif
184a0483764SConrad Meyer U32 hostageByte;
185a0483764SConrad Meyer int noForwardProgress;
186f7cd7fe5SConrad Meyer ZSTD_bufferMode_e outBufferMode;
18737f1f268SConrad Meyer ZSTD_outBuffer expectedOutBuffer;
188a0483764SConrad Meyer
189a0483764SConrad Meyer /* workspace */
190*5ff13fbcSAllan Jude BYTE* litBuffer;
191*5ff13fbcSAllan Jude const BYTE* litBufferEnd;
192*5ff13fbcSAllan Jude ZSTD_litLocation_e litBufferLocation;
193*5ff13fbcSAllan Jude BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
194a0483764SConrad Meyer BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
19537f1f268SConrad Meyer
19637f1f268SConrad Meyer size_t oversizedDuration;
19737f1f268SConrad Meyer
19837f1f268SConrad Meyer #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
19937f1f268SConrad Meyer void const* dictContentBeginForFuzzing;
20037f1f268SConrad Meyer void const* dictContentEndForFuzzing;
20137f1f268SConrad Meyer #endif
202*5ff13fbcSAllan Jude
203*5ff13fbcSAllan Jude /* Tracing */
204*5ff13fbcSAllan Jude #if ZSTD_TRACE
205*5ff13fbcSAllan Jude ZSTD_TraceCtx traceCtx;
206*5ff13fbcSAllan Jude #endif
207a0483764SConrad Meyer }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
208a0483764SConrad Meyer
ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s * dctx)209*5ff13fbcSAllan Jude MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
210*5ff13fbcSAllan Jude #if DYNAMIC_BMI2 != 0
211*5ff13fbcSAllan Jude return dctx->bmi2;
212*5ff13fbcSAllan Jude #else
213*5ff13fbcSAllan Jude (void)dctx;
214*5ff13fbcSAllan Jude return 0;
215*5ff13fbcSAllan Jude #endif
216*5ff13fbcSAllan Jude }
217a0483764SConrad Meyer
218a0483764SConrad Meyer /*-*******************************************************
219a0483764SConrad Meyer * Shared internal functions
220a0483764SConrad Meyer *********************************************************/
221a0483764SConrad Meyer
222a0483764SConrad Meyer /*! ZSTD_loadDEntropy() :
223a0483764SConrad Meyer * dict : must point at beginning of a valid zstd dictionary.
22437f1f268SConrad Meyer * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
225a0483764SConrad Meyer size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
226a0483764SConrad Meyer const void* const dict, size_t const dictSize);
227a0483764SConrad Meyer
228a0483764SConrad Meyer /*! ZSTD_checkContinuity() :
229a0483764SConrad Meyer * check if next `dst` follows previous position, where decompression ended.
230a0483764SConrad Meyer * If yes, do nothing (continue on current segment).
231a0483764SConrad Meyer * If not, classify previous segment as "external dictionary", and start a new segment.
232a0483764SConrad Meyer * This function cannot fail. */
233*5ff13fbcSAllan Jude void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
234a0483764SConrad Meyer
235a0483764SConrad Meyer
236a0483764SConrad Meyer #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
237