1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos 12*3117ece4Schristos /* zstd_decompress_internal: 13*3117ece4Schristos * objects and definitions shared within lib/decompress modules */ 14*3117ece4Schristos 15*3117ece4Schristos #ifndef ZSTD_DECOMPRESS_INTERNAL_H 16*3117ece4Schristos #define ZSTD_DECOMPRESS_INTERNAL_H 17*3117ece4Schristos 18*3117ece4Schristos 19*3117ece4Schristos /*-******************************************************* 20*3117ece4Schristos * Dependencies 21*3117ece4Schristos *********************************************************/ 22*3117ece4Schristos #include "../common/mem.h" /* BYTE, U16, U32 */ 23*3117ece4Schristos #include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */ 24*3117ece4Schristos 25*3117ece4Schristos 26*3117ece4Schristos 27*3117ece4Schristos /*-******************************************************* 28*3117ece4Schristos * Constants 29*3117ece4Schristos *********************************************************/ 30*3117ece4Schristos static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { 31*3117ece4Schristos 0, 1, 2, 3, 4, 5, 6, 7, 32*3117ece4Schristos 8, 9, 10, 11, 12, 13, 14, 15, 33*3117ece4Schristos 16, 18, 20, 22, 24, 28, 32, 40, 34*3117ece4Schristos 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 35*3117ece4Schristos 0x2000, 0x4000, 0x8000, 0x10000 }; 36*3117ece4Schristos 37*3117ece4Schristos static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { 38*3117ece4Schristos 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 39*3117ece4Schristos 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 40*3117ece4Schristos 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 41*3117ece4Schristos 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; 42*3117ece4Schristos 43*3117ece4Schristos static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = { 44*3117ece4Schristos 0, 1, 2, 3, 4, 5, 6, 7, 45*3117ece4Schristos 8, 9, 10, 11, 12, 13, 14, 15, 46*3117ece4Schristos 16, 17, 18, 19, 20, 21, 22, 23, 47*3117ece4Schristos 24, 25, 26, 27, 28, 29, 30, 31 }; 48*3117ece4Schristos 49*3117ece4Schristos static UNUSED_ATTR const U32 ML_base[MaxML+1] = { 50*3117ece4Schristos 3, 4, 5, 6, 7, 8, 9, 10, 51*3117ece4Schristos 11, 12, 13, 14, 15, 16, 17, 18, 52*3117ece4Schristos 19, 20, 21, 22, 23, 24, 25, 26, 53*3117ece4Schristos 27, 28, 29, 30, 31, 32, 33, 34, 54*3117ece4Schristos 35, 37, 39, 41, 43, 47, 51, 59, 55*3117ece4Schristos 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 56*3117ece4Schristos 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; 57*3117ece4Schristos 58*3117ece4Schristos 59*3117ece4Schristos /*-******************************************************* 60*3117ece4Schristos * Decompression types 61*3117ece4Schristos *********************************************************/ 62*3117ece4Schristos typedef struct { 63*3117ece4Schristos U32 fastMode; 64*3117ece4Schristos U32 tableLog; 65*3117ece4Schristos } ZSTD_seqSymbol_header; 66*3117ece4Schristos 67*3117ece4Schristos typedef struct { 68*3117ece4Schristos U16 nextState; 69*3117ece4Schristos BYTE nbAdditionalBits; 70*3117ece4Schristos BYTE nbBits; 71*3117ece4Schristos U32 baseValue; 72*3117ece4Schristos } ZSTD_seqSymbol; 73*3117ece4Schristos 74*3117ece4Schristos #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) 75*3117ece4Schristos 76*3117ece4Schristos #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) 77*3117ece4Schristos #define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) 78*3117ece4Schristos #define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 79*3117ece4Schristos 80*3117ece4Schristos typedef struct { 81*3117ece4Schristos ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ 82*3117ece4Schristos ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ 83*3117ece4Schristos ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ 84*3117ece4Schristos HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ 85*3117ece4Schristos U32 rep[ZSTD_REP_NUM]; 86*3117ece4Schristos U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; 87*3117ece4Schristos } ZSTD_entropyDTables_t; 88*3117ece4Schristos 89*3117ece4Schristos typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, 90*3117ece4Schristos ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, 91*3117ece4Schristos ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, 92*3117ece4Schristos ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; 93*3117ece4Schristos 94*3117ece4Schristos typedef enum { zdss_init=0, zdss_loadHeader, 95*3117ece4Schristos zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; 96*3117ece4Schristos 97*3117ece4Schristos typedef enum { 98*3117ece4Schristos ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ 99*3117ece4Schristos ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ 100*3117ece4Schristos ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ 101*3117ece4Schristos } ZSTD_dictUses_e; 102*3117ece4Schristos 103*3117ece4Schristos /* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ 104*3117ece4Schristos typedef struct { 105*3117ece4Schristos const ZSTD_DDict** ddictPtrTable; 106*3117ece4Schristos size_t ddictPtrTableSize; 107*3117ece4Schristos size_t ddictPtrCount; 108*3117ece4Schristos } ZSTD_DDictHashSet; 109*3117ece4Schristos 110*3117ece4Schristos #ifndef ZSTD_DECODER_INTERNAL_BUFFER 111*3117ece4Schristos # define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16) 112*3117ece4Schristos #endif 113*3117ece4Schristos 114*3117ece4Schristos #define ZSTD_LBMIN 64 115*3117ece4Schristos #define ZSTD_LBMAX (128 << 10) 116*3117ece4Schristos 117*3117ece4Schristos /* extra buffer, compensates when dst is not large enough to store litBuffer */ 118*3117ece4Schristos #define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX) 119*3117ece4Schristos 120*3117ece4Schristos typedef enum { 121*3117ece4Schristos ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */ 122*3117ece4Schristos ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */ 123*3117ece4Schristos ZSTD_split = 2 /* Split between litExtraBuffer and dst */ 124*3117ece4Schristos } ZSTD_litLocation_e; 125*3117ece4Schristos 126*3117ece4Schristos struct ZSTD_DCtx_s 127*3117ece4Schristos { 128*3117ece4Schristos const ZSTD_seqSymbol* LLTptr; 129*3117ece4Schristos const ZSTD_seqSymbol* MLTptr; 130*3117ece4Schristos const ZSTD_seqSymbol* OFTptr; 131*3117ece4Schristos const HUF_DTable* HUFptr; 132*3117ece4Schristos ZSTD_entropyDTables_t entropy; 133*3117ece4Schristos U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ 134*3117ece4Schristos const void* previousDstEnd; /* detect continuity */ 135*3117ece4Schristos const void* prefixStart; /* start of current segment */ 136*3117ece4Schristos const void* virtualStart; /* virtual start of previous segment if it was just before current one */ 137*3117ece4Schristos const void* dictEnd; /* end of previous segment */ 138*3117ece4Schristos size_t expected; 139*3117ece4Schristos ZSTD_frameHeader fParams; 140*3117ece4Schristos U64 processedCSize; 141*3117ece4Schristos U64 decodedSize; 142*3117ece4Schristos blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ 143*3117ece4Schristos ZSTD_dStage stage; 144*3117ece4Schristos U32 litEntropy; 145*3117ece4Schristos U32 fseEntropy; 146*3117ece4Schristos XXH64_state_t xxhState; 147*3117ece4Schristos size_t headerSize; 148*3117ece4Schristos ZSTD_format_e format; 149*3117ece4Schristos ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ 150*3117ece4Schristos U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ 151*3117ece4Schristos const BYTE* litPtr; 152*3117ece4Schristos ZSTD_customMem customMem; 153*3117ece4Schristos size_t litSize; 154*3117ece4Schristos size_t rleSize; 155*3117ece4Schristos size_t staticSize; 156*3117ece4Schristos int isFrameDecompression; 157*3117ece4Schristos #if DYNAMIC_BMI2 != 0 158*3117ece4Schristos int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 159*3117ece4Schristos #endif 160*3117ece4Schristos 161*3117ece4Schristos /* dictionary */ 162*3117ece4Schristos ZSTD_DDict* ddictLocal; 163*3117ece4Schristos const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ 164*3117ece4Schristos U32 dictID; 165*3117ece4Schristos int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ 166*3117ece4Schristos ZSTD_dictUses_e dictUses; 167*3117ece4Schristos ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ 168*3117ece4Schristos ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ 169*3117ece4Schristos int disableHufAsm; 170*3117ece4Schristos int maxBlockSizeParam; 171*3117ece4Schristos 172*3117ece4Schristos /* streaming */ 173*3117ece4Schristos ZSTD_dStreamStage streamStage; 174*3117ece4Schristos char* inBuff; 175*3117ece4Schristos size_t inBuffSize; 176*3117ece4Schristos size_t inPos; 177*3117ece4Schristos size_t maxWindowSize; 178*3117ece4Schristos char* outBuff; 179*3117ece4Schristos size_t outBuffSize; 180*3117ece4Schristos size_t outStart; 181*3117ece4Schristos size_t outEnd; 182*3117ece4Schristos size_t lhSize; 183*3117ece4Schristos #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) 184*3117ece4Schristos void* legacyContext; 185*3117ece4Schristos U32 previousLegacyVersion; 186*3117ece4Schristos U32 legacyVersion; 187*3117ece4Schristos #endif 188*3117ece4Schristos U32 hostageByte; 189*3117ece4Schristos int noForwardProgress; 190*3117ece4Schristos ZSTD_bufferMode_e outBufferMode; 191*3117ece4Schristos ZSTD_outBuffer expectedOutBuffer; 192*3117ece4Schristos 193*3117ece4Schristos /* workspace */ 194*3117ece4Schristos BYTE* litBuffer; 195*3117ece4Schristos const BYTE* litBufferEnd; 196*3117ece4Schristos ZSTD_litLocation_e litBufferLocation; 197*3117ece4Schristos BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */ 198*3117ece4Schristos BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 199*3117ece4Schristos 200*3117ece4Schristos size_t oversizedDuration; 201*3117ece4Schristos 202*3117ece4Schristos #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 203*3117ece4Schristos void const* dictContentBeginForFuzzing; 204*3117ece4Schristos void const* dictContentEndForFuzzing; 205*3117ece4Schristos #endif 206*3117ece4Schristos 207*3117ece4Schristos /* Tracing */ 208*3117ece4Schristos #if ZSTD_TRACE 209*3117ece4Schristos ZSTD_TraceCtx traceCtx; 210*3117ece4Schristos #endif 211*3117ece4Schristos }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ 212*3117ece4Schristos 213*3117ece4Schristos MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { 214*3117ece4Schristos #if DYNAMIC_BMI2 != 0 215*3117ece4Schristos return dctx->bmi2; 216*3117ece4Schristos #else 217*3117ece4Schristos (void)dctx; 218*3117ece4Schristos return 0; 219*3117ece4Schristos #endif 220*3117ece4Schristos } 221*3117ece4Schristos 222*3117ece4Schristos /*-******************************************************* 223*3117ece4Schristos * Shared internal functions 224*3117ece4Schristos *********************************************************/ 225*3117ece4Schristos 226*3117ece4Schristos /*! ZSTD_loadDEntropy() : 227*3117ece4Schristos * dict : must point at beginning of a valid zstd dictionary. 228*3117ece4Schristos * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ 229*3117ece4Schristos size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, 230*3117ece4Schristos const void* const dict, size_t const dictSize); 231*3117ece4Schristos 232*3117ece4Schristos /*! ZSTD_checkContinuity() : 233*3117ece4Schristos * check if next `dst` follows previous position, where decompression ended. 234*3117ece4Schristos * If yes, do nothing (continue on current segment). 235*3117ece4Schristos * If not, classify previous segment as "external dictionary", and start a new segment. 236*3117ece4Schristos * This function cannot fail. */ 237*3117ece4Schristos void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); 238*3117ece4Schristos 239*3117ece4Schristos 240*3117ece4Schristos #endif /* ZSTD_DECOMPRESS_INTERNAL_H */ 241