1*c03c5b1cSMartin Matuska /* 2*c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3*c03c5b1cSMartin Matuska * All rights reserved. 4*c03c5b1cSMartin Matuska * 5*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the 6*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree). 8*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses. 9*c03c5b1cSMartin Matuska */ 10*c03c5b1cSMartin Matuska 11*c03c5b1cSMartin Matuska 12*c03c5b1cSMartin Matuska /* zstd_decompress_internal: 13*c03c5b1cSMartin Matuska * objects and definitions shared within lib/decompress modules */ 14*c03c5b1cSMartin Matuska 15*c03c5b1cSMartin Matuska #ifndef ZSTD_DECOMPRESS_INTERNAL_H 16*c03c5b1cSMartin Matuska #define ZSTD_DECOMPRESS_INTERNAL_H 17*c03c5b1cSMartin Matuska 18*c03c5b1cSMartin Matuska 19*c03c5b1cSMartin Matuska /*-******************************************************* 20*c03c5b1cSMartin Matuska * Dependencies 21*c03c5b1cSMartin Matuska *********************************************************/ 22*c03c5b1cSMartin Matuska #include "../common/mem.h" /* BYTE, U16, U32 */ 23*c03c5b1cSMartin Matuska #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ 24*c03c5b1cSMartin Matuska 25*c03c5b1cSMartin Matuska 26*c03c5b1cSMartin Matuska 27*c03c5b1cSMartin Matuska /*-******************************************************* 28*c03c5b1cSMartin Matuska * Constants 29*c03c5b1cSMartin Matuska *********************************************************/ 30*c03c5b1cSMartin Matuska static const U32 LL_base[MaxLL+1] = { 31*c03c5b1cSMartin Matuska 0, 1, 2, 3, 4, 5, 6, 7, 32*c03c5b1cSMartin Matuska 8, 9, 10, 11, 12, 13, 14, 15, 33*c03c5b1cSMartin Matuska 16, 18, 20, 22, 24, 28, 32, 40, 34*c03c5b1cSMartin Matuska 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 35*c03c5b1cSMartin Matuska 0x2000, 0x4000, 0x8000, 0x10000 }; 36*c03c5b1cSMartin Matuska 37*c03c5b1cSMartin Matuska static const U32 OF_base[MaxOff+1] = { 38*c03c5b1cSMartin Matuska 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 39*c03c5b1cSMartin Matuska 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 40*c03c5b1cSMartin Matuska 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 41*c03c5b1cSMartin Matuska 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; 42*c03c5b1cSMartin Matuska 43*c03c5b1cSMartin Matuska static const U32 OF_bits[MaxOff+1] = { 44*c03c5b1cSMartin Matuska 0, 1, 2, 3, 4, 5, 6, 7, 45*c03c5b1cSMartin Matuska 8, 9, 10, 11, 12, 13, 14, 15, 46*c03c5b1cSMartin Matuska 16, 17, 18, 19, 20, 21, 22, 23, 47*c03c5b1cSMartin Matuska 24, 25, 26, 27, 28, 29, 30, 31 }; 48*c03c5b1cSMartin Matuska 49*c03c5b1cSMartin Matuska static const U32 ML_base[MaxML+1] = { 50*c03c5b1cSMartin Matuska 3, 4, 5, 6, 7, 8, 9, 10, 51*c03c5b1cSMartin Matuska 11, 12, 13, 14, 15, 16, 17, 18, 52*c03c5b1cSMartin Matuska 19, 20, 21, 22, 23, 24, 25, 26, 53*c03c5b1cSMartin Matuska 27, 28, 29, 30, 31, 32, 33, 34, 54*c03c5b1cSMartin Matuska 35, 37, 39, 41, 43, 47, 51, 59, 55*c03c5b1cSMartin Matuska 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 56*c03c5b1cSMartin Matuska 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; 57*c03c5b1cSMartin Matuska 58*c03c5b1cSMartin Matuska 59*c03c5b1cSMartin Matuska /*-******************************************************* 60*c03c5b1cSMartin Matuska * Decompression types 61*c03c5b1cSMartin Matuska *********************************************************/ 62*c03c5b1cSMartin Matuska typedef struct { 63*c03c5b1cSMartin Matuska U32 fastMode; 64*c03c5b1cSMartin Matuska U32 tableLog; 65*c03c5b1cSMartin Matuska } ZSTD_seqSymbol_header; 66*c03c5b1cSMartin Matuska 67*c03c5b1cSMartin Matuska typedef struct { 68*c03c5b1cSMartin Matuska U16 nextState; 69*c03c5b1cSMartin Matuska BYTE nbAdditionalBits; 70*c03c5b1cSMartin Matuska BYTE nbBits; 71*c03c5b1cSMartin Matuska U32 baseValue; 72*c03c5b1cSMartin Matuska } ZSTD_seqSymbol; 73*c03c5b1cSMartin Matuska 74*c03c5b1cSMartin Matuska #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) 75*c03c5b1cSMartin Matuska 76*c03c5b1cSMartin Matuska typedef struct { 77*c03c5b1cSMartin Matuska ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ 78*c03c5b1cSMartin Matuska ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ 79*c03c5b1cSMartin Matuska ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ 80*c03c5b1cSMartin Matuska HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ 81*c03c5b1cSMartin Matuska U32 rep[ZSTD_REP_NUM]; 82*c03c5b1cSMartin Matuska } ZSTD_entropyDTables_t; 83*c03c5b1cSMartin Matuska 84*c03c5b1cSMartin Matuska typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, 85*c03c5b1cSMartin Matuska ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, 86*c03c5b1cSMartin Matuska ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, 87*c03c5b1cSMartin Matuska ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; 88*c03c5b1cSMartin Matuska 89*c03c5b1cSMartin Matuska typedef enum { zdss_init=0, zdss_loadHeader, 90*c03c5b1cSMartin Matuska zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; 91*c03c5b1cSMartin Matuska 92*c03c5b1cSMartin Matuska typedef enum { 93*c03c5b1cSMartin Matuska ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ 94*c03c5b1cSMartin Matuska ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ 95*c03c5b1cSMartin Matuska ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ 96*c03c5b1cSMartin Matuska } ZSTD_dictUses_e; 97*c03c5b1cSMartin Matuska 98*c03c5b1cSMartin Matuska typedef enum { 99*c03c5b1cSMartin Matuska ZSTD_obm_buffered = 0, /* Buffer the output */ 100*c03c5b1cSMartin Matuska ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ 101*c03c5b1cSMartin Matuska } ZSTD_outBufferMode_e; 102*c03c5b1cSMartin Matuska 103*c03c5b1cSMartin Matuska struct ZSTD_DCtx_s 104*c03c5b1cSMartin Matuska { 105*c03c5b1cSMartin Matuska const ZSTD_seqSymbol* LLTptr; 106*c03c5b1cSMartin Matuska const ZSTD_seqSymbol* MLTptr; 107*c03c5b1cSMartin Matuska const ZSTD_seqSymbol* OFTptr; 108*c03c5b1cSMartin Matuska const HUF_DTable* HUFptr; 109*c03c5b1cSMartin Matuska ZSTD_entropyDTables_t entropy; 110*c03c5b1cSMartin Matuska U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ 111*c03c5b1cSMartin Matuska const void* previousDstEnd; /* detect continuity */ 112*c03c5b1cSMartin Matuska const void* prefixStart; /* start of current segment */ 113*c03c5b1cSMartin Matuska const void* virtualStart; /* virtual start of previous segment if it was just before current one */ 114*c03c5b1cSMartin Matuska const void* dictEnd; /* end of previous segment */ 115*c03c5b1cSMartin Matuska size_t expected; 116*c03c5b1cSMartin Matuska ZSTD_frameHeader fParams; 117*c03c5b1cSMartin Matuska U64 decodedSize; 118*c03c5b1cSMartin Matuska blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ 119*c03c5b1cSMartin Matuska ZSTD_dStage stage; 120*c03c5b1cSMartin Matuska U32 litEntropy; 121*c03c5b1cSMartin Matuska U32 fseEntropy; 122*c03c5b1cSMartin Matuska XXH64_state_t xxhState; 123*c03c5b1cSMartin Matuska size_t headerSize; 124*c03c5b1cSMartin Matuska ZSTD_format_e format; 125*c03c5b1cSMartin Matuska const BYTE* litPtr; 126*c03c5b1cSMartin Matuska ZSTD_customMem customMem; 127*c03c5b1cSMartin Matuska size_t litSize; 128*c03c5b1cSMartin Matuska size_t rleSize; 129*c03c5b1cSMartin Matuska size_t staticSize; 130*c03c5b1cSMartin Matuska int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 131*c03c5b1cSMartin Matuska 132*c03c5b1cSMartin Matuska /* dictionary */ 133*c03c5b1cSMartin Matuska ZSTD_DDict* ddictLocal; 134*c03c5b1cSMartin Matuska const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ 135*c03c5b1cSMartin Matuska U32 dictID; 136*c03c5b1cSMartin Matuska int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ 137*c03c5b1cSMartin Matuska ZSTD_dictUses_e dictUses; 138*c03c5b1cSMartin Matuska 139*c03c5b1cSMartin Matuska /* streaming */ 140*c03c5b1cSMartin Matuska ZSTD_dStreamStage streamStage; 141*c03c5b1cSMartin Matuska char* inBuff; 142*c03c5b1cSMartin Matuska size_t inBuffSize; 143*c03c5b1cSMartin Matuska size_t inPos; 144*c03c5b1cSMartin Matuska size_t maxWindowSize; 145*c03c5b1cSMartin Matuska char* outBuff; 146*c03c5b1cSMartin Matuska size_t outBuffSize; 147*c03c5b1cSMartin Matuska size_t outStart; 148*c03c5b1cSMartin Matuska size_t outEnd; 149*c03c5b1cSMartin Matuska size_t lhSize; 150*c03c5b1cSMartin Matuska void* legacyContext; 151*c03c5b1cSMartin Matuska U32 previousLegacyVersion; 152*c03c5b1cSMartin Matuska U32 legacyVersion; 153*c03c5b1cSMartin Matuska U32 hostageByte; 154*c03c5b1cSMartin Matuska int noForwardProgress; 155*c03c5b1cSMartin Matuska ZSTD_outBufferMode_e outBufferMode; 156*c03c5b1cSMartin Matuska ZSTD_outBuffer expectedOutBuffer; 157*c03c5b1cSMartin Matuska 158*c03c5b1cSMartin Matuska /* workspace */ 159*c03c5b1cSMartin Matuska BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; 160*c03c5b1cSMartin Matuska BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; 161*c03c5b1cSMartin Matuska 162*c03c5b1cSMartin Matuska size_t oversizedDuration; 163*c03c5b1cSMartin Matuska 164*c03c5b1cSMartin Matuska #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 165*c03c5b1cSMartin Matuska void const* dictContentBeginForFuzzing; 166*c03c5b1cSMartin Matuska void const* dictContentEndForFuzzing; 167*c03c5b1cSMartin Matuska #endif 168*c03c5b1cSMartin Matuska }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ 169*c03c5b1cSMartin Matuska 170*c03c5b1cSMartin Matuska 171*c03c5b1cSMartin Matuska /*-******************************************************* 172*c03c5b1cSMartin Matuska * Shared internal functions 173*c03c5b1cSMartin Matuska *********************************************************/ 174*c03c5b1cSMartin Matuska 175*c03c5b1cSMartin Matuska /*! ZSTD_loadDEntropy() : 176*c03c5b1cSMartin Matuska * dict : must point at beginning of a valid zstd dictionary. 177*c03c5b1cSMartin Matuska * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ 178*c03c5b1cSMartin Matuska size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, 179*c03c5b1cSMartin Matuska const void* const dict, size_t const dictSize); 180*c03c5b1cSMartin Matuska 181*c03c5b1cSMartin Matuska /*! ZSTD_checkContinuity() : 182*c03c5b1cSMartin Matuska * check if next `dst` follows previous position, where decompression ended. 183*c03c5b1cSMartin Matuska * If yes, do nothing (continue on current segment). 184*c03c5b1cSMartin Matuska * If not, classify previous segment as "external dictionary", and start a new segment. 185*c03c5b1cSMartin Matuska * This function cannot fail. */ 186*c03c5b1cSMartin Matuska void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); 187*c03c5b1cSMartin Matuska 188*c03c5b1cSMartin Matuska 189*c03c5b1cSMartin Matuska #endif /* ZSTD_DECOMPRESS_INTERNAL_H */ 190