1*c03c5b1cSMartin Matuska /*
2*c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*c03c5b1cSMartin Matuska * All rights reserved.
4*c03c5b1cSMartin Matuska *
5*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the
6*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree).
8*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses.
9*c03c5b1cSMartin Matuska */
10*c03c5b1cSMartin Matuska
11*c03c5b1cSMartin Matuska /* zstd_ddict.c :
12*c03c5b1cSMartin Matuska * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13*c03c5b1cSMartin Matuska
14*c03c5b1cSMartin Matuska /*-*******************************************************
15*c03c5b1cSMartin Matuska * Dependencies
16*c03c5b1cSMartin Matuska *********************************************************/
17*c03c5b1cSMartin Matuska #include <string.h> /* memcpy, memmove, memset */
18*c03c5b1cSMartin Matuska #include "../common/cpu.h" /* bmi2 */
19*c03c5b1cSMartin Matuska #include "../common/mem.h" /* low level memory routines */
20*c03c5b1cSMartin Matuska #define FSE_STATIC_LINKING_ONLY
21*c03c5b1cSMartin Matuska #include "../common/fse.h"
22*c03c5b1cSMartin Matuska #define HUF_STATIC_LINKING_ONLY
23*c03c5b1cSMartin Matuska #include "../common/huf.h"
24*c03c5b1cSMartin Matuska #include "zstd_decompress_internal.h"
25*c03c5b1cSMartin Matuska #include "zstd_ddict.h"
26*c03c5b1cSMartin Matuska
27*c03c5b1cSMartin Matuska #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28*c03c5b1cSMartin Matuska # include "../legacy/zstd_legacy.h"
29*c03c5b1cSMartin Matuska #endif
30*c03c5b1cSMartin Matuska
31*c03c5b1cSMartin Matuska
32*c03c5b1cSMartin Matuska
33*c03c5b1cSMartin Matuska /*-*******************************************************
34*c03c5b1cSMartin Matuska * Types
35*c03c5b1cSMartin Matuska *********************************************************/
36*c03c5b1cSMartin Matuska struct ZSTD_DDict_s {
37*c03c5b1cSMartin Matuska void* dictBuffer;
38*c03c5b1cSMartin Matuska const void* dictContent;
39*c03c5b1cSMartin Matuska size_t dictSize;
40*c03c5b1cSMartin Matuska ZSTD_entropyDTables_t entropy;
41*c03c5b1cSMartin Matuska U32 dictID;
42*c03c5b1cSMartin Matuska U32 entropyPresent;
43*c03c5b1cSMartin Matuska ZSTD_customMem cMem;
44*c03c5b1cSMartin Matuska }; /* typedef'd to ZSTD_DDict within "zstd.h" */
45*c03c5b1cSMartin Matuska
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46*c03c5b1cSMartin Matuska const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47*c03c5b1cSMartin Matuska {
48*c03c5b1cSMartin Matuska assert(ddict != NULL);
49*c03c5b1cSMartin Matuska return ddict->dictContent;
50*c03c5b1cSMartin Matuska }
51*c03c5b1cSMartin Matuska
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52*c03c5b1cSMartin Matuska size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53*c03c5b1cSMartin Matuska {
54*c03c5b1cSMartin Matuska assert(ddict != NULL);
55*c03c5b1cSMartin Matuska return ddict->dictSize;
56*c03c5b1cSMartin Matuska }
57*c03c5b1cSMartin Matuska
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58*c03c5b1cSMartin Matuska void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59*c03c5b1cSMartin Matuska {
60*c03c5b1cSMartin Matuska DEBUGLOG(4, "ZSTD_copyDDictParameters");
61*c03c5b1cSMartin Matuska assert(dctx != NULL);
62*c03c5b1cSMartin Matuska assert(ddict != NULL);
63*c03c5b1cSMartin Matuska dctx->dictID = ddict->dictID;
64*c03c5b1cSMartin Matuska dctx->prefixStart = ddict->dictContent;
65*c03c5b1cSMartin Matuska dctx->virtualStart = ddict->dictContent;
66*c03c5b1cSMartin Matuska dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67*c03c5b1cSMartin Matuska dctx->previousDstEnd = dctx->dictEnd;
68*c03c5b1cSMartin Matuska #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69*c03c5b1cSMartin Matuska dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70*c03c5b1cSMartin Matuska dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71*c03c5b1cSMartin Matuska #endif
72*c03c5b1cSMartin Matuska if (ddict->entropyPresent) {
73*c03c5b1cSMartin Matuska dctx->litEntropy = 1;
74*c03c5b1cSMartin Matuska dctx->fseEntropy = 1;
75*c03c5b1cSMartin Matuska dctx->LLTptr = ddict->entropy.LLTable;
76*c03c5b1cSMartin Matuska dctx->MLTptr = ddict->entropy.MLTable;
77*c03c5b1cSMartin Matuska dctx->OFTptr = ddict->entropy.OFTable;
78*c03c5b1cSMartin Matuska dctx->HUFptr = ddict->entropy.hufTable;
79*c03c5b1cSMartin Matuska dctx->entropy.rep[0] = ddict->entropy.rep[0];
80*c03c5b1cSMartin Matuska dctx->entropy.rep[1] = ddict->entropy.rep[1];
81*c03c5b1cSMartin Matuska dctx->entropy.rep[2] = ddict->entropy.rep[2];
82*c03c5b1cSMartin Matuska } else {
83*c03c5b1cSMartin Matuska dctx->litEntropy = 0;
84*c03c5b1cSMartin Matuska dctx->fseEntropy = 0;
85*c03c5b1cSMartin Matuska }
86*c03c5b1cSMartin Matuska }
87*c03c5b1cSMartin Matuska
88*c03c5b1cSMartin Matuska
89*c03c5b1cSMartin Matuska static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)90*c03c5b1cSMartin Matuska ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
91*c03c5b1cSMartin Matuska ZSTD_dictContentType_e dictContentType)
92*c03c5b1cSMartin Matuska {
93*c03c5b1cSMartin Matuska ddict->dictID = 0;
94*c03c5b1cSMartin Matuska ddict->entropyPresent = 0;
95*c03c5b1cSMartin Matuska if (dictContentType == ZSTD_dct_rawContent) return 0;
96*c03c5b1cSMartin Matuska
97*c03c5b1cSMartin Matuska if (ddict->dictSize < 8) {
98*c03c5b1cSMartin Matuska if (dictContentType == ZSTD_dct_fullDict)
99*c03c5b1cSMartin Matuska return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
100*c03c5b1cSMartin Matuska return 0; /* pure content mode */
101*c03c5b1cSMartin Matuska }
102*c03c5b1cSMartin Matuska { U32 const magic = MEM_readLE32(ddict->dictContent);
103*c03c5b1cSMartin Matuska if (magic != ZSTD_MAGIC_DICTIONARY) {
104*c03c5b1cSMartin Matuska if (dictContentType == ZSTD_dct_fullDict)
105*c03c5b1cSMartin Matuska return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
106*c03c5b1cSMartin Matuska return 0; /* pure content mode */
107*c03c5b1cSMartin Matuska }
108*c03c5b1cSMartin Matuska }
109*c03c5b1cSMartin Matuska ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
110*c03c5b1cSMartin Matuska
111*c03c5b1cSMartin Matuska /* load entropy tables */
112*c03c5b1cSMartin Matuska RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
113*c03c5b1cSMartin Matuska &ddict->entropy, ddict->dictContent, ddict->dictSize)),
114*c03c5b1cSMartin Matuska dictionary_corrupted, "");
115*c03c5b1cSMartin Matuska ddict->entropyPresent = 1;
116*c03c5b1cSMartin Matuska return 0;
117*c03c5b1cSMartin Matuska }
118*c03c5b1cSMartin Matuska
119*c03c5b1cSMartin Matuska
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)120*c03c5b1cSMartin Matuska static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
121*c03c5b1cSMartin Matuska const void* dict, size_t dictSize,
122*c03c5b1cSMartin Matuska ZSTD_dictLoadMethod_e dictLoadMethod,
123*c03c5b1cSMartin Matuska ZSTD_dictContentType_e dictContentType)
124*c03c5b1cSMartin Matuska {
125*c03c5b1cSMartin Matuska if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
126*c03c5b1cSMartin Matuska ddict->dictBuffer = NULL;
127*c03c5b1cSMartin Matuska ddict->dictContent = dict;
128*c03c5b1cSMartin Matuska if (!dict) dictSize = 0;
129*c03c5b1cSMartin Matuska } else {
130*c03c5b1cSMartin Matuska void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
131*c03c5b1cSMartin Matuska ddict->dictBuffer = internalBuffer;
132*c03c5b1cSMartin Matuska ddict->dictContent = internalBuffer;
133*c03c5b1cSMartin Matuska if (!internalBuffer) return ERROR(memory_allocation);
134*c03c5b1cSMartin Matuska memcpy(internalBuffer, dict, dictSize);
135*c03c5b1cSMartin Matuska }
136*c03c5b1cSMartin Matuska ddict->dictSize = dictSize;
137*c03c5b1cSMartin Matuska ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
138*c03c5b1cSMartin Matuska
139*c03c5b1cSMartin Matuska /* parse dictionary content */
140*c03c5b1cSMartin Matuska FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
141*c03c5b1cSMartin Matuska
142*c03c5b1cSMartin Matuska return 0;
143*c03c5b1cSMartin Matuska }
144*c03c5b1cSMartin Matuska
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)145*c03c5b1cSMartin Matuska ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
146*c03c5b1cSMartin Matuska ZSTD_dictLoadMethod_e dictLoadMethod,
147*c03c5b1cSMartin Matuska ZSTD_dictContentType_e dictContentType,
148*c03c5b1cSMartin Matuska ZSTD_customMem customMem)
149*c03c5b1cSMartin Matuska {
150*c03c5b1cSMartin Matuska if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
151*c03c5b1cSMartin Matuska
152*c03c5b1cSMartin Matuska { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
153*c03c5b1cSMartin Matuska if (ddict == NULL) return NULL;
154*c03c5b1cSMartin Matuska ddict->cMem = customMem;
155*c03c5b1cSMartin Matuska { size_t const initResult = ZSTD_initDDict_internal(ddict,
156*c03c5b1cSMartin Matuska dict, dictSize,
157*c03c5b1cSMartin Matuska dictLoadMethod, dictContentType);
158*c03c5b1cSMartin Matuska if (ZSTD_isError(initResult)) {
159*c03c5b1cSMartin Matuska ZSTD_freeDDict(ddict);
160*c03c5b1cSMartin Matuska return NULL;
161*c03c5b1cSMartin Matuska } }
162*c03c5b1cSMartin Matuska return ddict;
163*c03c5b1cSMartin Matuska }
164*c03c5b1cSMartin Matuska }
165*c03c5b1cSMartin Matuska
166*c03c5b1cSMartin Matuska /*! ZSTD_createDDict() :
167*c03c5b1cSMartin Matuska * Create a digested dictionary, to start decompression without startup delay.
168*c03c5b1cSMartin Matuska * `dict` content is copied inside DDict.
169*c03c5b1cSMartin Matuska * Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)170*c03c5b1cSMartin Matuska ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
171*c03c5b1cSMartin Matuska {
172*c03c5b1cSMartin Matuska ZSTD_customMem const allocator = { NULL, NULL, NULL };
173*c03c5b1cSMartin Matuska return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
174*c03c5b1cSMartin Matuska }
175*c03c5b1cSMartin Matuska
176*c03c5b1cSMartin Matuska /*! ZSTD_createDDict_byReference() :
177*c03c5b1cSMartin Matuska * Create a digested dictionary, to start decompression without startup delay.
178*c03c5b1cSMartin Matuska * Dictionary content is simply referenced, it will be accessed during decompression.
179*c03c5b1cSMartin Matuska * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)180*c03c5b1cSMartin Matuska ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
181*c03c5b1cSMartin Matuska {
182*c03c5b1cSMartin Matuska ZSTD_customMem const allocator = { NULL, NULL, NULL };
183*c03c5b1cSMartin Matuska return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
184*c03c5b1cSMartin Matuska }
185*c03c5b1cSMartin Matuska
186*c03c5b1cSMartin Matuska
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)187*c03c5b1cSMartin Matuska const ZSTD_DDict* ZSTD_initStaticDDict(
188*c03c5b1cSMartin Matuska void* sBuffer, size_t sBufferSize,
189*c03c5b1cSMartin Matuska const void* dict, size_t dictSize,
190*c03c5b1cSMartin Matuska ZSTD_dictLoadMethod_e dictLoadMethod,
191*c03c5b1cSMartin Matuska ZSTD_dictContentType_e dictContentType)
192*c03c5b1cSMartin Matuska {
193*c03c5b1cSMartin Matuska size_t const neededSpace = sizeof(ZSTD_DDict)
194*c03c5b1cSMartin Matuska + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
195*c03c5b1cSMartin Matuska ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
196*c03c5b1cSMartin Matuska assert(sBuffer != NULL);
197*c03c5b1cSMartin Matuska assert(dict != NULL);
198*c03c5b1cSMartin Matuska if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
199*c03c5b1cSMartin Matuska if (sBufferSize < neededSpace) return NULL;
200*c03c5b1cSMartin Matuska if (dictLoadMethod == ZSTD_dlm_byCopy) {
201*c03c5b1cSMartin Matuska memcpy(ddict+1, dict, dictSize); /* local copy */
202*c03c5b1cSMartin Matuska dict = ddict+1;
203*c03c5b1cSMartin Matuska }
204*c03c5b1cSMartin Matuska if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
205*c03c5b1cSMartin Matuska dict, dictSize,
206*c03c5b1cSMartin Matuska ZSTD_dlm_byRef, dictContentType) ))
207*c03c5b1cSMartin Matuska return NULL;
208*c03c5b1cSMartin Matuska return ddict;
209*c03c5b1cSMartin Matuska }
210*c03c5b1cSMartin Matuska
211*c03c5b1cSMartin Matuska
ZSTD_freeDDict(ZSTD_DDict * ddict)212*c03c5b1cSMartin Matuska size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
213*c03c5b1cSMartin Matuska {
214*c03c5b1cSMartin Matuska if (ddict==NULL) return 0; /* support free on NULL */
215*c03c5b1cSMartin Matuska { ZSTD_customMem const cMem = ddict->cMem;
216*c03c5b1cSMartin Matuska ZSTD_free(ddict->dictBuffer, cMem);
217*c03c5b1cSMartin Matuska ZSTD_free(ddict, cMem);
218*c03c5b1cSMartin Matuska return 0;
219*c03c5b1cSMartin Matuska }
220*c03c5b1cSMartin Matuska }
221*c03c5b1cSMartin Matuska
222*c03c5b1cSMartin Matuska /*! ZSTD_estimateDDictSize() :
223*c03c5b1cSMartin Matuska * Estimate amount of memory that will be needed to create a dictionary for decompression.
224*c03c5b1cSMartin Matuska * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)225*c03c5b1cSMartin Matuska size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
226*c03c5b1cSMartin Matuska {
227*c03c5b1cSMartin Matuska return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
228*c03c5b1cSMartin Matuska }
229*c03c5b1cSMartin Matuska
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)230*c03c5b1cSMartin Matuska size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
231*c03c5b1cSMartin Matuska {
232*c03c5b1cSMartin Matuska if (ddict==NULL) return 0; /* support sizeof on NULL */
233*c03c5b1cSMartin Matuska return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
234*c03c5b1cSMartin Matuska }
235*c03c5b1cSMartin Matuska
236*c03c5b1cSMartin Matuska /*! ZSTD_getDictID_fromDDict() :
237*c03c5b1cSMartin Matuska * Provides the dictID of the dictionary loaded into `ddict`.
238*c03c5b1cSMartin Matuska * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
239*c03c5b1cSMartin Matuska * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)240*c03c5b1cSMartin Matuska unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
241*c03c5b1cSMartin Matuska {
242*c03c5b1cSMartin Matuska if (ddict==NULL) return 0;
243*c03c5b1cSMartin Matuska return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
244*c03c5b1cSMartin Matuska }
245