xref: /dflybsd-src/contrib/zstd/lib/decompress/zstd_ddict.c (revision a28cd43d19e8b720a6c852a4bbc5ae147a26165a)
1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*a28cd43dSSascha Wildner  * All rights reserved.
4*a28cd43dSSascha Wildner  *
5*a28cd43dSSascha Wildner  * This source code is licensed under both the BSD-style license (found in the
6*a28cd43dSSascha Wildner  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*a28cd43dSSascha Wildner  * in the COPYING file in the root directory of this source tree).
8*a28cd43dSSascha Wildner  * You may select, at your option, one of the above-listed licenses.
9*a28cd43dSSascha Wildner  */
10*a28cd43dSSascha Wildner 
11*a28cd43dSSascha Wildner /* zstd_ddict.c :
12*a28cd43dSSascha Wildner  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13*a28cd43dSSascha Wildner 
14*a28cd43dSSascha Wildner /*-*******************************************************
15*a28cd43dSSascha Wildner *  Dependencies
16*a28cd43dSSascha Wildner *********************************************************/
17*a28cd43dSSascha Wildner #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18*a28cd43dSSascha Wildner #include "../common/cpu.h"         /* bmi2 */
19*a28cd43dSSascha Wildner #include "../common/mem.h"         /* low level memory routines */
20*a28cd43dSSascha Wildner #define FSE_STATIC_LINKING_ONLY
21*a28cd43dSSascha Wildner #include "../common/fse.h"
22*a28cd43dSSascha Wildner #define HUF_STATIC_LINKING_ONLY
23*a28cd43dSSascha Wildner #include "../common/huf.h"
24*a28cd43dSSascha Wildner #include "zstd_decompress_internal.h"
25*a28cd43dSSascha Wildner #include "zstd_ddict.h"
26*a28cd43dSSascha Wildner 
27*a28cd43dSSascha Wildner #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28*a28cd43dSSascha Wildner #  include "../legacy/zstd_legacy.h"
29*a28cd43dSSascha Wildner #endif
30*a28cd43dSSascha Wildner 
31*a28cd43dSSascha Wildner 
32*a28cd43dSSascha Wildner 
33*a28cd43dSSascha Wildner /*-*******************************************************
34*a28cd43dSSascha Wildner *  Types
35*a28cd43dSSascha Wildner *********************************************************/
36*a28cd43dSSascha Wildner struct ZSTD_DDict_s {
37*a28cd43dSSascha Wildner     void* dictBuffer;
38*a28cd43dSSascha Wildner     const void* dictContent;
39*a28cd43dSSascha Wildner     size_t dictSize;
40*a28cd43dSSascha Wildner     ZSTD_entropyDTables_t entropy;
41*a28cd43dSSascha Wildner     U32 dictID;
42*a28cd43dSSascha Wildner     U32 entropyPresent;
43*a28cd43dSSascha Wildner     ZSTD_customMem cMem;
44*a28cd43dSSascha Wildner };  /* typedef'd to ZSTD_DDict within "zstd.h" */
45*a28cd43dSSascha Wildner 
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46*a28cd43dSSascha Wildner const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47*a28cd43dSSascha Wildner {
48*a28cd43dSSascha Wildner     assert(ddict != NULL);
49*a28cd43dSSascha Wildner     return ddict->dictContent;
50*a28cd43dSSascha Wildner }
51*a28cd43dSSascha Wildner 
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52*a28cd43dSSascha Wildner size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53*a28cd43dSSascha Wildner {
54*a28cd43dSSascha Wildner     assert(ddict != NULL);
55*a28cd43dSSascha Wildner     return ddict->dictSize;
56*a28cd43dSSascha Wildner }
57*a28cd43dSSascha Wildner 
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58*a28cd43dSSascha Wildner void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59*a28cd43dSSascha Wildner {
60*a28cd43dSSascha Wildner     DEBUGLOG(4, "ZSTD_copyDDictParameters");
61*a28cd43dSSascha Wildner     assert(dctx != NULL);
62*a28cd43dSSascha Wildner     assert(ddict != NULL);
63*a28cd43dSSascha Wildner     dctx->dictID = ddict->dictID;
64*a28cd43dSSascha Wildner     dctx->prefixStart = ddict->dictContent;
65*a28cd43dSSascha Wildner     dctx->virtualStart = ddict->dictContent;
66*a28cd43dSSascha Wildner     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67*a28cd43dSSascha Wildner     dctx->previousDstEnd = dctx->dictEnd;
68*a28cd43dSSascha Wildner #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69*a28cd43dSSascha Wildner     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70*a28cd43dSSascha Wildner     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71*a28cd43dSSascha Wildner #endif
72*a28cd43dSSascha Wildner     if (ddict->entropyPresent) {
73*a28cd43dSSascha Wildner         dctx->litEntropy = 1;
74*a28cd43dSSascha Wildner         dctx->fseEntropy = 1;
75*a28cd43dSSascha Wildner         dctx->LLTptr = ddict->entropy.LLTable;
76*a28cd43dSSascha Wildner         dctx->MLTptr = ddict->entropy.MLTable;
77*a28cd43dSSascha Wildner         dctx->OFTptr = ddict->entropy.OFTable;
78*a28cd43dSSascha Wildner         dctx->HUFptr = ddict->entropy.hufTable;
79*a28cd43dSSascha Wildner         dctx->entropy.rep[0] = ddict->entropy.rep[0];
80*a28cd43dSSascha Wildner         dctx->entropy.rep[1] = ddict->entropy.rep[1];
81*a28cd43dSSascha Wildner         dctx->entropy.rep[2] = ddict->entropy.rep[2];
82*a28cd43dSSascha Wildner     } else {
83*a28cd43dSSascha Wildner         dctx->litEntropy = 0;
84*a28cd43dSSascha Wildner         dctx->fseEntropy = 0;
85*a28cd43dSSascha Wildner     }
86*a28cd43dSSascha Wildner }
87*a28cd43dSSascha Wildner 
88*a28cd43dSSascha Wildner 
89*a28cd43dSSascha Wildner static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)90*a28cd43dSSascha Wildner ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
91*a28cd43dSSascha Wildner                            ZSTD_dictContentType_e dictContentType)
92*a28cd43dSSascha Wildner {
93*a28cd43dSSascha Wildner     ddict->dictID = 0;
94*a28cd43dSSascha Wildner     ddict->entropyPresent = 0;
95*a28cd43dSSascha Wildner     if (dictContentType == ZSTD_dct_rawContent) return 0;
96*a28cd43dSSascha Wildner 
97*a28cd43dSSascha Wildner     if (ddict->dictSize < 8) {
98*a28cd43dSSascha Wildner         if (dictContentType == ZSTD_dct_fullDict)
99*a28cd43dSSascha Wildner             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
100*a28cd43dSSascha Wildner         return 0;   /* pure content mode */
101*a28cd43dSSascha Wildner     }
102*a28cd43dSSascha Wildner     {   U32 const magic = MEM_readLE32(ddict->dictContent);
103*a28cd43dSSascha Wildner         if (magic != ZSTD_MAGIC_DICTIONARY) {
104*a28cd43dSSascha Wildner             if (dictContentType == ZSTD_dct_fullDict)
105*a28cd43dSSascha Wildner                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
106*a28cd43dSSascha Wildner             return 0;   /* pure content mode */
107*a28cd43dSSascha Wildner         }
108*a28cd43dSSascha Wildner     }
109*a28cd43dSSascha Wildner     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
110*a28cd43dSSascha Wildner 
111*a28cd43dSSascha Wildner     /* load entropy tables */
112*a28cd43dSSascha Wildner     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
113*a28cd43dSSascha Wildner             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
114*a28cd43dSSascha Wildner         dictionary_corrupted, "");
115*a28cd43dSSascha Wildner     ddict->entropyPresent = 1;
116*a28cd43dSSascha Wildner     return 0;
117*a28cd43dSSascha Wildner }
118*a28cd43dSSascha Wildner 
119*a28cd43dSSascha Wildner 
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)120*a28cd43dSSascha Wildner static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
121*a28cd43dSSascha Wildner                                       const void* dict, size_t dictSize,
122*a28cd43dSSascha Wildner                                       ZSTD_dictLoadMethod_e dictLoadMethod,
123*a28cd43dSSascha Wildner                                       ZSTD_dictContentType_e dictContentType)
124*a28cd43dSSascha Wildner {
125*a28cd43dSSascha Wildner     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
126*a28cd43dSSascha Wildner         ddict->dictBuffer = NULL;
127*a28cd43dSSascha Wildner         ddict->dictContent = dict;
128*a28cd43dSSascha Wildner         if (!dict) dictSize = 0;
129*a28cd43dSSascha Wildner     } else {
130*a28cd43dSSascha Wildner         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
131*a28cd43dSSascha Wildner         ddict->dictBuffer = internalBuffer;
132*a28cd43dSSascha Wildner         ddict->dictContent = internalBuffer;
133*a28cd43dSSascha Wildner         if (!internalBuffer) return ERROR(memory_allocation);
134*a28cd43dSSascha Wildner         ZSTD_memcpy(internalBuffer, dict, dictSize);
135*a28cd43dSSascha Wildner     }
136*a28cd43dSSascha Wildner     ddict->dictSize = dictSize;
137*a28cd43dSSascha Wildner     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
138*a28cd43dSSascha Wildner 
139*a28cd43dSSascha Wildner     /* parse dictionary content */
140*a28cd43dSSascha Wildner     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
141*a28cd43dSSascha Wildner 
142*a28cd43dSSascha Wildner     return 0;
143*a28cd43dSSascha Wildner }
144*a28cd43dSSascha Wildner 
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)145*a28cd43dSSascha Wildner ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
146*a28cd43dSSascha Wildner                                       ZSTD_dictLoadMethod_e dictLoadMethod,
147*a28cd43dSSascha Wildner                                       ZSTD_dictContentType_e dictContentType,
148*a28cd43dSSascha Wildner                                       ZSTD_customMem customMem)
149*a28cd43dSSascha Wildner {
150*a28cd43dSSascha Wildner     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
151*a28cd43dSSascha Wildner 
152*a28cd43dSSascha Wildner     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
153*a28cd43dSSascha Wildner         if (ddict == NULL) return NULL;
154*a28cd43dSSascha Wildner         ddict->cMem = customMem;
155*a28cd43dSSascha Wildner         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
156*a28cd43dSSascha Wildner                                             dict, dictSize,
157*a28cd43dSSascha Wildner                                             dictLoadMethod, dictContentType);
158*a28cd43dSSascha Wildner             if (ZSTD_isError(initResult)) {
159*a28cd43dSSascha Wildner                 ZSTD_freeDDict(ddict);
160*a28cd43dSSascha Wildner                 return NULL;
161*a28cd43dSSascha Wildner         }   }
162*a28cd43dSSascha Wildner         return ddict;
163*a28cd43dSSascha Wildner     }
164*a28cd43dSSascha Wildner }
165*a28cd43dSSascha Wildner 
166*a28cd43dSSascha Wildner /*! ZSTD_createDDict() :
167*a28cd43dSSascha Wildner *   Create a digested dictionary, to start decompression without startup delay.
168*a28cd43dSSascha Wildner *   `dict` content is copied inside DDict.
169*a28cd43dSSascha Wildner *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)170*a28cd43dSSascha Wildner ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
171*a28cd43dSSascha Wildner {
172*a28cd43dSSascha Wildner     ZSTD_customMem const allocator = { NULL, NULL, NULL };
173*a28cd43dSSascha Wildner     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
174*a28cd43dSSascha Wildner }
175*a28cd43dSSascha Wildner 
176*a28cd43dSSascha Wildner /*! ZSTD_createDDict_byReference() :
177*a28cd43dSSascha Wildner  *  Create a digested dictionary, to start decompression without startup delay.
178*a28cd43dSSascha Wildner  *  Dictionary content is simply referenced, it will be accessed during decompression.
179*a28cd43dSSascha Wildner  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)180*a28cd43dSSascha Wildner ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
181*a28cd43dSSascha Wildner {
182*a28cd43dSSascha Wildner     ZSTD_customMem const allocator = { NULL, NULL, NULL };
183*a28cd43dSSascha Wildner     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
184*a28cd43dSSascha Wildner }
185*a28cd43dSSascha Wildner 
186*a28cd43dSSascha Wildner 
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)187*a28cd43dSSascha Wildner const ZSTD_DDict* ZSTD_initStaticDDict(
188*a28cd43dSSascha Wildner                                 void* sBuffer, size_t sBufferSize,
189*a28cd43dSSascha Wildner                                 const void* dict, size_t dictSize,
190*a28cd43dSSascha Wildner                                 ZSTD_dictLoadMethod_e dictLoadMethod,
191*a28cd43dSSascha Wildner                                 ZSTD_dictContentType_e dictContentType)
192*a28cd43dSSascha Wildner {
193*a28cd43dSSascha Wildner     size_t const neededSpace = sizeof(ZSTD_DDict)
194*a28cd43dSSascha Wildner                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
195*a28cd43dSSascha Wildner     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
196*a28cd43dSSascha Wildner     assert(sBuffer != NULL);
197*a28cd43dSSascha Wildner     assert(dict != NULL);
198*a28cd43dSSascha Wildner     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
199*a28cd43dSSascha Wildner     if (sBufferSize < neededSpace) return NULL;
200*a28cd43dSSascha Wildner     if (dictLoadMethod == ZSTD_dlm_byCopy) {
201*a28cd43dSSascha Wildner         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
202*a28cd43dSSascha Wildner         dict = ddict+1;
203*a28cd43dSSascha Wildner     }
204*a28cd43dSSascha Wildner     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
205*a28cd43dSSascha Wildner                                               dict, dictSize,
206*a28cd43dSSascha Wildner                                               ZSTD_dlm_byRef, dictContentType) ))
207*a28cd43dSSascha Wildner         return NULL;
208*a28cd43dSSascha Wildner     return ddict;
209*a28cd43dSSascha Wildner }
210*a28cd43dSSascha Wildner 
211*a28cd43dSSascha Wildner 
ZSTD_freeDDict(ZSTD_DDict * ddict)212*a28cd43dSSascha Wildner size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
213*a28cd43dSSascha Wildner {
214*a28cd43dSSascha Wildner     if (ddict==NULL) return 0;   /* support free on NULL */
215*a28cd43dSSascha Wildner     {   ZSTD_customMem const cMem = ddict->cMem;
216*a28cd43dSSascha Wildner         ZSTD_customFree(ddict->dictBuffer, cMem);
217*a28cd43dSSascha Wildner         ZSTD_customFree(ddict, cMem);
218*a28cd43dSSascha Wildner         return 0;
219*a28cd43dSSascha Wildner     }
220*a28cd43dSSascha Wildner }
221*a28cd43dSSascha Wildner 
222*a28cd43dSSascha Wildner /*! ZSTD_estimateDDictSize() :
223*a28cd43dSSascha Wildner  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
224*a28cd43dSSascha Wildner  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)225*a28cd43dSSascha Wildner size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
226*a28cd43dSSascha Wildner {
227*a28cd43dSSascha Wildner     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
228*a28cd43dSSascha Wildner }
229*a28cd43dSSascha Wildner 
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)230*a28cd43dSSascha Wildner size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
231*a28cd43dSSascha Wildner {
232*a28cd43dSSascha Wildner     if (ddict==NULL) return 0;   /* support sizeof on NULL */
233*a28cd43dSSascha Wildner     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
234*a28cd43dSSascha Wildner }
235*a28cd43dSSascha Wildner 
236*a28cd43dSSascha Wildner /*! ZSTD_getDictID_fromDDict() :
237*a28cd43dSSascha Wildner  *  Provides the dictID of the dictionary loaded into `ddict`.
238*a28cd43dSSascha Wildner  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
239*a28cd43dSSascha Wildner  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)240*a28cd43dSSascha Wildner unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
241*a28cd43dSSascha Wildner {
242*a28cd43dSSascha Wildner     if (ddict==NULL) return 0;
243*a28cd43dSSascha Wildner     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
244*a28cd43dSSascha Wildner }
245