xref: /netbsd-src/external/bsd/zstd/dist/lib/decompress/zstd_ddict.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos /* zstd_ddict.c :
12*3117ece4Schristos  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13*3117ece4Schristos 
14*3117ece4Schristos /*-*******************************************************
15*3117ece4Schristos *  Dependencies
16*3117ece4Schristos *********************************************************/
17*3117ece4Schristos #include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customFree */
18*3117ece4Schristos #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
19*3117ece4Schristos #include "../common/cpu.h"         /* bmi2 */
20*3117ece4Schristos #include "../common/mem.h"         /* low level memory routines */
21*3117ece4Schristos #define FSE_STATIC_LINKING_ONLY
22*3117ece4Schristos #include "../common/fse.h"
23*3117ece4Schristos #include "../common/huf.h"
24*3117ece4Schristos #include "zstd_decompress_internal.h"
25*3117ece4Schristos #include "zstd_ddict.h"
26*3117ece4Schristos 
27*3117ece4Schristos #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28*3117ece4Schristos #  include "../legacy/zstd_legacy.h"
29*3117ece4Schristos #endif
30*3117ece4Schristos 
31*3117ece4Schristos 
32*3117ece4Schristos 
33*3117ece4Schristos /*-*******************************************************
34*3117ece4Schristos *  Types
35*3117ece4Schristos *********************************************************/
36*3117ece4Schristos struct ZSTD_DDict_s {
37*3117ece4Schristos     void* dictBuffer;
38*3117ece4Schristos     const void* dictContent;
39*3117ece4Schristos     size_t dictSize;
40*3117ece4Schristos     ZSTD_entropyDTables_t entropy;
41*3117ece4Schristos     U32 dictID;
42*3117ece4Schristos     U32 entropyPresent;
43*3117ece4Schristos     ZSTD_customMem cMem;
44*3117ece4Schristos };  /* typedef'd to ZSTD_DDict within "zstd.h" */
45*3117ece4Schristos 
46*3117ece4Schristos const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47*3117ece4Schristos {
48*3117ece4Schristos     assert(ddict != NULL);
49*3117ece4Schristos     return ddict->dictContent;
50*3117ece4Schristos }
51*3117ece4Schristos 
52*3117ece4Schristos size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53*3117ece4Schristos {
54*3117ece4Schristos     assert(ddict != NULL);
55*3117ece4Schristos     return ddict->dictSize;
56*3117ece4Schristos }
57*3117ece4Schristos 
58*3117ece4Schristos void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59*3117ece4Schristos {
60*3117ece4Schristos     DEBUGLOG(4, "ZSTD_copyDDictParameters");
61*3117ece4Schristos     assert(dctx != NULL);
62*3117ece4Schristos     assert(ddict != NULL);
63*3117ece4Schristos     dctx->dictID = ddict->dictID;
64*3117ece4Schristos     dctx->prefixStart = ddict->dictContent;
65*3117ece4Schristos     dctx->virtualStart = ddict->dictContent;
66*3117ece4Schristos     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67*3117ece4Schristos     dctx->previousDstEnd = dctx->dictEnd;
68*3117ece4Schristos #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69*3117ece4Schristos     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70*3117ece4Schristos     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71*3117ece4Schristos #endif
72*3117ece4Schristos     if (ddict->entropyPresent) {
73*3117ece4Schristos         dctx->litEntropy = 1;
74*3117ece4Schristos         dctx->fseEntropy = 1;
75*3117ece4Schristos         dctx->LLTptr = ddict->entropy.LLTable;
76*3117ece4Schristos         dctx->MLTptr = ddict->entropy.MLTable;
77*3117ece4Schristos         dctx->OFTptr = ddict->entropy.OFTable;
78*3117ece4Schristos         dctx->HUFptr = ddict->entropy.hufTable;
79*3117ece4Schristos         dctx->entropy.rep[0] = ddict->entropy.rep[0];
80*3117ece4Schristos         dctx->entropy.rep[1] = ddict->entropy.rep[1];
81*3117ece4Schristos         dctx->entropy.rep[2] = ddict->entropy.rep[2];
82*3117ece4Schristos     } else {
83*3117ece4Schristos         dctx->litEntropy = 0;
84*3117ece4Schristos         dctx->fseEntropy = 0;
85*3117ece4Schristos     }
86*3117ece4Schristos }
87*3117ece4Schristos 
88*3117ece4Schristos 
89*3117ece4Schristos static size_t
90*3117ece4Schristos ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
91*3117ece4Schristos                            ZSTD_dictContentType_e dictContentType)
92*3117ece4Schristos {
93*3117ece4Schristos     ddict->dictID = 0;
94*3117ece4Schristos     ddict->entropyPresent = 0;
95*3117ece4Schristos     if (dictContentType == ZSTD_dct_rawContent) return 0;
96*3117ece4Schristos 
97*3117ece4Schristos     if (ddict->dictSize < 8) {
98*3117ece4Schristos         if (dictContentType == ZSTD_dct_fullDict)
99*3117ece4Schristos             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
100*3117ece4Schristos         return 0;   /* pure content mode */
101*3117ece4Schristos     }
102*3117ece4Schristos     {   U32 const magic = MEM_readLE32(ddict->dictContent);
103*3117ece4Schristos         if (magic != ZSTD_MAGIC_DICTIONARY) {
104*3117ece4Schristos             if (dictContentType == ZSTD_dct_fullDict)
105*3117ece4Schristos                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
106*3117ece4Schristos             return 0;   /* pure content mode */
107*3117ece4Schristos         }
108*3117ece4Schristos     }
109*3117ece4Schristos     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
110*3117ece4Schristos 
111*3117ece4Schristos     /* load entropy tables */
112*3117ece4Schristos     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
113*3117ece4Schristos             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
114*3117ece4Schristos         dictionary_corrupted, "");
115*3117ece4Schristos     ddict->entropyPresent = 1;
116*3117ece4Schristos     return 0;
117*3117ece4Schristos }
118*3117ece4Schristos 
119*3117ece4Schristos 
120*3117ece4Schristos static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
121*3117ece4Schristos                                       const void* dict, size_t dictSize,
122*3117ece4Schristos                                       ZSTD_dictLoadMethod_e dictLoadMethod,
123*3117ece4Schristos                                       ZSTD_dictContentType_e dictContentType)
124*3117ece4Schristos {
125*3117ece4Schristos     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
126*3117ece4Schristos         ddict->dictBuffer = NULL;
127*3117ece4Schristos         ddict->dictContent = dict;
128*3117ece4Schristos         if (!dict) dictSize = 0;
129*3117ece4Schristos     } else {
130*3117ece4Schristos         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
131*3117ece4Schristos         ddict->dictBuffer = internalBuffer;
132*3117ece4Schristos         ddict->dictContent = internalBuffer;
133*3117ece4Schristos         if (!internalBuffer) return ERROR(memory_allocation);
134*3117ece4Schristos         ZSTD_memcpy(internalBuffer, dict, dictSize);
135*3117ece4Schristos     }
136*3117ece4Schristos     ddict->dictSize = dictSize;
137*3117ece4Schristos     ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001);  /* cover both little and big endian */
138*3117ece4Schristos 
139*3117ece4Schristos     /* parse dictionary content */
140*3117ece4Schristos     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
141*3117ece4Schristos 
142*3117ece4Schristos     return 0;
143*3117ece4Schristos }
144*3117ece4Schristos 
145*3117ece4Schristos ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
146*3117ece4Schristos                                       ZSTD_dictLoadMethod_e dictLoadMethod,
147*3117ece4Schristos                                       ZSTD_dictContentType_e dictContentType,
148*3117ece4Schristos                                       ZSTD_customMem customMem)
149*3117ece4Schristos {
150*3117ece4Schristos     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
151*3117ece4Schristos 
152*3117ece4Schristos     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
153*3117ece4Schristos         if (ddict == NULL) return NULL;
154*3117ece4Schristos         ddict->cMem = customMem;
155*3117ece4Schristos         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
156*3117ece4Schristos                                             dict, dictSize,
157*3117ece4Schristos                                             dictLoadMethod, dictContentType);
158*3117ece4Schristos             if (ZSTD_isError(initResult)) {
159*3117ece4Schristos                 ZSTD_freeDDict(ddict);
160*3117ece4Schristos                 return NULL;
161*3117ece4Schristos         }   }
162*3117ece4Schristos         return ddict;
163*3117ece4Schristos     }
164*3117ece4Schristos }
165*3117ece4Schristos 
166*3117ece4Schristos /*! ZSTD_createDDict() :
167*3117ece4Schristos *   Create a digested dictionary, to start decompression without startup delay.
168*3117ece4Schristos *   `dict` content is copied inside DDict.
169*3117ece4Schristos *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
170*3117ece4Schristos ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
171*3117ece4Schristos {
172*3117ece4Schristos     ZSTD_customMem const allocator = { NULL, NULL, NULL };
173*3117ece4Schristos     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
174*3117ece4Schristos }
175*3117ece4Schristos 
176*3117ece4Schristos /*! ZSTD_createDDict_byReference() :
177*3117ece4Schristos  *  Create a digested dictionary, to start decompression without startup delay.
178*3117ece4Schristos  *  Dictionary content is simply referenced, it will be accessed during decompression.
179*3117ece4Schristos  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
180*3117ece4Schristos ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
181*3117ece4Schristos {
182*3117ece4Schristos     ZSTD_customMem const allocator = { NULL, NULL, NULL };
183*3117ece4Schristos     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
184*3117ece4Schristos }
185*3117ece4Schristos 
186*3117ece4Schristos 
187*3117ece4Schristos const ZSTD_DDict* ZSTD_initStaticDDict(
188*3117ece4Schristos                                 void* sBuffer, size_t sBufferSize,
189*3117ece4Schristos                                 const void* dict, size_t dictSize,
190*3117ece4Schristos                                 ZSTD_dictLoadMethod_e dictLoadMethod,
191*3117ece4Schristos                                 ZSTD_dictContentType_e dictContentType)
192*3117ece4Schristos {
193*3117ece4Schristos     size_t const neededSpace = sizeof(ZSTD_DDict)
194*3117ece4Schristos                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
195*3117ece4Schristos     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
196*3117ece4Schristos     assert(sBuffer != NULL);
197*3117ece4Schristos     assert(dict != NULL);
198*3117ece4Schristos     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
199*3117ece4Schristos     if (sBufferSize < neededSpace) return NULL;
200*3117ece4Schristos     if (dictLoadMethod == ZSTD_dlm_byCopy) {
201*3117ece4Schristos         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
202*3117ece4Schristos         dict = ddict+1;
203*3117ece4Schristos     }
204*3117ece4Schristos     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
205*3117ece4Schristos                                               dict, dictSize,
206*3117ece4Schristos                                               ZSTD_dlm_byRef, dictContentType) ))
207*3117ece4Schristos         return NULL;
208*3117ece4Schristos     return ddict;
209*3117ece4Schristos }
210*3117ece4Schristos 
211*3117ece4Schristos 
212*3117ece4Schristos size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
213*3117ece4Schristos {
214*3117ece4Schristos     if (ddict==NULL) return 0;   /* support free on NULL */
215*3117ece4Schristos     {   ZSTD_customMem const cMem = ddict->cMem;
216*3117ece4Schristos         ZSTD_customFree(ddict->dictBuffer, cMem);
217*3117ece4Schristos         ZSTD_customFree(ddict, cMem);
218*3117ece4Schristos         return 0;
219*3117ece4Schristos     }
220*3117ece4Schristos }
221*3117ece4Schristos 
222*3117ece4Schristos /*! ZSTD_estimateDDictSize() :
223*3117ece4Schristos  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
224*3117ece4Schristos  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
225*3117ece4Schristos size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
226*3117ece4Schristos {
227*3117ece4Schristos     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
228*3117ece4Schristos }
229*3117ece4Schristos 
230*3117ece4Schristos size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
231*3117ece4Schristos {
232*3117ece4Schristos     if (ddict==NULL) return 0;   /* support sizeof on NULL */
233*3117ece4Schristos     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
234*3117ece4Schristos }
235*3117ece4Schristos 
236*3117ece4Schristos /*! ZSTD_getDictID_fromDDict() :
237*3117ece4Schristos  *  Provides the dictID of the dictionary loaded into `ddict`.
238*3117ece4Schristos  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
239*3117ece4Schristos  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
240*3117ece4Schristos unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
241*3117ece4Schristos {
242*3117ece4Schristos     if (ddict==NULL) return 0;
243*3117ece4Schristos     return ddict->dictID;
244*3117ece4Schristos }
245