1a28cd43dSSascha Wildner /*
2a28cd43dSSascha Wildner * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3a28cd43dSSascha Wildner * All rights reserved.
4a28cd43dSSascha Wildner *
5a28cd43dSSascha Wildner * This source code is licensed under both the BSD-style license (found in the
6a28cd43dSSascha Wildner * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a28cd43dSSascha Wildner * in the COPYING file in the root directory of this source tree).
8a28cd43dSSascha Wildner * You may select, at your option, one of the above-listed licenses.
9a28cd43dSSascha Wildner */
10a28cd43dSSascha Wildner
11a28cd43dSSascha Wildner /* zstd_ddict.c :
12a28cd43dSSascha Wildner * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13a28cd43dSSascha Wildner
14a28cd43dSSascha Wildner /*-*******************************************************
15a28cd43dSSascha Wildner * Dependencies
16a28cd43dSSascha Wildner *********************************************************/
17a28cd43dSSascha Wildner #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18a28cd43dSSascha Wildner #include "../common/cpu.h" /* bmi2 */
19a28cd43dSSascha Wildner #include "../common/mem.h" /* low level memory routines */
20a28cd43dSSascha Wildner #define FSE_STATIC_LINKING_ONLY
21a28cd43dSSascha Wildner #include "../common/fse.h"
22a28cd43dSSascha Wildner #define HUF_STATIC_LINKING_ONLY
23a28cd43dSSascha Wildner #include "../common/huf.h"
24a28cd43dSSascha Wildner #include "zstd_decompress_internal.h"
25a28cd43dSSascha Wildner #include "zstd_ddict.h"
26a28cd43dSSascha Wildner
27a28cd43dSSascha Wildner #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28a28cd43dSSascha Wildner # include "../legacy/zstd_legacy.h"
29a28cd43dSSascha Wildner #endif
30a28cd43dSSascha Wildner
31a28cd43dSSascha Wildner
32a28cd43dSSascha Wildner
33a28cd43dSSascha Wildner /*-*******************************************************
34a28cd43dSSascha Wildner * Types
35a28cd43dSSascha Wildner *********************************************************/
36a28cd43dSSascha Wildner struct ZSTD_DDict_s {
37a28cd43dSSascha Wildner void* dictBuffer;
38a28cd43dSSascha Wildner const void* dictContent;
39a28cd43dSSascha Wildner size_t dictSize;
40a28cd43dSSascha Wildner ZSTD_entropyDTables_t entropy;
41a28cd43dSSascha Wildner U32 dictID;
42a28cd43dSSascha Wildner U32 entropyPresent;
43a28cd43dSSascha Wildner ZSTD_customMem cMem;
44a28cd43dSSascha Wildner }; /* typedef'd to ZSTD_DDict within "zstd.h" */
45a28cd43dSSascha Wildner
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46a28cd43dSSascha Wildner const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47a28cd43dSSascha Wildner {
48a28cd43dSSascha Wildner assert(ddict != NULL);
49a28cd43dSSascha Wildner return ddict->dictContent;
50a28cd43dSSascha Wildner }
51a28cd43dSSascha Wildner
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52a28cd43dSSascha Wildner size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53a28cd43dSSascha Wildner {
54a28cd43dSSascha Wildner assert(ddict != NULL);
55a28cd43dSSascha Wildner return ddict->dictSize;
56a28cd43dSSascha Wildner }
57a28cd43dSSascha Wildner
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58a28cd43dSSascha Wildner void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59a28cd43dSSascha Wildner {
60a28cd43dSSascha Wildner DEBUGLOG(4, "ZSTD_copyDDictParameters");
61a28cd43dSSascha Wildner assert(dctx != NULL);
62a28cd43dSSascha Wildner assert(ddict != NULL);
63a28cd43dSSascha Wildner dctx->dictID = ddict->dictID;
64a28cd43dSSascha Wildner dctx->prefixStart = ddict->dictContent;
65a28cd43dSSascha Wildner dctx->virtualStart = ddict->dictContent;
66a28cd43dSSascha Wildner dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67a28cd43dSSascha Wildner dctx->previousDstEnd = dctx->dictEnd;
68a28cd43dSSascha Wildner #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
69a28cd43dSSascha Wildner dctx->dictContentBeginForFuzzing = dctx->prefixStart;
70a28cd43dSSascha Wildner dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
71a28cd43dSSascha Wildner #endif
72a28cd43dSSascha Wildner if (ddict->entropyPresent) {
73a28cd43dSSascha Wildner dctx->litEntropy = 1;
74a28cd43dSSascha Wildner dctx->fseEntropy = 1;
75a28cd43dSSascha Wildner dctx->LLTptr = ddict->entropy.LLTable;
76a28cd43dSSascha Wildner dctx->MLTptr = ddict->entropy.MLTable;
77a28cd43dSSascha Wildner dctx->OFTptr = ddict->entropy.OFTable;
78a28cd43dSSascha Wildner dctx->HUFptr = ddict->entropy.hufTable;
79a28cd43dSSascha Wildner dctx->entropy.rep[0] = ddict->entropy.rep[0];
80a28cd43dSSascha Wildner dctx->entropy.rep[1] = ddict->entropy.rep[1];
81a28cd43dSSascha Wildner dctx->entropy.rep[2] = ddict->entropy.rep[2];
82a28cd43dSSascha Wildner } else {
83a28cd43dSSascha Wildner dctx->litEntropy = 0;
84a28cd43dSSascha Wildner dctx->fseEntropy = 0;
85a28cd43dSSascha Wildner }
86a28cd43dSSascha Wildner }
87a28cd43dSSascha Wildner
88a28cd43dSSascha Wildner
89a28cd43dSSascha Wildner static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)90a28cd43dSSascha Wildner ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
91a28cd43dSSascha Wildner ZSTD_dictContentType_e dictContentType)
92a28cd43dSSascha Wildner {
93a28cd43dSSascha Wildner ddict->dictID = 0;
94a28cd43dSSascha Wildner ddict->entropyPresent = 0;
95a28cd43dSSascha Wildner if (dictContentType == ZSTD_dct_rawContent) return 0;
96a28cd43dSSascha Wildner
97a28cd43dSSascha Wildner if (ddict->dictSize < 8) {
98a28cd43dSSascha Wildner if (dictContentType == ZSTD_dct_fullDict)
99a28cd43dSSascha Wildner return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
100a28cd43dSSascha Wildner return 0; /* pure content mode */
101a28cd43dSSascha Wildner }
102a28cd43dSSascha Wildner { U32 const magic = MEM_readLE32(ddict->dictContent);
103a28cd43dSSascha Wildner if (magic != ZSTD_MAGIC_DICTIONARY) {
104a28cd43dSSascha Wildner if (dictContentType == ZSTD_dct_fullDict)
105a28cd43dSSascha Wildner return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
106a28cd43dSSascha Wildner return 0; /* pure content mode */
107a28cd43dSSascha Wildner }
108a28cd43dSSascha Wildner }
109a28cd43dSSascha Wildner ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
110a28cd43dSSascha Wildner
111a28cd43dSSascha Wildner /* load entropy tables */
112a28cd43dSSascha Wildner RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
113a28cd43dSSascha Wildner &ddict->entropy, ddict->dictContent, ddict->dictSize)),
114a28cd43dSSascha Wildner dictionary_corrupted, "");
115a28cd43dSSascha Wildner ddict->entropyPresent = 1;
116a28cd43dSSascha Wildner return 0;
117a28cd43dSSascha Wildner }
118a28cd43dSSascha Wildner
119a28cd43dSSascha Wildner
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)120a28cd43dSSascha Wildner static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
121a28cd43dSSascha Wildner const void* dict, size_t dictSize,
122a28cd43dSSascha Wildner ZSTD_dictLoadMethod_e dictLoadMethod,
123a28cd43dSSascha Wildner ZSTD_dictContentType_e dictContentType)
124a28cd43dSSascha Wildner {
125a28cd43dSSascha Wildner if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
126a28cd43dSSascha Wildner ddict->dictBuffer = NULL;
127a28cd43dSSascha Wildner ddict->dictContent = dict;
128a28cd43dSSascha Wildner if (!dict) dictSize = 0;
129a28cd43dSSascha Wildner } else {
130a28cd43dSSascha Wildner void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
131a28cd43dSSascha Wildner ddict->dictBuffer = internalBuffer;
132a28cd43dSSascha Wildner ddict->dictContent = internalBuffer;
133a28cd43dSSascha Wildner if (!internalBuffer) return ERROR(memory_allocation);
134a28cd43dSSascha Wildner ZSTD_memcpy(internalBuffer, dict, dictSize);
135a28cd43dSSascha Wildner }
136a28cd43dSSascha Wildner ddict->dictSize = dictSize;
137a28cd43dSSascha Wildner ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
138a28cd43dSSascha Wildner
139a28cd43dSSascha Wildner /* parse dictionary content */
140a28cd43dSSascha Wildner FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
141a28cd43dSSascha Wildner
142a28cd43dSSascha Wildner return 0;
143a28cd43dSSascha Wildner }
144a28cd43dSSascha Wildner
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)145a28cd43dSSascha Wildner ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
146a28cd43dSSascha Wildner ZSTD_dictLoadMethod_e dictLoadMethod,
147a28cd43dSSascha Wildner ZSTD_dictContentType_e dictContentType,
148a28cd43dSSascha Wildner ZSTD_customMem customMem)
149a28cd43dSSascha Wildner {
150a28cd43dSSascha Wildner if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
151a28cd43dSSascha Wildner
152a28cd43dSSascha Wildner { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
153a28cd43dSSascha Wildner if (ddict == NULL) return NULL;
154a28cd43dSSascha Wildner ddict->cMem = customMem;
155a28cd43dSSascha Wildner { size_t const initResult = ZSTD_initDDict_internal(ddict,
156a28cd43dSSascha Wildner dict, dictSize,
157a28cd43dSSascha Wildner dictLoadMethod, dictContentType);
158a28cd43dSSascha Wildner if (ZSTD_isError(initResult)) {
159a28cd43dSSascha Wildner ZSTD_freeDDict(ddict);
160a28cd43dSSascha Wildner return NULL;
161a28cd43dSSascha Wildner } }
162a28cd43dSSascha Wildner return ddict;
163a28cd43dSSascha Wildner }
164a28cd43dSSascha Wildner }
165a28cd43dSSascha Wildner
166a28cd43dSSascha Wildner /*! ZSTD_createDDict() :
167a28cd43dSSascha Wildner * Create a digested dictionary, to start decompression without startup delay.
168a28cd43dSSascha Wildner * `dict` content is copied inside DDict.
169a28cd43dSSascha Wildner * Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)170a28cd43dSSascha Wildner ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
171a28cd43dSSascha Wildner {
172a28cd43dSSascha Wildner ZSTD_customMem const allocator = { NULL, NULL, NULL };
173a28cd43dSSascha Wildner return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
174a28cd43dSSascha Wildner }
175a28cd43dSSascha Wildner
176a28cd43dSSascha Wildner /*! ZSTD_createDDict_byReference() :
177a28cd43dSSascha Wildner * Create a digested dictionary, to start decompression without startup delay.
178a28cd43dSSascha Wildner * Dictionary content is simply referenced, it will be accessed during decompression.
179a28cd43dSSascha Wildner * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)180a28cd43dSSascha Wildner ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
181a28cd43dSSascha Wildner {
182a28cd43dSSascha Wildner ZSTD_customMem const allocator = { NULL, NULL, NULL };
183a28cd43dSSascha Wildner return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
184a28cd43dSSascha Wildner }
185a28cd43dSSascha Wildner
186a28cd43dSSascha Wildner
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)187a28cd43dSSascha Wildner const ZSTD_DDict* ZSTD_initStaticDDict(
188a28cd43dSSascha Wildner void* sBuffer, size_t sBufferSize,
189a28cd43dSSascha Wildner const void* dict, size_t dictSize,
190a28cd43dSSascha Wildner ZSTD_dictLoadMethod_e dictLoadMethod,
191a28cd43dSSascha Wildner ZSTD_dictContentType_e dictContentType)
192a28cd43dSSascha Wildner {
193a28cd43dSSascha Wildner size_t const neededSpace = sizeof(ZSTD_DDict)
194a28cd43dSSascha Wildner + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
195a28cd43dSSascha Wildner ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
196a28cd43dSSascha Wildner assert(sBuffer != NULL);
197a28cd43dSSascha Wildner assert(dict != NULL);
198a28cd43dSSascha Wildner if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
199a28cd43dSSascha Wildner if (sBufferSize < neededSpace) return NULL;
200a28cd43dSSascha Wildner if (dictLoadMethod == ZSTD_dlm_byCopy) {
201a28cd43dSSascha Wildner ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
202a28cd43dSSascha Wildner dict = ddict+1;
203a28cd43dSSascha Wildner }
204a28cd43dSSascha Wildner if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
205a28cd43dSSascha Wildner dict, dictSize,
206a28cd43dSSascha Wildner ZSTD_dlm_byRef, dictContentType) ))
207a28cd43dSSascha Wildner return NULL;
208a28cd43dSSascha Wildner return ddict;
209a28cd43dSSascha Wildner }
210a28cd43dSSascha Wildner
211a28cd43dSSascha Wildner
ZSTD_freeDDict(ZSTD_DDict * ddict)212a28cd43dSSascha Wildner size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
213a28cd43dSSascha Wildner {
214a28cd43dSSascha Wildner if (ddict==NULL) return 0; /* support free on NULL */
215a28cd43dSSascha Wildner { ZSTD_customMem const cMem = ddict->cMem;
216a28cd43dSSascha Wildner ZSTD_customFree(ddict->dictBuffer, cMem);
217a28cd43dSSascha Wildner ZSTD_customFree(ddict, cMem);
218a28cd43dSSascha Wildner return 0;
219a28cd43dSSascha Wildner }
220a28cd43dSSascha Wildner }
221a28cd43dSSascha Wildner
222a28cd43dSSascha Wildner /*! ZSTD_estimateDDictSize() :
223a28cd43dSSascha Wildner * Estimate amount of memory that will be needed to create a dictionary for decompression.
224a28cd43dSSascha Wildner * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)225a28cd43dSSascha Wildner size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
226a28cd43dSSascha Wildner {
227a28cd43dSSascha Wildner return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
228a28cd43dSSascha Wildner }
229a28cd43dSSascha Wildner
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)230a28cd43dSSascha Wildner size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
231a28cd43dSSascha Wildner {
232a28cd43dSSascha Wildner if (ddict==NULL) return 0; /* support sizeof on NULL */
233a28cd43dSSascha Wildner return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
234a28cd43dSSascha Wildner }
235a28cd43dSSascha Wildner
236a28cd43dSSascha Wildner /*! ZSTD_getDictID_fromDDict() :
237a28cd43dSSascha Wildner * Provides the dictID of the dictionary loaded into `ddict`.
238a28cd43dSSascha Wildner * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
239a28cd43dSSascha Wildner * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)240a28cd43dSSascha Wildner unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
241a28cd43dSSascha Wildner {
242a28cd43dSSascha Wildner if (ddict==NULL) return 0;
243a28cd43dSSascha Wildner return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
244a28cd43dSSascha Wildner }
245