1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos /* zstd_ddict.c : 12*3117ece4Schristos * concentrates all logic that needs to know the internals of ZSTD_DDict object */ 13*3117ece4Schristos 14*3117ece4Schristos /*-******************************************************* 15*3117ece4Schristos * Dependencies 16*3117ece4Schristos *********************************************************/ 17*3117ece4Schristos #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ 18*3117ece4Schristos #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ 19*3117ece4Schristos #include "../common/cpu.h" /* bmi2 */ 20*3117ece4Schristos #include "../common/mem.h" /* low level memory routines */ 21*3117ece4Schristos #define FSE_STATIC_LINKING_ONLY 22*3117ece4Schristos #include "../common/fse.h" 23*3117ece4Schristos #include "../common/huf.h" 24*3117ece4Schristos #include "zstd_decompress_internal.h" 25*3117ece4Schristos #include "zstd_ddict.h" 26*3117ece4Schristos 27*3117ece4Schristos #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) 28*3117ece4Schristos # include "../legacy/zstd_legacy.h" 29*3117ece4Schristos #endif 30*3117ece4Schristos 31*3117ece4Schristos 32*3117ece4Schristos 33*3117ece4Schristos /*-******************************************************* 34*3117ece4Schristos * Types 35*3117ece4Schristos *********************************************************/ 36*3117ece4Schristos struct ZSTD_DDict_s { 37*3117ece4Schristos void* dictBuffer; 38*3117ece4Schristos const void* dictContent; 39*3117ece4Schristos size_t dictSize; 40*3117ece4Schristos ZSTD_entropyDTables_t entropy; 41*3117ece4Schristos U32 dictID; 42*3117ece4Schristos U32 entropyPresent; 43*3117ece4Schristos ZSTD_customMem cMem; 44*3117ece4Schristos }; /* typedef'd to ZSTD_DDict within "zstd.h" */ 45*3117ece4Schristos 46*3117ece4Schristos const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) 47*3117ece4Schristos { 48*3117ece4Schristos assert(ddict != NULL); 49*3117ece4Schristos return ddict->dictContent; 50*3117ece4Schristos } 51*3117ece4Schristos 52*3117ece4Schristos size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) 53*3117ece4Schristos { 54*3117ece4Schristos assert(ddict != NULL); 55*3117ece4Schristos return ddict->dictSize; 56*3117ece4Schristos } 57*3117ece4Schristos 58*3117ece4Schristos void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) 59*3117ece4Schristos { 60*3117ece4Schristos DEBUGLOG(4, "ZSTD_copyDDictParameters"); 61*3117ece4Schristos assert(dctx != NULL); 62*3117ece4Schristos assert(ddict != NULL); 63*3117ece4Schristos dctx->dictID = ddict->dictID; 64*3117ece4Schristos dctx->prefixStart = ddict->dictContent; 65*3117ece4Schristos dctx->virtualStart = ddict->dictContent; 66*3117ece4Schristos dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; 67*3117ece4Schristos dctx->previousDstEnd = dctx->dictEnd; 68*3117ece4Schristos #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 69*3117ece4Schristos dctx->dictContentBeginForFuzzing = dctx->prefixStart; 70*3117ece4Schristos dctx->dictContentEndForFuzzing = dctx->previousDstEnd; 71*3117ece4Schristos #endif 72*3117ece4Schristos if (ddict->entropyPresent) { 73*3117ece4Schristos dctx->litEntropy = 1; 74*3117ece4Schristos dctx->fseEntropy = 1; 75*3117ece4Schristos dctx->LLTptr = ddict->entropy.LLTable; 76*3117ece4Schristos dctx->MLTptr = ddict->entropy.MLTable; 77*3117ece4Schristos dctx->OFTptr = ddict->entropy.OFTable; 78*3117ece4Schristos dctx->HUFptr = ddict->entropy.hufTable; 79*3117ece4Schristos dctx->entropy.rep[0] = ddict->entropy.rep[0]; 80*3117ece4Schristos dctx->entropy.rep[1] = ddict->entropy.rep[1]; 81*3117ece4Schristos dctx->entropy.rep[2] = ddict->entropy.rep[2]; 82*3117ece4Schristos } else { 83*3117ece4Schristos dctx->litEntropy = 0; 84*3117ece4Schristos dctx->fseEntropy = 0; 85*3117ece4Schristos } 86*3117ece4Schristos } 87*3117ece4Schristos 88*3117ece4Schristos 89*3117ece4Schristos static size_t 90*3117ece4Schristos ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, 91*3117ece4Schristos ZSTD_dictContentType_e dictContentType) 92*3117ece4Schristos { 93*3117ece4Schristos ddict->dictID = 0; 94*3117ece4Schristos ddict->entropyPresent = 0; 95*3117ece4Schristos if (dictContentType == ZSTD_dct_rawContent) return 0; 96*3117ece4Schristos 97*3117ece4Schristos if (ddict->dictSize < 8) { 98*3117ece4Schristos if (dictContentType == ZSTD_dct_fullDict) 99*3117ece4Schristos return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ 100*3117ece4Schristos return 0; /* pure content mode */ 101*3117ece4Schristos } 102*3117ece4Schristos { U32 const magic = MEM_readLE32(ddict->dictContent); 103*3117ece4Schristos if (magic != ZSTD_MAGIC_DICTIONARY) { 104*3117ece4Schristos if (dictContentType == ZSTD_dct_fullDict) 105*3117ece4Schristos return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ 106*3117ece4Schristos return 0; /* pure content mode */ 107*3117ece4Schristos } 108*3117ece4Schristos } 109*3117ece4Schristos ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); 110*3117ece4Schristos 111*3117ece4Schristos /* load entropy tables */ 112*3117ece4Schristos RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( 113*3117ece4Schristos &ddict->entropy, ddict->dictContent, ddict->dictSize)), 114*3117ece4Schristos dictionary_corrupted, ""); 115*3117ece4Schristos ddict->entropyPresent = 1; 116*3117ece4Schristos return 0; 117*3117ece4Schristos } 118*3117ece4Schristos 119*3117ece4Schristos 120*3117ece4Schristos static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, 121*3117ece4Schristos const void* dict, size_t dictSize, 122*3117ece4Schristos ZSTD_dictLoadMethod_e dictLoadMethod, 123*3117ece4Schristos ZSTD_dictContentType_e dictContentType) 124*3117ece4Schristos { 125*3117ece4Schristos if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { 126*3117ece4Schristos ddict->dictBuffer = NULL; 127*3117ece4Schristos ddict->dictContent = dict; 128*3117ece4Schristos if (!dict) dictSize = 0; 129*3117ece4Schristos } else { 130*3117ece4Schristos void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); 131*3117ece4Schristos ddict->dictBuffer = internalBuffer; 132*3117ece4Schristos ddict->dictContent = internalBuffer; 133*3117ece4Schristos if (!internalBuffer) return ERROR(memory_allocation); 134*3117ece4Schristos ZSTD_memcpy(internalBuffer, dict, dictSize); 135*3117ece4Schristos } 136*3117ece4Schristos ddict->dictSize = dictSize; 137*3117ece4Schristos ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ 138*3117ece4Schristos 139*3117ece4Schristos /* parse dictionary content */ 140*3117ece4Schristos FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); 141*3117ece4Schristos 142*3117ece4Schristos return 0; 143*3117ece4Schristos } 144*3117ece4Schristos 145*3117ece4Schristos ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, 146*3117ece4Schristos ZSTD_dictLoadMethod_e dictLoadMethod, 147*3117ece4Schristos ZSTD_dictContentType_e dictContentType, 148*3117ece4Schristos ZSTD_customMem customMem) 149*3117ece4Schristos { 150*3117ece4Schristos if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; 151*3117ece4Schristos 152*3117ece4Schristos { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); 153*3117ece4Schristos if (ddict == NULL) return NULL; 154*3117ece4Schristos ddict->cMem = customMem; 155*3117ece4Schristos { size_t const initResult = ZSTD_initDDict_internal(ddict, 156*3117ece4Schristos dict, dictSize, 157*3117ece4Schristos dictLoadMethod, dictContentType); 158*3117ece4Schristos if (ZSTD_isError(initResult)) { 159*3117ece4Schristos ZSTD_freeDDict(ddict); 160*3117ece4Schristos return NULL; 161*3117ece4Schristos } } 162*3117ece4Schristos return ddict; 163*3117ece4Schristos } 164*3117ece4Schristos } 165*3117ece4Schristos 166*3117ece4Schristos /*! ZSTD_createDDict() : 167*3117ece4Schristos * Create a digested dictionary, to start decompression without startup delay. 168*3117ece4Schristos * `dict` content is copied inside DDict. 169*3117ece4Schristos * Consequently, `dict` can be released after `ZSTD_DDict` creation */ 170*3117ece4Schristos ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) 171*3117ece4Schristos { 172*3117ece4Schristos ZSTD_customMem const allocator = { NULL, NULL, NULL }; 173*3117ece4Schristos return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); 174*3117ece4Schristos } 175*3117ece4Schristos 176*3117ece4Schristos /*! ZSTD_createDDict_byReference() : 177*3117ece4Schristos * Create a digested dictionary, to start decompression without startup delay. 178*3117ece4Schristos * Dictionary content is simply referenced, it will be accessed during decompression. 179*3117ece4Schristos * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ 180*3117ece4Schristos ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) 181*3117ece4Schristos { 182*3117ece4Schristos ZSTD_customMem const allocator = { NULL, NULL, NULL }; 183*3117ece4Schristos return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); 184*3117ece4Schristos } 185*3117ece4Schristos 186*3117ece4Schristos 187*3117ece4Schristos const ZSTD_DDict* ZSTD_initStaticDDict( 188*3117ece4Schristos void* sBuffer, size_t sBufferSize, 189*3117ece4Schristos const void* dict, size_t dictSize, 190*3117ece4Schristos ZSTD_dictLoadMethod_e dictLoadMethod, 191*3117ece4Schristos ZSTD_dictContentType_e dictContentType) 192*3117ece4Schristos { 193*3117ece4Schristos size_t const neededSpace = sizeof(ZSTD_DDict) 194*3117ece4Schristos + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 195*3117ece4Schristos ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; 196*3117ece4Schristos assert(sBuffer != NULL); 197*3117ece4Schristos assert(dict != NULL); 198*3117ece4Schristos if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ 199*3117ece4Schristos if (sBufferSize < neededSpace) return NULL; 200*3117ece4Schristos if (dictLoadMethod == ZSTD_dlm_byCopy) { 201*3117ece4Schristos ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ 202*3117ece4Schristos dict = ddict+1; 203*3117ece4Schristos } 204*3117ece4Schristos if (ZSTD_isError( ZSTD_initDDict_internal(ddict, 205*3117ece4Schristos dict, dictSize, 206*3117ece4Schristos ZSTD_dlm_byRef, dictContentType) )) 207*3117ece4Schristos return NULL; 208*3117ece4Schristos return ddict; 209*3117ece4Schristos } 210*3117ece4Schristos 211*3117ece4Schristos 212*3117ece4Schristos size_t ZSTD_freeDDict(ZSTD_DDict* ddict) 213*3117ece4Schristos { 214*3117ece4Schristos if (ddict==NULL) return 0; /* support free on NULL */ 215*3117ece4Schristos { ZSTD_customMem const cMem = ddict->cMem; 216*3117ece4Schristos ZSTD_customFree(ddict->dictBuffer, cMem); 217*3117ece4Schristos ZSTD_customFree(ddict, cMem); 218*3117ece4Schristos return 0; 219*3117ece4Schristos } 220*3117ece4Schristos } 221*3117ece4Schristos 222*3117ece4Schristos /*! ZSTD_estimateDDictSize() : 223*3117ece4Schristos * Estimate amount of memory that will be needed to create a dictionary for decompression. 224*3117ece4Schristos * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ 225*3117ece4Schristos size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) 226*3117ece4Schristos { 227*3117ece4Schristos return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); 228*3117ece4Schristos } 229*3117ece4Schristos 230*3117ece4Schristos size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) 231*3117ece4Schristos { 232*3117ece4Schristos if (ddict==NULL) return 0; /* support sizeof on NULL */ 233*3117ece4Schristos return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; 234*3117ece4Schristos } 235*3117ece4Schristos 236*3117ece4Schristos /*! ZSTD_getDictID_fromDDict() : 237*3117ece4Schristos * Provides the dictID of the dictionary loaded into `ddict`. 238*3117ece4Schristos * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. 239*3117ece4Schristos * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ 240*3117ece4Schristos unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) 241*3117ece4Schristos { 242*3117ece4Schristos if (ddict==NULL) return 0; 243*3117ece4Schristos return ddict->dictID; 244*3117ece4Schristos } 245