1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 #define ZSTD_STATIC_LINKING_ONLY 12 #define ZDICT_STATIC_LINKING_ONLY 13 14 #include <string.h> 15 16 #include "zstd_helpers.h" 17 #include "fuzz_helpers.h" 18 #include "zstd.h" 19 #include "zdict.h" 20 #include "sequence_producer.h" 21 #include "fuzz_third_party_seq_prod.h" 22 23 const int kMinClevel = -3; 24 const int kMaxClevel = 19; 25 26 void* FUZZ_seqProdState = NULL; 27 28 static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) 29 { 30 FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); 31 } 32 33 static unsigned produceParamValue(unsigned min, unsigned max, 34 FUZZ_dataProducer_t *producer) { 35 return FUZZ_dataProducer_uint32Range(producer, min, max); 36 } 37 38 static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, 39 unsigned max, FUZZ_dataProducer_t *producer) { 40 unsigned const value = produceParamValue(min, max, producer); 41 set(cctx, param, value); 42 } 43 44 ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) 45 { 46 /* Select compression parameters */ 47 ZSTD_compressionParameters cParams; 48 cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); 49 cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); 50 cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); 51 cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); 52 cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, 53 ZSTD_MINMATCH_MAX); 54 cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); 55 cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); 56 return ZSTD_adjustCParams(cParams, srcSize, 0); 57 } 58 59 ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) 60 { 61 /* Select frame parameters */ 62 ZSTD_frameParameters fParams; 63 fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); 64 fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); 65 fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); 66 return fParams; 67 } 68 69 ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) 70 { 71 ZSTD_parameters params; 72 params.cParams = FUZZ_randomCParams(srcSize, producer); 73 params.fParams = FUZZ_randomFParams(producer); 74 return params; 75 } 76 77 static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) { 78 #ifdef FUZZ_THIRD_PARTY_SEQ_PROD 79 ZSTD_registerSequenceProducer( 80 cctx, 81 FUZZ_seqProdState, 82 FUZZ_thirdPartySeqProd 83 ); 84 #else 85 ZSTD_registerSequenceProducer( 86 cctx, 87 NULL, 88 simpleSequenceProducer 89 ); 90 #endif 91 92 #ifdef FUZZ_THIRD_PARTY_SEQ_PROD 93 FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1)); 94 #else 95 setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer); 96 #endif 97 FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0)); 98 FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable)); 99 } 100 101 void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) 102 { 103 ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); 104 set(cctx, ZSTD_c_windowLog, cParams.windowLog); 105 set(cctx, ZSTD_c_hashLog, cParams.hashLog); 106 set(cctx, ZSTD_c_chainLog, cParams.chainLog); 107 set(cctx, ZSTD_c_searchLog, cParams.searchLog); 108 set(cctx, ZSTD_c_minMatch, cParams.minMatch); 109 set(cctx, ZSTD_c_targetLength, cParams.targetLength); 110 set(cctx, ZSTD_c_strategy, cParams.strategy); 111 /* Select frame parameters */ 112 setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); 113 setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); 114 setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); 115 /* Select long distance matching parameters */ 116 setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer); 117 setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); 118 setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, 119 ZSTD_LDM_MINMATCH_MAX, producer); 120 setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, 121 producer); 122 setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, 123 ZSTD_LDM_HASHRATELOG_MAX, producer); 124 /* Set misc parameters */ 125 #ifndef ZSTD_MULTITHREAD 126 // To reproduce with or without ZSTD_MULTITHREAD, we are going to use 127 // the same amount of entropy. 128 unsigned const nbWorkers_value = produceParamValue(0, 2, producer); 129 unsigned const rsyncable_value = produceParamValue(0, 1, producer); 130 (void)nbWorkers_value; 131 (void)rsyncable_value; 132 set(cctx, ZSTD_c_nbWorkers, 0); 133 set(cctx, ZSTD_c_rsyncable, 0); 134 #else 135 setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); 136 setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); 137 #endif 138 setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer); 139 setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer); 140 setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); 141 setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); 142 setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); 143 setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer); 144 setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); 145 setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer); 146 setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer); 147 setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer); 148 setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer); 149 if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { 150 setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); 151 } 152 if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { 153 setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); 154 } 155 156 #ifdef FUZZ_THIRD_PARTY_SEQ_PROD 157 setSequenceProducerParams(cctx, producer); 158 #else 159 if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) { 160 setSequenceProducerParams(cctx, producer); 161 } else { 162 ZSTD_registerSequenceProducer(cctx, NULL, NULL); 163 } 164 #endif 165 } 166 167 FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) 168 { 169 size_t const dictSize = MAX(srcSize / 8, 1024); 170 size_t const totalSampleSize = dictSize * 11; 171 FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize }; 172 char* const samples = (char*)FUZZ_malloc(totalSampleSize); 173 unsigned nbSamples = 100; 174 size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples); 175 size_t pos = 0; 176 size_t sample = 0; 177 ZDICT_fastCover_params_t params; 178 179 for (sample = 0; sample < nbSamples; ++sample) { 180 size_t const remaining = totalSampleSize - pos; 181 size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); 182 size_t const limit = MIN(srcSize - offset, remaining); 183 size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); 184 memcpy(samples + pos, (const char*)src + offset, toCopy); 185 pos += toCopy; 186 samplesSizes[sample] = toCopy; 187 } 188 memset(samples + pos, 0, totalSampleSize - pos); 189 190 memset(¶ms, 0, sizeof(params)); 191 params.accel = 5; 192 params.k = 40; 193 params.d = 8; 194 params.f = 14; 195 params.zParams.compressionLevel = 1; 196 dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, 197 samples, samplesSizes, nbSamples, params); 198 if (ZSTD_isError(dict.size)) { 199 free(dict.buff); 200 memset(&dict, 0, sizeof(dict)); 201 } 202 203 free(samplesSizes); 204 free(samples); 205 206 return dict; 207 } 208