1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #define ZSTD_STATIC_LINKING_ONLY 12*3117ece4Schristos #define ZDICT_STATIC_LINKING_ONLY 13*3117ece4Schristos 14*3117ece4Schristos #include <string.h> 15*3117ece4Schristos 16*3117ece4Schristos #include "zstd_helpers.h" 17*3117ece4Schristos #include "fuzz_helpers.h" 18*3117ece4Schristos #include "zstd.h" 19*3117ece4Schristos #include "zdict.h" 20*3117ece4Schristos #include "sequence_producer.h" 21*3117ece4Schristos #include "fuzz_third_party_seq_prod.h" 22*3117ece4Schristos 23*3117ece4Schristos const int kMinClevel = -3; 24*3117ece4Schristos const int kMaxClevel = 19; 25*3117ece4Schristos 26*3117ece4Schristos void* FUZZ_seqProdState = NULL; 27*3117ece4Schristos 28*3117ece4Schristos static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value) 29*3117ece4Schristos { 30*3117ece4Schristos FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value)); 31*3117ece4Schristos } 32*3117ece4Schristos 33*3117ece4Schristos static unsigned produceParamValue(unsigned min, unsigned max, 34*3117ece4Schristos FUZZ_dataProducer_t *producer) { 35*3117ece4Schristos return FUZZ_dataProducer_uint32Range(producer, min, max); 36*3117ece4Schristos } 37*3117ece4Schristos 38*3117ece4Schristos static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min, 39*3117ece4Schristos unsigned max, FUZZ_dataProducer_t *producer) { 40*3117ece4Schristos unsigned const value = produceParamValue(min, max, producer); 41*3117ece4Schristos set(cctx, param, value); 42*3117ece4Schristos } 43*3117ece4Schristos 44*3117ece4Schristos ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer) 45*3117ece4Schristos { 46*3117ece4Schristos /* Select compression parameters */ 47*3117ece4Schristos ZSTD_compressionParameters cParams; 48*3117ece4Schristos cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15); 49*3117ece4Schristos cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15); 50*3117ece4Schristos cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16); 51*3117ece4Schristos cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9); 52*3117ece4Schristos cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, 53*3117ece4Schristos ZSTD_MINMATCH_MAX); 54*3117ece4Schristos cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512); 55*3117ece4Schristos cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX); 56*3117ece4Schristos return ZSTD_adjustCParams(cParams, srcSize, 0); 57*3117ece4Schristos } 58*3117ece4Schristos 59*3117ece4Schristos ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer) 60*3117ece4Schristos { 61*3117ece4Schristos /* Select frame parameters */ 62*3117ece4Schristos ZSTD_frameParameters fParams; 63*3117ece4Schristos fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); 64*3117ece4Schristos fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); 65*3117ece4Schristos fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1); 66*3117ece4Schristos return fParams; 67*3117ece4Schristos } 68*3117ece4Schristos 69*3117ece4Schristos ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer) 70*3117ece4Schristos { 71*3117ece4Schristos ZSTD_parameters params; 72*3117ece4Schristos params.cParams = FUZZ_randomCParams(srcSize, producer); 73*3117ece4Schristos params.fParams = FUZZ_randomFParams(producer); 74*3117ece4Schristos return params; 75*3117ece4Schristos } 76*3117ece4Schristos 77*3117ece4Schristos static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) { 78*3117ece4Schristos #ifdef FUZZ_THIRD_PARTY_SEQ_PROD 79*3117ece4Schristos ZSTD_registerSequenceProducer( 80*3117ece4Schristos cctx, 81*3117ece4Schristos FUZZ_seqProdState, 82*3117ece4Schristos FUZZ_thirdPartySeqProd 83*3117ece4Schristos ); 84*3117ece4Schristos #else 85*3117ece4Schristos ZSTD_registerSequenceProducer( 86*3117ece4Schristos cctx, 87*3117ece4Schristos NULL, 88*3117ece4Schristos simpleSequenceProducer 89*3117ece4Schristos ); 90*3117ece4Schristos #endif 91*3117ece4Schristos 92*3117ece4Schristos #ifdef FUZZ_THIRD_PARTY_SEQ_PROD 93*3117ece4Schristos FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1)); 94*3117ece4Schristos #else 95*3117ece4Schristos setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer); 96*3117ece4Schristos #endif 97*3117ece4Schristos FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0)); 98*3117ece4Schristos FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable)); 99*3117ece4Schristos } 100*3117ece4Schristos 101*3117ece4Schristos void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer) 102*3117ece4Schristos { 103*3117ece4Schristos ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer); 104*3117ece4Schristos set(cctx, ZSTD_c_windowLog, cParams.windowLog); 105*3117ece4Schristos set(cctx, ZSTD_c_hashLog, cParams.hashLog); 106*3117ece4Schristos set(cctx, ZSTD_c_chainLog, cParams.chainLog); 107*3117ece4Schristos set(cctx, ZSTD_c_searchLog, cParams.searchLog); 108*3117ece4Schristos set(cctx, ZSTD_c_minMatch, cParams.minMatch); 109*3117ece4Schristos set(cctx, ZSTD_c_targetLength, cParams.targetLength); 110*3117ece4Schristos set(cctx, ZSTD_c_strategy, cParams.strategy); 111*3117ece4Schristos /* Select frame parameters */ 112*3117ece4Schristos setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer); 113*3117ece4Schristos setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); 114*3117ece4Schristos setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); 115*3117ece4Schristos /* Select long distance matching parameters */ 116*3117ece4Schristos setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer); 117*3117ece4Schristos setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); 118*3117ece4Schristos setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, 119*3117ece4Schristos ZSTD_LDM_MINMATCH_MAX, producer); 120*3117ece4Schristos setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX, 121*3117ece4Schristos producer); 122*3117ece4Schristos setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, 123*3117ece4Schristos ZSTD_LDM_HASHRATELOG_MAX, producer); 124*3117ece4Schristos /* Set misc parameters */ 125*3117ece4Schristos #ifndef ZSTD_MULTITHREAD 126*3117ece4Schristos // To reproduce with or without ZSTD_MULTITHREAD, we are going to use 127*3117ece4Schristos // the same amount of entropy. 128*3117ece4Schristos unsigned const nbWorkers_value = produceParamValue(0, 2, producer); 129*3117ece4Schristos unsigned const rsyncable_value = produceParamValue(0, 1, producer); 130*3117ece4Schristos (void)nbWorkers_value; 131*3117ece4Schristos (void)rsyncable_value; 132*3117ece4Schristos set(cctx, ZSTD_c_nbWorkers, 0); 133*3117ece4Schristos set(cctx, ZSTD_c_rsyncable, 0); 134*3117ece4Schristos #else 135*3117ece4Schristos setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); 136*3117ece4Schristos setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); 137*3117ece4Schristos #endif 138*3117ece4Schristos setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer); 139*3117ece4Schristos setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer); 140*3117ece4Schristos setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); 141*3117ece4Schristos setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); 142*3117ece4Schristos setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); 143*3117ece4Schristos setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer); 144*3117ece4Schristos setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); 145*3117ece4Schristos setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer); 146*3117ece4Schristos setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer); 147*3117ece4Schristos setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer); 148*3117ece4Schristos setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer); 149*3117ece4Schristos if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { 150*3117ece4Schristos setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); 151*3117ece4Schristos } 152*3117ece4Schristos if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { 153*3117ece4Schristos setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer); 154*3117ece4Schristos } 155*3117ece4Schristos 156*3117ece4Schristos #ifdef FUZZ_THIRD_PARTY_SEQ_PROD 157*3117ece4Schristos setSequenceProducerParams(cctx, producer); 158*3117ece4Schristos #else 159*3117ece4Schristos if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) { 160*3117ece4Schristos setSequenceProducerParams(cctx, producer); 161*3117ece4Schristos } else { 162*3117ece4Schristos ZSTD_registerSequenceProducer(cctx, NULL, NULL); 163*3117ece4Schristos } 164*3117ece4Schristos #endif 165*3117ece4Schristos } 166*3117ece4Schristos 167*3117ece4Schristos FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer) 168*3117ece4Schristos { 169*3117ece4Schristos size_t const dictSize = MAX(srcSize / 8, 1024); 170*3117ece4Schristos size_t const totalSampleSize = dictSize * 11; 171*3117ece4Schristos FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize }; 172*3117ece4Schristos char* const samples = (char*)FUZZ_malloc(totalSampleSize); 173*3117ece4Schristos unsigned nbSamples = 100; 174*3117ece4Schristos size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples); 175*3117ece4Schristos size_t pos = 0; 176*3117ece4Schristos size_t sample = 0; 177*3117ece4Schristos ZDICT_fastCover_params_t params; 178*3117ece4Schristos 179*3117ece4Schristos for (sample = 0; sample < nbSamples; ++sample) { 180*3117ece4Schristos size_t const remaining = totalSampleSize - pos; 181*3117ece4Schristos size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1); 182*3117ece4Schristos size_t const limit = MIN(srcSize - offset, remaining); 183*3117ece4Schristos size_t const toCopy = MIN(limit, remaining / (nbSamples - sample)); 184*3117ece4Schristos memcpy(samples + pos, (const char*)src + offset, toCopy); 185*3117ece4Schristos pos += toCopy; 186*3117ece4Schristos samplesSizes[sample] = toCopy; 187*3117ece4Schristos } 188*3117ece4Schristos memset(samples + pos, 0, totalSampleSize - pos); 189*3117ece4Schristos 190*3117ece4Schristos memset(¶ms, 0, sizeof(params)); 191*3117ece4Schristos params.accel = 5; 192*3117ece4Schristos params.k = 40; 193*3117ece4Schristos params.d = 8; 194*3117ece4Schristos params.f = 14; 195*3117ece4Schristos params.zParams.compressionLevel = 1; 196*3117ece4Schristos dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize, 197*3117ece4Schristos samples, samplesSizes, nbSamples, params); 198*3117ece4Schristos if (ZSTD_isError(dict.size)) { 199*3117ece4Schristos free(dict.buff); 200*3117ece4Schristos memset(&dict, 0, sizeof(dict)); 201*3117ece4Schristos } 202*3117ece4Schristos 203*3117ece4Schristos free(samplesSizes); 204*3117ece4Schristos free(samples); 205*3117ece4Schristos 206*3117ece4Schristos return dict; 207*3117ece4Schristos } 208