xref: /netbsd-src/external/bsd/zstd/dist/tests/fuzz/zstd_helpers.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #define ZSTD_STATIC_LINKING_ONLY
12*3117ece4Schristos #define ZDICT_STATIC_LINKING_ONLY
13*3117ece4Schristos 
14*3117ece4Schristos #include <string.h>
15*3117ece4Schristos 
16*3117ece4Schristos #include "zstd_helpers.h"
17*3117ece4Schristos #include "fuzz_helpers.h"
18*3117ece4Schristos #include "zstd.h"
19*3117ece4Schristos #include "zdict.h"
20*3117ece4Schristos #include "sequence_producer.h"
21*3117ece4Schristos #include "fuzz_third_party_seq_prod.h"
22*3117ece4Schristos 
23*3117ece4Schristos const int kMinClevel = -3;
24*3117ece4Schristos const int kMaxClevel = 19;
25*3117ece4Schristos 
26*3117ece4Schristos void* FUZZ_seqProdState = NULL;
27*3117ece4Schristos 
28*3117ece4Schristos static void set(ZSTD_CCtx *cctx, ZSTD_cParameter param, int value)
29*3117ece4Schristos {
30*3117ece4Schristos     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, param, value));
31*3117ece4Schristos }
32*3117ece4Schristos 
33*3117ece4Schristos static unsigned produceParamValue(unsigned min, unsigned max,
34*3117ece4Schristos                                   FUZZ_dataProducer_t *producer) {
35*3117ece4Schristos     return FUZZ_dataProducer_uint32Range(producer, min, max);
36*3117ece4Schristos }
37*3117ece4Schristos 
38*3117ece4Schristos static void setRand(ZSTD_CCtx *cctx, ZSTD_cParameter param, unsigned min,
39*3117ece4Schristos                     unsigned max, FUZZ_dataProducer_t *producer) {
40*3117ece4Schristos     unsigned const value = produceParamValue(min, max, producer);
41*3117ece4Schristos     set(cctx, param, value);
42*3117ece4Schristos }
43*3117ece4Schristos 
44*3117ece4Schristos ZSTD_compressionParameters FUZZ_randomCParams(size_t srcSize, FUZZ_dataProducer_t *producer)
45*3117ece4Schristos {
46*3117ece4Schristos     /* Select compression parameters */
47*3117ece4Schristos     ZSTD_compressionParameters cParams;
48*3117ece4Schristos     cParams.windowLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, 15);
49*3117ece4Schristos     cParams.hashLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_HASHLOG_MIN, 15);
50*3117ece4Schristos     cParams.chainLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_CHAINLOG_MIN, 16);
51*3117ece4Schristos     cParams.searchLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_SEARCHLOG_MIN, 9);
52*3117ece4Schristos     cParams.minMatch = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN,
53*3117ece4Schristos                                           ZSTD_MINMATCH_MAX);
54*3117ece4Schristos     cParams.targetLength = FUZZ_dataProducer_uint32Range(producer, 0, 512);
55*3117ece4Schristos     cParams.strategy = FUZZ_dataProducer_uint32Range(producer, ZSTD_STRATEGY_MIN, ZSTD_STRATEGY_MAX);
56*3117ece4Schristos     return ZSTD_adjustCParams(cParams, srcSize, 0);
57*3117ece4Schristos }
58*3117ece4Schristos 
59*3117ece4Schristos ZSTD_frameParameters FUZZ_randomFParams(FUZZ_dataProducer_t *producer)
60*3117ece4Schristos {
61*3117ece4Schristos     /* Select frame parameters */
62*3117ece4Schristos     ZSTD_frameParameters fParams;
63*3117ece4Schristos     fParams.contentSizeFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
64*3117ece4Schristos     fParams.checksumFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
65*3117ece4Schristos     fParams.noDictIDFlag = FUZZ_dataProducer_uint32Range(producer, 0, 1);
66*3117ece4Schristos     return fParams;
67*3117ece4Schristos }
68*3117ece4Schristos 
69*3117ece4Schristos ZSTD_parameters FUZZ_randomParams(size_t srcSize, FUZZ_dataProducer_t *producer)
70*3117ece4Schristos {
71*3117ece4Schristos     ZSTD_parameters params;
72*3117ece4Schristos     params.cParams = FUZZ_randomCParams(srcSize, producer);
73*3117ece4Schristos     params.fParams = FUZZ_randomFParams(producer);
74*3117ece4Schristos     return params;
75*3117ece4Schristos }
76*3117ece4Schristos 
77*3117ece4Schristos static void setSequenceProducerParams(ZSTD_CCtx *cctx, FUZZ_dataProducer_t *producer) {
78*3117ece4Schristos #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
79*3117ece4Schristos     ZSTD_registerSequenceProducer(
80*3117ece4Schristos         cctx,
81*3117ece4Schristos         FUZZ_seqProdState,
82*3117ece4Schristos         FUZZ_thirdPartySeqProd
83*3117ece4Schristos     );
84*3117ece4Schristos #else
85*3117ece4Schristos     ZSTD_registerSequenceProducer(
86*3117ece4Schristos         cctx,
87*3117ece4Schristos         NULL,
88*3117ece4Schristos         simpleSequenceProducer
89*3117ece4Schristos     );
90*3117ece4Schristos #endif
91*3117ece4Schristos 
92*3117ece4Schristos #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
93*3117ece4Schristos     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableSeqProducerFallback, 1));
94*3117ece4Schristos #else
95*3117ece4Schristos     setRand(cctx, ZSTD_c_enableSeqProducerFallback, 0, 1, producer);
96*3117ece4Schristos #endif
97*3117ece4Schristos     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0));
98*3117ece4Schristos     FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable));
99*3117ece4Schristos }
100*3117ece4Schristos 
101*3117ece4Schristos void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer_t *producer)
102*3117ece4Schristos {
103*3117ece4Schristos     ZSTD_compressionParameters cParams = FUZZ_randomCParams(srcSize, producer);
104*3117ece4Schristos     set(cctx, ZSTD_c_windowLog, cParams.windowLog);
105*3117ece4Schristos     set(cctx, ZSTD_c_hashLog, cParams.hashLog);
106*3117ece4Schristos     set(cctx, ZSTD_c_chainLog, cParams.chainLog);
107*3117ece4Schristos     set(cctx, ZSTD_c_searchLog, cParams.searchLog);
108*3117ece4Schristos     set(cctx, ZSTD_c_minMatch, cParams.minMatch);
109*3117ece4Schristos     set(cctx, ZSTD_c_targetLength, cParams.targetLength);
110*3117ece4Schristos     set(cctx, ZSTD_c_strategy, cParams.strategy);
111*3117ece4Schristos     /* Select frame parameters */
112*3117ece4Schristos     setRand(cctx, ZSTD_c_contentSizeFlag, 0, 1, producer);
113*3117ece4Schristos     setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer);
114*3117ece4Schristos     setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer);
115*3117ece4Schristos     /* Select long distance matching parameters */
116*3117ece4Schristos     setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer);
117*3117ece4Schristos     setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer);
118*3117ece4Schristos     setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN,
119*3117ece4Schristos             ZSTD_LDM_MINMATCH_MAX, producer);
120*3117ece4Schristos     setRand(cctx, ZSTD_c_ldmBucketSizeLog, 0, ZSTD_LDM_BUCKETSIZELOG_MAX,
121*3117ece4Schristos             producer);
122*3117ece4Schristos     setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN,
123*3117ece4Schristos             ZSTD_LDM_HASHRATELOG_MAX, producer);
124*3117ece4Schristos     /* Set misc parameters */
125*3117ece4Schristos #ifndef ZSTD_MULTITHREAD
126*3117ece4Schristos     // To reproduce with or without ZSTD_MULTITHREAD, we are going to use
127*3117ece4Schristos     // the same amount of entropy.
128*3117ece4Schristos     unsigned const nbWorkers_value = produceParamValue(0, 2, producer);
129*3117ece4Schristos     unsigned const rsyncable_value = produceParamValue(0, 1, producer);
130*3117ece4Schristos     (void)nbWorkers_value;
131*3117ece4Schristos     (void)rsyncable_value;
132*3117ece4Schristos     set(cctx, ZSTD_c_nbWorkers, 0);
133*3117ece4Schristos     set(cctx, ZSTD_c_rsyncable, 0);
134*3117ece4Schristos #else
135*3117ece4Schristos     setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer);
136*3117ece4Schristos     setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer);
137*3117ece4Schristos #endif
138*3117ece4Schristos     setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer);
139*3117ece4Schristos     setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer);
140*3117ece4Schristos     setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer);
141*3117ece4Schristos     setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer);
142*3117ece4Schristos     setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer);
143*3117ece4Schristos     setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer);
144*3117ece4Schristos     setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer);
145*3117ece4Schristos     setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer);
146*3117ece4Schristos     setRand(cctx, ZSTD_c_maxBlockSize, ZSTD_BLOCKSIZE_MAX_MIN, ZSTD_BLOCKSIZE_MAX, producer);
147*3117ece4Schristos     setRand(cctx, ZSTD_c_validateSequences, 0, 1, producer);
148*3117ece4Schristos     setRand(cctx, ZSTD_c_searchForExternalRepcodes, 0, 2, producer);
149*3117ece4Schristos     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
150*3117ece4Schristos       setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer);
151*3117ece4Schristos     }
152*3117ece4Schristos     if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) {
153*3117ece4Schristos       setRand(cctx, ZSTD_c_targetCBlockSize, ZSTD_TARGETCBLOCKSIZE_MIN, ZSTD_TARGETCBLOCKSIZE_MAX, producer);
154*3117ece4Schristos     }
155*3117ece4Schristos 
156*3117ece4Schristos #ifdef FUZZ_THIRD_PARTY_SEQ_PROD
157*3117ece4Schristos     setSequenceProducerParams(cctx, producer);
158*3117ece4Schristos #else
159*3117ece4Schristos     if (FUZZ_dataProducer_uint32Range(producer, 0, 10) == 1) {
160*3117ece4Schristos         setSequenceProducerParams(cctx, producer);
161*3117ece4Schristos     } else {
162*3117ece4Schristos         ZSTD_registerSequenceProducer(cctx, NULL, NULL);
163*3117ece4Schristos     }
164*3117ece4Schristos #endif
165*3117ece4Schristos }
166*3117ece4Schristos 
167*3117ece4Schristos FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, FUZZ_dataProducer_t *producer)
168*3117ece4Schristos {
169*3117ece4Schristos     size_t const dictSize = MAX(srcSize / 8, 1024);
170*3117ece4Schristos     size_t const totalSampleSize = dictSize * 11;
171*3117ece4Schristos     FUZZ_dict_t dict = { FUZZ_malloc(dictSize), dictSize };
172*3117ece4Schristos     char* const samples = (char*)FUZZ_malloc(totalSampleSize);
173*3117ece4Schristos     unsigned nbSamples = 100;
174*3117ece4Schristos     size_t* const samplesSizes = (size_t*)FUZZ_malloc(sizeof(size_t) * nbSamples);
175*3117ece4Schristos     size_t pos = 0;
176*3117ece4Schristos     size_t sample = 0;
177*3117ece4Schristos     ZDICT_fastCover_params_t params;
178*3117ece4Schristos 
179*3117ece4Schristos     for (sample = 0; sample < nbSamples; ++sample) {
180*3117ece4Schristos       size_t const remaining = totalSampleSize - pos;
181*3117ece4Schristos       size_t const offset = FUZZ_dataProducer_uint32Range(producer, 0, MAX(srcSize, 1) - 1);
182*3117ece4Schristos       size_t const limit = MIN(srcSize - offset, remaining);
183*3117ece4Schristos       size_t const toCopy = MIN(limit, remaining / (nbSamples - sample));
184*3117ece4Schristos       memcpy(samples + pos, (const char*)src + offset, toCopy);
185*3117ece4Schristos       pos += toCopy;
186*3117ece4Schristos       samplesSizes[sample] = toCopy;
187*3117ece4Schristos     }
188*3117ece4Schristos     memset(samples + pos, 0, totalSampleSize - pos);
189*3117ece4Schristos 
190*3117ece4Schristos     memset(&params, 0, sizeof(params));
191*3117ece4Schristos     params.accel = 5;
192*3117ece4Schristos     params.k = 40;
193*3117ece4Schristos     params.d = 8;
194*3117ece4Schristos     params.f = 14;
195*3117ece4Schristos     params.zParams.compressionLevel = 1;
196*3117ece4Schristos     dict.size = ZDICT_trainFromBuffer_fastCover(dict.buff, dictSize,
197*3117ece4Schristos         samples, samplesSizes, nbSamples, params);
198*3117ece4Schristos     if (ZSTD_isError(dict.size)) {
199*3117ece4Schristos         free(dict.buff);
200*3117ece4Schristos         memset(&dict, 0, sizeof(dict));
201*3117ece4Schristos     }
202*3117ece4Schristos 
203*3117ece4Schristos     free(samplesSizes);
204*3117ece4Schristos     free(samples);
205*3117ece4Schristos 
206*3117ece4Schristos     return dict;
207*3117ece4Schristos }
208