1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 /** 12 * This fuzz target performs a zstd round-trip test by generating an arbitrary 13 * array of sequences, generating the associated source buffer, calling 14 * ZSTD_compressSequences(), and then decompresses and compares the result with 15 * the original generated source buffer. 16 */ 17 18 #define ZSTD_STATIC_LINKING_ONLY 19 20 #include <stddef.h> 21 #include <stdlib.h> 22 #include <stdio.h> 23 #include <string.h> 24 #include <time.h> 25 #include "fuzz_helpers.h" 26 #include "zstd_helpers.h" 27 #include "fuzz_data_producer.h" 28 #include "fuzz_third_party_seq_prod.h" 29 30 static ZSTD_CCtx* cctx = NULL; 31 static ZSTD_DCtx* dctx = NULL; 32 static void* literalsBuffer = NULL; 33 static void* generatedSrc = NULL; 34 static ZSTD_Sequence* generatedSequences = NULL; 35 36 static void* dictBuffer = NULL; 37 static ZSTD_CDict* cdict = NULL; 38 static ZSTD_DDict* ddict = NULL; 39 40 #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */ 41 #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */ 42 #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */ 43 #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */ 44 #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */ 45 46 /* Deterministic random number generator */ 47 #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) 48 static uint32_t FUZZ_RDG_rand(uint32_t* src) 49 { 50 static const uint32_t prime1 = 2654435761U; 51 static const uint32_t prime2 = 2246822519U; 52 uint32_t rand32 = *src; 53 rand32 *= prime1; 54 rand32 ^= prime2; 55 rand32 = FUZZ_RDG_rotl32(rand32, 13); 56 *src = rand32; 57 return rand32 >> 5; 58 } 59 60 /* Make a pseudorandom string - this simple function exists to avoid 61 * taking a dependency on datagen.h to have RDG_genBuffer(). 62 */ 63 static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) { 64 const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_"; 65 uint32_t seed = FUZZ_dataProducer_uint32(producer); 66 if (size) { 67 for (size_t n = 0; n < size; n++) { 68 int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1); 69 str[n] = charset[key]; 70 } 71 } 72 return str; 73 } 74 75 /* Returns size of source buffer */ 76 static size_t decodeSequences(void* dst, size_t nbSequences, 77 size_t literalsSize, 78 const void* dict, size_t dictSize, 79 ZSTD_sequenceFormat_e mode) 80 { 81 const uint8_t* litPtr = literalsBuffer; 82 const uint8_t* const litBegin = literalsBuffer; 83 const uint8_t* const litEnd = litBegin + literalsSize; 84 const uint8_t* dictPtr = dict; 85 uint8_t* op = dst; 86 const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE; 87 size_t generatedSrcBufferSize = 0; 88 size_t bytesWritten = 0; 89 90 for (size_t i = 0; i < nbSequences; ++i) { 91 /* block boundary */ 92 if (generatedSequences[i].offset == 0) 93 FUZZ_ASSERT(generatedSequences[i].matchLength == 0); 94 95 if (litPtr + generatedSequences[i].litLength > litEnd) { 96 litPtr = litBegin; 97 } 98 memcpy(op, litPtr, generatedSequences[i].litLength); 99 bytesWritten += generatedSequences[i].litLength; 100 op += generatedSequences[i].litLength; 101 litPtr += generatedSequences[i].litLength; 102 103 /* Copy over the match */ 104 { size_t matchLength = generatedSequences[i].matchLength; 105 size_t j = 0; 106 size_t k = 0; 107 if (dictSize != 0) { 108 if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */ 109 size_t dictOffset = generatedSequences[i].offset - bytesWritten; 110 size_t matchInDict = MIN(matchLength, dictOffset); 111 for (; k < matchInDict; ++k) { 112 op[k] = dictPtr[dictSize - dictOffset + k]; 113 } 114 matchLength -= matchInDict; 115 op += matchInDict; 116 } 117 } 118 for (; j < matchLength; ++j) { 119 op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)]; 120 } 121 op += j; 122 FUZZ_ASSERT(generatedSequences[i].matchLength == j + k); 123 bytesWritten += generatedSequences[i].matchLength; 124 } 125 } 126 generatedSrcBufferSize = bytesWritten; 127 FUZZ_ASSERT(litPtr <= litEnd); 128 if (mode == ZSTD_sf_noBlockDelimiters) { 129 const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr); 130 if (lastLLSize <= oend - op) { 131 memcpy(op, litPtr, lastLLSize); 132 generatedSrcBufferSize += lastLLSize; 133 } } 134 return generatedSrcBufferSize; 135 } 136 137 /* Returns nb sequences generated 138 * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode. 139 * However, it can fail with ZSTD_sf_explicitBlockDelimiters, 140 * due to potential lack of space in 141 */ 142 static size_t generateRandomSequences(FUZZ_dataProducer_t* producer, 143 size_t literalsSizeLimit, size_t dictSize, 144 size_t windowLog, ZSTD_sequenceFormat_e mode) 145 { 146 const uint32_t repCode = 0; /* not used by sequence ingestion api */ 147 size_t windowSize = 1ULL << windowLog; 148 size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); 149 uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE; 150 uint32_t bytesGenerated = 0; 151 uint32_t nbSeqGenerated = 0; 152 uint32_t isFirstSequence = 1; 153 uint32_t blockSize = 0; 154 155 if (mode == ZSTD_sf_explicitBlockDelimiters) { 156 /* ensure that no sequence can be larger than one block */ 157 literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2); 158 matchLengthMax = MIN(matchLengthMax, blockSizeMax/2); 159 } 160 161 while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */ 162 && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE 163 && !FUZZ_dataProducer_empty(producer)) { 164 uint32_t matchLength; 165 uint32_t matchBound = matchLengthMax; 166 uint32_t offset; 167 uint32_t offsetBound; 168 const uint32_t minLitLength = (isFirstSequence && (dictSize == 0)); 169 const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit); 170 bytesGenerated += litLength; 171 if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { 172 break; 173 } 174 offsetBound = (bytesGenerated > windowSize) ? windowSize : bytesGenerated + (uint32_t)dictSize; 175 offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound); 176 if (dictSize > 0 && bytesGenerated <= windowSize) { 177 /* Prevent match length from being such that it would be associated with an offset too large 178 * from the decoder's perspective. If not possible (match would be too small), 179 * then reduce the offset if necessary. 180 */ 181 const size_t bytesToReachWindowSize = windowSize - bytesGenerated; 182 if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) { 183 const uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound; 184 offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound); 185 } else { 186 matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize); 187 } 188 } 189 matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound); 190 bytesGenerated += matchLength; 191 if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) { 192 break; 193 } 194 { ZSTD_Sequence seq = {offset, litLength, matchLength, repCode}; 195 const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength); 196 #define SPLITPROB 6000 197 #define SPLITMARK 5234 198 const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK); 199 if (mode == ZSTD_sf_explicitBlockDelimiters) { 200 const size_t seqSize = seq.litLength + seq.matchLength; 201 if (blockSize + seqSize > blockSizeMax) { /* reaching limit : must end block now */ 202 const ZSTD_Sequence endBlock = {0, 0, 0, 0}; 203 generatedSequences[nbSeqGenerated++] = endBlock; 204 blockSize = seqSize; 205 } 206 if (split) { 207 const ZSTD_Sequence endBlock = {0, lastLits, 0, 0}; 208 generatedSequences[nbSeqGenerated++] = endBlock; 209 assert(lastLits <= seq.litLength); 210 seq.litLength -= lastLits; 211 blockSize = seqSize - lastLits; 212 } else { 213 blockSize += seqSize; 214 } 215 } 216 generatedSequences[nbSeqGenerated++] = seq; 217 isFirstSequence = 0; 218 } 219 } 220 221 if (mode == ZSTD_sf_explicitBlockDelimiters) { 222 /* always end sequences with a block delimiter */ 223 const ZSTD_Sequence endBlock = {0, 0, 0, 0}; 224 assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ); 225 generatedSequences[nbSeqGenerated++] = endBlock; 226 } 227 return nbSeqGenerated; 228 } 229 230 static size_t roundTripTest(void* result, size_t resultCapacity, 231 void* compressed, size_t compressedCapacity, 232 const void* src, size_t srcSize, 233 const ZSTD_Sequence* seqs, size_t seqSize, 234 unsigned hasDict, 235 ZSTD_sequenceFormat_e mode) 236 { 237 size_t cSize; 238 size_t dSize; 239 240 if (hasDict) { 241 FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict)); 242 FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict)); 243 } 244 245 cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity, 246 seqs, seqSize, 247 src, srcSize); 248 if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall) 249 && (mode == ZSTD_sf_explicitBlockDelimiters) ) { 250 /* Valid scenario : in explicit delimiter mode, 251 * it might be possible for the compressed size to outgrow dstCapacity. 252 * In which case, it's still a valid fuzzer scenario, 253 * but no roundtrip shall be possible */ 254 return 0; 255 } 256 /* round-trip */ 257 FUZZ_ZASSERT(cSize); 258 dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize); 259 FUZZ_ZASSERT(dSize); 260 FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size"); 261 FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!"); 262 return dSize; 263 } 264 265 int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size) 266 { 267 FUZZ_SEQ_PROD_SETUP(); 268 269 void* rBuf; 270 size_t rBufSize; 271 void* cBuf; 272 size_t cBufSize; 273 size_t generatedSrcSize; 274 size_t nbSequences; 275 size_t dictSize = 0; 276 unsigned hasDict; 277 unsigned wLog; 278 int cLevel; 279 ZSTD_sequenceFormat_e mode; 280 281 FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size); 282 FUZZ_ASSERT(producer); 283 284 if (!cctx) { 285 cctx = ZSTD_createCCtx(); 286 FUZZ_ASSERT(cctx); 287 } 288 if (!dctx) { 289 dctx = ZSTD_createDCtx(); 290 FUZZ_ASSERT(dctx); 291 } 292 293 /* Generate window log first so we don't generate offsets too large */ 294 wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); 295 cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22); 296 mode = (ZSTD_sequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1); 297 298 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); 299 ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0); 300 ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel); 301 ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog); 302 ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN); 303 ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1); 304 ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, mode); 305 ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach); 306 307 if (!literalsBuffer) { 308 literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE); 309 FUZZ_ASSERT(literalsBuffer); 310 literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer); 311 } 312 313 if (!dictBuffer) { /* Generate global dictionary buffer */ 314 ZSTD_compressionParameters cParams; 315 316 /* Generate a large dictionary buffer */ 317 dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1); 318 FUZZ_ASSERT(dictBuffer); 319 320 /* Create global cdict and ddict */ 321 cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE); 322 cParams.minMatch = ZSTD_MINMATCH_MIN; 323 cParams.hashLog = ZSTD_HASHLOG_MIN; 324 cParams.chainLog = ZSTD_CHAINLOG_MIN; 325 326 cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem); 327 ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem); 328 FUZZ_ASSERT(cdict); 329 FUZZ_ASSERT(ddict); 330 } 331 332 FUZZ_ASSERT(cdict); 333 FUZZ_ASSERT(ddict); 334 335 hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1); 336 if (hasDict) { 337 dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE; 338 } 339 340 if (!generatedSequences) { 341 generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ); 342 } 343 if (!generatedSrc) { 344 generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE); 345 } 346 347 nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode); 348 generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode); 349 350 /* Note : in explicit block delimiters mode, 351 * the fuzzer might generate a lot of small blocks. 352 * In which case, the final compressed size might be > ZSTD_compressBound(). 353 * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity. 354 * The test just doesn't roundtrip. */ 355 cBufSize = ZSTD_compressBound(generatedSrcSize); 356 cBuf = FUZZ_malloc(cBufSize); 357 358 rBufSize = generatedSrcSize; 359 rBuf = FUZZ_malloc(rBufSize); 360 361 { const size_t result = roundTripTest(rBuf, rBufSize, 362 cBuf, cBufSize, 363 generatedSrc, generatedSrcSize, 364 generatedSequences, nbSequences, 365 hasDict, mode); 366 FUZZ_ASSERT(result <= generatedSrcSize); /* can be 0 when no round-trip */ 367 } 368 369 free(rBuf); 370 free(cBuf); 371 FUZZ_dataProducer_free(producer); 372 #ifndef STATEFUL_FUZZING 373 ZSTD_freeCCtx(cctx); cctx = NULL; 374 ZSTD_freeDCtx(dctx); dctx = NULL; 375 free(generatedSequences); generatedSequences = NULL; 376 free(generatedSrc); generatedSrc = NULL; 377 free(literalsBuffer); literalsBuffer = NULL; 378 #endif 379 FUZZ_SEQ_PROD_TEARDOWN(); 380 return 0; 381 } 382