xref: /openbsd-src/gnu/llvm/compiler-rt/lib/fuzzer/FuzzerMerge.h (revision 810390e339a5425391477d5d41c78d7cab2424ac)
13cab2bb3Spatrick //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick // Merging Corpora.
93cab2bb3Spatrick //
103cab2bb3Spatrick // The task:
113cab2bb3Spatrick //   Take the existing corpus (possibly empty) and merge new inputs into
123cab2bb3Spatrick //   it so that only inputs with new coverage ('features') are added.
133cab2bb3Spatrick //   The process should tolerate the crashes, OOMs, leaks, etc.
143cab2bb3Spatrick //
153cab2bb3Spatrick // Algorithm:
161f9cb04fSpatrick //   The outer process collects the set of files and writes their names
173cab2bb3Spatrick //   into a temporary "control" file, then repeatedly launches the inner
183cab2bb3Spatrick //   process until all inputs are processed.
193cab2bb3Spatrick //   The outer process does not actually execute the target code.
203cab2bb3Spatrick //
213cab2bb3Spatrick //   The inner process reads the control file and sees a) list of all the inputs
223cab2bb3Spatrick //   and b) the last processed input. Then it starts processing the inputs one
233cab2bb3Spatrick //   by one. Before processing every input it writes one line to control file:
243cab2bb3Spatrick //   STARTED INPUT_ID INPUT_SIZE
251f9cb04fSpatrick //   After processing an input it writes the following lines:
261f9cb04fSpatrick //   FT INPUT_ID Feature1 Feature2 Feature3 ...
271f9cb04fSpatrick //   COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
283cab2bb3Spatrick //   If a crash happens while processing an input the last line in the control
293cab2bb3Spatrick //   file will be "STARTED INPUT_ID" and so the next process will know
303cab2bb3Spatrick //   where to resume.
313cab2bb3Spatrick //
321f9cb04fSpatrick //   Once all inputs are processed by the inner process(es) the outer process
333cab2bb3Spatrick //   reads the control files and does the merge based entirely on the contents
343cab2bb3Spatrick //   of control file.
353cab2bb3Spatrick //   It uses a single pass greedy algorithm choosing first the smallest inputs
363cab2bb3Spatrick //   within the same size the inputs that have more new features.
373cab2bb3Spatrick //
383cab2bb3Spatrick //===----------------------------------------------------------------------===//
393cab2bb3Spatrick 
403cab2bb3Spatrick #ifndef LLVM_FUZZER_MERGE_H
413cab2bb3Spatrick #define LLVM_FUZZER_MERGE_H
423cab2bb3Spatrick 
433cab2bb3Spatrick #include "FuzzerDefs.h"
44*810390e3Srobert #include "FuzzerIO.h"
453cab2bb3Spatrick 
463cab2bb3Spatrick #include <istream>
473cab2bb3Spatrick #include <ostream>
483cab2bb3Spatrick #include <set>
493cab2bb3Spatrick #include <vector>
503cab2bb3Spatrick 
513cab2bb3Spatrick namespace fuzzer {
523cab2bb3Spatrick 
533cab2bb3Spatrick struct MergeFileInfo {
543cab2bb3Spatrick   std::string Name;
553cab2bb3Spatrick   size_t Size = 0;
56*810390e3Srobert   std::vector<uint32_t> Features, Cov;
573cab2bb3Spatrick };
583cab2bb3Spatrick 
593cab2bb3Spatrick struct Merger {
60*810390e3Srobert   std::vector<MergeFileInfo> Files;
613cab2bb3Spatrick   size_t NumFilesInFirstCorpus = 0;
623cab2bb3Spatrick   size_t FirstNotProcessedFile = 0;
633cab2bb3Spatrick   std::string LastFailure;
643cab2bb3Spatrick 
653cab2bb3Spatrick   bool Parse(std::istream &IS, bool ParseCoverage);
663cab2bb3Spatrick   bool Parse(const std::string &Str, bool ParseCoverage);
673cab2bb3Spatrick   void ParseOrExit(std::istream &IS, bool ParseCoverage);
68*810390e3Srobert   size_t Merge(const std::set<uint32_t> &InitialFeatures,
69*810390e3Srobert                std::set<uint32_t> *NewFeatures,
70*810390e3Srobert                const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
71*810390e3Srobert                std::vector<std::string> *NewFiles);
72*810390e3Srobert   size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
73*810390e3Srobert                        std::set<uint32_t> *NewFeatures,
74*810390e3Srobert                        const std::set<uint32_t> &InitialCov,
75*810390e3Srobert                        std::set<uint32_t> *NewCov,
76*810390e3Srobert                        std::vector<std::string> *NewFiles);
773cab2bb3Spatrick   size_t ApproximateMemoryConsumption() const;
78*810390e3Srobert   std::set<uint32_t> AllFeatures() const;
793cab2bb3Spatrick };
803cab2bb3Spatrick 
81*810390e3Srobert void CrashResistantMerge(const std::vector<std::string> &Args,
82*810390e3Srobert                          const std::vector<SizedFile> &OldCorpus,
83*810390e3Srobert                          const std::vector<SizedFile> &NewCorpus,
84*810390e3Srobert                          std::vector<std::string> *NewFiles,
85*810390e3Srobert                          const std::set<uint32_t> &InitialFeatures,
86*810390e3Srobert                          std::set<uint32_t> *NewFeatures,
87*810390e3Srobert                          const std::set<uint32_t> &InitialCov,
88*810390e3Srobert                          std::set<uint32_t> *NewCov, const std::string &CFPath,
89*810390e3Srobert                          bool Verbose, bool IsSetCoverMerge);
903cab2bb3Spatrick 
913cab2bb3Spatrick }  // namespace fuzzer
923cab2bb3Spatrick 
933cab2bb3Spatrick #endif  // LLVM_FUZZER_MERGE_H
94