xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/fuzzer/FuzzerMerge.h (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
10b57cec5SDimitry Andric //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric // Merging Corpora.
90b57cec5SDimitry Andric //
100b57cec5SDimitry Andric // The task:
110b57cec5SDimitry Andric //   Take the existing corpus (possibly empty) and merge new inputs into
120b57cec5SDimitry Andric //   it so that only inputs with new coverage ('features') are added.
130b57cec5SDimitry Andric //   The process should tolerate the crashes, OOMs, leaks, etc.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // Algorithm:
165ffd83dbSDimitry Andric //   The outer process collects the set of files and writes their names
170b57cec5SDimitry Andric //   into a temporary "control" file, then repeatedly launches the inner
180b57cec5SDimitry Andric //   process until all inputs are processed.
190b57cec5SDimitry Andric //   The outer process does not actually execute the target code.
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric //   The inner process reads the control file and sees a) list of all the inputs
220b57cec5SDimitry Andric //   and b) the last processed input. Then it starts processing the inputs one
230b57cec5SDimitry Andric //   by one. Before processing every input it writes one line to control file:
240b57cec5SDimitry Andric //   STARTED INPUT_ID INPUT_SIZE
255ffd83dbSDimitry Andric //   After processing an input it writes the following lines:
265ffd83dbSDimitry Andric //   FT INPUT_ID Feature1 Feature2 Feature3 ...
275ffd83dbSDimitry Andric //   COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
280b57cec5SDimitry Andric //   If a crash happens while processing an input the last line in the control
290b57cec5SDimitry Andric //   file will be "STARTED INPUT_ID" and so the next process will know
300b57cec5SDimitry Andric //   where to resume.
310b57cec5SDimitry Andric //
325ffd83dbSDimitry Andric //   Once all inputs are processed by the inner process(es) the outer process
330b57cec5SDimitry Andric //   reads the control files and does the merge based entirely on the contents
340b57cec5SDimitry Andric //   of control file.
350b57cec5SDimitry Andric //   It uses a single pass greedy algorithm choosing first the smallest inputs
360b57cec5SDimitry Andric //   within the same size the inputs that have more new features.
370b57cec5SDimitry Andric //
380b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric #ifndef LLVM_FUZZER_MERGE_H
410b57cec5SDimitry Andric #define LLVM_FUZZER_MERGE_H
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric #include "FuzzerDefs.h"
44*349cc55cSDimitry Andric #include "FuzzerIO.h"
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric #include <istream>
470b57cec5SDimitry Andric #include <ostream>
480b57cec5SDimitry Andric #include <set>
490b57cec5SDimitry Andric #include <vector>
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric namespace fuzzer {
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric struct MergeFileInfo {
540b57cec5SDimitry Andric   std::string Name;
550b57cec5SDimitry Andric   size_t Size = 0;
56*349cc55cSDimitry Andric   std::vector<uint32_t> Features, Cov;
570b57cec5SDimitry Andric };
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric struct Merger {
60*349cc55cSDimitry Andric   std::vector<MergeFileInfo> Files;
610b57cec5SDimitry Andric   size_t NumFilesInFirstCorpus = 0;
620b57cec5SDimitry Andric   size_t FirstNotProcessedFile = 0;
630b57cec5SDimitry Andric   std::string LastFailure;
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   bool Parse(std::istream &IS, bool ParseCoverage);
660b57cec5SDimitry Andric   bool Parse(const std::string &Str, bool ParseCoverage);
670b57cec5SDimitry Andric   void ParseOrExit(std::istream &IS, bool ParseCoverage);
68*349cc55cSDimitry Andric   size_t Merge(const std::set<uint32_t> &InitialFeatures,
69*349cc55cSDimitry Andric                std::set<uint32_t> *NewFeatures,
70*349cc55cSDimitry Andric                const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
71*349cc55cSDimitry Andric                std::vector<std::string> *NewFiles);
72*349cc55cSDimitry Andric   size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
73*349cc55cSDimitry Andric                        std::set<uint32_t> *NewFeatures,
74*349cc55cSDimitry Andric                        const std::set<uint32_t> &InitialCov,
75*349cc55cSDimitry Andric                        std::set<uint32_t> *NewCov,
76*349cc55cSDimitry Andric                        std::vector<std::string> *NewFiles);
770b57cec5SDimitry Andric   size_t ApproximateMemoryConsumption() const;
78*349cc55cSDimitry Andric   std::set<uint32_t> AllFeatures() const;
790b57cec5SDimitry Andric };
800b57cec5SDimitry Andric 
81*349cc55cSDimitry Andric void CrashResistantMerge(const std::vector<std::string> &Args,
82*349cc55cSDimitry Andric                          const std::vector<SizedFile> &OldCorpus,
83*349cc55cSDimitry Andric                          const std::vector<SizedFile> &NewCorpus,
84*349cc55cSDimitry Andric                          std::vector<std::string> *NewFiles,
85*349cc55cSDimitry Andric                          const std::set<uint32_t> &InitialFeatures,
86*349cc55cSDimitry Andric                          std::set<uint32_t> *NewFeatures,
87*349cc55cSDimitry Andric                          const std::set<uint32_t> &InitialCov,
88*349cc55cSDimitry Andric                          std::set<uint32_t> *NewCov, const std::string &CFPath,
89*349cc55cSDimitry Andric                          bool Verbose, bool IsSetCoverMerge);
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric }  // namespace fuzzer
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric #endif  // LLVM_FUZZER_MERGE_H
94