xref: /llvm-project/compiler-rt/lib/fuzzer/FuzzerMerge.h (revision e6597dbae84034ce8ef97802584039b723adb526)
110ab2aceSGeorge Karpenkov //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
210ab2aceSGeorge Karpenkov //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
610ab2aceSGeorge Karpenkov //
710ab2aceSGeorge Karpenkov //===----------------------------------------------------------------------===//
810ab2aceSGeorge Karpenkov // Merging Corpora.
910ab2aceSGeorge Karpenkov //
1010ab2aceSGeorge Karpenkov // The task:
1110ab2aceSGeorge Karpenkov //   Take the existing corpus (possibly empty) and merge new inputs into
1210ab2aceSGeorge Karpenkov //   it so that only inputs with new coverage ('features') are added.
1310ab2aceSGeorge Karpenkov //   The process should tolerate the crashes, OOMs, leaks, etc.
1410ab2aceSGeorge Karpenkov //
1510ab2aceSGeorge Karpenkov // Algorithm:
164034d0ceSAdrian Herrera //   The outer process collects the set of files and writes their names
1710ab2aceSGeorge Karpenkov //   into a temporary "control" file, then repeatedly launches the inner
1810ab2aceSGeorge Karpenkov //   process until all inputs are processed.
1910ab2aceSGeorge Karpenkov //   The outer process does not actually execute the target code.
2010ab2aceSGeorge Karpenkov //
2110ab2aceSGeorge Karpenkov //   The inner process reads the control file and sees a) list of all the inputs
2210ab2aceSGeorge Karpenkov //   and b) the last processed input. Then it starts processing the inputs one
2310ab2aceSGeorge Karpenkov //   by one. Before processing every input it writes one line to control file:
2410ab2aceSGeorge Karpenkov //   STARTED INPUT_ID INPUT_SIZE
254034d0ceSAdrian Herrera //   After processing an input it writes the following lines:
264034d0ceSAdrian Herrera //   FT INPUT_ID Feature1 Feature2 Feature3 ...
274034d0ceSAdrian Herrera //   COV INPUT_ID Coverage1 Coverage2 Coverage3 ...
2810ab2aceSGeorge Karpenkov //   If a crash happens while processing an input the last line in the control
2910ab2aceSGeorge Karpenkov //   file will be "STARTED INPUT_ID" and so the next process will know
3010ab2aceSGeorge Karpenkov //   where to resume.
3110ab2aceSGeorge Karpenkov //
324034d0ceSAdrian Herrera //   Once all inputs are processed by the inner process(es) the outer process
3310ab2aceSGeorge Karpenkov //   reads the control files and does the merge based entirely on the contents
3410ab2aceSGeorge Karpenkov //   of control file.
3510ab2aceSGeorge Karpenkov //   It uses a single pass greedy algorithm choosing first the smallest inputs
3610ab2aceSGeorge Karpenkov //   within the same size the inputs that have more new features.
3710ab2aceSGeorge Karpenkov //
3810ab2aceSGeorge Karpenkov //===----------------------------------------------------------------------===//
3910ab2aceSGeorge Karpenkov 
4010ab2aceSGeorge Karpenkov #ifndef LLVM_FUZZER_MERGE_H
4110ab2aceSGeorge Karpenkov #define LLVM_FUZZER_MERGE_H
4210ab2aceSGeorge Karpenkov 
4310ab2aceSGeorge Karpenkov #include "FuzzerDefs.h"
44ff163ef1SKostya Serebryany #include "FuzzerIO.h"
4510ab2aceSGeorge Karpenkov 
4610ab2aceSGeorge Karpenkov #include <istream>
4710ab2aceSGeorge Karpenkov #include <ostream>
4810ab2aceSGeorge Karpenkov #include <set>
4910ab2aceSGeorge Karpenkov #include <vector>
5010ab2aceSGeorge Karpenkov 
5110ab2aceSGeorge Karpenkov namespace fuzzer {
5210ab2aceSGeorge Karpenkov 
5310ab2aceSGeorge Karpenkov struct MergeFileInfo {
5410ab2aceSGeorge Karpenkov   std::string Name;
5510ab2aceSGeorge Karpenkov   size_t Size = 0;
567c921753SKostya Serebryany   std::vector<uint32_t> Features, Cov;
5710ab2aceSGeorge Karpenkov };
5810ab2aceSGeorge Karpenkov 
5910ab2aceSGeorge Karpenkov struct Merger {
607c921753SKostya Serebryany   std::vector<MergeFileInfo> Files;
6110ab2aceSGeorge Karpenkov   size_t NumFilesInFirstCorpus = 0;
6210ab2aceSGeorge Karpenkov   size_t FirstNotProcessedFile = 0;
6310ab2aceSGeorge Karpenkov   std::string LastFailure;
6410ab2aceSGeorge Karpenkov 
6510ab2aceSGeorge Karpenkov   bool Parse(std::istream &IS, bool ParseCoverage);
6610ab2aceSGeorge Karpenkov   bool Parse(const std::string &Str, bool ParseCoverage);
6710ab2aceSGeorge Karpenkov   void ParseOrExit(std::istream &IS, bool ParseCoverage);
687c921753SKostya Serebryany   size_t Merge(const std::set<uint32_t> &InitialFeatures,
697c921753SKostya Serebryany                std::set<uint32_t> *NewFeatures,
707c921753SKostya Serebryany                const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
717c921753SKostya Serebryany                std::vector<std::string> *NewFiles);
72*e6597dbaSaristotelis   size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
73*e6597dbaSaristotelis                        std::set<uint32_t> *NewFeatures,
74*e6597dbaSaristotelis                        const std::set<uint32_t> &InitialCov,
75*e6597dbaSaristotelis                        std::set<uint32_t> *NewCov,
76*e6597dbaSaristotelis                        std::vector<std::string> *NewFiles);
7710ab2aceSGeorge Karpenkov   size_t ApproximateMemoryConsumption() const;
787c921753SKostya Serebryany   std::set<uint32_t> AllFeatures() const;
7910ab2aceSGeorge Karpenkov };
8010ab2aceSGeorge Karpenkov 
817c921753SKostya Serebryany void CrashResistantMerge(const std::vector<std::string> &Args,
827c921753SKostya Serebryany                          const std::vector<SizedFile> &OldCorpus,
837c921753SKostya Serebryany                          const std::vector<SizedFile> &NewCorpus,
847c921753SKostya Serebryany                          std::vector<std::string> *NewFiles,
857c921753SKostya Serebryany                          const std::set<uint32_t> &InitialFeatures,
867c921753SKostya Serebryany                          std::set<uint32_t> *NewFeatures,
877c921753SKostya Serebryany                          const std::set<uint32_t> &InitialCov,
887c921753SKostya Serebryany                          std::set<uint32_t> *NewCov, const std::string &CFPath,
89*e6597dbaSaristotelis                          bool Verbose, bool IsSetCoverMerge);
90f762a115SKostya Serebryany 
9110ab2aceSGeorge Karpenkov }  // namespace fuzzer
9210ab2aceSGeorge Karpenkov 
9310ab2aceSGeorge Karpenkov #endif  // LLVM_FUZZER_MERGE_H
94