110ab2aceSGeorge Karpenkov //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// 210ab2aceSGeorge Karpenkov // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 610ab2aceSGeorge Karpenkov // 710ab2aceSGeorge Karpenkov //===----------------------------------------------------------------------===// 810ab2aceSGeorge Karpenkov // Merging Corpora. 910ab2aceSGeorge Karpenkov // 1010ab2aceSGeorge Karpenkov // The task: 1110ab2aceSGeorge Karpenkov // Take the existing corpus (possibly empty) and merge new inputs into 1210ab2aceSGeorge Karpenkov // it so that only inputs with new coverage ('features') are added. 1310ab2aceSGeorge Karpenkov // The process should tolerate the crashes, OOMs, leaks, etc. 1410ab2aceSGeorge Karpenkov // 1510ab2aceSGeorge Karpenkov // Algorithm: 164034d0ceSAdrian Herrera // The outer process collects the set of files and writes their names 1710ab2aceSGeorge Karpenkov // into a temporary "control" file, then repeatedly launches the inner 1810ab2aceSGeorge Karpenkov // process until all inputs are processed. 1910ab2aceSGeorge Karpenkov // The outer process does not actually execute the target code. 2010ab2aceSGeorge Karpenkov // 2110ab2aceSGeorge Karpenkov // The inner process reads the control file and sees a) list of all the inputs 2210ab2aceSGeorge Karpenkov // and b) the last processed input. Then it starts processing the inputs one 2310ab2aceSGeorge Karpenkov // by one. Before processing every input it writes one line to control file: 2410ab2aceSGeorge Karpenkov // STARTED INPUT_ID INPUT_SIZE 254034d0ceSAdrian Herrera // After processing an input it writes the following lines: 264034d0ceSAdrian Herrera // FT INPUT_ID Feature1 Feature2 Feature3 ... 274034d0ceSAdrian Herrera // COV INPUT_ID Coverage1 Coverage2 Coverage3 ... 2810ab2aceSGeorge Karpenkov // If a crash happens while processing an input the last line in the control 2910ab2aceSGeorge Karpenkov // file will be "STARTED INPUT_ID" and so the next process will know 3010ab2aceSGeorge Karpenkov // where to resume. 3110ab2aceSGeorge Karpenkov // 324034d0ceSAdrian Herrera // Once all inputs are processed by the inner process(es) the outer process 3310ab2aceSGeorge Karpenkov // reads the control files and does the merge based entirely on the contents 3410ab2aceSGeorge Karpenkov // of control file. 3510ab2aceSGeorge Karpenkov // It uses a single pass greedy algorithm choosing first the smallest inputs 3610ab2aceSGeorge Karpenkov // within the same size the inputs that have more new features. 3710ab2aceSGeorge Karpenkov // 3810ab2aceSGeorge Karpenkov //===----------------------------------------------------------------------===// 3910ab2aceSGeorge Karpenkov 4010ab2aceSGeorge Karpenkov #ifndef LLVM_FUZZER_MERGE_H 4110ab2aceSGeorge Karpenkov #define LLVM_FUZZER_MERGE_H 4210ab2aceSGeorge Karpenkov 4310ab2aceSGeorge Karpenkov #include "FuzzerDefs.h" 44ff163ef1SKostya Serebryany #include "FuzzerIO.h" 4510ab2aceSGeorge Karpenkov 4610ab2aceSGeorge Karpenkov #include <istream> 4710ab2aceSGeorge Karpenkov #include <ostream> 4810ab2aceSGeorge Karpenkov #include <set> 4910ab2aceSGeorge Karpenkov #include <vector> 5010ab2aceSGeorge Karpenkov 5110ab2aceSGeorge Karpenkov namespace fuzzer { 5210ab2aceSGeorge Karpenkov 5310ab2aceSGeorge Karpenkov struct MergeFileInfo { 5410ab2aceSGeorge Karpenkov std::string Name; 5510ab2aceSGeorge Karpenkov size_t Size = 0; 567c921753SKostya Serebryany std::vector<uint32_t> Features, Cov; 5710ab2aceSGeorge Karpenkov }; 5810ab2aceSGeorge Karpenkov 5910ab2aceSGeorge Karpenkov struct Merger { 607c921753SKostya Serebryany std::vector<MergeFileInfo> Files; 6110ab2aceSGeorge Karpenkov size_t NumFilesInFirstCorpus = 0; 6210ab2aceSGeorge Karpenkov size_t FirstNotProcessedFile = 0; 6310ab2aceSGeorge Karpenkov std::string LastFailure; 6410ab2aceSGeorge Karpenkov 6510ab2aceSGeorge Karpenkov bool Parse(std::istream &IS, bool ParseCoverage); 6610ab2aceSGeorge Karpenkov bool Parse(const std::string &Str, bool ParseCoverage); 6710ab2aceSGeorge Karpenkov void ParseOrExit(std::istream &IS, bool ParseCoverage); 687c921753SKostya Serebryany size_t Merge(const std::set<uint32_t> &InitialFeatures, 697c921753SKostya Serebryany std::set<uint32_t> *NewFeatures, 707c921753SKostya Serebryany const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov, 717c921753SKostya Serebryany std::vector<std::string> *NewFiles); 72*e6597dbaSaristotelis size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures, 73*e6597dbaSaristotelis std::set<uint32_t> *NewFeatures, 74*e6597dbaSaristotelis const std::set<uint32_t> &InitialCov, 75*e6597dbaSaristotelis std::set<uint32_t> *NewCov, 76*e6597dbaSaristotelis std::vector<std::string> *NewFiles); 7710ab2aceSGeorge Karpenkov size_t ApproximateMemoryConsumption() const; 787c921753SKostya Serebryany std::set<uint32_t> AllFeatures() const; 7910ab2aceSGeorge Karpenkov }; 8010ab2aceSGeorge Karpenkov 817c921753SKostya Serebryany void CrashResistantMerge(const std::vector<std::string> &Args, 827c921753SKostya Serebryany const std::vector<SizedFile> &OldCorpus, 837c921753SKostya Serebryany const std::vector<SizedFile> &NewCorpus, 847c921753SKostya Serebryany std::vector<std::string> *NewFiles, 857c921753SKostya Serebryany const std::set<uint32_t> &InitialFeatures, 867c921753SKostya Serebryany std::set<uint32_t> *NewFeatures, 877c921753SKostya Serebryany const std::set<uint32_t> &InitialCov, 887c921753SKostya Serebryany std::set<uint32_t> *NewCov, const std::string &CFPath, 89*e6597dbaSaristotelis bool Verbose, bool IsSetCoverMerge); 90f762a115SKostya Serebryany 9110ab2aceSGeorge Karpenkov } // namespace fuzzer 9210ab2aceSGeorge Karpenkov 9310ab2aceSGeorge Karpenkov #endif // LLVM_FUZZER_MERGE_H 94