13cab2bb3Spatrick //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// 23cab2bb3Spatrick // 33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information. 53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 63cab2bb3Spatrick // 73cab2bb3Spatrick //===----------------------------------------------------------------------===// 83cab2bb3Spatrick // Merging Corpora. 93cab2bb3Spatrick // 103cab2bb3Spatrick // The task: 113cab2bb3Spatrick // Take the existing corpus (possibly empty) and merge new inputs into 123cab2bb3Spatrick // it so that only inputs with new coverage ('features') are added. 133cab2bb3Spatrick // The process should tolerate the crashes, OOMs, leaks, etc. 143cab2bb3Spatrick // 153cab2bb3Spatrick // Algorithm: 161f9cb04fSpatrick // The outer process collects the set of files and writes their names 173cab2bb3Spatrick // into a temporary "control" file, then repeatedly launches the inner 183cab2bb3Spatrick // process until all inputs are processed. 193cab2bb3Spatrick // The outer process does not actually execute the target code. 203cab2bb3Spatrick // 213cab2bb3Spatrick // The inner process reads the control file and sees a) list of all the inputs 223cab2bb3Spatrick // and b) the last processed input. Then it starts processing the inputs one 233cab2bb3Spatrick // by one. Before processing every input it writes one line to control file: 243cab2bb3Spatrick // STARTED INPUT_ID INPUT_SIZE 251f9cb04fSpatrick // After processing an input it writes the following lines: 261f9cb04fSpatrick // FT INPUT_ID Feature1 Feature2 Feature3 ... 271f9cb04fSpatrick // COV INPUT_ID Coverage1 Coverage2 Coverage3 ... 283cab2bb3Spatrick // If a crash happens while processing an input the last line in the control 293cab2bb3Spatrick // file will be "STARTED INPUT_ID" and so the next process will know 303cab2bb3Spatrick // where to resume. 313cab2bb3Spatrick // 321f9cb04fSpatrick // Once all inputs are processed by the inner process(es) the outer process 333cab2bb3Spatrick // reads the control files and does the merge based entirely on the contents 343cab2bb3Spatrick // of control file. 353cab2bb3Spatrick // It uses a single pass greedy algorithm choosing first the smallest inputs 363cab2bb3Spatrick // within the same size the inputs that have more new features. 373cab2bb3Spatrick // 383cab2bb3Spatrick //===----------------------------------------------------------------------===// 393cab2bb3Spatrick 403cab2bb3Spatrick #ifndef LLVM_FUZZER_MERGE_H 413cab2bb3Spatrick #define LLVM_FUZZER_MERGE_H 423cab2bb3Spatrick 433cab2bb3Spatrick #include "FuzzerDefs.h" 44*810390e3Srobert #include "FuzzerIO.h" 453cab2bb3Spatrick 463cab2bb3Spatrick #include <istream> 473cab2bb3Spatrick #include <ostream> 483cab2bb3Spatrick #include <set> 493cab2bb3Spatrick #include <vector> 503cab2bb3Spatrick 513cab2bb3Spatrick namespace fuzzer { 523cab2bb3Spatrick 533cab2bb3Spatrick struct MergeFileInfo { 543cab2bb3Spatrick std::string Name; 553cab2bb3Spatrick size_t Size = 0; 56*810390e3Srobert std::vector<uint32_t> Features, Cov; 573cab2bb3Spatrick }; 583cab2bb3Spatrick 593cab2bb3Spatrick struct Merger { 60*810390e3Srobert std::vector<MergeFileInfo> Files; 613cab2bb3Spatrick size_t NumFilesInFirstCorpus = 0; 623cab2bb3Spatrick size_t FirstNotProcessedFile = 0; 633cab2bb3Spatrick std::string LastFailure; 643cab2bb3Spatrick 653cab2bb3Spatrick bool Parse(std::istream &IS, bool ParseCoverage); 663cab2bb3Spatrick bool Parse(const std::string &Str, bool ParseCoverage); 673cab2bb3Spatrick void ParseOrExit(std::istream &IS, bool ParseCoverage); 68*810390e3Srobert size_t Merge(const std::set<uint32_t> &InitialFeatures, 69*810390e3Srobert std::set<uint32_t> *NewFeatures, 70*810390e3Srobert const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov, 71*810390e3Srobert std::vector<std::string> *NewFiles); 72*810390e3Srobert size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures, 73*810390e3Srobert std::set<uint32_t> *NewFeatures, 74*810390e3Srobert const std::set<uint32_t> &InitialCov, 75*810390e3Srobert std::set<uint32_t> *NewCov, 76*810390e3Srobert std::vector<std::string> *NewFiles); 773cab2bb3Spatrick size_t ApproximateMemoryConsumption() const; 78*810390e3Srobert std::set<uint32_t> AllFeatures() const; 793cab2bb3Spatrick }; 803cab2bb3Spatrick 81*810390e3Srobert void CrashResistantMerge(const std::vector<std::string> &Args, 82*810390e3Srobert const std::vector<SizedFile> &OldCorpus, 83*810390e3Srobert const std::vector<SizedFile> &NewCorpus, 84*810390e3Srobert std::vector<std::string> *NewFiles, 85*810390e3Srobert const std::set<uint32_t> &InitialFeatures, 86*810390e3Srobert std::set<uint32_t> *NewFeatures, 87*810390e3Srobert const std::set<uint32_t> &InitialCov, 88*810390e3Srobert std::set<uint32_t> *NewCov, const std::string &CFPath, 89*810390e3Srobert bool Verbose, bool IsSetCoverMerge); 903cab2bb3Spatrick 913cab2bb3Spatrick } // namespace fuzzer 923cab2bb3Spatrick 933cab2bb3Spatrick #endif // LLVM_FUZZER_MERGE_H 94