10b57cec5SDimitry Andric //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // Merging Corpora. 90b57cec5SDimitry Andric // 100b57cec5SDimitry Andric // The task: 110b57cec5SDimitry Andric // Take the existing corpus (possibly empty) and merge new inputs into 120b57cec5SDimitry Andric // it so that only inputs with new coverage ('features') are added. 130b57cec5SDimitry Andric // The process should tolerate the crashes, OOMs, leaks, etc. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // Algorithm: 165ffd83dbSDimitry Andric // The outer process collects the set of files and writes their names 170b57cec5SDimitry Andric // into a temporary "control" file, then repeatedly launches the inner 180b57cec5SDimitry Andric // process until all inputs are processed. 190b57cec5SDimitry Andric // The outer process does not actually execute the target code. 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric // The inner process reads the control file and sees a) list of all the inputs 220b57cec5SDimitry Andric // and b) the last processed input. Then it starts processing the inputs one 230b57cec5SDimitry Andric // by one. Before processing every input it writes one line to control file: 240b57cec5SDimitry Andric // STARTED INPUT_ID INPUT_SIZE 255ffd83dbSDimitry Andric // After processing an input it writes the following lines: 265ffd83dbSDimitry Andric // FT INPUT_ID Feature1 Feature2 Feature3 ... 275ffd83dbSDimitry Andric // COV INPUT_ID Coverage1 Coverage2 Coverage3 ... 280b57cec5SDimitry Andric // If a crash happens while processing an input the last line in the control 290b57cec5SDimitry Andric // file will be "STARTED INPUT_ID" and so the next process will know 300b57cec5SDimitry Andric // where to resume. 310b57cec5SDimitry Andric // 325ffd83dbSDimitry Andric // Once all inputs are processed by the inner process(es) the outer process 330b57cec5SDimitry Andric // reads the control files and does the merge based entirely on the contents 340b57cec5SDimitry Andric // of control file. 350b57cec5SDimitry Andric // It uses a single pass greedy algorithm choosing first the smallest inputs 360b57cec5SDimitry Andric // within the same size the inputs that have more new features. 370b57cec5SDimitry Andric // 380b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric #ifndef LLVM_FUZZER_MERGE_H 410b57cec5SDimitry Andric #define LLVM_FUZZER_MERGE_H 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric #include "FuzzerDefs.h" 44*349cc55cSDimitry Andric #include "FuzzerIO.h" 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric #include <istream> 470b57cec5SDimitry Andric #include <ostream> 480b57cec5SDimitry Andric #include <set> 490b57cec5SDimitry Andric #include <vector> 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric namespace fuzzer { 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric struct MergeFileInfo { 540b57cec5SDimitry Andric std::string Name; 550b57cec5SDimitry Andric size_t Size = 0; 56*349cc55cSDimitry Andric std::vector<uint32_t> Features, Cov; 570b57cec5SDimitry Andric }; 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric struct Merger { 60*349cc55cSDimitry Andric std::vector<MergeFileInfo> Files; 610b57cec5SDimitry Andric size_t NumFilesInFirstCorpus = 0; 620b57cec5SDimitry Andric size_t FirstNotProcessedFile = 0; 630b57cec5SDimitry Andric std::string LastFailure; 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric bool Parse(std::istream &IS, bool ParseCoverage); 660b57cec5SDimitry Andric bool Parse(const std::string &Str, bool ParseCoverage); 670b57cec5SDimitry Andric void ParseOrExit(std::istream &IS, bool ParseCoverage); 68*349cc55cSDimitry Andric size_t Merge(const std::set<uint32_t> &InitialFeatures, 69*349cc55cSDimitry Andric std::set<uint32_t> *NewFeatures, 70*349cc55cSDimitry Andric const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov, 71*349cc55cSDimitry Andric std::vector<std::string> *NewFiles); 72*349cc55cSDimitry Andric size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures, 73*349cc55cSDimitry Andric std::set<uint32_t> *NewFeatures, 74*349cc55cSDimitry Andric const std::set<uint32_t> &InitialCov, 75*349cc55cSDimitry Andric std::set<uint32_t> *NewCov, 76*349cc55cSDimitry Andric std::vector<std::string> *NewFiles); 770b57cec5SDimitry Andric size_t ApproximateMemoryConsumption() const; 78*349cc55cSDimitry Andric std::set<uint32_t> AllFeatures() const; 790b57cec5SDimitry Andric }; 800b57cec5SDimitry Andric 81*349cc55cSDimitry Andric void CrashResistantMerge(const std::vector<std::string> &Args, 82*349cc55cSDimitry Andric const std::vector<SizedFile> &OldCorpus, 83*349cc55cSDimitry Andric const std::vector<SizedFile> &NewCorpus, 84*349cc55cSDimitry Andric std::vector<std::string> *NewFiles, 85*349cc55cSDimitry Andric const std::set<uint32_t> &InitialFeatures, 86*349cc55cSDimitry Andric std::set<uint32_t> *NewFeatures, 87*349cc55cSDimitry Andric const std::set<uint32_t> &InitialCov, 88*349cc55cSDimitry Andric std::set<uint32_t> *NewCov, const std::string &CFPath, 89*349cc55cSDimitry Andric bool Verbose, bool IsSetCoverMerge); 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric } // namespace fuzzer 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric #endif // LLVM_FUZZER_MERGE_H 94