10b57cec5SDimitry Andric //===- FuzzerDataFlowTrace.h - Internal header for the Fuzzer ---*- C++ -* ===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // fuzzer::DataFlowTrace; reads and handles a data-flow trace. 90b57cec5SDimitry Andric // 100b57cec5SDimitry Andric // A data flow trace is generated by e.g. dataflow/DataFlow.cpp 110b57cec5SDimitry Andric // and is stored on disk in a separate directory. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The trace dir contains a file 'functions.txt' which lists function names, 140b57cec5SDimitry Andric // oner per line, e.g. 150b57cec5SDimitry Andric // ==> functions.txt <== 160b57cec5SDimitry Andric // Func2 170b57cec5SDimitry Andric // LLVMFuzzerTestOneInput 180b57cec5SDimitry Andric // Func1 190b57cec5SDimitry Andric // 200b57cec5SDimitry Andric // All other files in the dir are the traces, see dataflow/DataFlow.cpp. 210b57cec5SDimitry Andric // The name of the file is sha1 of the input used to generate the trace. 220b57cec5SDimitry Andric // 230b57cec5SDimitry Andric // Current status: 240b57cec5SDimitry Andric // the data is parsed and the summary is printed, but the data is not yet 250b57cec5SDimitry Andric // used in any other way. 260b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric #ifndef LLVM_FUZZER_DATA_FLOW_TRACE 290b57cec5SDimitry Andric #define LLVM_FUZZER_DATA_FLOW_TRACE 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #include "FuzzerDefs.h" 320b57cec5SDimitry Andric #include "FuzzerIO.h" 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric #include <unordered_map> 350b57cec5SDimitry Andric #include <unordered_set> 360b57cec5SDimitry Andric #include <vector> 370b57cec5SDimitry Andric #include <string> 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric namespace fuzzer { 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath, 42*349cc55cSDimitry Andric const std::vector<SizedFile> &CorporaFiles); 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric class BlockCoverage { 450b57cec5SDimitry Andric public: 46fe6060f1SDimitry Andric // These functions guarantee no CoverageVector is longer than UINT32_MAX. 470b57cec5SDimitry Andric bool AppendCoverage(std::istream &IN); 480b57cec5SDimitry Andric bool AppendCoverage(const std::string &S); 490b57cec5SDimitry Andric NumCoveredFunctions()500b57cec5SDimitry Andric size_t NumCoveredFunctions() const { return Functions.size(); } 510b57cec5SDimitry Andric GetCounter(size_t FunctionId,size_t BasicBlockId)520b57cec5SDimitry Andric uint32_t GetCounter(size_t FunctionId, size_t BasicBlockId) { 530b57cec5SDimitry Andric auto It = Functions.find(FunctionId); 54fe6060f1SDimitry Andric if (It == Functions.end()) 55fe6060f1SDimitry Andric return 0; 560b57cec5SDimitry Andric const auto &Counters = It->second; 570b57cec5SDimitry Andric if (BasicBlockId < Counters.size()) 580b57cec5SDimitry Andric return Counters[BasicBlockId]; 590b57cec5SDimitry Andric return 0; 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric GetNumberOfBlocks(size_t FunctionId)620b57cec5SDimitry Andric uint32_t GetNumberOfBlocks(size_t FunctionId) { 630b57cec5SDimitry Andric auto It = Functions.find(FunctionId); 640b57cec5SDimitry Andric if (It == Functions.end()) return 0; 650b57cec5SDimitry Andric const auto &Counters = It->second; 66fe6060f1SDimitry Andric return static_cast<uint32_t>(Counters.size()); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric GetNumberOfCoveredBlocks(size_t FunctionId)690b57cec5SDimitry Andric uint32_t GetNumberOfCoveredBlocks(size_t FunctionId) { 700b57cec5SDimitry Andric auto It = Functions.find(FunctionId); 710b57cec5SDimitry Andric if (It == Functions.end()) return 0; 720b57cec5SDimitry Andric const auto &Counters = It->second; 730b57cec5SDimitry Andric uint32_t Result = 0; 740b57cec5SDimitry Andric for (auto Cnt: Counters) 750b57cec5SDimitry Andric if (Cnt) 760b57cec5SDimitry Andric Result++; 770b57cec5SDimitry Andric return Result; 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 80*349cc55cSDimitry Andric std::vector<double> FunctionWeights(size_t NumFunctions) const; clear()810b57cec5SDimitry Andric void clear() { Functions.clear(); } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric private: 84*349cc55cSDimitry Andric typedef std::vector<uint32_t> CoverageVector; 850b57cec5SDimitry Andric NumberOfCoveredBlocks(const CoverageVector & Counters)860b57cec5SDimitry Andric uint32_t NumberOfCoveredBlocks(const CoverageVector &Counters) const { 870b57cec5SDimitry Andric uint32_t Res = 0; 880b57cec5SDimitry Andric for (auto Cnt : Counters) 890b57cec5SDimitry Andric if (Cnt) 900b57cec5SDimitry Andric Res++; 910b57cec5SDimitry Andric return Res; 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric NumberOfUncoveredBlocks(const CoverageVector & Counters)940b57cec5SDimitry Andric uint32_t NumberOfUncoveredBlocks(const CoverageVector &Counters) const { 95fe6060f1SDimitry Andric return static_cast<uint32_t>(Counters.size()) - 96fe6060f1SDimitry Andric NumberOfCoveredBlocks(Counters); 970b57cec5SDimitry Andric } 980b57cec5SDimitry Andric SmallestNonZeroCounter(const CoverageVector & Counters)990b57cec5SDimitry Andric uint32_t SmallestNonZeroCounter(const CoverageVector &Counters) const { 1000b57cec5SDimitry Andric assert(!Counters.empty()); 1010b57cec5SDimitry Andric uint32_t Res = Counters[0]; 1020b57cec5SDimitry Andric for (auto Cnt : Counters) 1030b57cec5SDimitry Andric if (Cnt) 1040b57cec5SDimitry Andric Res = Min(Res, Cnt); 1050b57cec5SDimitry Andric assert(Res); 1060b57cec5SDimitry Andric return Res; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric // Function ID => vector of counters. 1100b57cec5SDimitry Andric // Each counter represents how many input files trigger the given basic block. 1110b57cec5SDimitry Andric std::unordered_map<size_t, CoverageVector> Functions; 1120b57cec5SDimitry Andric // Functions that have DFT entry. 1130b57cec5SDimitry Andric std::unordered_set<size_t> FunctionsWithDFT; 1140b57cec5SDimitry Andric }; 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric class DataFlowTrace { 1170b57cec5SDimitry Andric public: 1180b57cec5SDimitry Andric void ReadCoverage(const std::string &DirPath); 1190b57cec5SDimitry Andric bool Init(const std::string &DirPath, std::string *FocusFunction, 120*349cc55cSDimitry Andric std::vector<SizedFile> &CorporaFiles, Random &Rand); Clear()1210b57cec5SDimitry Andric void Clear() { Traces.clear(); } Get(const std::string & InputSha1)122*349cc55cSDimitry Andric const std::vector<uint8_t> *Get(const std::string &InputSha1) const { 1230b57cec5SDimitry Andric auto It = Traces.find(InputSha1); 1240b57cec5SDimitry Andric if (It != Traces.end()) 1250b57cec5SDimitry Andric return &It->second; 1260b57cec5SDimitry Andric return nullptr; 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric private: 1300b57cec5SDimitry Andric // Input's sha1 => DFT for the FocusFunction. 131*349cc55cSDimitry Andric std::unordered_map<std::string, std::vector<uint8_t>> Traces; 1320b57cec5SDimitry Andric BlockCoverage Coverage; 1330b57cec5SDimitry Andric std::unordered_set<std::string> CorporaHashes; 1340b57cec5SDimitry Andric }; 1350b57cec5SDimitry Andric } // namespace fuzzer 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric #endif // LLVM_FUZZER_DATA_FLOW_TRACE 138