xref: /openbsd-src/gnu/llvm/compiler-rt/lib/fuzzer/FuzzerCorpus.h (revision 810390e339a5425391477d5d41c78d7cab2424ac)
13cab2bb3Spatrick //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
23cab2bb3Spatrick //
33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick //
73cab2bb3Spatrick //===----------------------------------------------------------------------===//
83cab2bb3Spatrick // fuzzer::InputCorpus
93cab2bb3Spatrick //===----------------------------------------------------------------------===//
103cab2bb3Spatrick 
113cab2bb3Spatrick #ifndef LLVM_FUZZER_CORPUS
123cab2bb3Spatrick #define LLVM_FUZZER_CORPUS
133cab2bb3Spatrick 
143cab2bb3Spatrick #include "FuzzerDataFlowTrace.h"
153cab2bb3Spatrick #include "FuzzerDefs.h"
163cab2bb3Spatrick #include "FuzzerIO.h"
173cab2bb3Spatrick #include "FuzzerRandom.h"
183cab2bb3Spatrick #include "FuzzerSHA1.h"
193cab2bb3Spatrick #include "FuzzerTracePC.h"
203cab2bb3Spatrick #include <algorithm>
21d89ec533Spatrick #include <chrono>
223cab2bb3Spatrick #include <numeric>
233cab2bb3Spatrick #include <random>
243cab2bb3Spatrick #include <unordered_set>
253cab2bb3Spatrick 
263cab2bb3Spatrick namespace fuzzer {
273cab2bb3Spatrick 
283cab2bb3Spatrick struct InputInfo {
293cab2bb3Spatrick   Unit U;  // The actual input data.
30d89ec533Spatrick   std::chrono::microseconds TimeOfUnit;
313cab2bb3Spatrick   uint8_t Sha1[kSHA1NumBytes];  // Checksum.
323cab2bb3Spatrick   // Number of features that this input has and no smaller input has.
333cab2bb3Spatrick   size_t NumFeatures = 0;
343cab2bb3Spatrick   size_t Tmp = 0; // Used by ValidateFeatureSet.
353cab2bb3Spatrick   // Stats.
363cab2bb3Spatrick   size_t NumExecutedMutations = 0;
373cab2bb3Spatrick   size_t NumSuccessfullMutations = 0;
38d89ec533Spatrick   bool NeverReduce = false;
393cab2bb3Spatrick   bool MayDeleteFile = false;
403cab2bb3Spatrick   bool Reduced = false;
413cab2bb3Spatrick   bool HasFocusFunction = false;
42*810390e3Srobert   std::vector<uint32_t> UniqFeatureSet;
43*810390e3Srobert   std::vector<uint8_t> DataFlowTraceForFocusFunction;
441f9cb04fSpatrick   // Power schedule.
451f9cb04fSpatrick   bool NeedsEnergyUpdate = false;
461f9cb04fSpatrick   double Energy = 0.0;
47d89ec533Spatrick   double SumIncidence = 0.0;
48*810390e3Srobert   std::vector<std::pair<uint32_t, uint16_t>> FeatureFreqs;
491f9cb04fSpatrick 
501f9cb04fSpatrick   // Delete feature Idx and its frequency from FeatureFreqs.
DeleteFeatureFreqInputInfo511f9cb04fSpatrick   bool DeleteFeatureFreq(uint32_t Idx) {
521f9cb04fSpatrick     if (FeatureFreqs.empty())
531f9cb04fSpatrick       return false;
541f9cb04fSpatrick 
551f9cb04fSpatrick     // Binary search over local feature frequencies sorted by index.
561f9cb04fSpatrick     auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(),
571f9cb04fSpatrick                                   std::pair<uint32_t, uint16_t>(Idx, 0));
581f9cb04fSpatrick 
591f9cb04fSpatrick     if (Lower != FeatureFreqs.end() && Lower->first == Idx) {
601f9cb04fSpatrick       FeatureFreqs.erase(Lower);
611f9cb04fSpatrick       return true;
621f9cb04fSpatrick     }
631f9cb04fSpatrick     return false;
641f9cb04fSpatrick   }
651f9cb04fSpatrick 
661f9cb04fSpatrick   // Assign more energy to a high-entropy seed, i.e., that reveals more
67d89ec533Spatrick   // information about the globally rare features in the neighborhood of the
68d89ec533Spatrick   // seed. Since we do not know the entropy of a seed that has never been
69d89ec533Spatrick   // executed we assign fresh seeds maximum entropy and let II->Energy approach
70d89ec533Spatrick   // the true entropy from above. If ScalePerExecTime is true, the computed
71d89ec533Spatrick   // entropy is scaled based on how fast this input executes compared to the
72d89ec533Spatrick   // average execution time of inputs. The faster an input executes, the more
73d89ec533Spatrick   // energy gets assigned to the input.
UpdateEnergyInputInfo74d89ec533Spatrick   void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime,
75d89ec533Spatrick                     std::chrono::microseconds AverageUnitExecutionTime) {
761f9cb04fSpatrick     Energy = 0.0;
77d89ec533Spatrick     SumIncidence = 0.0;
781f9cb04fSpatrick 
791f9cb04fSpatrick     // Apply add-one smoothing to locally discovered features.
801f9cb04fSpatrick     for (auto F : FeatureFreqs) {
81d89ec533Spatrick       double LocalIncidence = F.second + 1;
82d89ec533Spatrick       Energy -= LocalIncidence * log(LocalIncidence);
831f9cb04fSpatrick       SumIncidence += LocalIncidence;
841f9cb04fSpatrick     }
851f9cb04fSpatrick 
861f9cb04fSpatrick     // Apply add-one smoothing to locally undiscovered features.
87d89ec533Spatrick     //   PreciseEnergy -= 0; // since log(1.0) == 0)
88d89ec533Spatrick     SumIncidence +=
89d89ec533Spatrick         static_cast<double>(GlobalNumberOfFeatures - FeatureFreqs.size());
901f9cb04fSpatrick 
911f9cb04fSpatrick     // Add a single locally abundant feature apply add-one smoothing.
92d89ec533Spatrick     double AbdIncidence = static_cast<double>(NumExecutedMutations + 1);
93d89ec533Spatrick     Energy -= AbdIncidence * log(AbdIncidence);
941f9cb04fSpatrick     SumIncidence += AbdIncidence;
951f9cb04fSpatrick 
961f9cb04fSpatrick     // Normalize.
971f9cb04fSpatrick     if (SumIncidence != 0)
98d89ec533Spatrick       Energy = Energy / SumIncidence + log(SumIncidence);
99d89ec533Spatrick 
100d89ec533Spatrick     if (ScalePerExecTime) {
101d89ec533Spatrick       // Scaling to favor inputs with lower execution time.
102d89ec533Spatrick       uint32_t PerfScore = 100;
103d89ec533Spatrick       if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10)
104d89ec533Spatrick         PerfScore = 10;
105d89ec533Spatrick       else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4)
106d89ec533Spatrick         PerfScore = 25;
107d89ec533Spatrick       else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2)
108d89ec533Spatrick         PerfScore = 50;
109d89ec533Spatrick       else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4)
110d89ec533Spatrick         PerfScore = 75;
111d89ec533Spatrick       else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count())
112d89ec533Spatrick         PerfScore = 300;
113d89ec533Spatrick       else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count())
114d89ec533Spatrick         PerfScore = 200;
115d89ec533Spatrick       else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count())
116d89ec533Spatrick         PerfScore = 150;
117d89ec533Spatrick 
118d89ec533Spatrick       Energy *= PerfScore;
119d89ec533Spatrick     }
1201f9cb04fSpatrick   }
1211f9cb04fSpatrick 
1221f9cb04fSpatrick   // Increment the frequency of the feature Idx.
UpdateFeatureFrequencyInputInfo1231f9cb04fSpatrick   void UpdateFeatureFrequency(uint32_t Idx) {
1241f9cb04fSpatrick     NeedsEnergyUpdate = true;
1251f9cb04fSpatrick 
1261f9cb04fSpatrick     // The local feature frequencies is an ordered vector of pairs.
1271f9cb04fSpatrick     // If there are no local feature frequencies, push_back preserves order.
1281f9cb04fSpatrick     // Set the feature frequency for feature Idx32 to 1.
1291f9cb04fSpatrick     if (FeatureFreqs.empty()) {
1301f9cb04fSpatrick       FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(Idx, 1));
1311f9cb04fSpatrick       return;
1321f9cb04fSpatrick     }
1331f9cb04fSpatrick 
1341f9cb04fSpatrick     // Binary search over local feature frequencies sorted by index.
1351f9cb04fSpatrick     auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(),
1361f9cb04fSpatrick                                   std::pair<uint32_t, uint16_t>(Idx, 0));
1371f9cb04fSpatrick 
1381f9cb04fSpatrick     // If feature Idx32 already exists, increment its frequency.
1391f9cb04fSpatrick     // Otherwise, insert a new pair right after the next lower index.
1401f9cb04fSpatrick     if (Lower != FeatureFreqs.end() && Lower->first == Idx) {
1411f9cb04fSpatrick       Lower->second++;
1421f9cb04fSpatrick     } else {
1431f9cb04fSpatrick       FeatureFreqs.insert(Lower, std::pair<uint32_t, uint16_t>(Idx, 1));
1441f9cb04fSpatrick     }
1451f9cb04fSpatrick   }
1461f9cb04fSpatrick };
1471f9cb04fSpatrick 
1481f9cb04fSpatrick struct EntropicOptions {
1491f9cb04fSpatrick   bool Enabled;
1501f9cb04fSpatrick   size_t NumberOfRarestFeatures;
1511f9cb04fSpatrick   size_t FeatureFrequencyThreshold;
152d89ec533Spatrick   bool ScalePerExecTime;
1533cab2bb3Spatrick };
1543cab2bb3Spatrick 
1553cab2bb3Spatrick class InputCorpus {
1561f9cb04fSpatrick   static const uint32_t kFeatureSetSize = 1 << 21;
1571f9cb04fSpatrick   static const uint8_t kMaxMutationFactor = 20;
1581f9cb04fSpatrick   static const size_t kSparseEnergyUpdates = 100;
1591f9cb04fSpatrick 
1601f9cb04fSpatrick   size_t NumExecutedMutations = 0;
1611f9cb04fSpatrick 
1621f9cb04fSpatrick   EntropicOptions Entropic;
1631f9cb04fSpatrick 
1643cab2bb3Spatrick public:
InputCorpus(const std::string & OutputCorpus,EntropicOptions Entropic)1651f9cb04fSpatrick   InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic)
1661f9cb04fSpatrick       : Entropic(Entropic), OutputCorpus(OutputCorpus) {
1673cab2bb3Spatrick     memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
1683cab2bb3Spatrick     memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
1693cab2bb3Spatrick   }
~InputCorpus()1703cab2bb3Spatrick   ~InputCorpus() {
1713cab2bb3Spatrick     for (auto II : Inputs)
1723cab2bb3Spatrick       delete II;
1733cab2bb3Spatrick   }
size()1743cab2bb3Spatrick   size_t size() const { return Inputs.size(); }
SizeInBytes()1753cab2bb3Spatrick   size_t SizeInBytes() const {
1763cab2bb3Spatrick     size_t Res = 0;
1773cab2bb3Spatrick     for (auto II : Inputs)
1783cab2bb3Spatrick       Res += II->U.size();
1793cab2bb3Spatrick     return Res;
1803cab2bb3Spatrick   }
NumActiveUnits()1813cab2bb3Spatrick   size_t NumActiveUnits() const {
1823cab2bb3Spatrick     size_t Res = 0;
1833cab2bb3Spatrick     for (auto II : Inputs)
1843cab2bb3Spatrick       Res += !II->U.empty();
1853cab2bb3Spatrick     return Res;
1863cab2bb3Spatrick   }
MaxInputSize()1873cab2bb3Spatrick   size_t MaxInputSize() const {
1883cab2bb3Spatrick     size_t Res = 0;
1893cab2bb3Spatrick     for (auto II : Inputs)
1903cab2bb3Spatrick         Res = std::max(Res, II->U.size());
1913cab2bb3Spatrick     return Res;
1923cab2bb3Spatrick   }
IncrementNumExecutedMutations()1931f9cb04fSpatrick   void IncrementNumExecutedMutations() { NumExecutedMutations++; }
1943cab2bb3Spatrick 
NumInputsThatTouchFocusFunction()1953cab2bb3Spatrick   size_t NumInputsThatTouchFocusFunction() {
1963cab2bb3Spatrick     return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
1973cab2bb3Spatrick       return II->HasFocusFunction;
1983cab2bb3Spatrick     });
1993cab2bb3Spatrick   }
2003cab2bb3Spatrick 
NumInputsWithDataFlowTrace()2013cab2bb3Spatrick   size_t NumInputsWithDataFlowTrace() {
2023cab2bb3Spatrick     return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
2033cab2bb3Spatrick       return !II->DataFlowTraceForFocusFunction.empty();
2043cab2bb3Spatrick     });
2053cab2bb3Spatrick   }
2063cab2bb3Spatrick 
empty()2073cab2bb3Spatrick   bool empty() const { return Inputs.empty(); }
2083cab2bb3Spatrick   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
AddToCorpus(const Unit & U,size_t NumFeatures,bool MayDeleteFile,bool HasFocusFunction,bool NeverReduce,std::chrono::microseconds TimeOfUnit,const std::vector<uint32_t> & FeatureSet,const DataFlowTrace & DFT,const InputInfo * BaseII)2093cab2bb3Spatrick   InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
210d89ec533Spatrick                          bool HasFocusFunction, bool NeverReduce,
211d89ec533Spatrick                          std::chrono::microseconds TimeOfUnit,
212*810390e3Srobert                          const std::vector<uint32_t> &FeatureSet,
2133cab2bb3Spatrick                          const DataFlowTrace &DFT, const InputInfo *BaseII) {
2143cab2bb3Spatrick     assert(!U.empty());
2153cab2bb3Spatrick     if (FeatureDebug)
2163cab2bb3Spatrick       Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
217d89ec533Spatrick     // Inputs.size() is cast to uint32_t below.
218d89ec533Spatrick     assert(Inputs.size() < std::numeric_limits<uint32_t>::max());
2193cab2bb3Spatrick     Inputs.push_back(new InputInfo());
2203cab2bb3Spatrick     InputInfo &II = *Inputs.back();
2213cab2bb3Spatrick     II.U = U;
2223cab2bb3Spatrick     II.NumFeatures = NumFeatures;
223d89ec533Spatrick     II.NeverReduce = NeverReduce;
224d89ec533Spatrick     II.TimeOfUnit = TimeOfUnit;
2253cab2bb3Spatrick     II.MayDeleteFile = MayDeleteFile;
2263cab2bb3Spatrick     II.UniqFeatureSet = FeatureSet;
2273cab2bb3Spatrick     II.HasFocusFunction = HasFocusFunction;
2281f9cb04fSpatrick     // Assign maximal energy to the new seed.
2291f9cb04fSpatrick     II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size());
230d89ec533Spatrick     II.SumIncidence = static_cast<double>(RareFeatures.size());
2311f9cb04fSpatrick     II.NeedsEnergyUpdate = false;
2323cab2bb3Spatrick     std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
2333cab2bb3Spatrick     ComputeSHA1(U.data(), U.size(), II.Sha1);
2343cab2bb3Spatrick     auto Sha1Str = Sha1ToString(II.Sha1);
2353cab2bb3Spatrick     Hashes.insert(Sha1Str);
2363cab2bb3Spatrick     if (HasFocusFunction)
2373cab2bb3Spatrick       if (auto V = DFT.Get(Sha1Str))
2383cab2bb3Spatrick         II.DataFlowTraceForFocusFunction = *V;
2393cab2bb3Spatrick     // This is a gross heuristic.
2403cab2bb3Spatrick     // Ideally, when we add an element to a corpus we need to know its DFT.
2413cab2bb3Spatrick     // But if we don't, we'll use the DFT of its base input.
2423cab2bb3Spatrick     if (II.DataFlowTraceForFocusFunction.empty() && BaseII)
2433cab2bb3Spatrick       II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction;
2441f9cb04fSpatrick     DistributionNeedsUpdate = true;
2453cab2bb3Spatrick     PrintCorpus();
2463cab2bb3Spatrick     // ValidateFeatureSet();
2473cab2bb3Spatrick     return &II;
2483cab2bb3Spatrick   }
2493cab2bb3Spatrick 
2503cab2bb3Spatrick   // Debug-only
PrintUnit(const Unit & U)2513cab2bb3Spatrick   void PrintUnit(const Unit &U) {
2523cab2bb3Spatrick     if (!FeatureDebug) return;
2533cab2bb3Spatrick     for (uint8_t C : U) {
2543cab2bb3Spatrick       if (C != 'F' && C != 'U' && C != 'Z')
2553cab2bb3Spatrick         C = '.';
2563cab2bb3Spatrick       Printf("%c", C);
2573cab2bb3Spatrick     }
2583cab2bb3Spatrick   }
2593cab2bb3Spatrick 
2603cab2bb3Spatrick   // Debug-only
PrintFeatureSet(const std::vector<uint32_t> & FeatureSet)261*810390e3Srobert   void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) {
2623cab2bb3Spatrick     if (!FeatureDebug) return;
2633cab2bb3Spatrick     Printf("{");
2643cab2bb3Spatrick     for (uint32_t Feature: FeatureSet)
2653cab2bb3Spatrick       Printf("%u,", Feature);
2663cab2bb3Spatrick     Printf("}");
2673cab2bb3Spatrick   }
2683cab2bb3Spatrick 
2693cab2bb3Spatrick   // Debug-only
PrintCorpus()2703cab2bb3Spatrick   void PrintCorpus() {
2713cab2bb3Spatrick     if (!FeatureDebug) return;
2723cab2bb3Spatrick     Printf("======= CORPUS:\n");
2733cab2bb3Spatrick     int i = 0;
2743cab2bb3Spatrick     for (auto II : Inputs) {
2753cab2bb3Spatrick       if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) {
2763cab2bb3Spatrick         Printf("[%2d] ", i);
2773cab2bb3Spatrick         Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size());
2783cab2bb3Spatrick         PrintUnit(II->U);
2793cab2bb3Spatrick         Printf(" ");
2803cab2bb3Spatrick         PrintFeatureSet(II->UniqFeatureSet);
2813cab2bb3Spatrick         Printf("\n");
2823cab2bb3Spatrick       }
2833cab2bb3Spatrick       i++;
2843cab2bb3Spatrick     }
2853cab2bb3Spatrick   }
2863cab2bb3Spatrick 
Replace(InputInfo * II,const Unit & U,std::chrono::microseconds TimeOfUnit)287*810390e3Srobert   void Replace(InputInfo *II, const Unit &U,
288*810390e3Srobert                std::chrono::microseconds TimeOfUnit) {
2893cab2bb3Spatrick     assert(II->U.size() > U.size());
2903cab2bb3Spatrick     Hashes.erase(Sha1ToString(II->Sha1));
2913cab2bb3Spatrick     DeleteFile(*II);
2923cab2bb3Spatrick     ComputeSHA1(U.data(), U.size(), II->Sha1);
2933cab2bb3Spatrick     Hashes.insert(Sha1ToString(II->Sha1));
2943cab2bb3Spatrick     II->U = U;
2953cab2bb3Spatrick     II->Reduced = true;
296*810390e3Srobert     II->TimeOfUnit = TimeOfUnit;
2971f9cb04fSpatrick     DistributionNeedsUpdate = true;
2983cab2bb3Spatrick   }
2993cab2bb3Spatrick 
HasUnit(const Unit & U)3003cab2bb3Spatrick   bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
HasUnit(const std::string & H)3013cab2bb3Spatrick   bool HasUnit(const std::string &H) { return Hashes.count(H); }
ChooseUnitToMutate(Random & Rand)3023cab2bb3Spatrick   InputInfo &ChooseUnitToMutate(Random &Rand) {
3033cab2bb3Spatrick     InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)];
3043cab2bb3Spatrick     assert(!II.U.empty());
3053cab2bb3Spatrick     return II;
3063cab2bb3Spatrick   }
3073cab2bb3Spatrick 
ChooseUnitToCrossOverWith(Random & Rand,bool UniformDist)308d89ec533Spatrick   InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) {
309d89ec533Spatrick     if (!UniformDist) {
310d89ec533Spatrick       return ChooseUnitToMutate(Rand);
311d89ec533Spatrick     }
312d89ec533Spatrick     InputInfo &II = *Inputs[Rand(Inputs.size())];
313d89ec533Spatrick     assert(!II.U.empty());
314d89ec533Spatrick     return II;
315d89ec533Spatrick   }
316d89ec533Spatrick 
3173cab2bb3Spatrick   // Returns an index of random unit from the corpus to mutate.
ChooseUnitIdxToMutate(Random & Rand)3183cab2bb3Spatrick   size_t ChooseUnitIdxToMutate(Random &Rand) {
3191f9cb04fSpatrick     UpdateCorpusDistribution(Rand);
3203cab2bb3Spatrick     size_t Idx = static_cast<size_t>(CorpusDistribution(Rand));
3213cab2bb3Spatrick     assert(Idx < Inputs.size());
3223cab2bb3Spatrick     return Idx;
3233cab2bb3Spatrick   }
3243cab2bb3Spatrick 
PrintStats()3253cab2bb3Spatrick   void PrintStats() {
3263cab2bb3Spatrick     for (size_t i = 0; i < Inputs.size(); i++) {
3273cab2bb3Spatrick       const auto &II = *Inputs[i];
3283cab2bb3Spatrick       Printf("  [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n", i,
3293cab2bb3Spatrick              Sha1ToString(II.Sha1).c_str(), II.U.size(),
330*810390e3Srobert              II.NumExecutedMutations, II.NumSuccessfullMutations,
331*810390e3Srobert              II.HasFocusFunction);
3323cab2bb3Spatrick     }
3333cab2bb3Spatrick   }
3343cab2bb3Spatrick 
PrintFeatureSet()3353cab2bb3Spatrick   void PrintFeatureSet() {
3363cab2bb3Spatrick     for (size_t i = 0; i < kFeatureSetSize; i++) {
3373cab2bb3Spatrick       if(size_t Sz = GetFeature(i))
3383cab2bb3Spatrick         Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz);
3393cab2bb3Spatrick     }
3403cab2bb3Spatrick     Printf("\n\t");
3413cab2bb3Spatrick     for (size_t i = 0; i < Inputs.size(); i++)
3423cab2bb3Spatrick       if (size_t N = Inputs[i]->NumFeatures)
3433cab2bb3Spatrick         Printf(" %zd=>%zd ", i, N);
3443cab2bb3Spatrick     Printf("\n");
3453cab2bb3Spatrick   }
3463cab2bb3Spatrick 
DeleteFile(const InputInfo & II)3473cab2bb3Spatrick   void DeleteFile(const InputInfo &II) {
3483cab2bb3Spatrick     if (!OutputCorpus.empty() && II.MayDeleteFile)
3493cab2bb3Spatrick       RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1)));
3503cab2bb3Spatrick   }
3513cab2bb3Spatrick 
DeleteInput(size_t Idx)3523cab2bb3Spatrick   void DeleteInput(size_t Idx) {
3533cab2bb3Spatrick     InputInfo &II = *Inputs[Idx];
3543cab2bb3Spatrick     DeleteFile(II);
3553cab2bb3Spatrick     Unit().swap(II.U);
3561f9cb04fSpatrick     II.Energy = 0.0;
3571f9cb04fSpatrick     II.NeedsEnergyUpdate = false;
3581f9cb04fSpatrick     DistributionNeedsUpdate = true;
3593cab2bb3Spatrick     if (FeatureDebug)
3603cab2bb3Spatrick       Printf("EVICTED %zd\n", Idx);
3613cab2bb3Spatrick   }
3623cab2bb3Spatrick 
AddRareFeature(uint32_t Idx)3631f9cb04fSpatrick   void AddRareFeature(uint32_t Idx) {
3641f9cb04fSpatrick     // Maintain *at least* TopXRarestFeatures many rare features
3651f9cb04fSpatrick     // and all features with a frequency below ConsideredRare.
3661f9cb04fSpatrick     // Remove all other features.
3671f9cb04fSpatrick     while (RareFeatures.size() > Entropic.NumberOfRarestFeatures &&
3681f9cb04fSpatrick            FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold) {
3691f9cb04fSpatrick 
3701f9cb04fSpatrick       // Find most and second most abbundant feature.
3711f9cb04fSpatrick       uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0],
3721f9cb04fSpatrick                                                     RareFeatures[0]};
3731f9cb04fSpatrick       size_t Delete = 0;
3741f9cb04fSpatrick       for (size_t i = 0; i < RareFeatures.size(); i++) {
3751f9cb04fSpatrick         uint32_t Idx2 = RareFeatures[i];
3761f9cb04fSpatrick         if (GlobalFeatureFreqs[Idx2] >=
3771f9cb04fSpatrick             GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) {
3781f9cb04fSpatrick           MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0];
3791f9cb04fSpatrick           MostAbundantRareFeatureIndices[0] = Idx2;
3801f9cb04fSpatrick           Delete = i;
3811f9cb04fSpatrick         }
3821f9cb04fSpatrick       }
3831f9cb04fSpatrick 
3841f9cb04fSpatrick       // Remove most abundant rare feature.
3851f9cb04fSpatrick       RareFeatures[Delete] = RareFeatures.back();
3861f9cb04fSpatrick       RareFeatures.pop_back();
3871f9cb04fSpatrick 
3881f9cb04fSpatrick       for (auto II : Inputs) {
3891f9cb04fSpatrick         if (II->DeleteFeatureFreq(MostAbundantRareFeatureIndices[0]))
3901f9cb04fSpatrick           II->NeedsEnergyUpdate = true;
3911f9cb04fSpatrick       }
3921f9cb04fSpatrick 
3931f9cb04fSpatrick       // Set 2nd most abundant as the new most abundant feature count.
3941f9cb04fSpatrick       FreqOfMostAbundantRareFeature =
3951f9cb04fSpatrick           GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]];
3961f9cb04fSpatrick     }
3971f9cb04fSpatrick 
3981f9cb04fSpatrick     // Add rare feature, handle collisions, and update energy.
3991f9cb04fSpatrick     RareFeatures.push_back(Idx);
4001f9cb04fSpatrick     GlobalFeatureFreqs[Idx] = 0;
4011f9cb04fSpatrick     for (auto II : Inputs) {
4021f9cb04fSpatrick       II->DeleteFeatureFreq(Idx);
4031f9cb04fSpatrick 
4041f9cb04fSpatrick       // Apply add-one smoothing to this locally undiscovered feature.
4051f9cb04fSpatrick       // Zero energy seeds will never be fuzzed and remain zero energy.
4061f9cb04fSpatrick       if (II->Energy > 0.0) {
4071f9cb04fSpatrick         II->SumIncidence += 1;
408d89ec533Spatrick         II->Energy += log(II->SumIncidence) / II->SumIncidence;
4091f9cb04fSpatrick       }
4101f9cb04fSpatrick     }
4111f9cb04fSpatrick 
4121f9cb04fSpatrick     DistributionNeedsUpdate = true;
4131f9cb04fSpatrick   }
4141f9cb04fSpatrick 
AddFeature(size_t Idx,uint32_t NewSize,bool Shrink)4153cab2bb3Spatrick   bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
4163cab2bb3Spatrick     assert(NewSize);
4173cab2bb3Spatrick     Idx = Idx % kFeatureSetSize;
4183cab2bb3Spatrick     uint32_t OldSize = GetFeature(Idx);
4193cab2bb3Spatrick     if (OldSize == 0 || (Shrink && OldSize > NewSize)) {
4203cab2bb3Spatrick       if (OldSize > 0) {
4213cab2bb3Spatrick         size_t OldIdx = SmallestElementPerFeature[Idx];
4223cab2bb3Spatrick         InputInfo &II = *Inputs[OldIdx];
4233cab2bb3Spatrick         assert(II.NumFeatures > 0);
4243cab2bb3Spatrick         II.NumFeatures--;
4253cab2bb3Spatrick         if (II.NumFeatures == 0)
4263cab2bb3Spatrick           DeleteInput(OldIdx);
4273cab2bb3Spatrick       } else {
4283cab2bb3Spatrick         NumAddedFeatures++;
4291f9cb04fSpatrick         if (Entropic.Enabled)
4301f9cb04fSpatrick           AddRareFeature((uint32_t)Idx);
4313cab2bb3Spatrick       }
4323cab2bb3Spatrick       NumUpdatedFeatures++;
4333cab2bb3Spatrick       if (FeatureDebug)
4343cab2bb3Spatrick         Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
435d89ec533Spatrick       // Inputs.size() is guaranteed to be less than UINT32_MAX by AddToCorpus.
436d89ec533Spatrick       SmallestElementPerFeature[Idx] = static_cast<uint32_t>(Inputs.size());
4373cab2bb3Spatrick       InputSizesPerFeature[Idx] = NewSize;
4383cab2bb3Spatrick       return true;
4393cab2bb3Spatrick     }
4403cab2bb3Spatrick     return false;
4413cab2bb3Spatrick   }
4423cab2bb3Spatrick 
4431f9cb04fSpatrick   // Increment frequency of feature Idx globally and locally.
UpdateFeatureFrequency(InputInfo * II,size_t Idx)4441f9cb04fSpatrick   void UpdateFeatureFrequency(InputInfo *II, size_t Idx) {
4451f9cb04fSpatrick     uint32_t Idx32 = Idx % kFeatureSetSize;
4461f9cb04fSpatrick 
4471f9cb04fSpatrick     // Saturated increment.
4481f9cb04fSpatrick     if (GlobalFeatureFreqs[Idx32] == 0xFFFF)
4491f9cb04fSpatrick       return;
4501f9cb04fSpatrick     uint16_t Freq = GlobalFeatureFreqs[Idx32]++;
4511f9cb04fSpatrick 
4521f9cb04fSpatrick     // Skip if abundant.
4531f9cb04fSpatrick     if (Freq > FreqOfMostAbundantRareFeature ||
4541f9cb04fSpatrick         std::find(RareFeatures.begin(), RareFeatures.end(), Idx32) ==
4551f9cb04fSpatrick             RareFeatures.end())
4561f9cb04fSpatrick       return;
4571f9cb04fSpatrick 
4581f9cb04fSpatrick     // Update global frequencies.
4591f9cb04fSpatrick     if (Freq == FreqOfMostAbundantRareFeature)
4601f9cb04fSpatrick       FreqOfMostAbundantRareFeature++;
4611f9cb04fSpatrick 
4621f9cb04fSpatrick     // Update local frequencies.
4631f9cb04fSpatrick     if (II)
4641f9cb04fSpatrick       II->UpdateFeatureFrequency(Idx32);
4651f9cb04fSpatrick   }
4661f9cb04fSpatrick 
NumFeatures()4673cab2bb3Spatrick   size_t NumFeatures() const { return NumAddedFeatures; }
NumFeatureUpdates()4683cab2bb3Spatrick   size_t NumFeatureUpdates() const { return NumUpdatedFeatures; }
4693cab2bb3Spatrick 
4703cab2bb3Spatrick private:
4713cab2bb3Spatrick 
4723cab2bb3Spatrick   static const bool FeatureDebug = false;
4733cab2bb3Spatrick 
GetFeature(size_t Idx)474d89ec533Spatrick   uint32_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; }
4753cab2bb3Spatrick 
ValidateFeatureSet()4763cab2bb3Spatrick   void ValidateFeatureSet() {
4773cab2bb3Spatrick     if (FeatureDebug)
4783cab2bb3Spatrick       PrintFeatureSet();
4793cab2bb3Spatrick     for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++)
4803cab2bb3Spatrick       if (GetFeature(Idx))
4813cab2bb3Spatrick         Inputs[SmallestElementPerFeature[Idx]]->Tmp++;
4823cab2bb3Spatrick     for (auto II: Inputs) {
4833cab2bb3Spatrick       if (II->Tmp != II->NumFeatures)
4843cab2bb3Spatrick         Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures);
4853cab2bb3Spatrick       assert(II->Tmp == II->NumFeatures);
4863cab2bb3Spatrick       II->Tmp = 0;
4873cab2bb3Spatrick     }
4883cab2bb3Spatrick   }
4893cab2bb3Spatrick 
4903cab2bb3Spatrick   // Updates the probability distribution for the units in the corpus.
4913cab2bb3Spatrick   // Must be called whenever the corpus or unit weights are changed.
4923cab2bb3Spatrick   //
4931f9cb04fSpatrick   // Hypothesis: inputs that maximize information about globally rare features
4941f9cb04fSpatrick   // are interesting.
UpdateCorpusDistribution(Random & Rand)4951f9cb04fSpatrick   void UpdateCorpusDistribution(Random &Rand) {
4961f9cb04fSpatrick     // Skip update if no seeds or rare features were added/deleted.
4971f9cb04fSpatrick     // Sparse updates for local change of feature frequencies,
4981f9cb04fSpatrick     // i.e., randomly do not skip.
4991f9cb04fSpatrick     if (!DistributionNeedsUpdate &&
5001f9cb04fSpatrick         (!Entropic.Enabled || Rand(kSparseEnergyUpdates)))
5011f9cb04fSpatrick       return;
5021f9cb04fSpatrick 
5031f9cb04fSpatrick     DistributionNeedsUpdate = false;
5041f9cb04fSpatrick 
5053cab2bb3Spatrick     size_t N = Inputs.size();
5063cab2bb3Spatrick     assert(N);
5073cab2bb3Spatrick     Intervals.resize(N + 1);
5083cab2bb3Spatrick     Weights.resize(N);
5093cab2bb3Spatrick     std::iota(Intervals.begin(), Intervals.end(), 0);
5101f9cb04fSpatrick 
511d89ec533Spatrick     std::chrono::microseconds AverageUnitExecutionTime(0);
512d89ec533Spatrick     for (auto II : Inputs) {
513d89ec533Spatrick       AverageUnitExecutionTime += II->TimeOfUnit;
514d89ec533Spatrick     }
515d89ec533Spatrick     AverageUnitExecutionTime /= N;
516d89ec533Spatrick 
5171f9cb04fSpatrick     bool VanillaSchedule = true;
5181f9cb04fSpatrick     if (Entropic.Enabled) {
5191f9cb04fSpatrick       for (auto II : Inputs) {
5201f9cb04fSpatrick         if (II->NeedsEnergyUpdate && II->Energy != 0.0) {
5211f9cb04fSpatrick           II->NeedsEnergyUpdate = false;
522d89ec533Spatrick           II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime,
523d89ec533Spatrick                            AverageUnitExecutionTime);
5241f9cb04fSpatrick         }
5251f9cb04fSpatrick       }
5261f9cb04fSpatrick 
5271f9cb04fSpatrick       for (size_t i = 0; i < N; i++) {
5281f9cb04fSpatrick 
5291f9cb04fSpatrick         if (Inputs[i]->NumFeatures == 0) {
5301f9cb04fSpatrick           // If the seed doesn't represent any features, assign zero energy.
5311f9cb04fSpatrick           Weights[i] = 0.;
5321f9cb04fSpatrick         } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor >
5331f9cb04fSpatrick                    NumExecutedMutations / Inputs.size()) {
5341f9cb04fSpatrick           // If the seed was fuzzed a lot more than average, assign zero energy.
5351f9cb04fSpatrick           Weights[i] = 0.;
5361f9cb04fSpatrick         } else {
5371f9cb04fSpatrick           // Otherwise, simply assign the computed energy.
5381f9cb04fSpatrick           Weights[i] = Inputs[i]->Energy;
5391f9cb04fSpatrick         }
5401f9cb04fSpatrick 
5411f9cb04fSpatrick         // If energy for all seeds is zero, fall back to vanilla schedule.
5421f9cb04fSpatrick         if (Weights[i] > 0.0)
5431f9cb04fSpatrick           VanillaSchedule = false;
5441f9cb04fSpatrick       }
5451f9cb04fSpatrick     }
5461f9cb04fSpatrick 
5471f9cb04fSpatrick     if (VanillaSchedule) {
5483cab2bb3Spatrick       for (size_t i = 0; i < N; i++)
549d89ec533Spatrick         Weights[i] =
550d89ec533Spatrick             Inputs[i]->NumFeatures
551d89ec533Spatrick                 ? static_cast<double>((i + 1) *
552d89ec533Spatrick                                       (Inputs[i]->HasFocusFunction ? 1000 : 1))
5533cab2bb3Spatrick                 : 0.;
5541f9cb04fSpatrick     }
5551f9cb04fSpatrick 
5563cab2bb3Spatrick     if (FeatureDebug) {
5573cab2bb3Spatrick       for (size_t i = 0; i < N; i++)
5583cab2bb3Spatrick         Printf("%zd ", Inputs[i]->NumFeatures);
5593cab2bb3Spatrick       Printf("SCORE\n");
5603cab2bb3Spatrick       for (size_t i = 0; i < N; i++)
5613cab2bb3Spatrick         Printf("%f ", Weights[i]);
5623cab2bb3Spatrick       Printf("Weights\n");
5633cab2bb3Spatrick     }
5643cab2bb3Spatrick     CorpusDistribution = std::piecewise_constant_distribution<double>(
5653cab2bb3Spatrick         Intervals.begin(), Intervals.end(), Weights.begin());
5663cab2bb3Spatrick   }
5673cab2bb3Spatrick   std::piecewise_constant_distribution<double> CorpusDistribution;
5683cab2bb3Spatrick 
569*810390e3Srobert   std::vector<double> Intervals;
570*810390e3Srobert   std::vector<double> Weights;
5713cab2bb3Spatrick 
5723cab2bb3Spatrick   std::unordered_set<std::string> Hashes;
573*810390e3Srobert   std::vector<InputInfo *> Inputs;
5743cab2bb3Spatrick 
5753cab2bb3Spatrick   size_t NumAddedFeatures = 0;
5763cab2bb3Spatrick   size_t NumUpdatedFeatures = 0;
5773cab2bb3Spatrick   uint32_t InputSizesPerFeature[kFeatureSetSize];
5783cab2bb3Spatrick   uint32_t SmallestElementPerFeature[kFeatureSetSize];
5793cab2bb3Spatrick 
5801f9cb04fSpatrick   bool DistributionNeedsUpdate = true;
5811f9cb04fSpatrick   uint16_t FreqOfMostAbundantRareFeature = 0;
5821f9cb04fSpatrick   uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {};
583*810390e3Srobert   std::vector<uint32_t> RareFeatures;
5841f9cb04fSpatrick 
5853cab2bb3Spatrick   std::string OutputCorpus;
5863cab2bb3Spatrick };
5873cab2bb3Spatrick 
5883cab2bb3Spatrick }  // namespace fuzzer
5893cab2bb3Spatrick 
5903cab2bb3Spatrick #endif  // LLVM_FUZZER_CORPUS
591