13cab2bb3Spatrick //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// 23cab2bb3Spatrick // 33cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 43cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information. 53cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 63cab2bb3Spatrick // 73cab2bb3Spatrick //===----------------------------------------------------------------------===// 83cab2bb3Spatrick // fuzzer::InputCorpus 93cab2bb3Spatrick //===----------------------------------------------------------------------===// 103cab2bb3Spatrick 113cab2bb3Spatrick #ifndef LLVM_FUZZER_CORPUS 123cab2bb3Spatrick #define LLVM_FUZZER_CORPUS 133cab2bb3Spatrick 143cab2bb3Spatrick #include "FuzzerDataFlowTrace.h" 153cab2bb3Spatrick #include "FuzzerDefs.h" 163cab2bb3Spatrick #include "FuzzerIO.h" 173cab2bb3Spatrick #include "FuzzerRandom.h" 183cab2bb3Spatrick #include "FuzzerSHA1.h" 193cab2bb3Spatrick #include "FuzzerTracePC.h" 203cab2bb3Spatrick #include <algorithm> 21d89ec533Spatrick #include <chrono> 223cab2bb3Spatrick #include <numeric> 233cab2bb3Spatrick #include <random> 243cab2bb3Spatrick #include <unordered_set> 253cab2bb3Spatrick 263cab2bb3Spatrick namespace fuzzer { 273cab2bb3Spatrick 283cab2bb3Spatrick struct InputInfo { 293cab2bb3Spatrick Unit U; // The actual input data. 30d89ec533Spatrick std::chrono::microseconds TimeOfUnit; 313cab2bb3Spatrick uint8_t Sha1[kSHA1NumBytes]; // Checksum. 323cab2bb3Spatrick // Number of features that this input has and no smaller input has. 333cab2bb3Spatrick size_t NumFeatures = 0; 343cab2bb3Spatrick size_t Tmp = 0; // Used by ValidateFeatureSet. 353cab2bb3Spatrick // Stats. 363cab2bb3Spatrick size_t NumExecutedMutations = 0; 373cab2bb3Spatrick size_t NumSuccessfullMutations = 0; 38d89ec533Spatrick bool NeverReduce = false; 393cab2bb3Spatrick bool MayDeleteFile = false; 403cab2bb3Spatrick bool Reduced = false; 413cab2bb3Spatrick bool HasFocusFunction = false; 42*810390e3Srobert std::vector<uint32_t> UniqFeatureSet; 43*810390e3Srobert std::vector<uint8_t> DataFlowTraceForFocusFunction; 441f9cb04fSpatrick // Power schedule. 451f9cb04fSpatrick bool NeedsEnergyUpdate = false; 461f9cb04fSpatrick double Energy = 0.0; 47d89ec533Spatrick double SumIncidence = 0.0; 48*810390e3Srobert std::vector<std::pair<uint32_t, uint16_t>> FeatureFreqs; 491f9cb04fSpatrick 501f9cb04fSpatrick // Delete feature Idx and its frequency from FeatureFreqs. DeleteFeatureFreqInputInfo511f9cb04fSpatrick bool DeleteFeatureFreq(uint32_t Idx) { 521f9cb04fSpatrick if (FeatureFreqs.empty()) 531f9cb04fSpatrick return false; 541f9cb04fSpatrick 551f9cb04fSpatrick // Binary search over local feature frequencies sorted by index. 561f9cb04fSpatrick auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), 571f9cb04fSpatrick std::pair<uint32_t, uint16_t>(Idx, 0)); 581f9cb04fSpatrick 591f9cb04fSpatrick if (Lower != FeatureFreqs.end() && Lower->first == Idx) { 601f9cb04fSpatrick FeatureFreqs.erase(Lower); 611f9cb04fSpatrick return true; 621f9cb04fSpatrick } 631f9cb04fSpatrick return false; 641f9cb04fSpatrick } 651f9cb04fSpatrick 661f9cb04fSpatrick // Assign more energy to a high-entropy seed, i.e., that reveals more 67d89ec533Spatrick // information about the globally rare features in the neighborhood of the 68d89ec533Spatrick // seed. Since we do not know the entropy of a seed that has never been 69d89ec533Spatrick // executed we assign fresh seeds maximum entropy and let II->Energy approach 70d89ec533Spatrick // the true entropy from above. If ScalePerExecTime is true, the computed 71d89ec533Spatrick // entropy is scaled based on how fast this input executes compared to the 72d89ec533Spatrick // average execution time of inputs. The faster an input executes, the more 73d89ec533Spatrick // energy gets assigned to the input. UpdateEnergyInputInfo74d89ec533Spatrick void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime, 75d89ec533Spatrick std::chrono::microseconds AverageUnitExecutionTime) { 761f9cb04fSpatrick Energy = 0.0; 77d89ec533Spatrick SumIncidence = 0.0; 781f9cb04fSpatrick 791f9cb04fSpatrick // Apply add-one smoothing to locally discovered features. 801f9cb04fSpatrick for (auto F : FeatureFreqs) { 81d89ec533Spatrick double LocalIncidence = F.second + 1; 82d89ec533Spatrick Energy -= LocalIncidence * log(LocalIncidence); 831f9cb04fSpatrick SumIncidence += LocalIncidence; 841f9cb04fSpatrick } 851f9cb04fSpatrick 861f9cb04fSpatrick // Apply add-one smoothing to locally undiscovered features. 87d89ec533Spatrick // PreciseEnergy -= 0; // since log(1.0) == 0) 88d89ec533Spatrick SumIncidence += 89d89ec533Spatrick static_cast<double>(GlobalNumberOfFeatures - FeatureFreqs.size()); 901f9cb04fSpatrick 911f9cb04fSpatrick // Add a single locally abundant feature apply add-one smoothing. 92d89ec533Spatrick double AbdIncidence = static_cast<double>(NumExecutedMutations + 1); 93d89ec533Spatrick Energy -= AbdIncidence * log(AbdIncidence); 941f9cb04fSpatrick SumIncidence += AbdIncidence; 951f9cb04fSpatrick 961f9cb04fSpatrick // Normalize. 971f9cb04fSpatrick if (SumIncidence != 0) 98d89ec533Spatrick Energy = Energy / SumIncidence + log(SumIncidence); 99d89ec533Spatrick 100d89ec533Spatrick if (ScalePerExecTime) { 101d89ec533Spatrick // Scaling to favor inputs with lower execution time. 102d89ec533Spatrick uint32_t PerfScore = 100; 103d89ec533Spatrick if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10) 104d89ec533Spatrick PerfScore = 10; 105d89ec533Spatrick else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4) 106d89ec533Spatrick PerfScore = 25; 107d89ec533Spatrick else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2) 108d89ec533Spatrick PerfScore = 50; 109d89ec533Spatrick else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4) 110d89ec533Spatrick PerfScore = 75; 111d89ec533Spatrick else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count()) 112d89ec533Spatrick PerfScore = 300; 113d89ec533Spatrick else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count()) 114d89ec533Spatrick PerfScore = 200; 115d89ec533Spatrick else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count()) 116d89ec533Spatrick PerfScore = 150; 117d89ec533Spatrick 118d89ec533Spatrick Energy *= PerfScore; 119d89ec533Spatrick } 1201f9cb04fSpatrick } 1211f9cb04fSpatrick 1221f9cb04fSpatrick // Increment the frequency of the feature Idx. UpdateFeatureFrequencyInputInfo1231f9cb04fSpatrick void UpdateFeatureFrequency(uint32_t Idx) { 1241f9cb04fSpatrick NeedsEnergyUpdate = true; 1251f9cb04fSpatrick 1261f9cb04fSpatrick // The local feature frequencies is an ordered vector of pairs. 1271f9cb04fSpatrick // If there are no local feature frequencies, push_back preserves order. 1281f9cb04fSpatrick // Set the feature frequency for feature Idx32 to 1. 1291f9cb04fSpatrick if (FeatureFreqs.empty()) { 1301f9cb04fSpatrick FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(Idx, 1)); 1311f9cb04fSpatrick return; 1321f9cb04fSpatrick } 1331f9cb04fSpatrick 1341f9cb04fSpatrick // Binary search over local feature frequencies sorted by index. 1351f9cb04fSpatrick auto Lower = std::lower_bound(FeatureFreqs.begin(), FeatureFreqs.end(), 1361f9cb04fSpatrick std::pair<uint32_t, uint16_t>(Idx, 0)); 1371f9cb04fSpatrick 1381f9cb04fSpatrick // If feature Idx32 already exists, increment its frequency. 1391f9cb04fSpatrick // Otherwise, insert a new pair right after the next lower index. 1401f9cb04fSpatrick if (Lower != FeatureFreqs.end() && Lower->first == Idx) { 1411f9cb04fSpatrick Lower->second++; 1421f9cb04fSpatrick } else { 1431f9cb04fSpatrick FeatureFreqs.insert(Lower, std::pair<uint32_t, uint16_t>(Idx, 1)); 1441f9cb04fSpatrick } 1451f9cb04fSpatrick } 1461f9cb04fSpatrick }; 1471f9cb04fSpatrick 1481f9cb04fSpatrick struct EntropicOptions { 1491f9cb04fSpatrick bool Enabled; 1501f9cb04fSpatrick size_t NumberOfRarestFeatures; 1511f9cb04fSpatrick size_t FeatureFrequencyThreshold; 152d89ec533Spatrick bool ScalePerExecTime; 1533cab2bb3Spatrick }; 1543cab2bb3Spatrick 1553cab2bb3Spatrick class InputCorpus { 1561f9cb04fSpatrick static const uint32_t kFeatureSetSize = 1 << 21; 1571f9cb04fSpatrick static const uint8_t kMaxMutationFactor = 20; 1581f9cb04fSpatrick static const size_t kSparseEnergyUpdates = 100; 1591f9cb04fSpatrick 1601f9cb04fSpatrick size_t NumExecutedMutations = 0; 1611f9cb04fSpatrick 1621f9cb04fSpatrick EntropicOptions Entropic; 1631f9cb04fSpatrick 1643cab2bb3Spatrick public: InputCorpus(const std::string & OutputCorpus,EntropicOptions Entropic)1651f9cb04fSpatrick InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic) 1661f9cb04fSpatrick : Entropic(Entropic), OutputCorpus(OutputCorpus) { 1673cab2bb3Spatrick memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); 1683cab2bb3Spatrick memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); 1693cab2bb3Spatrick } ~InputCorpus()1703cab2bb3Spatrick ~InputCorpus() { 1713cab2bb3Spatrick for (auto II : Inputs) 1723cab2bb3Spatrick delete II; 1733cab2bb3Spatrick } size()1743cab2bb3Spatrick size_t size() const { return Inputs.size(); } SizeInBytes()1753cab2bb3Spatrick size_t SizeInBytes() const { 1763cab2bb3Spatrick size_t Res = 0; 1773cab2bb3Spatrick for (auto II : Inputs) 1783cab2bb3Spatrick Res += II->U.size(); 1793cab2bb3Spatrick return Res; 1803cab2bb3Spatrick } NumActiveUnits()1813cab2bb3Spatrick size_t NumActiveUnits() const { 1823cab2bb3Spatrick size_t Res = 0; 1833cab2bb3Spatrick for (auto II : Inputs) 1843cab2bb3Spatrick Res += !II->U.empty(); 1853cab2bb3Spatrick return Res; 1863cab2bb3Spatrick } MaxInputSize()1873cab2bb3Spatrick size_t MaxInputSize() const { 1883cab2bb3Spatrick size_t Res = 0; 1893cab2bb3Spatrick for (auto II : Inputs) 1903cab2bb3Spatrick Res = std::max(Res, II->U.size()); 1913cab2bb3Spatrick return Res; 1923cab2bb3Spatrick } IncrementNumExecutedMutations()1931f9cb04fSpatrick void IncrementNumExecutedMutations() { NumExecutedMutations++; } 1943cab2bb3Spatrick NumInputsThatTouchFocusFunction()1953cab2bb3Spatrick size_t NumInputsThatTouchFocusFunction() { 1963cab2bb3Spatrick return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { 1973cab2bb3Spatrick return II->HasFocusFunction; 1983cab2bb3Spatrick }); 1993cab2bb3Spatrick } 2003cab2bb3Spatrick NumInputsWithDataFlowTrace()2013cab2bb3Spatrick size_t NumInputsWithDataFlowTrace() { 2023cab2bb3Spatrick return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) { 2033cab2bb3Spatrick return !II->DataFlowTraceForFocusFunction.empty(); 2043cab2bb3Spatrick }); 2053cab2bb3Spatrick } 2063cab2bb3Spatrick empty()2073cab2bb3Spatrick bool empty() const { return Inputs.empty(); } 2083cab2bb3Spatrick const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } AddToCorpus(const Unit & U,size_t NumFeatures,bool MayDeleteFile,bool HasFocusFunction,bool NeverReduce,std::chrono::microseconds TimeOfUnit,const std::vector<uint32_t> & FeatureSet,const DataFlowTrace & DFT,const InputInfo * BaseII)2093cab2bb3Spatrick InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, 210d89ec533Spatrick bool HasFocusFunction, bool NeverReduce, 211d89ec533Spatrick std::chrono::microseconds TimeOfUnit, 212*810390e3Srobert const std::vector<uint32_t> &FeatureSet, 2133cab2bb3Spatrick const DataFlowTrace &DFT, const InputInfo *BaseII) { 2143cab2bb3Spatrick assert(!U.empty()); 2153cab2bb3Spatrick if (FeatureDebug) 2163cab2bb3Spatrick Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); 217d89ec533Spatrick // Inputs.size() is cast to uint32_t below. 218d89ec533Spatrick assert(Inputs.size() < std::numeric_limits<uint32_t>::max()); 2193cab2bb3Spatrick Inputs.push_back(new InputInfo()); 2203cab2bb3Spatrick InputInfo &II = *Inputs.back(); 2213cab2bb3Spatrick II.U = U; 2223cab2bb3Spatrick II.NumFeatures = NumFeatures; 223d89ec533Spatrick II.NeverReduce = NeverReduce; 224d89ec533Spatrick II.TimeOfUnit = TimeOfUnit; 2253cab2bb3Spatrick II.MayDeleteFile = MayDeleteFile; 2263cab2bb3Spatrick II.UniqFeatureSet = FeatureSet; 2273cab2bb3Spatrick II.HasFocusFunction = HasFocusFunction; 2281f9cb04fSpatrick // Assign maximal energy to the new seed. 2291f9cb04fSpatrick II.Energy = RareFeatures.empty() ? 1.0 : log(RareFeatures.size()); 230d89ec533Spatrick II.SumIncidence = static_cast<double>(RareFeatures.size()); 2311f9cb04fSpatrick II.NeedsEnergyUpdate = false; 2323cab2bb3Spatrick std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); 2333cab2bb3Spatrick ComputeSHA1(U.data(), U.size(), II.Sha1); 2343cab2bb3Spatrick auto Sha1Str = Sha1ToString(II.Sha1); 2353cab2bb3Spatrick Hashes.insert(Sha1Str); 2363cab2bb3Spatrick if (HasFocusFunction) 2373cab2bb3Spatrick if (auto V = DFT.Get(Sha1Str)) 2383cab2bb3Spatrick II.DataFlowTraceForFocusFunction = *V; 2393cab2bb3Spatrick // This is a gross heuristic. 2403cab2bb3Spatrick // Ideally, when we add an element to a corpus we need to know its DFT. 2413cab2bb3Spatrick // But if we don't, we'll use the DFT of its base input. 2423cab2bb3Spatrick if (II.DataFlowTraceForFocusFunction.empty() && BaseII) 2433cab2bb3Spatrick II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; 2441f9cb04fSpatrick DistributionNeedsUpdate = true; 2453cab2bb3Spatrick PrintCorpus(); 2463cab2bb3Spatrick // ValidateFeatureSet(); 2473cab2bb3Spatrick return &II; 2483cab2bb3Spatrick } 2493cab2bb3Spatrick 2503cab2bb3Spatrick // Debug-only PrintUnit(const Unit & U)2513cab2bb3Spatrick void PrintUnit(const Unit &U) { 2523cab2bb3Spatrick if (!FeatureDebug) return; 2533cab2bb3Spatrick for (uint8_t C : U) { 2543cab2bb3Spatrick if (C != 'F' && C != 'U' && C != 'Z') 2553cab2bb3Spatrick C = '.'; 2563cab2bb3Spatrick Printf("%c", C); 2573cab2bb3Spatrick } 2583cab2bb3Spatrick } 2593cab2bb3Spatrick 2603cab2bb3Spatrick // Debug-only PrintFeatureSet(const std::vector<uint32_t> & FeatureSet)261*810390e3Srobert void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) { 2623cab2bb3Spatrick if (!FeatureDebug) return; 2633cab2bb3Spatrick Printf("{"); 2643cab2bb3Spatrick for (uint32_t Feature: FeatureSet) 2653cab2bb3Spatrick Printf("%u,", Feature); 2663cab2bb3Spatrick Printf("}"); 2673cab2bb3Spatrick } 2683cab2bb3Spatrick 2693cab2bb3Spatrick // Debug-only PrintCorpus()2703cab2bb3Spatrick void PrintCorpus() { 2713cab2bb3Spatrick if (!FeatureDebug) return; 2723cab2bb3Spatrick Printf("======= CORPUS:\n"); 2733cab2bb3Spatrick int i = 0; 2743cab2bb3Spatrick for (auto II : Inputs) { 2753cab2bb3Spatrick if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { 2763cab2bb3Spatrick Printf("[%2d] ", i); 2773cab2bb3Spatrick Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); 2783cab2bb3Spatrick PrintUnit(II->U); 2793cab2bb3Spatrick Printf(" "); 2803cab2bb3Spatrick PrintFeatureSet(II->UniqFeatureSet); 2813cab2bb3Spatrick Printf("\n"); 2823cab2bb3Spatrick } 2833cab2bb3Spatrick i++; 2843cab2bb3Spatrick } 2853cab2bb3Spatrick } 2863cab2bb3Spatrick Replace(InputInfo * II,const Unit & U,std::chrono::microseconds TimeOfUnit)287*810390e3Srobert void Replace(InputInfo *II, const Unit &U, 288*810390e3Srobert std::chrono::microseconds TimeOfUnit) { 2893cab2bb3Spatrick assert(II->U.size() > U.size()); 2903cab2bb3Spatrick Hashes.erase(Sha1ToString(II->Sha1)); 2913cab2bb3Spatrick DeleteFile(*II); 2923cab2bb3Spatrick ComputeSHA1(U.data(), U.size(), II->Sha1); 2933cab2bb3Spatrick Hashes.insert(Sha1ToString(II->Sha1)); 2943cab2bb3Spatrick II->U = U; 2953cab2bb3Spatrick II->Reduced = true; 296*810390e3Srobert II->TimeOfUnit = TimeOfUnit; 2971f9cb04fSpatrick DistributionNeedsUpdate = true; 2983cab2bb3Spatrick } 2993cab2bb3Spatrick HasUnit(const Unit & U)3003cab2bb3Spatrick bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } HasUnit(const std::string & H)3013cab2bb3Spatrick bool HasUnit(const std::string &H) { return Hashes.count(H); } ChooseUnitToMutate(Random & Rand)3023cab2bb3Spatrick InputInfo &ChooseUnitToMutate(Random &Rand) { 3033cab2bb3Spatrick InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; 3043cab2bb3Spatrick assert(!II.U.empty()); 3053cab2bb3Spatrick return II; 3063cab2bb3Spatrick } 3073cab2bb3Spatrick ChooseUnitToCrossOverWith(Random & Rand,bool UniformDist)308d89ec533Spatrick InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) { 309d89ec533Spatrick if (!UniformDist) { 310d89ec533Spatrick return ChooseUnitToMutate(Rand); 311d89ec533Spatrick } 312d89ec533Spatrick InputInfo &II = *Inputs[Rand(Inputs.size())]; 313d89ec533Spatrick assert(!II.U.empty()); 314d89ec533Spatrick return II; 315d89ec533Spatrick } 316d89ec533Spatrick 3173cab2bb3Spatrick // Returns an index of random unit from the corpus to mutate. ChooseUnitIdxToMutate(Random & Rand)3183cab2bb3Spatrick size_t ChooseUnitIdxToMutate(Random &Rand) { 3191f9cb04fSpatrick UpdateCorpusDistribution(Rand); 3203cab2bb3Spatrick size_t Idx = static_cast<size_t>(CorpusDistribution(Rand)); 3213cab2bb3Spatrick assert(Idx < Inputs.size()); 3223cab2bb3Spatrick return Idx; 3233cab2bb3Spatrick } 3243cab2bb3Spatrick PrintStats()3253cab2bb3Spatrick void PrintStats() { 3263cab2bb3Spatrick for (size_t i = 0; i < Inputs.size(); i++) { 3273cab2bb3Spatrick const auto &II = *Inputs[i]; 3283cab2bb3Spatrick Printf(" [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n", i, 3293cab2bb3Spatrick Sha1ToString(II.Sha1).c_str(), II.U.size(), 330*810390e3Srobert II.NumExecutedMutations, II.NumSuccessfullMutations, 331*810390e3Srobert II.HasFocusFunction); 3323cab2bb3Spatrick } 3333cab2bb3Spatrick } 3343cab2bb3Spatrick PrintFeatureSet()3353cab2bb3Spatrick void PrintFeatureSet() { 3363cab2bb3Spatrick for (size_t i = 0; i < kFeatureSetSize; i++) { 3373cab2bb3Spatrick if(size_t Sz = GetFeature(i)) 3383cab2bb3Spatrick Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); 3393cab2bb3Spatrick } 3403cab2bb3Spatrick Printf("\n\t"); 3413cab2bb3Spatrick for (size_t i = 0; i < Inputs.size(); i++) 3423cab2bb3Spatrick if (size_t N = Inputs[i]->NumFeatures) 3433cab2bb3Spatrick Printf(" %zd=>%zd ", i, N); 3443cab2bb3Spatrick Printf("\n"); 3453cab2bb3Spatrick } 3463cab2bb3Spatrick DeleteFile(const InputInfo & II)3473cab2bb3Spatrick void DeleteFile(const InputInfo &II) { 3483cab2bb3Spatrick if (!OutputCorpus.empty() && II.MayDeleteFile) 3493cab2bb3Spatrick RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); 3503cab2bb3Spatrick } 3513cab2bb3Spatrick DeleteInput(size_t Idx)3523cab2bb3Spatrick void DeleteInput(size_t Idx) { 3533cab2bb3Spatrick InputInfo &II = *Inputs[Idx]; 3543cab2bb3Spatrick DeleteFile(II); 3553cab2bb3Spatrick Unit().swap(II.U); 3561f9cb04fSpatrick II.Energy = 0.0; 3571f9cb04fSpatrick II.NeedsEnergyUpdate = false; 3581f9cb04fSpatrick DistributionNeedsUpdate = true; 3593cab2bb3Spatrick if (FeatureDebug) 3603cab2bb3Spatrick Printf("EVICTED %zd\n", Idx); 3613cab2bb3Spatrick } 3623cab2bb3Spatrick AddRareFeature(uint32_t Idx)3631f9cb04fSpatrick void AddRareFeature(uint32_t Idx) { 3641f9cb04fSpatrick // Maintain *at least* TopXRarestFeatures many rare features 3651f9cb04fSpatrick // and all features with a frequency below ConsideredRare. 3661f9cb04fSpatrick // Remove all other features. 3671f9cb04fSpatrick while (RareFeatures.size() > Entropic.NumberOfRarestFeatures && 3681f9cb04fSpatrick FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold) { 3691f9cb04fSpatrick 3701f9cb04fSpatrick // Find most and second most abbundant feature. 3711f9cb04fSpatrick uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0], 3721f9cb04fSpatrick RareFeatures[0]}; 3731f9cb04fSpatrick size_t Delete = 0; 3741f9cb04fSpatrick for (size_t i = 0; i < RareFeatures.size(); i++) { 3751f9cb04fSpatrick uint32_t Idx2 = RareFeatures[i]; 3761f9cb04fSpatrick if (GlobalFeatureFreqs[Idx2] >= 3771f9cb04fSpatrick GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) { 3781f9cb04fSpatrick MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0]; 3791f9cb04fSpatrick MostAbundantRareFeatureIndices[0] = Idx2; 3801f9cb04fSpatrick Delete = i; 3811f9cb04fSpatrick } 3821f9cb04fSpatrick } 3831f9cb04fSpatrick 3841f9cb04fSpatrick // Remove most abundant rare feature. 3851f9cb04fSpatrick RareFeatures[Delete] = RareFeatures.back(); 3861f9cb04fSpatrick RareFeatures.pop_back(); 3871f9cb04fSpatrick 3881f9cb04fSpatrick for (auto II : Inputs) { 3891f9cb04fSpatrick if (II->DeleteFeatureFreq(MostAbundantRareFeatureIndices[0])) 3901f9cb04fSpatrick II->NeedsEnergyUpdate = true; 3911f9cb04fSpatrick } 3921f9cb04fSpatrick 3931f9cb04fSpatrick // Set 2nd most abundant as the new most abundant feature count. 3941f9cb04fSpatrick FreqOfMostAbundantRareFeature = 3951f9cb04fSpatrick GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]]; 3961f9cb04fSpatrick } 3971f9cb04fSpatrick 3981f9cb04fSpatrick // Add rare feature, handle collisions, and update energy. 3991f9cb04fSpatrick RareFeatures.push_back(Idx); 4001f9cb04fSpatrick GlobalFeatureFreqs[Idx] = 0; 4011f9cb04fSpatrick for (auto II : Inputs) { 4021f9cb04fSpatrick II->DeleteFeatureFreq(Idx); 4031f9cb04fSpatrick 4041f9cb04fSpatrick // Apply add-one smoothing to this locally undiscovered feature. 4051f9cb04fSpatrick // Zero energy seeds will never be fuzzed and remain zero energy. 4061f9cb04fSpatrick if (II->Energy > 0.0) { 4071f9cb04fSpatrick II->SumIncidence += 1; 408d89ec533Spatrick II->Energy += log(II->SumIncidence) / II->SumIncidence; 4091f9cb04fSpatrick } 4101f9cb04fSpatrick } 4111f9cb04fSpatrick 4121f9cb04fSpatrick DistributionNeedsUpdate = true; 4131f9cb04fSpatrick } 4141f9cb04fSpatrick AddFeature(size_t Idx,uint32_t NewSize,bool Shrink)4153cab2bb3Spatrick bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { 4163cab2bb3Spatrick assert(NewSize); 4173cab2bb3Spatrick Idx = Idx % kFeatureSetSize; 4183cab2bb3Spatrick uint32_t OldSize = GetFeature(Idx); 4193cab2bb3Spatrick if (OldSize == 0 || (Shrink && OldSize > NewSize)) { 4203cab2bb3Spatrick if (OldSize > 0) { 4213cab2bb3Spatrick size_t OldIdx = SmallestElementPerFeature[Idx]; 4223cab2bb3Spatrick InputInfo &II = *Inputs[OldIdx]; 4233cab2bb3Spatrick assert(II.NumFeatures > 0); 4243cab2bb3Spatrick II.NumFeatures--; 4253cab2bb3Spatrick if (II.NumFeatures == 0) 4263cab2bb3Spatrick DeleteInput(OldIdx); 4273cab2bb3Spatrick } else { 4283cab2bb3Spatrick NumAddedFeatures++; 4291f9cb04fSpatrick if (Entropic.Enabled) 4301f9cb04fSpatrick AddRareFeature((uint32_t)Idx); 4313cab2bb3Spatrick } 4323cab2bb3Spatrick NumUpdatedFeatures++; 4333cab2bb3Spatrick if (FeatureDebug) 4343cab2bb3Spatrick Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); 435d89ec533Spatrick // Inputs.size() is guaranteed to be less than UINT32_MAX by AddToCorpus. 436d89ec533Spatrick SmallestElementPerFeature[Idx] = static_cast<uint32_t>(Inputs.size()); 4373cab2bb3Spatrick InputSizesPerFeature[Idx] = NewSize; 4383cab2bb3Spatrick return true; 4393cab2bb3Spatrick } 4403cab2bb3Spatrick return false; 4413cab2bb3Spatrick } 4423cab2bb3Spatrick 4431f9cb04fSpatrick // Increment frequency of feature Idx globally and locally. UpdateFeatureFrequency(InputInfo * II,size_t Idx)4441f9cb04fSpatrick void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { 4451f9cb04fSpatrick uint32_t Idx32 = Idx % kFeatureSetSize; 4461f9cb04fSpatrick 4471f9cb04fSpatrick // Saturated increment. 4481f9cb04fSpatrick if (GlobalFeatureFreqs[Idx32] == 0xFFFF) 4491f9cb04fSpatrick return; 4501f9cb04fSpatrick uint16_t Freq = GlobalFeatureFreqs[Idx32]++; 4511f9cb04fSpatrick 4521f9cb04fSpatrick // Skip if abundant. 4531f9cb04fSpatrick if (Freq > FreqOfMostAbundantRareFeature || 4541f9cb04fSpatrick std::find(RareFeatures.begin(), RareFeatures.end(), Idx32) == 4551f9cb04fSpatrick RareFeatures.end()) 4561f9cb04fSpatrick return; 4571f9cb04fSpatrick 4581f9cb04fSpatrick // Update global frequencies. 4591f9cb04fSpatrick if (Freq == FreqOfMostAbundantRareFeature) 4601f9cb04fSpatrick FreqOfMostAbundantRareFeature++; 4611f9cb04fSpatrick 4621f9cb04fSpatrick // Update local frequencies. 4631f9cb04fSpatrick if (II) 4641f9cb04fSpatrick II->UpdateFeatureFrequency(Idx32); 4651f9cb04fSpatrick } 4661f9cb04fSpatrick NumFeatures()4673cab2bb3Spatrick size_t NumFeatures() const { return NumAddedFeatures; } NumFeatureUpdates()4683cab2bb3Spatrick size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } 4693cab2bb3Spatrick 4703cab2bb3Spatrick private: 4713cab2bb3Spatrick 4723cab2bb3Spatrick static const bool FeatureDebug = false; 4733cab2bb3Spatrick GetFeature(size_t Idx)474d89ec533Spatrick uint32_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } 4753cab2bb3Spatrick ValidateFeatureSet()4763cab2bb3Spatrick void ValidateFeatureSet() { 4773cab2bb3Spatrick if (FeatureDebug) 4783cab2bb3Spatrick PrintFeatureSet(); 4793cab2bb3Spatrick for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) 4803cab2bb3Spatrick if (GetFeature(Idx)) 4813cab2bb3Spatrick Inputs[SmallestElementPerFeature[Idx]]->Tmp++; 4823cab2bb3Spatrick for (auto II: Inputs) { 4833cab2bb3Spatrick if (II->Tmp != II->NumFeatures) 4843cab2bb3Spatrick Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures); 4853cab2bb3Spatrick assert(II->Tmp == II->NumFeatures); 4863cab2bb3Spatrick II->Tmp = 0; 4873cab2bb3Spatrick } 4883cab2bb3Spatrick } 4893cab2bb3Spatrick 4903cab2bb3Spatrick // Updates the probability distribution for the units in the corpus. 4913cab2bb3Spatrick // Must be called whenever the corpus or unit weights are changed. 4923cab2bb3Spatrick // 4931f9cb04fSpatrick // Hypothesis: inputs that maximize information about globally rare features 4941f9cb04fSpatrick // are interesting. UpdateCorpusDistribution(Random & Rand)4951f9cb04fSpatrick void UpdateCorpusDistribution(Random &Rand) { 4961f9cb04fSpatrick // Skip update if no seeds or rare features were added/deleted. 4971f9cb04fSpatrick // Sparse updates for local change of feature frequencies, 4981f9cb04fSpatrick // i.e., randomly do not skip. 4991f9cb04fSpatrick if (!DistributionNeedsUpdate && 5001f9cb04fSpatrick (!Entropic.Enabled || Rand(kSparseEnergyUpdates))) 5011f9cb04fSpatrick return; 5021f9cb04fSpatrick 5031f9cb04fSpatrick DistributionNeedsUpdate = false; 5041f9cb04fSpatrick 5053cab2bb3Spatrick size_t N = Inputs.size(); 5063cab2bb3Spatrick assert(N); 5073cab2bb3Spatrick Intervals.resize(N + 1); 5083cab2bb3Spatrick Weights.resize(N); 5093cab2bb3Spatrick std::iota(Intervals.begin(), Intervals.end(), 0); 5101f9cb04fSpatrick 511d89ec533Spatrick std::chrono::microseconds AverageUnitExecutionTime(0); 512d89ec533Spatrick for (auto II : Inputs) { 513d89ec533Spatrick AverageUnitExecutionTime += II->TimeOfUnit; 514d89ec533Spatrick } 515d89ec533Spatrick AverageUnitExecutionTime /= N; 516d89ec533Spatrick 5171f9cb04fSpatrick bool VanillaSchedule = true; 5181f9cb04fSpatrick if (Entropic.Enabled) { 5191f9cb04fSpatrick for (auto II : Inputs) { 5201f9cb04fSpatrick if (II->NeedsEnergyUpdate && II->Energy != 0.0) { 5211f9cb04fSpatrick II->NeedsEnergyUpdate = false; 522d89ec533Spatrick II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime, 523d89ec533Spatrick AverageUnitExecutionTime); 5241f9cb04fSpatrick } 5251f9cb04fSpatrick } 5261f9cb04fSpatrick 5271f9cb04fSpatrick for (size_t i = 0; i < N; i++) { 5281f9cb04fSpatrick 5291f9cb04fSpatrick if (Inputs[i]->NumFeatures == 0) { 5301f9cb04fSpatrick // If the seed doesn't represent any features, assign zero energy. 5311f9cb04fSpatrick Weights[i] = 0.; 5321f9cb04fSpatrick } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > 5331f9cb04fSpatrick NumExecutedMutations / Inputs.size()) { 5341f9cb04fSpatrick // If the seed was fuzzed a lot more than average, assign zero energy. 5351f9cb04fSpatrick Weights[i] = 0.; 5361f9cb04fSpatrick } else { 5371f9cb04fSpatrick // Otherwise, simply assign the computed energy. 5381f9cb04fSpatrick Weights[i] = Inputs[i]->Energy; 5391f9cb04fSpatrick } 5401f9cb04fSpatrick 5411f9cb04fSpatrick // If energy for all seeds is zero, fall back to vanilla schedule. 5421f9cb04fSpatrick if (Weights[i] > 0.0) 5431f9cb04fSpatrick VanillaSchedule = false; 5441f9cb04fSpatrick } 5451f9cb04fSpatrick } 5461f9cb04fSpatrick 5471f9cb04fSpatrick if (VanillaSchedule) { 5483cab2bb3Spatrick for (size_t i = 0; i < N; i++) 549d89ec533Spatrick Weights[i] = 550d89ec533Spatrick Inputs[i]->NumFeatures 551d89ec533Spatrick ? static_cast<double>((i + 1) * 552d89ec533Spatrick (Inputs[i]->HasFocusFunction ? 1000 : 1)) 5533cab2bb3Spatrick : 0.; 5541f9cb04fSpatrick } 5551f9cb04fSpatrick 5563cab2bb3Spatrick if (FeatureDebug) { 5573cab2bb3Spatrick for (size_t i = 0; i < N; i++) 5583cab2bb3Spatrick Printf("%zd ", Inputs[i]->NumFeatures); 5593cab2bb3Spatrick Printf("SCORE\n"); 5603cab2bb3Spatrick for (size_t i = 0; i < N; i++) 5613cab2bb3Spatrick Printf("%f ", Weights[i]); 5623cab2bb3Spatrick Printf("Weights\n"); 5633cab2bb3Spatrick } 5643cab2bb3Spatrick CorpusDistribution = std::piecewise_constant_distribution<double>( 5653cab2bb3Spatrick Intervals.begin(), Intervals.end(), Weights.begin()); 5663cab2bb3Spatrick } 5673cab2bb3Spatrick std::piecewise_constant_distribution<double> CorpusDistribution; 5683cab2bb3Spatrick 569*810390e3Srobert std::vector<double> Intervals; 570*810390e3Srobert std::vector<double> Weights; 5713cab2bb3Spatrick 5723cab2bb3Spatrick std::unordered_set<std::string> Hashes; 573*810390e3Srobert std::vector<InputInfo *> Inputs; 5743cab2bb3Spatrick 5753cab2bb3Spatrick size_t NumAddedFeatures = 0; 5763cab2bb3Spatrick size_t NumUpdatedFeatures = 0; 5773cab2bb3Spatrick uint32_t InputSizesPerFeature[kFeatureSetSize]; 5783cab2bb3Spatrick uint32_t SmallestElementPerFeature[kFeatureSetSize]; 5793cab2bb3Spatrick 5801f9cb04fSpatrick bool DistributionNeedsUpdate = true; 5811f9cb04fSpatrick uint16_t FreqOfMostAbundantRareFeature = 0; 5821f9cb04fSpatrick uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; 583*810390e3Srobert std::vector<uint32_t> RareFeatures; 5841f9cb04fSpatrick 5853cab2bb3Spatrick std::string OutputCorpus; 5863cab2bb3Spatrick }; 5873cab2bb3Spatrick 5883cab2bb3Spatrick } // namespace fuzzer 5893cab2bb3Spatrick 5903cab2bb3Spatrick #endif // LLVM_FUZZER_CORPUS 591