11f05b1a9Swlei //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===// 21f05b1a9Swlei // 31f05b1a9Swlei // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41f05b1a9Swlei // See https://llvm.org/LICENSE.txt for license information. 51f05b1a9Swlei // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61f05b1a9Swlei // 71f05b1a9Swlei //===----------------------------------------------------------------------===// 81f05b1a9Swlei #include "ProfileGenerator.h" 941a681ceSwlei #include "ErrorHandling.h" 105d7950a4SHongtao Yu #include "MissingFrameInferrer.h" 11937924ebSHongtao Yu #include "PerfReader.h" 12eca03d27SWenlei He #include "ProfiledBinary.h" 13db29f437Sserge-sans-paille #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 14ce6bfe94SWenlei He #include "llvm/ProfileData/ProfileCommon.h" 153f970168SHongtao Yu #include <algorithm> 16c2e08abaSwlei #include <float.h> 17a8a38ef3Swlei #include <unordered_set> 18937924ebSHongtao Yu #include <utility> 191f05b1a9Swlei 20964053d5Swlei cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), 211f05b1a9Swlei cl::Required, 221f05b1a9Swlei cl::desc("Output profile file")); 23426e326aSwlei static cl::alias OutputA("o", cl::desc("Alias for --output"), 24426e326aSwlei cl::aliasopt(OutputFilename)); 251f05b1a9Swlei 261f05b1a9Swlei static cl::opt<SampleProfileFormat> OutputFormat( 27aaa826faSWenlei He "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary), 281f05b1a9Swlei cl::values( 291f05b1a9Swlei clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"), 301f05b1a9Swlei clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"), 311f05b1a9Swlei clEnumValN(SPF_Text, "text", "Text encoding"), 321f05b1a9Swlei clEnumValN(SPF_GCC, "gcc", 331f05b1a9Swlei "GCC encoding (only meaningful for -sample)"))); 341f05b1a9Swlei 35c2250d8bSFangrui Song static cl::opt<bool> UseMD5( 36c2250d8bSFangrui Song "use-md5", cl::Hidden, 377ca80300SHongtao Yu cl::desc("Use md5 to represent function names in the output profile (only " 387ca80300SHongtao Yu "meaningful for -extbinary)")); 397ca80300SHongtao Yu 4016516f89Swlei static cl::opt<bool> PopulateProfileSymbolList( 412f8196dbSwlei "populate-profile-symbol-list", cl::init(false), cl::Hidden, 4216516f89Swlei cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); 4316516f89Swlei 443f3103c6Swlei static cl::opt<bool> FillZeroForAllFuncs( 453f3103c6Swlei "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, 463f3103c6Swlei cl::desc("Attribute all functions' range with zero count " 473f3103c6Swlei "even it's not hit by any samples.")); 483f3103c6Swlei 49ac14bb14Swlei static cl::opt<int32_t, true> RecursionCompression( 50ac14bb14Swlei "compress-recursion", 51ac14bb14Swlei cl::desc("Compressing recursion by deduplicating adjacent frame " 52ac14bb14Swlei "sequences up to the specified size. -1 means no size limit."), 53ac14bb14Swlei cl::Hidden, 54ac14bb14Swlei cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); 55ac14bb14Swlei 5627cb3707Swlei static cl::opt<bool> 57557efc9aSFangrui Song TrimColdProfile("trim-cold-profile", 5827cb3707Swlei cl::desc("If the total count of the profile is smaller " 5927cb3707Swlei "than threshold, it will be trimmed.")); 6027cb3707Swlei 61ce6bfe94SWenlei He static cl::opt<bool> CSProfMergeColdContext( 62557efc9aSFangrui Song "csprof-merge-cold-context", cl::init(true), 63fa14fd30SWenlei He cl::desc("If the total count of context profile is smaller than " 64fa14fd30SWenlei He "the threshold, it will be merged into context-less base " 65fa14fd30SWenlei He "profile.")); 66ce6bfe94SWenlei He 67856a6a50Swlei static cl::opt<uint32_t> CSProfMaxColdContextDepth( 68557efc9aSFangrui Song "csprof-max-cold-context-depth", cl::init(1), 69856a6a50Swlei cl::desc("Keep the last K contexts while merging cold profile. 1 means the " 70863184ddSwlei "context-less base profile")); 71863184ddSwlei 72856a6a50Swlei static cl::opt<int, true> CSProfMaxContextDepth( 73557efc9aSFangrui Song "csprof-max-context-depth", 74856a6a50Swlei cl::desc("Keep the last K contexts while merging profile. -1 means no " 75856a6a50Swlei "depth limit."), 76856a6a50Swlei cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth)); 77856a6a50Swlei 78b9d40a7aSLei Wang static cl::opt<double> ProfileDensityThreshold( 79b9d40a7aSLei Wang "profile-density-threshold", llvm::cl::init(50), 80b9d40a7aSLei Wang llvm::cl::desc("If the profile density is below the given threshold, it " 81b9d40a7aSLei Wang "will be suggested to increase the sampling rate."), 82c2e08abaSwlei llvm::cl::Optional); 83c2e08abaSwlei static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false), 84c2e08abaSwlei llvm::cl::desc("show profile density details"), 85c2e08abaSwlei llvm::cl::Optional); 86b9d40a7aSLei Wang static cl::opt<int> ProfileDensityCutOffHot( 87b9d40a7aSLei Wang "profile-density-cutoff-hot", llvm::cl::init(990000), 88b9d40a7aSLei Wang llvm::cl::desc("Total samples cutoff for functions used to calculate " 89b9d40a7aSLei Wang "profile density.")); 90c2e08abaSwlei 91484a569eSwlei static cl::opt<bool> UpdateTotalSamples( 92484a569eSwlei "update-total-samples", llvm::cl::init(false), 93484a569eSwlei llvm::cl::desc( 94484a569eSwlei "Update total samples by accumulating all its body samples."), 95484a569eSwlei llvm::cl::Optional); 96484a569eSwlei 975740bb80SHongtao Yu static cl::opt<bool> GenCSNestedProfile( 98bc380c09SHongtao Yu "gen-cs-nested-profile", cl::Hidden, cl::init(true), 995740bb80SHongtao Yu cl::desc("Generate nested function profiles for CSSPGO")); 1005740bb80SHongtao Yu 1015d7950a4SHongtao Yu cl::opt<bool> InferMissingFrames( 1025d7950a4SHongtao Yu "infer-missing-frames", llvm::cl::init(true), 1035d7950a4SHongtao Yu llvm::cl::desc( 1045d7950a4SHongtao Yu "Infer missing call frames due to compiler tail call elimination."), 1055d7950a4SHongtao Yu llvm::cl::Optional); 1065d7950a4SHongtao Yu 1071f05b1a9Swlei using namespace llvm; 1081f05b1a9Swlei using namespace sampleprof; 1091f05b1a9Swlei 1101f05b1a9Swlei namespace llvm { 111c2250d8bSFangrui Song extern cl::opt<int> ProfileSummaryCutoffHot; 112c2250d8bSFangrui Song extern cl::opt<bool> UseContextLessSummary; 113c2250d8bSFangrui Song 1141f05b1a9Swlei namespace sampleprof { 1151f05b1a9Swlei 116ac14bb14Swlei // Initialize the MaxCompressionSize to -1 which means no size limit 117ac14bb14Swlei int32_t CSProfileGenerator::MaxCompressionSize = -1; 118ac14bb14Swlei 119856a6a50Swlei int CSProfileGenerator::MaxContextDepth = -1; 120856a6a50Swlei 12141a681ceSwlei bool ProfileGeneratorBase::UseFSDiscriminator = false; 12241a681ceSwlei 123d5f20130Swlei std::unique_ptr<ProfileGeneratorBase> 124d5f20130Swlei ProfileGeneratorBase::create(ProfiledBinary *Binary, 125937924ebSHongtao Yu const ContextSampleCounterMap *SampleCounters, 126e36786d1SHongtao Yu bool ProfileIsCS) { 127d5f20130Swlei std::unique_ptr<ProfileGeneratorBase> Generator; 128e36786d1SHongtao Yu if (ProfileIsCS) { 129d5f20130Swlei Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); 130c681400bSwlei } else { 131a03cf331Swlei Generator.reset(new ProfileGenerator(Binary, SampleCounters)); 1321f05b1a9Swlei } 13341a681ceSwlei ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 13441a681ceSwlei FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 1351f05b1a9Swlei 136d5f20130Swlei return Generator; 1371f05b1a9Swlei } 1381f05b1a9Swlei 139937924ebSHongtao Yu std::unique_ptr<ProfileGeneratorBase> 140aa58b7b1Swlei ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles, 141e36786d1SHongtao Yu bool ProfileIsCS) { 142937924ebSHongtao Yu std::unique_ptr<ProfileGeneratorBase> Generator; 143e36786d1SHongtao Yu if (ProfileIsCS) { 144aa58b7b1Swlei Generator.reset(new CSProfileGenerator(Binary, Profiles)); 145937924ebSHongtao Yu } else { 146937924ebSHongtao Yu Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); 147937924ebSHongtao Yu } 148937924ebSHongtao Yu ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator(); 149937924ebSHongtao Yu FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator(); 150937924ebSHongtao Yu 151937924ebSHongtao Yu return Generator; 152937924ebSHongtao Yu } 153937924ebSHongtao Yu 154d5f20130Swlei void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer, 155b9db7036SHongtao Yu SampleProfileMap &ProfileMap) { 15616516f89Swlei // Populate profile symbol list if extended binary format is used. 15716516f89Swlei ProfileSymbolList SymbolList; 15816516f89Swlei 15916516f89Swlei if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) { 1602f8196dbSwlei Binary->populateSymbolListFromDWARF(SymbolList); 16116516f89Swlei Writer->setProfileSymbolList(&SymbolList); 16216516f89Swlei } 16316516f89Swlei 16400ef28efSWenlei He if (std::error_code EC = Writer->write(ProfileMap)) 16500ef28efSWenlei He exitWithError(std::move(EC)); 166c3aeabaeSwlei } 167c3aeabaeSwlei 168d5f20130Swlei void ProfileGeneratorBase::write() { 1691f05b1a9Swlei auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat); 1701f05b1a9Swlei if (std::error_code EC = WriterOrErr.getError()) 1711f05b1a9Swlei exitWithError(EC, OutputFilename); 1727ca80300SHongtao Yu 1737ca80300SHongtao Yu if (UseMD5) { 1747ca80300SHongtao Yu if (OutputFormat != SPF_Ext_Binary) 1757ca80300SHongtao Yu WithColor::warning() << "-use-md5 is ignored. Specify " 1767ca80300SHongtao Yu "--format=extbinary to enable it\n"; 1777ca80300SHongtao Yu else 1787ca80300SHongtao Yu WriterOrErr.get()->setUseMD5(); 1797ca80300SHongtao Yu } 1807ca80300SHongtao Yu 181c3aeabaeSwlei write(std::move(WriterOrErr.get()), ProfileMap); 1821f05b1a9Swlei } 1831f05b1a9Swlei 184c2e08abaSwlei void ProfileGeneratorBase::showDensitySuggestion(double Density) { 185c2e08abaSwlei if (Density == 0.0) 186b9d40a7aSLei Wang WithColor::warning() << "The output profile is empty or the " 187b9d40a7aSLei Wang "--profile-density-cutoff-hot option is " 188c2e08abaSwlei "set too low. Please check your command.\n"; 189b9d40a7aSLei Wang else if (Density < ProfileDensityThreshold) 190c2e08abaSwlei WithColor::warning() 1911a0d23efSWenlei He << "Sample PGO is estimated to optimize better with " 192b9d40a7aSLei Wang << format("%.1f", ProfileDensityThreshold / Density) 193c2e08abaSwlei << "x more samples. Please consider increasing sampling rate or " 194c2e08abaSwlei "profiling for longer duration to get more samples.\n"; 195c2e08abaSwlei 196c2e08abaSwlei if (ShowDensity) 197b9d40a7aSLei Wang outs() << "Functions with density >= " << format("%.1f", Density) 198b9d40a7aSLei Wang << " account for " 199c2e08abaSwlei << format("%.2f", 200b9d40a7aSLei Wang static_cast<double>(ProfileDensityCutOffHot) / 10000) 201b9d40a7aSLei Wang << "% total sample counts.\n"; 202c2e08abaSwlei } 203c2e08abaSwlei 20424f02517SLei Wang bool ProfileGeneratorBase::filterAmbiguousProfile(FunctionSamples &FS) { 20524f02517SLei Wang for (const auto &Prefix : FuncPrefixsToFilter) { 20624f02517SLei Wang if (FS.getFuncName().starts_with(Prefix)) 20724f02517SLei Wang return true; 20824f02517SLei Wang } 20924f02517SLei Wang 21024f02517SLei Wang // Filter the function profiles for the inlinees. It's useful for fuzzy 21124f02517SLei Wang // profile matching which flattens the profile and inlinees' samples are 21224f02517SLei Wang // merged into top-level function. 21324f02517SLei Wang for (auto &Callees : 21424f02517SLei Wang const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) { 21524f02517SLei Wang auto &CalleesMap = Callees.second; 21624f02517SLei Wang for (auto I = CalleesMap.begin(); I != CalleesMap.end();) { 21724f02517SLei Wang auto FS = I++; 21824f02517SLei Wang if (filterAmbiguousProfile(FS->second)) 21924f02517SLei Wang CalleesMap.erase(FS); 22024f02517SLei Wang } 22124f02517SLei Wang } 22224f02517SLei Wang return false; 22324f02517SLei Wang } 22424f02517SLei Wang 22524f02517SLei Wang // For built-in local initialization function such as __cxx_global_var_init, 22624f02517SLei Wang // __tls_init prefix function, there could be multiple versions of the functions 22724f02517SLei Wang // in the final binary. However, in the profile generation, we call 22824f02517SLei Wang // getCanonicalFnName to canonicalize the names which strips the suffixes. 22924f02517SLei Wang // Therefore, samples from different functions queries the same profile and the 23024f02517SLei Wang // samples are merged. As the functions are essentially different, entries of 23124f02517SLei Wang // the merged profile are ambiguous. In sample loader, the IR from one version 23224f02517SLei Wang // would be attributed towards a merged entries, which is inaccurate. Especially 23324f02517SLei Wang // for fuzzy profile matching, it gets multiple callsites(from different 23424f02517SLei Wang // function) but used to match one callsite, which misleads the matching and 23524f02517SLei Wang // causes a lot of false positives report. Hence, we want to filter them out 23624f02517SLei Wang // from the profile map during the profile generation time. The profiles are all 23724f02517SLei Wang // cold functions, it won't have perf impact. 23824f02517SLei Wang void ProfileGeneratorBase::filterAmbiguousProfile(SampleProfileMap &Profiles) { 23924f02517SLei Wang for (auto I = ProfileMap.begin(); I != ProfileMap.end();) { 24024f02517SLei Wang auto FS = I++; 24124f02517SLei Wang if (filterAmbiguousProfile(FS->second)) 24224f02517SLei Wang ProfileMap.erase(FS); 24324f02517SLei Wang } 24424f02517SLei Wang } 24524f02517SLei Wang 246d5f20130Swlei void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges, 2471f05b1a9Swlei const RangeSample &Ranges) { 2481f05b1a9Swlei 2491f05b1a9Swlei /* 2501f05b1a9Swlei Regions may overlap with each other. Using the boundary info, find all 2511f05b1a9Swlei disjoint ranges and their sample count. BoundaryPoint contains the count 252c82b24f4Swlei multiple samples begin/end at this points. 2531f05b1a9Swlei 2541f05b1a9Swlei |<--100-->| Sample1 2551f05b1a9Swlei |<------200------>| Sample2 2561f05b1a9Swlei A B C 2571f05b1a9Swlei 2581f05b1a9Swlei In the example above, 2591f05b1a9Swlei Sample1 begins at A, ends at B, its value is 100. 2601f05b1a9Swlei Sample2 beings at A, ends at C, its value is 200. 2611f05b1a9Swlei For A, BeginCount is the sum of sample begins at A, which is 300 and no 2621f05b1a9Swlei samples ends at A, so EndCount is 0. 2631f05b1a9Swlei Then boundary points A, B, and C with begin/end counts are: 2641f05b1a9Swlei A: (300, 0) 2651f05b1a9Swlei B: (0, 100) 2661f05b1a9Swlei C: (0, 200) 2671f05b1a9Swlei */ 2681f05b1a9Swlei struct BoundaryPoint { 2691f05b1a9Swlei // Sum of sample counts beginning at this point 27028277e9bSwlei uint64_t BeginCount = UINT64_MAX; 2711f05b1a9Swlei // Sum of sample counts ending at this point 27228277e9bSwlei uint64_t EndCount = UINT64_MAX; 27328277e9bSwlei // Is the begin point of a zero range. 27428277e9bSwlei bool IsZeroRangeBegin = false; 27528277e9bSwlei // Is the end point of a zero range. 27628277e9bSwlei bool IsZeroRangeEnd = false; 2771f05b1a9Swlei 27828277e9bSwlei void addBeginCount(uint64_t Count) { 27928277e9bSwlei if (BeginCount == UINT64_MAX) 28028277e9bSwlei BeginCount = 0; 28128277e9bSwlei BeginCount += Count; 28228277e9bSwlei } 2831f05b1a9Swlei 28428277e9bSwlei void addEndCount(uint64_t Count) { 28528277e9bSwlei if (EndCount == UINT64_MAX) 28628277e9bSwlei EndCount = 0; 28728277e9bSwlei EndCount += Count; 28828277e9bSwlei } 2891f05b1a9Swlei }; 2901f05b1a9Swlei 2911f05b1a9Swlei /* 2921f05b1a9Swlei For the above example. With boundary points, follwing logic finds two 2931f05b1a9Swlei disjoint region of 2941f05b1a9Swlei 2951f05b1a9Swlei [A,B]: 300 2961f05b1a9Swlei [B+1,C]: 200 2971f05b1a9Swlei 2981f05b1a9Swlei If there is a boundary point that both begin and end, the point itself 2991f05b1a9Swlei becomes a separate disjoint region. For example, if we have original 3001f05b1a9Swlei ranges of 3011f05b1a9Swlei 3021f05b1a9Swlei |<--- 100 --->| 3031f05b1a9Swlei |<--- 200 --->| 3041f05b1a9Swlei A B C 3051f05b1a9Swlei 3061f05b1a9Swlei there are three boundary points with their begin/end counts of 3071f05b1a9Swlei 3081f05b1a9Swlei A: (100, 0) 3091f05b1a9Swlei B: (200, 100) 3101f05b1a9Swlei C: (0, 200) 3111f05b1a9Swlei 3121f05b1a9Swlei the disjoint ranges would be 3131f05b1a9Swlei 3141f05b1a9Swlei [A, B-1]: 100 3151f05b1a9Swlei [B, B]: 300 3161f05b1a9Swlei [B+1, C]: 200. 31728277e9bSwlei 31828277e9bSwlei Example for zero value range: 31928277e9bSwlei 32028277e9bSwlei |<--- 100 --->| 32128277e9bSwlei |<--- 200 --->| 32228277e9bSwlei |<--------------- 0 ----------------->| 32328277e9bSwlei A B C D E F 32428277e9bSwlei 32528277e9bSwlei [A, B-1] : 0 32628277e9bSwlei [B, C] : 100 32728277e9bSwlei [C+1, D-1]: 0 32828277e9bSwlei [D, E] : 200 32928277e9bSwlei [E+1, F] : 0 3301f05b1a9Swlei */ 3311f05b1a9Swlei std::map<uint64_t, BoundaryPoint> Boundaries; 3321f05b1a9Swlei 33392ba979cSSimon Pilgrim for (const auto &Item : Ranges) { 33428277e9bSwlei assert(Item.first.first <= Item.first.second && 33528277e9bSwlei "Invalid instruction range"); 33628277e9bSwlei auto &BeginPoint = Boundaries[Item.first.first]; 33728277e9bSwlei auto &EndPoint = Boundaries[Item.first.second]; 3381f05b1a9Swlei uint64_t Count = Item.second; 3391f05b1a9Swlei 34028277e9bSwlei BeginPoint.addBeginCount(Count); 34128277e9bSwlei EndPoint.addEndCount(Count); 34228277e9bSwlei if (Count == 0) { 34328277e9bSwlei BeginPoint.IsZeroRangeBegin = true; 34428277e9bSwlei EndPoint.IsZeroRangeEnd = true; 34528277e9bSwlei } 3461f05b1a9Swlei } 3471f05b1a9Swlei 34828277e9bSwlei // Use UINT64_MAX to indicate there is no existing range between BeginAddress 34928277e9bSwlei // and the next valid address 350fb19aa0cSHongtao Yu uint64_t BeginAddress = UINT64_MAX; 35128277e9bSwlei int ZeroRangeDepth = 0; 3528cbbd7e0SHongtao Yu uint64_t Count = 0; 35392ba979cSSimon Pilgrim for (const auto &Item : Boundaries) { 3541f05b1a9Swlei uint64_t Address = Item.first; 35592ba979cSSimon Pilgrim const BoundaryPoint &Point = Item.second; 35628277e9bSwlei if (Point.BeginCount != UINT64_MAX) { 357fb19aa0cSHongtao Yu if (BeginAddress != UINT64_MAX) 3581f05b1a9Swlei DisjointRanges[{BeginAddress, Address - 1}] = Count; 3591f05b1a9Swlei Count += Point.BeginCount; 3601f05b1a9Swlei BeginAddress = Address; 36128277e9bSwlei ZeroRangeDepth += Point.IsZeroRangeBegin; 3621f05b1a9Swlei } 36328277e9bSwlei if (Point.EndCount != UINT64_MAX) { 364fb19aa0cSHongtao Yu assert((BeginAddress != UINT64_MAX) && 365fb19aa0cSHongtao Yu "First boundary point cannot be 'end' point"); 3661f05b1a9Swlei DisjointRanges[{BeginAddress, Address}] = Count; 3678cbbd7e0SHongtao Yu assert(Count >= Point.EndCount && "Mismatched live ranges"); 3681f05b1a9Swlei Count -= Point.EndCount; 3691f05b1a9Swlei BeginAddress = Address + 1; 37028277e9bSwlei ZeroRangeDepth -= Point.IsZeroRangeEnd; 37128277e9bSwlei // If the remaining count is zero and it's no longer in a zero range, this 37228277e9bSwlei // means we consume all the ranges before, thus mark BeginAddress as 37328277e9bSwlei // UINT64_MAX. e.g. supposing we have two non-overlapping ranges: 37428277e9bSwlei // [<---- 10 ---->] 37528277e9bSwlei // [<---- 20 ---->] 37628277e9bSwlei // A B C D 37728277e9bSwlei // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't 37828277e9bSwlei // have the [B+1, C-1] zero range. 37928277e9bSwlei if (Count == 0 && ZeroRangeDepth == 0) 38028277e9bSwlei BeginAddress = UINT64_MAX; 3811f05b1a9Swlei } 3821f05b1a9Swlei } 3831f05b1a9Swlei } 3841f05b1a9Swlei 385d5f20130Swlei void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( 386d5f20130Swlei FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc, 387d5f20130Swlei uint64_t Count) { 388d5f20130Swlei // Use the maximum count of samples with same line location 38946cf7d75Swlei uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); 390e8c245dcSWenlei He 39123609a38STim Creech // Use duplication factor to compensated for loop unroll/vectorization. 39223609a38STim Creech // Note that this is only needed when we're taking MAX of the counts at 39323609a38STim Creech // the location instead of SUM. 394e8c245dcSWenlei He Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); 395e8c245dcSWenlei He 39623609a38STim Creech ErrorOr<uint64_t> R = 39723609a38STim Creech FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); 39846cf7d75Swlei 399d5f20130Swlei uint64_t PreviousCount = R ? R.get() : 0; 40028277e9bSwlei if (PreviousCount <= Count) { 40146cf7d75Swlei FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, 402d5f20130Swlei Count - PreviousCount); 403d5f20130Swlei } 404d5f20130Swlei } 405d5f20130Swlei 406f5537643Swlei void ProfileGeneratorBase::updateTotalSamples() { 407f5537643Swlei for (auto &Item : ProfileMap) { 408f5537643Swlei FunctionSamples &FunctionProfile = Item.second; 409f5537643Swlei FunctionProfile.updateTotalSamples(); 410f5537643Swlei } 411f5537643Swlei } 412f5537643Swlei 413acfd0a34SHongtao Yu void ProfileGeneratorBase::updateCallsiteSamples() { 414acfd0a34SHongtao Yu for (auto &Item : ProfileMap) { 415acfd0a34SHongtao Yu FunctionSamples &FunctionProfile = Item.second; 416acfd0a34SHongtao Yu FunctionProfile.updateCallsiteSamples(); 417acfd0a34SHongtao Yu } 418acfd0a34SHongtao Yu } 419acfd0a34SHongtao Yu 420acfd0a34SHongtao Yu void ProfileGeneratorBase::updateFunctionSamples() { 421acfd0a34SHongtao Yu updateCallsiteSamples(); 422acfd0a34SHongtao Yu 423acfd0a34SHongtao Yu if (UpdateTotalSamples) 424acfd0a34SHongtao Yu updateTotalSamples(); 425acfd0a34SHongtao Yu } 426acfd0a34SHongtao Yu 4273f970168SHongtao Yu void ProfileGeneratorBase::collectProfiledFunctions() { 4283f970168SHongtao Yu std::unordered_set<const BinaryFunction *> ProfiledFunctions; 429aa58b7b1Swlei if (collectFunctionsFromRawProfile(ProfiledFunctions)) 430aa58b7b1Swlei Binary->setProfiledFunctions(ProfiledFunctions); 431aa58b7b1Swlei else if (collectFunctionsFromLLVMProfile(ProfiledFunctions)) 432aa58b7b1Swlei Binary->setProfiledFunctions(ProfiledFunctions); 433aa58b7b1Swlei else 434aa58b7b1Swlei llvm_unreachable("Unsupported input profile"); 435aa58b7b1Swlei } 436aa58b7b1Swlei 437aa58b7b1Swlei bool ProfileGeneratorBase::collectFunctionsFromRawProfile( 438aa58b7b1Swlei std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 439aa58b7b1Swlei if (!SampleCounters) 440aa58b7b1Swlei return false; 441937924ebSHongtao Yu // Go through all the stacks, ranges and branches in sample counters, use 442937924ebSHongtao Yu // the start of the range to look up the function it belongs and record the 4433f970168SHongtao Yu // function. 444937924ebSHongtao Yu for (const auto &CI : *SampleCounters) { 4453f970168SHongtao Yu if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { 44646765248Swlei for (auto StackAddr : CtxKey->Context) { 44746765248Swlei if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) 4483f970168SHongtao Yu ProfiledFunctions.insert(FRange->Func); 4493f970168SHongtao Yu } 4503f970168SHongtao Yu } 4513f970168SHongtao Yu 4523f970168SHongtao Yu for (auto Item : CI.second.RangeCounter) { 45346765248Swlei uint64_t StartAddress = Item.first.first; 45446765248Swlei if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) 4553f970168SHongtao Yu ProfiledFunctions.insert(FRange->Func); 4563f970168SHongtao Yu } 4573f970168SHongtao Yu 4583f970168SHongtao Yu for (auto Item : CI.second.BranchCounter) { 45946765248Swlei uint64_t SourceAddress = Item.first.first; 460c5667778SHongtao Yu uint64_t TargetAddress = Item.first.second; 46146765248Swlei if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) 4623f970168SHongtao Yu ProfiledFunctions.insert(FRange->Func); 46346765248Swlei if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) 4643f970168SHongtao Yu ProfiledFunctions.insert(FRange->Func); 4653f970168SHongtao Yu } 4663f970168SHongtao Yu } 467aa58b7b1Swlei return true; 468aa58b7b1Swlei } 469aa58b7b1Swlei 470aa58b7b1Swlei bool ProfileGenerator::collectFunctionsFromLLVMProfile( 471aa58b7b1Swlei std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 472937924ebSHongtao Yu for (const auto &FS : ProfileMap) { 473ef0e0adcSWilliam Junda Huang if (auto *Func = Binary->getBinaryFunction(FS.second.getFunction())) 474937924ebSHongtao Yu ProfiledFunctions.insert(Func); 475937924ebSHongtao Yu } 476aa58b7b1Swlei return true; 477937924ebSHongtao Yu } 4783f970168SHongtao Yu 479aa58b7b1Swlei bool CSProfileGenerator::collectFunctionsFromLLVMProfile( 480aa58b7b1Swlei std::unordered_set<const BinaryFunction *> &ProfiledFunctions) { 4817e86b13cSwlei for (auto *Node : ContextTracker) { 482aa58b7b1Swlei if (!Node->getFuncName().empty()) 483aa58b7b1Swlei if (auto *Func = Binary->getBinaryFunction(Node->getFuncName())) 484aa58b7b1Swlei ProfiledFunctions.insert(Func); 485aa58b7b1Swlei } 486aa58b7b1Swlei return true; 4873f970168SHongtao Yu } 4883f970168SHongtao Yu 489d5f20130Swlei FunctionSamples & 490ef0e0adcSWilliam Junda Huang ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) { 491d5f20130Swlei SampleContext Context(FuncName); 492ce03155aSMircea Trofin return ProfileMap.create(Context); 493d5f20130Swlei } 494d5f20130Swlei 495d5f20130Swlei void ProfileGenerator::generateProfile() { 4963f970168SHongtao Yu collectProfiledFunctions(); 497937924ebSHongtao Yu 498937924ebSHongtao Yu if (Binary->usePseudoProbes()) 499937924ebSHongtao Yu Binary->decodePseudoProbe(); 500937924ebSHongtao Yu 501937924ebSHongtao Yu if (SampleCounters) { 502d5f20130Swlei if (Binary->usePseudoProbes()) { 50323391febSHongtao Yu generateProbeBasedProfile(); 504d5f20130Swlei } else { 505d5f20130Swlei generateLineNumBasedProfile(); 506d5f20130Swlei } 507937924ebSHongtao Yu } 508937924ebSHongtao Yu 509c2e08abaSwlei postProcessProfiles(); 510c2e08abaSwlei } 511c2e08abaSwlei 512c2e08abaSwlei void ProfileGenerator::postProcessProfiles() { 513aa58b7b1Swlei computeSummaryAndThreshold(ProfileMap); 51427cb3707Swlei trimColdProfiles(ProfileMap, ColdCountThreshold); 51524f02517SLei Wang filterAmbiguousProfile(ProfileMap); 516c2e08abaSwlei calculateAndShowDensity(ProfileMap); 517d5f20130Swlei } 518d5f20130Swlei 51927cb3707Swlei void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles, 52027cb3707Swlei uint64_t ColdCntThreshold) { 52127cb3707Swlei if (!TrimColdProfile) 52227cb3707Swlei return; 52327cb3707Swlei 52427cb3707Swlei // Move cold profiles into a tmp container. 5257624de5bSWilliam Huang std::vector<hash_code> ColdProfileHashes; 52627cb3707Swlei for (const auto &I : ProfileMap) { 52727cb3707Swlei if (I.second.getTotalSamples() < ColdCntThreshold) 5287624de5bSWilliam Huang ColdProfileHashes.emplace_back(I.first); 52927cb3707Swlei } 53027cb3707Swlei 53127cb3707Swlei // Remove the cold profile from ProfileMap. 5327624de5bSWilliam Huang for (const auto &I : ColdProfileHashes) 53327cb3707Swlei ProfileMap.erase(I); 53427cb3707Swlei } 53527cb3707Swlei 536d5f20130Swlei void ProfileGenerator::generateLineNumBasedProfile() { 537937924ebSHongtao Yu assert(SampleCounters->size() == 1 && 538d5f20130Swlei "Must have one entry for profile generation."); 539937924ebSHongtao Yu const SampleCounter &SC = SampleCounters->begin()->second; 540d5f20130Swlei // Fill in function body samples 541d5f20130Swlei populateBodySamplesForAllFunctions(SC.RangeCounter); 542d5f20130Swlei // Fill in boundary sample counts as well as call site samples for calls 543d5f20130Swlei populateBoundarySamplesForAllFunctions(SC.BranchCounter); 544f5537643Swlei 545acfd0a34SHongtao Yu updateFunctionSamples(); 546d5f20130Swlei } 547d5f20130Swlei 54823391febSHongtao Yu void ProfileGenerator::generateProbeBasedProfile() { 549937924ebSHongtao Yu assert(SampleCounters->size() == 1 && 55023391febSHongtao Yu "Must have one entry for profile generation."); 55123391febSHongtao Yu // Enable pseudo probe functionalities in SampleProf 55223391febSHongtao Yu FunctionSamples::ProfileIsProbeBased = true; 553937924ebSHongtao Yu const SampleCounter &SC = SampleCounters->begin()->second; 55423391febSHongtao Yu // Fill in function body samples 55523391febSHongtao Yu populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter); 55623391febSHongtao Yu // Fill in boundary sample counts as well as call site samples for calls 55723391febSHongtao Yu populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter); 55823391febSHongtao Yu 559acfd0a34SHongtao Yu updateFunctionSamples(); 56023391febSHongtao Yu } 56123391febSHongtao Yu 56223391febSHongtao Yu void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions( 56323391febSHongtao Yu const RangeSample &RangeCounter) { 56423391febSHongtao Yu ProbeCounterMap ProbeCounter; 5653f970168SHongtao Yu // preprocessRangeCounter returns disjoint ranges, so no longer to redo it 5663f970168SHongtao Yu // inside extractProbesFromRange. 5673f970168SHongtao Yu extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter, 5683f970168SHongtao Yu false); 56923391febSHongtao Yu 57023391febSHongtao Yu for (const auto &PI : ProbeCounter) { 57123391febSHongtao Yu const MCDecodedPseudoProbe *Probe = PI.first; 57223391febSHongtao Yu uint64_t Count = PI.second; 57323391febSHongtao Yu SampleContextFrameVector FrameVec; 57423391febSHongtao Yu Binary->getInlineContextForProbe(Probe, FrameVec, true); 5753f970168SHongtao Yu FunctionSamples &FunctionProfile = 5763f970168SHongtao Yu getLeafProfileAndAddTotalSamples(FrameVec, Count); 577345fd0c1SHongtao Yu FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(), 578345fd0c1SHongtao Yu Count); 57923391febSHongtao Yu if (Probe->isEntry()) 58023391febSHongtao Yu FunctionProfile.addHeadSamples(Count); 58123391febSHongtao Yu } 58223391febSHongtao Yu } 58323391febSHongtao Yu 58423391febSHongtao Yu void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions( 58523391febSHongtao Yu const BranchSample &BranchCounters) { 58623391febSHongtao Yu for (const auto &Entry : BranchCounters) { 58746765248Swlei uint64_t SourceAddress = Entry.first.first; 58846765248Swlei uint64_t TargetAddress = Entry.first.second; 58923391febSHongtao Yu uint64_t Count = Entry.second; 59023391febSHongtao Yu assert(Count != 0 && "Unexpected zero weight branch"); 59123391febSHongtao Yu 59246765248Swlei StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 59323391febSHongtao Yu if (CalleeName.size() == 0) 59423391febSHongtao Yu continue; 59523391febSHongtao Yu 59623391febSHongtao Yu const MCDecodedPseudoProbe *CallProbe = 59723391febSHongtao Yu Binary->getCallProbeForAddr(SourceAddress); 59823391febSHongtao Yu if (CallProbe == nullptr) 59923391febSHongtao Yu continue; 60023391febSHongtao Yu 60123391febSHongtao Yu // Record called target sample and its count. 60223391febSHongtao Yu SampleContextFrameVector FrameVec; 60323391febSHongtao Yu Binary->getInlineContextForProbe(CallProbe, FrameVec, true); 60423391febSHongtao Yu 60523391febSHongtao Yu if (!FrameVec.empty()) { 60623391febSHongtao Yu FunctionSamples &FunctionProfile = 60723391febSHongtao Yu getLeafProfileAndAddTotalSamples(FrameVec, 0); 60823391febSHongtao Yu FunctionProfile.addCalledTargetSamples( 609345fd0c1SHongtao Yu FrameVec.back().Location.LineOffset, 610345fd0c1SHongtao Yu FrameVec.back().Location.Discriminator, 611ef0e0adcSWilliam Junda Huang FunctionId(CalleeName), Count); 61223391febSHongtao Yu } 61323391febSHongtao Yu } 61423391febSHongtao Yu } 61523391febSHongtao Yu 616484a569eSwlei FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples( 617484a569eSwlei const SampleContextFrameVector &FrameVec, uint64_t Count) { 618d5f20130Swlei // Get top level profile 619d5f20130Swlei FunctionSamples *FunctionProfile = 620ef0e0adcSWilliam Junda Huang &getTopLevelFunctionProfile(FrameVec[0].Func); 621484a569eSwlei FunctionProfile->addTotalSamples(Count); 62223391febSHongtao Yu if (Binary->usePseudoProbes()) { 6233f970168SHongtao Yu const auto *FuncDesc = Binary->getFuncDescForGUID( 624ef0e0adcSWilliam Junda Huang FunctionProfile->getFunction().getHashCode()); 62523391febSHongtao Yu FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 62623391febSHongtao Yu } 627d5f20130Swlei 628d5f20130Swlei for (size_t I = 1; I < FrameVec.size(); I++) { 62946cf7d75Swlei LineLocation Callsite( 63046cf7d75Swlei FrameVec[I - 1].Location.LineOffset, 63146cf7d75Swlei getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator)); 632d5f20130Swlei FunctionSamplesMap &SamplesMap = 63346cf7d75Swlei FunctionProfile->functionSamplesAt(Callsite); 634d5f20130Swlei auto Ret = 635ef0e0adcSWilliam Junda Huang SamplesMap.emplace(FrameVec[I].Func, FunctionSamples()); 636d5f20130Swlei if (Ret.second) { 637ef0e0adcSWilliam Junda Huang SampleContext Context(FrameVec[I].Func); 638d5f20130Swlei Ret.first->second.setContext(Context); 639d5f20130Swlei } 640d5f20130Swlei FunctionProfile = &Ret.first->second; 641484a569eSwlei FunctionProfile->addTotalSamples(Count); 64223391febSHongtao Yu if (Binary->usePseudoProbes()) { 6433f970168SHongtao Yu const auto *FuncDesc = Binary->getFuncDescForGUID( 644ef0e0adcSWilliam Junda Huang FunctionProfile->getFunction().getHashCode()); 64523391febSHongtao Yu FunctionProfile->setFunctionHash(FuncDesc->FuncHash); 64623391febSHongtao Yu } 647d5f20130Swlei } 648d5f20130Swlei 649d5f20130Swlei return *FunctionProfile; 650d5f20130Swlei } 651d5f20130Swlei 65228277e9bSwlei RangeSample 65328277e9bSwlei ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { 65428277e9bSwlei RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); 6553f3103c6Swlei if (FillZeroForAllFuncs) { 6563f3103c6Swlei for (auto &FuncI : Binary->getAllBinaryFunctions()) { 6573f3103c6Swlei for (auto &R : FuncI.second.Ranges) { 6585bf191a3Swlei Ranges[{R.first, R.second - 1}] += 0; 6593f3103c6Swlei } 6603f3103c6Swlei } 6613f3103c6Swlei } else { 6623f3103c6Swlei // For each range, we search for all ranges of the function it belongs to 6633f3103c6Swlei // and initialize it with zero count, so it remains zero if doesn't hit any 66428277e9bSwlei // samples. This is to be consistent with compiler that interpret zero count 66528277e9bSwlei // as unexecuted(cold). 66692ba979cSSimon Pilgrim for (const auto &I : RangeCounter) { 66746765248Swlei uint64_t StartAddress = I.first.first; 66846765248Swlei for (const auto &Range : Binary->getRanges(StartAddress)) 66940ca4112Swlei Ranges[{Range.first, Range.second - 1}] += 0; 67028277e9bSwlei } 6713f3103c6Swlei } 67228277e9bSwlei RangeSample DisjointRanges; 67328277e9bSwlei findDisjointRanges(DisjointRanges, Ranges); 67428277e9bSwlei return DisjointRanges; 67528277e9bSwlei } 67628277e9bSwlei 677d5f20130Swlei void ProfileGenerator::populateBodySamplesForAllFunctions( 678d5f20130Swlei const RangeSample &RangeCounter) { 67992ba979cSSimon Pilgrim for (const auto &Range : preprocessRangeCounter(RangeCounter)) { 68046765248Swlei uint64_t RangeBegin = Range.first.first; 68146765248Swlei uint64_t RangeEnd = Range.first.second; 682d5f20130Swlei uint64_t Count = Range.second; 683d5f20130Swlei 684d5f20130Swlei InstructionPointer IP(Binary, RangeBegin, true); 685d5f20130Swlei // Disjoint ranges may have range in the middle of two instr, 686d5f20130Swlei // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 687d5f20130Swlei // can be Addr1+1 to Addr2-1. We should ignore such range. 6885bf191a3Swlei if (IP.Address > RangeEnd) 6895bf191a3Swlei continue; 6905bf191a3Swlei 6915bf191a3Swlei do { 69291cc53d5Swlei const SampleContextFrameVector FrameVec = 69346765248Swlei Binary->getFrameLocationStack(IP.Address); 694d5f20130Swlei if (!FrameVec.empty()) { 695484a569eSwlei // FIXME: As accumulating total count per instruction caused some 696484a569eSwlei // regression, we changed to accumulate total count per byte as a 697484a569eSwlei // workaround. Tuning hotness threshold on the compiler side might be 698484a569eSwlei // necessary in the future. 699484a569eSwlei FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples( 70046765248Swlei FrameVec, Count * Binary->getInstSize(IP.Address)); 701d5f20130Swlei updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), 702e8c245dcSWenlei He Count); 703d5f20130Swlei } 7045bf191a3Swlei } while (IP.advance() && IP.Address <= RangeEnd); 705d5f20130Swlei } 706d5f20130Swlei } 707d5f20130Swlei 70846765248Swlei StringRef 70946765248Swlei ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) { 71040ca4112Swlei // Get the function range by branch target if it's a call branch. 71146765248Swlei auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress); 712b1a45c62Swlei 71340ca4112Swlei // We won't accumulate sample count for a range whose start is not the real 71440ca4112Swlei // function entry such as outlined function or inner labels. 71540ca4112Swlei if (!FRange || !FRange->IsFuncEntry) 716b1a45c62Swlei return StringRef(); 717b1a45c62Swlei 71840ca4112Swlei return FunctionSamples::getCanonicalFnName(FRange->getFuncName()); 719b1a45c62Swlei } 720b1a45c62Swlei 721d5f20130Swlei void ProfileGenerator::populateBoundarySamplesForAllFunctions( 722d5f20130Swlei const BranchSample &BranchCounters) { 72392ba979cSSimon Pilgrim for (const auto &Entry : BranchCounters) { 72446765248Swlei uint64_t SourceAddress = Entry.first.first; 72546765248Swlei uint64_t TargetAddress = Entry.first.second; 726d5f20130Swlei uint64_t Count = Entry.second; 727d5f20130Swlei assert(Count != 0 && "Unexpected zero weight branch"); 728d5f20130Swlei 72946765248Swlei StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 730d5f20130Swlei if (CalleeName.size() == 0) 731d5f20130Swlei continue; 732d5f20130Swlei // Record called target sample and its count. 733d5f20130Swlei const SampleContextFrameVector &FrameVec = 73491cc53d5Swlei Binary->getCachedFrameLocationStack(SourceAddress); 735d5f20130Swlei if (!FrameVec.empty()) { 736484a569eSwlei FunctionSamples &FunctionProfile = 737484a569eSwlei getLeafProfileAndAddTotalSamples(FrameVec, 0); 738d5f20130Swlei FunctionProfile.addCalledTargetSamples( 739fb29d812Swlei FrameVec.back().Location.LineOffset, 74046cf7d75Swlei getBaseDiscriminator(FrameVec.back().Location.Discriminator), 741ef0e0adcSWilliam Junda Huang FunctionId(CalleeName), Count); 742d5f20130Swlei } 743d5f20130Swlei // Add head samples for callee. 744ef0e0adcSWilliam Junda Huang FunctionSamples &CalleeProfile = 745ef0e0adcSWilliam Junda Huang getTopLevelFunctionProfile(FunctionId(CalleeName)); 746d5f20130Swlei CalleeProfile.addHeadSamples(Count); 747d5f20130Swlei } 748d5f20130Swlei } 749d5f20130Swlei 750b9d40a7aSLei Wang void ProfileGeneratorBase::calculateBodySamplesAndSize( 751b9d40a7aSLei Wang const FunctionSamples &FSamples, uint64_t &TotalBodySamples, 752b9d40a7aSLei Wang uint64_t &FuncBodySize) { 753b9d40a7aSLei Wang // Note that ideally the size should be the number of function instruction. 754b9d40a7aSLei Wang // However, for probe-based profile, we don't have the accurate instruction 755b9d40a7aSLei Wang // count for each probe, instead, the probe sample is the samples count for 756b9d40a7aSLei Wang // the block, which is equivelant to 757b9d40a7aSLei Wang // total_instruction_samples/num_of_instruction in one block. Hence, we use 758b9d40a7aSLei Wang // the number of probe as a proxy for the function's size. 759b9d40a7aSLei Wang FuncBodySize += FSamples.getBodySamples().size(); 760b9d40a7aSLei Wang 761b9d40a7aSLei Wang // The accumulated body samples re-calculated here could be different from the 762b9d40a7aSLei Wang // TotalSamples(getTotalSamples) field of FunctionSamples for line-number 763b9d40a7aSLei Wang // based profile. The reason is that TotalSamples is the sum of all the 764b9d40a7aSLei Wang // samples of the machine instruction in one source-code line, however, the 765b9d40a7aSLei Wang // entry of Bodysamples is the only max number of them, so the TotalSamples is 766b9d40a7aSLei Wang // usually much bigger than the accumulated body samples as one souce-code 767b9d40a7aSLei Wang // line can emit many machine instructions. We observed a regression when we 768b9d40a7aSLei Wang // switched to use the accumulated body samples(by using 769b9d40a7aSLei Wang // -update-total-samples). Hence, it's safer to re-calculate here to avoid 770b9d40a7aSLei Wang // such discrepancy. There is no problem for probe-based profile, as the 771b9d40a7aSLei Wang // TotalSamples is exactly the same as the accumulated body samples. 772b9d40a7aSLei Wang for (const auto &I : FSamples.getBodySamples()) 773b9d40a7aSLei Wang TotalBodySamples += I.second.getSamples(); 774b9d40a7aSLei Wang 775b9d40a7aSLei Wang for (const auto &CallsiteSamples : FSamples.getCallsiteSamples()) 776b9d40a7aSLei Wang for (const auto &Callee : CallsiteSamples.second) { 777b9d40a7aSLei Wang // For binary-level density, the inlinees' samples and size should be 778b9d40a7aSLei Wang // included in the calculation. 779b9d40a7aSLei Wang calculateBodySamplesAndSize(Callee.second, TotalBodySamples, 780b9d40a7aSLei Wang FuncBodySize); 781b9d40a7aSLei Wang } 782b9d40a7aSLei Wang } 783b9d40a7aSLei Wang 784b9d40a7aSLei Wang // Calculate Profile-density: 785b9d40a7aSLei Wang // Calculate the density for each function and sort them in descending order, 786b9d40a7aSLei Wang // keep accumulating their total samples unitl it exceeds the 787b9d40a7aSLei Wang // percentage_threshold(cut-off) of total profile samples, the profile-density 788b9d40a7aSLei Wang // is the last(minimum) function-density of the processed functions, which means 789b9d40a7aSLei Wang // all the functions hot to perf are on good density if the profile-density is 790b9d40a7aSLei Wang // good. The percentage_threshold(--profile-density-cutoff-hot) is configurable 791b9d40a7aSLei Wang // depending on how much regression the system want to tolerate. 792b9d40a7aSLei Wang double 793b9d40a7aSLei Wang ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles) { 794b9d40a7aSLei Wang double ProfileDensity = 0.0; 795b9d40a7aSLei Wang 796b9d40a7aSLei Wang uint64_t TotalProfileSamples = 0; 797b9d40a7aSLei Wang // A list of the function profile density and its total samples. 798b9d40a7aSLei Wang std::vector<std::pair<double, uint64_t>> FuncDensityList; 799b9d40a7aSLei Wang for (const auto &I : Profiles) { 800b9d40a7aSLei Wang uint64_t TotalBodySamples = 0; 801b9d40a7aSLei Wang uint64_t FuncBodySize = 0; 802b9d40a7aSLei Wang calculateBodySamplesAndSize(I.second, TotalBodySamples, FuncBodySize); 803b9d40a7aSLei Wang 804b9d40a7aSLei Wang if (FuncBodySize == 0) 805b9d40a7aSLei Wang continue; 806b9d40a7aSLei Wang 807b9d40a7aSLei Wang double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize; 808b9d40a7aSLei Wang TotalProfileSamples += TotalBodySamples; 809b9d40a7aSLei Wang FuncDensityList.emplace_back(FuncDensity, TotalBodySamples); 810b9d40a7aSLei Wang } 811b9d40a7aSLei Wang 812b9d40a7aSLei Wang // Sorted by the density in descending order. 813b9d40a7aSLei Wang llvm::stable_sort(FuncDensityList, [&](const std::pair<double, uint64_t> &A, 814b9d40a7aSLei Wang const std::pair<double, uint64_t> &B) { 815b9d40a7aSLei Wang if (A.first != B.first) 816b9d40a7aSLei Wang return A.first > B.first; 817b9d40a7aSLei Wang return A.second < B.second; 818b9d40a7aSLei Wang }); 819b9d40a7aSLei Wang 820b9d40a7aSLei Wang uint64_t AccumulatedSamples = 0; 821b9d40a7aSLei Wang uint32_t I = 0; 822b9d40a7aSLei Wang assert(ProfileDensityCutOffHot <= 1000000 && 823b9d40a7aSLei Wang "The cutoff value is greater than 1000000(100%)"); 824b9d40a7aSLei Wang while (AccumulatedSamples < TotalProfileSamples * 825b9d40a7aSLei Wang static_cast<float>(ProfileDensityCutOffHot) / 826b9d40a7aSLei Wang 1000000 && 827b9d40a7aSLei Wang I < FuncDensityList.size()) { 828b9d40a7aSLei Wang AccumulatedSamples += FuncDensityList[I].second; 829b9d40a7aSLei Wang ProfileDensity = FuncDensityList[I].first; 830b9d40a7aSLei Wang I++; 831b9d40a7aSLei Wang } 832b9d40a7aSLei Wang 833b9d40a7aSLei Wang return ProfileDensity; 834b9d40a7aSLei Wang } 835b9d40a7aSLei Wang 836c2e08abaSwlei void ProfileGeneratorBase::calculateAndShowDensity( 837c2e08abaSwlei const SampleProfileMap &Profiles) { 838b9d40a7aSLei Wang double Density = calculateDensity(Profiles); 839c2e08abaSwlei showDensitySuggestion(Density); 840c2e08abaSwlei } 841c2e08abaSwlei 842eba57492Swlei FunctionSamples * 843eba57492Swlei CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode, 844eba57492Swlei bool WasLeafInlined) { 845eba57492Swlei FunctionSamples *FProfile = ContextNode->getFunctionSamples(); 846eba57492Swlei if (!FProfile) { 847eba57492Swlei FSamplesList.emplace_back(); 848eba57492Swlei FProfile = &FSamplesList.back(); 849ef0e0adcSWilliam Junda Huang FProfile->setFunction(ContextNode->getFuncName()); 850eba57492Swlei ContextNode->setFunctionSamples(FProfile); 851eba57492Swlei } 8524c2b57aeSHongtao Yu // Update ContextWasInlined attribute for existing contexts. 8534c2b57aeSHongtao Yu // The current function can be called in two ways: 8544c2b57aeSHongtao Yu // - when processing a probe of the current frame 8554c2b57aeSHongtao Yu // - when processing the entry probe of an inlinee's frame, which 8564c2b57aeSHongtao Yu // is then used to update the callsite count of the current frame. 8574c2b57aeSHongtao Yu // The two can happen in any order, hence here we are making sure 8584c2b57aeSHongtao Yu // `ContextWasInlined` is always set as expected. 8594c2b57aeSHongtao Yu // TODO: Note that the former does not always happen if no probes of the 8604c2b57aeSHongtao Yu // current frame has samples, and if the latter happens, we could lose the 8614c2b57aeSHongtao Yu // attribute. This should be fixed. 8624c2b57aeSHongtao Yu if (WasLeafInlined) 863eba57492Swlei FProfile->getContext().setAttribute(ContextWasInlined); 864eba57492Swlei return FProfile; 8651f05b1a9Swlei } 8664c2b57aeSHongtao Yu 867eba57492Swlei ContextTrieNode * 868eba57492Swlei CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context, 869eba57492Swlei bool WasLeafInlined) { 870eba57492Swlei ContextTrieNode *ContextNode = 871eba57492Swlei ContextTracker.getOrCreateContextPath(Context, true); 872eba57492Swlei getOrCreateFunctionSamples(ContextNode, WasLeafInlined); 873eba57492Swlei return ContextNode; 8741ed69bb8Swlei } 8751f05b1a9Swlei 876a5d30421SWenlei He void CSProfileGenerator::generateProfile() { 877e36786d1SHongtao Yu FunctionSamples::ProfileIsCS = true; 878ce40843aSwlei 8793f970168SHongtao Yu collectProfiledFunctions(); 880ce40843aSwlei 8815d7950a4SHongtao Yu if (Binary->usePseudoProbes()) { 882937924ebSHongtao Yu Binary->decodePseudoProbe(); 8835d7950a4SHongtao Yu if (InferMissingFrames) 8845d7950a4SHongtao Yu initializeMissingFrameInferrer(); 8855d7950a4SHongtao Yu } 886937924ebSHongtao Yu 887937924ebSHongtao Yu if (SampleCounters) { 888d5f20130Swlei if (Binary->usePseudoProbes()) { 889d5f20130Swlei generateProbeBasedProfile(); 890d5f20130Swlei } else { 891d5f20130Swlei generateLineNumBasedProfile(); 892d5f20130Swlei } 893937924ebSHongtao Yu } 8943f970168SHongtao Yu 8953f970168SHongtao Yu if (Binary->getTrackFuncContextSize()) 8963f970168SHongtao Yu computeSizeForProfiledFunctions(); 8973f970168SHongtao Yu 898d5f20130Swlei postProcessProfiles(); 899d5f20130Swlei } 900d5f20130Swlei 9015d7950a4SHongtao Yu void CSProfileGenerator::initializeMissingFrameInferrer() { 9025d7950a4SHongtao Yu Binary->getMissingContextInferrer()->initialize(SampleCounters); 9035d7950a4SHongtao Yu } 9045d7950a4SHongtao Yu 9055d7950a4SHongtao Yu void CSProfileGenerator::inferMissingFrames( 9065d7950a4SHongtao Yu const SmallVectorImpl<uint64_t> &Context, 9075d7950a4SHongtao Yu SmallVectorImpl<uint64_t> &NewContext) { 9085d7950a4SHongtao Yu Binary->inferMissingFrames(Context, NewContext); 9095d7950a4SHongtao Yu } 9105d7950a4SHongtao Yu 911ce40843aSwlei void CSProfileGenerator::computeSizeForProfiledFunctions() { 9123f970168SHongtao Yu for (auto *Func : Binary->getProfiledFunctions()) 91334e131b0SHongtao Yu Binary->computeInlinedContextSizeForFunc(Func); 91434e131b0SHongtao Yu 91534e131b0SHongtao Yu // Flush the symbolizer to save memory. 91634e131b0SHongtao Yu Binary->flushSymbolizer(); 917ce40843aSwlei } 918ce40843aSwlei 919eba57492Swlei void CSProfileGenerator::updateFunctionSamples() { 9207e86b13cSwlei for (auto *Node : ContextTracker) { 921eba57492Swlei FunctionSamples *FSamples = Node->getFunctionSamples(); 922eba57492Swlei if (FSamples) { 923eba57492Swlei if (UpdateTotalSamples) 924eba57492Swlei FSamples->updateTotalSamples(); 925eba57492Swlei FSamples->updateCallsiteSamples(); 926eba57492Swlei } 927eba57492Swlei } 928eba57492Swlei } 929eba57492Swlei 930d5f20130Swlei void CSProfileGenerator::generateLineNumBasedProfile() { 931937924ebSHongtao Yu for (const auto &CI : *SampleCounters) { 93286bbf01dSSimon Pilgrim const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr()); 93386bbf01dSSimon Pilgrim 934eba57492Swlei ContextTrieNode *ContextNode = &getRootContext(); 935bfcb2c11Swlei // Sample context will be empty if the jump is an external-to-internal call 936bfcb2c11Swlei // pattern, the head samples should be added for the internal function. 937bfcb2c11Swlei if (!CtxKey->Context.empty()) { 938a5d30421SWenlei He // Get or create function profile for the range 939eba57492Swlei ContextNode = 940eba57492Swlei getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined); 941a5d30421SWenlei He // Fill in function body samples 942eba57492Swlei populateBodySamplesForFunction(*ContextNode->getFunctionSamples(), 943eba57492Swlei CI.second.RangeCounter); 944bfcb2c11Swlei } 945a5d30421SWenlei He // Fill in boundary sample counts as well as call site samples for calls 946eba57492Swlei populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter); 947a5d30421SWenlei He } 948a5d30421SWenlei He // Fill in call site value sample for inlined calls and also use context to 949a5d30421SWenlei He // infer missing samples. Since we don't have call count for inlined 950a5d30421SWenlei He // functions, we estimate it from inlinee's profile using the entry of the 951a5d30421SWenlei He // body sample. 952eba57492Swlei populateInferredFunctionSamples(getRootContext()); 953f5537643Swlei 954acfd0a34SHongtao Yu updateFunctionSamples(); 955a5d30421SWenlei He } 956a5d30421SWenlei He 957d5f20130Swlei void CSProfileGenerator::populateBodySamplesForFunction( 958f812c192Swlei FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) { 9591f05b1a9Swlei // Compute disjoint ranges first, so we can use MAX 9601f05b1a9Swlei // for calculating count for each location. 9611f05b1a9Swlei RangeSample Ranges; 962414930b9Swlei findDisjointRanges(Ranges, RangeCounter); 96392ba979cSSimon Pilgrim for (const auto &Range : Ranges) { 96446765248Swlei uint64_t RangeBegin = Range.first.first; 96546765248Swlei uint64_t RangeEnd = Range.first.second; 9661f05b1a9Swlei uint64_t Count = Range.second; 9671f05b1a9Swlei // Disjoint ranges have introduce zero-filled gap that 9681f05b1a9Swlei // doesn't belong to current context, filter them out. 9691f05b1a9Swlei if (Count == 0) 9701f05b1a9Swlei continue; 9711f05b1a9Swlei 9721f05b1a9Swlei InstructionPointer IP(Binary, RangeBegin, true); 9731f05b1a9Swlei // Disjoint ranges may have range in the middle of two instr, 9741f05b1a9Swlei // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 9751f05b1a9Swlei // can be Addr1+1 to Addr2-1. We should ignore such range. 9765bf191a3Swlei if (IP.Address > RangeEnd) 9775bf191a3Swlei continue; 9785bf191a3Swlei 9795bf191a3Swlei do { 98046765248Swlei auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address); 981e0e687a6SKazu Hirata if (LeafLoc) { 9821f05b1a9Swlei // Recording body sample for this specific context 983e8c245dcSWenlei He updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); 984484a569eSwlei FunctionProfile.addTotalSamples(Count); 985afd8bd60Swlei } 9865bf191a3Swlei } while (IP.advance() && IP.Address <= RangeEnd); 9871f05b1a9Swlei } 9881f05b1a9Swlei } 9891f05b1a9Swlei 990d5f20130Swlei void CSProfileGenerator::populateBoundarySamplesForFunction( 991eba57492Swlei ContextTrieNode *Node, const BranchSample &BranchCounters) { 9921f05b1a9Swlei 99392ba979cSSimon Pilgrim for (const auto &Entry : BranchCounters) { 99446765248Swlei uint64_t SourceAddress = Entry.first.first; 99546765248Swlei uint64_t TargetAddress = Entry.first.second; 9961f05b1a9Swlei uint64_t Count = Entry.second; 997d5f20130Swlei assert(Count != 0 && "Unexpected zero weight branch"); 998d5f20130Swlei 99946765248Swlei StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 10001f05b1a9Swlei if (CalleeName.size() == 0) 10011f05b1a9Swlei continue; 10021f05b1a9Swlei 1003eba57492Swlei ContextTrieNode *CallerNode = Node; 1004eba57492Swlei LineLocation CalleeCallSite(0, 0); 1005eba57492Swlei if (CallerNode != &getRootContext()) { 10061f05b1a9Swlei // Record called target sample and its count 100746765248Swlei auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress); 1008e0e687a6SKazu Hirata if (LeafLoc) { 1009eba57492Swlei CallerNode->getFunctionSamples()->addCalledTargetSamples( 101046cf7d75Swlei LeafLoc->Location.LineOffset, 1011ef0e0adcSWilliam Junda Huang getBaseDiscriminator(LeafLoc->Location.Discriminator), 1012ef0e0adcSWilliam Junda Huang FunctionId(CalleeName), 101346cf7d75Swlei Count); 10141f05b1a9Swlei // Record head sample for called target(callee) 1015eba57492Swlei CalleeCallSite = LeafLoc->Location; 10161f05b1a9Swlei } 10171f05b1a9Swlei } 10181f05b1a9Swlei 1019eba57492Swlei ContextTrieNode *CalleeNode = 1020ef0e0adcSWilliam Junda Huang CallerNode->getOrCreateChildContext(CalleeCallSite, 1021ef0e0adcSWilliam Junda Huang FunctionId(CalleeName)); 1022eba57492Swlei FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode); 1023eba57492Swlei CalleeProfile->addHeadSamples(Count); 1024eba57492Swlei } 10251f05b1a9Swlei } 10261f05b1a9Swlei 1027eba57492Swlei void CSProfileGenerator::populateInferredFunctionSamples( 1028eba57492Swlei ContextTrieNode &Node) { 1029eba57492Swlei // There is no call jmp sample between the inliner and inlinee, we need to use 1030eba57492Swlei // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s 1031eba57492Swlei // sample depends on child(inlinee)'s sample, so traverse the tree in 1032eba57492Swlei // post-order. 1033eba57492Swlei for (auto &It : Node.getAllChildContext()) 1034eba57492Swlei populateInferredFunctionSamples(It.second); 10351f05b1a9Swlei 1036eba57492Swlei FunctionSamples *CalleeProfile = Node.getFunctionSamples(); 1037eba57492Swlei if (!CalleeProfile) 1038eba57492Swlei return; 10391f05b1a9Swlei // If we already have head sample counts, we must have value profile 10401f05b1a9Swlei // for call sites added already. Skip to avoid double counting. 1041eba57492Swlei if (CalleeProfile->getHeadSamples()) 1042eba57492Swlei return; 1043eba57492Swlei ContextTrieNode *CallerNode = Node.getParentContext(); 10441f05b1a9Swlei // If we don't have context, nothing to do for caller's call site. 10451f05b1a9Swlei // This could happen for entry point function. 1046eba57492Swlei if (CallerNode == &getRootContext()) 1047eba57492Swlei return; 10481f05b1a9Swlei 1049eba57492Swlei LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc(); 1050eba57492Swlei FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); 10511f05b1a9Swlei // Since we don't have call count for inlined functions, we 10521f05b1a9Swlei // estimate it from inlinee's profile using entry body sample. 10537b81a81dSMircea Trofin uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate(); 10541f05b1a9Swlei // If we don't have samples with location, use 1 to indicate live. 1055eba57492Swlei if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size()) 10561f05b1a9Swlei EstimatedCallCount = 1; 1057eba57492Swlei CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset, 1058eba57492Swlei CallerLeafFrameLoc.Discriminator, 1059eba57492Swlei Node.getFuncName(), EstimatedCallCount); 1060eba57492Swlei CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset, 1061eba57492Swlei CallerLeafFrameLoc.Discriminator, 10621f05b1a9Swlei EstimatedCallCount); 1063484a569eSwlei CallerProfile.addTotalSamples(EstimatedCallCount); 10641f05b1a9Swlei } 1065eba57492Swlei 1066eba57492Swlei void CSProfileGenerator::convertToProfileMap( 1067eba57492Swlei ContextTrieNode &Node, SampleContextFrameVector &Context) { 1068eba57492Swlei FunctionSamples *FProfile = Node.getFunctionSamples(); 1069eba57492Swlei if (FProfile) { 1070eba57492Swlei Context.emplace_back(Node.getFuncName(), LineLocation(0, 0)); 1071eba57492Swlei // Save the new context for future references. 1072eba57492Swlei SampleContextFrames NewContext = *Contexts.insert(Context).first; 1073eba57492Swlei auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile)); 1074eba57492Swlei FunctionSamples &NewProfile = Ret.first->second; 1075eba57492Swlei NewProfile.getContext().setContext(NewContext); 1076eba57492Swlei Context.pop_back(); 1077eba57492Swlei } 1078eba57492Swlei 1079eba57492Swlei for (auto &It : Node.getAllChildContext()) { 1080eba57492Swlei ContextTrieNode &ChildNode = It.second; 1081eba57492Swlei Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc()); 1082eba57492Swlei convertToProfileMap(ChildNode, Context); 1083eba57492Swlei Context.pop_back(); 1084eba57492Swlei } 1085eba57492Swlei } 1086eba57492Swlei 1087eba57492Swlei void CSProfileGenerator::convertToProfileMap() { 1088eba57492Swlei assert(ProfileMap.empty() && 1089eba57492Swlei "ProfileMap should be empty before converting from the trie"); 1090eba57492Swlei assert(IsProfileValidOnTrie && 1091eba57492Swlei "Do not convert the trie twice, it's already destroyed"); 1092eba57492Swlei 1093eba57492Swlei SampleContextFrameVector Context; 1094eba57492Swlei for (auto &It : getRootContext().getAllChildContext()) 1095eba57492Swlei convertToProfileMap(It.second, Context); 1096eba57492Swlei 1097eba57492Swlei IsProfileValidOnTrie = false; 10981f05b1a9Swlei } 10991f05b1a9Swlei 110030b02323SWenlei He void CSProfileGenerator::postProcessProfiles() { 110130b02323SWenlei He // Compute hot/cold threshold based on profile. This will be used for cold 110230b02323SWenlei He // context profile merging/trimming. 110330b02323SWenlei He computeSummaryAndThreshold(); 110430b02323SWenlei He 110530b02323SWenlei He // Run global pre-inliner to adjust/merge context profile based on estimated 110630b02323SWenlei He // inline decisions. 1107eca03d27SWenlei He if (EnableCSPreInliner) { 11087e86b13cSwlei ContextTracker.populateFuncToCtxtMap(); 11097e86b13cSwlei CSPreInliner(ContextTracker, *Binary, Summary.get()).run(); 1110259e4c56SHongtao Yu // Turn off the profile merger by default unless it is explicitly enabled. 1111259e4c56SHongtao Yu if (!CSProfMergeColdContext.getNumOccurrences()) 1112259e4c56SHongtao Yu CSProfMergeColdContext = false; 1113eca03d27SWenlei He } 111430b02323SWenlei He 11157e86b13cSwlei convertToProfileMap(); 11167e86b13cSwlei 1117259e4c56SHongtao Yu // Trim and merge cold context profile using cold threshold above. 111827cb3707Swlei if (TrimColdProfile || CSProfMergeColdContext) { 111900ef28efSWenlei He SampleContextTrimmer(ProfileMap) 112000ef28efSWenlei He .trimAndMergeColdContextProfiles( 112127cb3707Swlei HotCountThreshold, TrimColdProfile, CSProfMergeColdContext, 1122259e4c56SHongtao Yu CSProfMaxColdContextDepth, EnableCSPreInliner); 112330b02323SWenlei He } 1124c2e08abaSwlei 11255740bb80SHongtao Yu if (GenCSNestedProfile) { 1126339b8a00Swlei ProfileConverter CSConverter(ProfileMap); 1127339b8a00Swlei CSConverter.convertCSProfiles(); 1128e36786d1SHongtao Yu FunctionSamples::ProfileIsCS = false; 11295740bb80SHongtao Yu } 113024f02517SLei Wang filterAmbiguousProfile(ProfileMap); 1131b9d40a7aSLei Wang ProfileGeneratorBase::calculateAndShowDensity(ProfileMap); 1132c2e08abaSwlei } 1133c2e08abaSwlei 1134aa58b7b1Swlei void ProfileGeneratorBase::computeSummaryAndThreshold( 1135aa58b7b1Swlei SampleProfileMap &Profiles) { 1136ce6bfe94SWenlei He SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1137aa58b7b1Swlei Summary = Builder.computeSummaryForProfiles(Profiles); 1138dff83158SWenlei He HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold( 1139dff83158SWenlei He (Summary->getDetailedSummary())); 1140dff83158SWenlei He ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold( 1141dff83158SWenlei He (Summary->getDetailedSummary())); 1142e10b73f6Swlei } 1143e10b73f6Swlei 1144aa58b7b1Swlei void CSProfileGenerator::computeSummaryAndThreshold() { 1145aa58b7b1Swlei // Always merge and use context-less profile map to compute summary. 1146aa58b7b1Swlei SampleProfileMap ContextLessProfiles; 1147aa58b7b1Swlei ContextTracker.createContextLessProfileMap(ContextLessProfiles); 1148aa58b7b1Swlei 1149aa58b7b1Swlei // Set the flag below to avoid merging the profile again in 1150aa58b7b1Swlei // computeSummaryAndThreshold 1151aa58b7b1Swlei FunctionSamples::ProfileIsCS = false; 1152aa58b7b1Swlei assert( 1153aa58b7b1Swlei (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) && 1154aa58b7b1Swlei "Don't set --profile-summary-contextless to false for profile " 1155aa58b7b1Swlei "generation"); 1156aa58b7b1Swlei ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles); 1157aa58b7b1Swlei // Recover the old value. 1158aa58b7b1Swlei FunctionSamples::ProfileIsCS = true; 1159aa58b7b1Swlei } 1160aa58b7b1Swlei 116123391febSHongtao Yu void ProfileGeneratorBase::extractProbesFromRange( 116223391febSHongtao Yu const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter, 116323391febSHongtao Yu bool FindDisjointRanges) { 116423391febSHongtao Yu const RangeSample *PRanges = &RangeCounter; 1165c82b24f4Swlei RangeSample Ranges; 116623391febSHongtao Yu if (FindDisjointRanges) { 1167c82b24f4Swlei findDisjointRanges(Ranges, RangeCounter); 116823391febSHongtao Yu PRanges = &Ranges; 116923391febSHongtao Yu } 117023391febSHongtao Yu 117123391febSHongtao Yu for (const auto &Range : *PRanges) { 117246765248Swlei uint64_t RangeBegin = Range.first.first; 117346765248Swlei uint64_t RangeEnd = Range.first.second; 1174c82b24f4Swlei uint64_t Count = Range.second; 1175c82b24f4Swlei 1176c82b24f4Swlei InstructionPointer IP(Binary, RangeBegin, true); 1177c82b24f4Swlei // Disjoint ranges may have range in the middle of two instr, 1178c82b24f4Swlei // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range 1179c82b24f4Swlei // can be Addr1+1 to Addr2-1. We should ignore such range. 1180c82b24f4Swlei if (IP.Address > RangeEnd) 1181c82b24f4Swlei continue; 1182c82b24f4Swlei 11835bf191a3Swlei do { 1184c82b24f4Swlei const AddressProbesMap &Address2ProbesMap = 1185c82b24f4Swlei Binary->getAddress2ProbesMap(); 1186*ee09f7d1SAmir Ayupov for (const MCDecodedPseudoProbe &Probe : 1187*ee09f7d1SAmir Ayupov Address2ProbesMap.find(IP.Address)) { 1188c82b24f4Swlei ProbeCounter[&Probe] += Count; 1189c82b24f4Swlei } 11905bf191a3Swlei } while (IP.advance() && IP.Address <= RangeEnd); 1191c82b24f4Swlei } 1192c82b24f4Swlei } 1193c82b24f4Swlei 119446765248Swlei static void extractPrefixContextStack(SampleContextFrameVector &ContextStack, 119546765248Swlei const SmallVectorImpl<uint64_t> &AddrVec, 119623391febSHongtao Yu ProfiledBinary *Binary) { 11973f970168SHongtao Yu SmallVector<const MCDecodedPseudoProbe *, 16> Probes; 119846765248Swlei for (auto Address : reverse(AddrVec)) { 119946765248Swlei const MCDecodedPseudoProbe *CallProbe = 120046765248Swlei Binary->getCallProbeForAddr(Address); 12013f970168SHongtao Yu // These could be the cases when a probe is not found at a calliste. Cutting 12023f970168SHongtao Yu // off the context from here since the inliner will not know how to consume 12033f970168SHongtao Yu // a context with unknown callsites. 12043f970168SHongtao Yu // 1. for functions that are not sampled when 12053f970168SHongtao Yu // --decode-probe-for-profiled-functions-only is on. 12063f970168SHongtao Yu // 2. for a merged callsite. Callsite merging may cause the loss of original 12073f970168SHongtao Yu // probe IDs. 12083f970168SHongtao Yu // 3. for an external callsite. 12093f970168SHongtao Yu if (!CallProbe) 12103f970168SHongtao Yu break; 12113f970168SHongtao Yu Probes.push_back(CallProbe); 12123f970168SHongtao Yu } 12133f970168SHongtao Yu 12143f970168SHongtao Yu std::reverse(Probes.begin(), Probes.end()); 12153f970168SHongtao Yu 12163f970168SHongtao Yu // Extract context stack for reusing, leaf context stack will be added 12173f970168SHongtao Yu // compressed while looking up function profile. 121823391febSHongtao Yu for (const auto *P : Probes) { 121923391febSHongtao Yu Binary->getInlineContextForProbe(P, ContextStack, true); 122023391febSHongtao Yu } 122123391febSHongtao Yu } 122223391febSHongtao Yu 122323391febSHongtao Yu void CSProfileGenerator::generateProbeBasedProfile() { 122423391febSHongtao Yu // Enable pseudo probe functionalities in SampleProf 122523391febSHongtao Yu FunctionSamples::ProfileIsProbeBased = true; 1226937924ebSHongtao Yu for (const auto &CI : *SampleCounters) { 12273f970168SHongtao Yu const AddrBasedCtxKey *CtxKey = 12283f970168SHongtao Yu dyn_cast<AddrBasedCtxKey>(CI.first.getPtr()); 122923391febSHongtao Yu // Fill in function body samples from probes, also infer caller's samples 123023391febSHongtao Yu // from callee's probe 12315d7950a4SHongtao Yu populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey); 123223391febSHongtao Yu // Fill in boundary samples for a call probe 12335d7950a4SHongtao Yu populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey); 123423391febSHongtao Yu } 123523391febSHongtao Yu } 123623391febSHongtao Yu 1237d5f20130Swlei void CSProfileGenerator::populateBodySamplesWithProbes( 12385d7950a4SHongtao Yu const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) { 1239c82b24f4Swlei ProbeCounterMap ProbeCounter; 1240c82b24f4Swlei // Extract the top frame probes by looking up each address among the range in 1241c82b24f4Swlei // the Address2ProbeMap 1242f812c192Swlei extractProbesFromRange(RangeCounter, ProbeCounter); 1243a8a38ef3Swlei std::unordered_map<MCDecodedPseudoProbeInlineTree *, 1244a8a38ef3Swlei std::unordered_set<FunctionSamples *>> 1245ee7d20e8Sjamesluox FrameSamples; 124692ba979cSSimon Pilgrim for (const auto &PI : ProbeCounter) { 1247ee7d20e8Sjamesluox const MCDecodedPseudoProbe *Probe = PI.first; 1248c82b24f4Swlei uint64_t Count = PI.second; 124923391febSHongtao Yu // Disjoint ranges have introduce zero-filled gap that 125023391febSHongtao Yu // doesn't belong to current context, filter them out. 125123391febSHongtao Yu if (!Probe->isBlock() || Count == 0) 125223391febSHongtao Yu continue; 1253eba57492Swlei 12545d7950a4SHongtao Yu ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe); 1255eba57492Swlei FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples(); 12561a719089SHongtao Yu // Record the current frame and FunctionProfile whenever samples are 12571a719089SHongtao Yu // collected for non-danglie probes. This is for reporting all of the 1258bd524955SHongtao Yu // zero count probes of the frame later. 1259a8a38ef3Swlei FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); 1260345fd0c1SHongtao Yu FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(), 1261345fd0c1SHongtao Yu Count); 1262c82b24f4Swlei FunctionProfile.addTotalSamples(Count); 1263c82b24f4Swlei if (Probe->isEntry()) { 1264c82b24f4Swlei FunctionProfile.addHeadSamples(Count); 1265c82b24f4Swlei // Look up for the caller's function profile 1266c82b24f4Swlei const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe); 1267eba57492Swlei ContextTrieNode *CallerNode = ContextNode->getParentContext(); 1268eba57492Swlei if (InlinerDesc != nullptr && CallerNode != &getRootContext()) { 1269c82b24f4Swlei // Since the context id will be compressed, we have to use callee's 1270c82b24f4Swlei // context id to infer caller's context id to ensure they share the 1271c82b24f4Swlei // same context prefix. 1272eba57492Swlei uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; 1273345fd0c1SHongtao Yu uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator; 1274c82b24f4Swlei assert(CallerIndex && 1275c82b24f4Swlei "Inferred caller's location index shouldn't be zero!"); 1276345fd0c1SHongtao Yu assert(!CallerDiscriminator && 1277345fd0c1SHongtao Yu "Callsite probe should not have a discriminator!"); 1278c82b24f4Swlei FunctionSamples &CallerProfile = 1279eba57492Swlei *getOrCreateFunctionSamples(CallerNode); 1280c82b24f4Swlei CallerProfile.setFunctionHash(InlinerDesc->FuncHash); 1281345fd0c1SHongtao Yu CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count); 1282c82b24f4Swlei CallerProfile.addTotalSamples(Count); 1283345fd0c1SHongtao Yu CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator, 1284eba57492Swlei ContextNode->getFuncName(), Count); 1285c82b24f4Swlei } 1286c82b24f4Swlei } 1287a8a38ef3Swlei } 12881a719089SHongtao Yu 1289cef9b96bSHongtao Yu // Assign zero count for remaining probes without sample hits to 1290cef9b96bSHongtao Yu // differentiate from probes optimized away, of which the counts are unknown 1291cef9b96bSHongtao Yu // and will be inferred by the compiler. 12921a719089SHongtao Yu for (auto &I : FrameSamples) { 1293a8a38ef3Swlei for (auto *FunctionProfile : I.second) { 129404ebd190SAmir Ayupov for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) { 129504ebd190SAmir Ayupov FunctionProfile->addBodySamples(Probe.getIndex(), 129604ebd190SAmir Ayupov Probe.getDiscriminator(), 0); 12971a719089SHongtao Yu } 12981a719089SHongtao Yu } 1299c82b24f4Swlei } 1300c82b24f4Swlei } 1301c82b24f4Swlei 1302d5f20130Swlei void CSProfileGenerator::populateBoundarySamplesWithProbes( 13035d7950a4SHongtao Yu const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) { 130492ba979cSSimon Pilgrim for (const auto &BI : BranchCounter) { 130546765248Swlei uint64_t SourceAddress = BI.first.first; 130646765248Swlei uint64_t TargetAddress = BI.first.second; 1307c82b24f4Swlei uint64_t Count = BI.second; 1308ee7d20e8Sjamesluox const MCDecodedPseudoProbe *CallProbe = 1309ee7d20e8Sjamesluox Binary->getCallProbeForAddr(SourceAddress); 1310c82b24f4Swlei if (CallProbe == nullptr) 1311c82b24f4Swlei continue; 1312c82b24f4Swlei FunctionSamples &FunctionProfile = 13135d7950a4SHongtao Yu getFunctionProfileForLeafProbe(CtxKey, CallProbe); 1314ee7d20e8Sjamesluox FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count); 1315c82b24f4Swlei FunctionProfile.addTotalSamples(Count); 131646765248Swlei StringRef CalleeName = getCalleeNameForAddress(TargetAddress); 1317c82b24f4Swlei if (CalleeName.size() == 0) 1318c82b24f4Swlei continue; 1319345fd0c1SHongtao Yu FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 1320345fd0c1SHongtao Yu CallProbe->getDiscriminator(), 1321ef0e0adcSWilliam Junda Huang FunctionId(CalleeName), Count); 1322c82b24f4Swlei } 1323c82b24f4Swlei } 1324c82b24f4Swlei 1325eba57492Swlei ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe( 13265d7950a4SHongtao Yu const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { 13275d7950a4SHongtao Yu 13285d7950a4SHongtao Yu const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context; 13295d7950a4SHongtao Yu SmallVector<uint64_t, 16> NewContext; 13305d7950a4SHongtao Yu 13315d7950a4SHongtao Yu if (InferMissingFrames) { 13325d7950a4SHongtao Yu SmallVector<uint64_t, 16> Context = CtxKey->Context; 13335d7950a4SHongtao Yu // Append leaf frame for a complete inference. 13345d7950a4SHongtao Yu Context.push_back(LeafProbe->getAddress()); 13355d7950a4SHongtao Yu inferMissingFrames(Context, NewContext); 13365d7950a4SHongtao Yu // Pop out the leaf probe that was pushed in above. 13375d7950a4SHongtao Yu NewContext.pop_back(); 13385d7950a4SHongtao Yu PContext = &NewContext; 13395d7950a4SHongtao Yu } 13405d7950a4SHongtao Yu 13415d7950a4SHongtao Yu SampleContextFrameVector ContextStack; 13425d7950a4SHongtao Yu extractPrefixContextStack(ContextStack, *PContext, Binary); 13436bccdcdbSwlei 1344b9db7036SHongtao Yu // Explicitly copy the context for appending the leaf context 1345b9db7036SHongtao Yu SampleContextFrameVector NewContextStack(ContextStack.begin(), 1346b9db7036SHongtao Yu ContextStack.end()); 1347b9db7036SHongtao Yu Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true); 1348c82b24f4Swlei // For leaf inlined context with the top frame, we should strip off the top 1349c82b24f4Swlei // frame's probe id, like: 1350c82b24f4Swlei // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar" 1351b9db7036SHongtao Yu auto LeafFrame = NewContextStack.back(); 1352fb29d812Swlei LeafFrame.Location = LineLocation(0, 0); 1353b9db7036SHongtao Yu NewContextStack.pop_back(); 1354b9db7036SHongtao Yu // Compress the context string except for the leaf frame 1355b9db7036SHongtao Yu CSProfileGenerator::compressRecursionContext(NewContextStack); 1356b9db7036SHongtao Yu CSProfileGenerator::trimContext(NewContextStack); 1357b9db7036SHongtao Yu NewContextStack.push_back(LeafFrame); 1358c82b24f4Swlei 1359ee7d20e8Sjamesluox const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid()); 1360ee7d20e8Sjamesluox bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite(); 1361eba57492Swlei ContextTrieNode *ContextNode = 1362eba57492Swlei getOrCreateContextNode(NewContextStack, WasLeafInlined); 1363eba57492Swlei ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash); 1364eba57492Swlei return ContextNode; 1365eba57492Swlei } 1366eba57492Swlei 1367eba57492Swlei FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe( 13685d7950a4SHongtao Yu const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) { 13695d7950a4SHongtao Yu return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples(); 1370c82b24f4Swlei } 1371c82b24f4Swlei 13721f05b1a9Swlei } // end namespace sampleprof 13731f05b1a9Swlei } // end namespace llvm 1374