xref: /llvm-project/llvm/tools/llvm-profgen/ProfileGenerator.cpp (revision ee09f7d1fc173f2b495838c925f2cf39a2b55369)
11f05b1a9Swlei //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
21f05b1a9Swlei //
31f05b1a9Swlei // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41f05b1a9Swlei // See https://llvm.org/LICENSE.txt for license information.
51f05b1a9Swlei // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61f05b1a9Swlei //
71f05b1a9Swlei //===----------------------------------------------------------------------===//
81f05b1a9Swlei #include "ProfileGenerator.h"
941a681ceSwlei #include "ErrorHandling.h"
105d7950a4SHongtao Yu #include "MissingFrameInferrer.h"
11937924ebSHongtao Yu #include "PerfReader.h"
12eca03d27SWenlei He #include "ProfiledBinary.h"
13db29f437Sserge-sans-paille #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14ce6bfe94SWenlei He #include "llvm/ProfileData/ProfileCommon.h"
153f970168SHongtao Yu #include <algorithm>
16c2e08abaSwlei #include <float.h>
17a8a38ef3Swlei #include <unordered_set>
18937924ebSHongtao Yu #include <utility>
191f05b1a9Swlei 
20964053d5Swlei cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
211f05b1a9Swlei                                     cl::Required,
221f05b1a9Swlei                                     cl::desc("Output profile file"));
23426e326aSwlei static cl::alias OutputA("o", cl::desc("Alias for --output"),
24426e326aSwlei                          cl::aliasopt(OutputFilename));
251f05b1a9Swlei 
261f05b1a9Swlei static cl::opt<SampleProfileFormat> OutputFormat(
27aaa826faSWenlei He     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
281f05b1a9Swlei     cl::values(
291f05b1a9Swlei         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
301f05b1a9Swlei         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
311f05b1a9Swlei         clEnumValN(SPF_Text, "text", "Text encoding"),
321f05b1a9Swlei         clEnumValN(SPF_GCC, "gcc",
331f05b1a9Swlei                    "GCC encoding (only meaningful for -sample)")));
341f05b1a9Swlei 
35c2250d8bSFangrui Song static cl::opt<bool> UseMD5(
36c2250d8bSFangrui Song     "use-md5", cl::Hidden,
377ca80300SHongtao Yu     cl::desc("Use md5 to represent function names in the output profile (only "
387ca80300SHongtao Yu              "meaningful for -extbinary)"));
397ca80300SHongtao Yu 
4016516f89Swlei static cl::opt<bool> PopulateProfileSymbolList(
412f8196dbSwlei     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
4216516f89Swlei     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
4316516f89Swlei 
443f3103c6Swlei static cl::opt<bool> FillZeroForAllFuncs(
453f3103c6Swlei     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
463f3103c6Swlei     cl::desc("Attribute all functions' range with zero count "
473f3103c6Swlei              "even it's not hit by any samples."));
483f3103c6Swlei 
49ac14bb14Swlei static cl::opt<int32_t, true> RecursionCompression(
50ac14bb14Swlei     "compress-recursion",
51ac14bb14Swlei     cl::desc("Compressing recursion by deduplicating adjacent frame "
52ac14bb14Swlei              "sequences up to the specified size. -1 means no size limit."),
53ac14bb14Swlei     cl::Hidden,
54ac14bb14Swlei     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55ac14bb14Swlei 
5627cb3707Swlei static cl::opt<bool>
57557efc9aSFangrui Song     TrimColdProfile("trim-cold-profile",
5827cb3707Swlei                     cl::desc("If the total count of the profile is smaller "
5927cb3707Swlei                              "than threshold, it will be trimmed."));
6027cb3707Swlei 
61ce6bfe94SWenlei He static cl::opt<bool> CSProfMergeColdContext(
62557efc9aSFangrui Song     "csprof-merge-cold-context", cl::init(true),
63fa14fd30SWenlei He     cl::desc("If the total count of context profile is smaller than "
64fa14fd30SWenlei He              "the threshold, it will be merged into context-less base "
65fa14fd30SWenlei He              "profile."));
66ce6bfe94SWenlei He 
67856a6a50Swlei static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68557efc9aSFangrui Song     "csprof-max-cold-context-depth", cl::init(1),
69856a6a50Swlei     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70863184ddSwlei              "context-less base profile"));
71863184ddSwlei 
72856a6a50Swlei static cl::opt<int, true> CSProfMaxContextDepth(
73557efc9aSFangrui Song     "csprof-max-context-depth",
74856a6a50Swlei     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75856a6a50Swlei              "depth limit."),
76856a6a50Swlei     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77856a6a50Swlei 
78b9d40a7aSLei Wang static cl::opt<double> ProfileDensityThreshold(
79b9d40a7aSLei Wang     "profile-density-threshold", llvm::cl::init(50),
80b9d40a7aSLei Wang     llvm::cl::desc("If the profile density is below the given threshold, it "
81b9d40a7aSLei Wang                    "will be suggested to increase the sampling rate."),
82c2e08abaSwlei     llvm::cl::Optional);
83c2e08abaSwlei static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84c2e08abaSwlei                                  llvm::cl::desc("show profile density details"),
85c2e08abaSwlei                                  llvm::cl::Optional);
86b9d40a7aSLei Wang static cl::opt<int> ProfileDensityCutOffHot(
87b9d40a7aSLei Wang     "profile-density-cutoff-hot", llvm::cl::init(990000),
88b9d40a7aSLei Wang     llvm::cl::desc("Total samples cutoff for functions used to calculate "
89b9d40a7aSLei Wang                    "profile density."));
90c2e08abaSwlei 
91484a569eSwlei static cl::opt<bool> UpdateTotalSamples(
92484a569eSwlei     "update-total-samples", llvm::cl::init(false),
93484a569eSwlei     llvm::cl::desc(
94484a569eSwlei         "Update total samples by accumulating all its body samples."),
95484a569eSwlei     llvm::cl::Optional);
96484a569eSwlei 
975740bb80SHongtao Yu static cl::opt<bool> GenCSNestedProfile(
98bc380c09SHongtao Yu     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
995740bb80SHongtao Yu     cl::desc("Generate nested function profiles for CSSPGO"));
1005740bb80SHongtao Yu 
1015d7950a4SHongtao Yu cl::opt<bool> InferMissingFrames(
1025d7950a4SHongtao Yu     "infer-missing-frames", llvm::cl::init(true),
1035d7950a4SHongtao Yu     llvm::cl::desc(
1045d7950a4SHongtao Yu         "Infer missing call frames due to compiler tail call elimination."),
1055d7950a4SHongtao Yu     llvm::cl::Optional);
1065d7950a4SHongtao Yu 
1071f05b1a9Swlei using namespace llvm;
1081f05b1a9Swlei using namespace sampleprof;
1091f05b1a9Swlei 
1101f05b1a9Swlei namespace llvm {
111c2250d8bSFangrui Song extern cl::opt<int> ProfileSummaryCutoffHot;
112c2250d8bSFangrui Song extern cl::opt<bool> UseContextLessSummary;
113c2250d8bSFangrui Song 
1141f05b1a9Swlei namespace sampleprof {
1151f05b1a9Swlei 
116ac14bb14Swlei // Initialize the MaxCompressionSize to -1 which means no size limit
117ac14bb14Swlei int32_t CSProfileGenerator::MaxCompressionSize = -1;
118ac14bb14Swlei 
119856a6a50Swlei int CSProfileGenerator::MaxContextDepth = -1;
120856a6a50Swlei 
12141a681ceSwlei bool ProfileGeneratorBase::UseFSDiscriminator = false;
12241a681ceSwlei 
123d5f20130Swlei std::unique_ptr<ProfileGeneratorBase>
124d5f20130Swlei ProfileGeneratorBase::create(ProfiledBinary *Binary,
125937924ebSHongtao Yu                              const ContextSampleCounterMap *SampleCounters,
126e36786d1SHongtao Yu                              bool ProfileIsCS) {
127d5f20130Swlei   std::unique_ptr<ProfileGeneratorBase> Generator;
128e36786d1SHongtao Yu   if (ProfileIsCS) {
129d5f20130Swlei     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
130c681400bSwlei   } else {
131a03cf331Swlei     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
1321f05b1a9Swlei   }
13341a681ceSwlei   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
13441a681ceSwlei   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
1351f05b1a9Swlei 
136d5f20130Swlei   return Generator;
1371f05b1a9Swlei }
1381f05b1a9Swlei 
139937924ebSHongtao Yu std::unique_ptr<ProfileGeneratorBase>
140aa58b7b1Swlei ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
141e36786d1SHongtao Yu                              bool ProfileIsCS) {
142937924ebSHongtao Yu   std::unique_ptr<ProfileGeneratorBase> Generator;
143e36786d1SHongtao Yu   if (ProfileIsCS) {
144aa58b7b1Swlei     Generator.reset(new CSProfileGenerator(Binary, Profiles));
145937924ebSHongtao Yu   } else {
146937924ebSHongtao Yu     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
147937924ebSHongtao Yu   }
148937924ebSHongtao Yu   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
149937924ebSHongtao Yu   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
150937924ebSHongtao Yu 
151937924ebSHongtao Yu   return Generator;
152937924ebSHongtao Yu }
153937924ebSHongtao Yu 
154d5f20130Swlei void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
155b9db7036SHongtao Yu                                  SampleProfileMap &ProfileMap) {
15616516f89Swlei   // Populate profile symbol list if extended binary format is used.
15716516f89Swlei   ProfileSymbolList SymbolList;
15816516f89Swlei 
15916516f89Swlei   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
1602f8196dbSwlei     Binary->populateSymbolListFromDWARF(SymbolList);
16116516f89Swlei     Writer->setProfileSymbolList(&SymbolList);
16216516f89Swlei   }
16316516f89Swlei 
16400ef28efSWenlei He   if (std::error_code EC = Writer->write(ProfileMap))
16500ef28efSWenlei He     exitWithError(std::move(EC));
166c3aeabaeSwlei }
167c3aeabaeSwlei 
168d5f20130Swlei void ProfileGeneratorBase::write() {
1691f05b1a9Swlei   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
1701f05b1a9Swlei   if (std::error_code EC = WriterOrErr.getError())
1711f05b1a9Swlei     exitWithError(EC, OutputFilename);
1727ca80300SHongtao Yu 
1737ca80300SHongtao Yu   if (UseMD5) {
1747ca80300SHongtao Yu     if (OutputFormat != SPF_Ext_Binary)
1757ca80300SHongtao Yu       WithColor::warning() << "-use-md5 is ignored. Specify "
1767ca80300SHongtao Yu                               "--format=extbinary to enable it\n";
1777ca80300SHongtao Yu     else
1787ca80300SHongtao Yu       WriterOrErr.get()->setUseMD5();
1797ca80300SHongtao Yu   }
1807ca80300SHongtao Yu 
181c3aeabaeSwlei   write(std::move(WriterOrErr.get()), ProfileMap);
1821f05b1a9Swlei }
1831f05b1a9Swlei 
184c2e08abaSwlei void ProfileGeneratorBase::showDensitySuggestion(double Density) {
185c2e08abaSwlei   if (Density == 0.0)
186b9d40a7aSLei Wang     WithColor::warning() << "The output profile is empty or the "
187b9d40a7aSLei Wang                             "--profile-density-cutoff-hot option is "
188c2e08abaSwlei                             "set too low. Please check your command.\n";
189b9d40a7aSLei Wang   else if (Density < ProfileDensityThreshold)
190c2e08abaSwlei     WithColor::warning()
1911a0d23efSWenlei He         << "Sample PGO is estimated to optimize better with "
192b9d40a7aSLei Wang         << format("%.1f", ProfileDensityThreshold / Density)
193c2e08abaSwlei         << "x more samples. Please consider increasing sampling rate or "
194c2e08abaSwlei            "profiling for longer duration to get more samples.\n";
195c2e08abaSwlei 
196c2e08abaSwlei   if (ShowDensity)
197b9d40a7aSLei Wang     outs() << "Functions with density >= " << format("%.1f", Density)
198b9d40a7aSLei Wang            << " account for "
199c2e08abaSwlei            << format("%.2f",
200b9d40a7aSLei Wang                      static_cast<double>(ProfileDensityCutOffHot) / 10000)
201b9d40a7aSLei Wang            << "% total sample counts.\n";
202c2e08abaSwlei }
203c2e08abaSwlei 
20424f02517SLei Wang bool ProfileGeneratorBase::filterAmbiguousProfile(FunctionSamples &FS) {
20524f02517SLei Wang   for (const auto &Prefix : FuncPrefixsToFilter) {
20624f02517SLei Wang     if (FS.getFuncName().starts_with(Prefix))
20724f02517SLei Wang       return true;
20824f02517SLei Wang   }
20924f02517SLei Wang 
21024f02517SLei Wang   // Filter the function profiles for the inlinees. It's useful for fuzzy
21124f02517SLei Wang   // profile matching which flattens the profile and inlinees' samples are
21224f02517SLei Wang   // merged into top-level function.
21324f02517SLei Wang   for (auto &Callees :
21424f02517SLei Wang        const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
21524f02517SLei Wang     auto &CalleesMap = Callees.second;
21624f02517SLei Wang     for (auto I = CalleesMap.begin(); I != CalleesMap.end();) {
21724f02517SLei Wang       auto FS = I++;
21824f02517SLei Wang       if (filterAmbiguousProfile(FS->second))
21924f02517SLei Wang         CalleesMap.erase(FS);
22024f02517SLei Wang     }
22124f02517SLei Wang   }
22224f02517SLei Wang   return false;
22324f02517SLei Wang }
22424f02517SLei Wang 
22524f02517SLei Wang // For built-in local initialization function such as __cxx_global_var_init,
22624f02517SLei Wang // __tls_init prefix function, there could be multiple versions of the functions
22724f02517SLei Wang // in the final binary. However, in the profile generation, we call
22824f02517SLei Wang // getCanonicalFnName to canonicalize the names which strips the suffixes.
22924f02517SLei Wang // Therefore, samples from different functions queries the same profile and the
23024f02517SLei Wang // samples are merged. As the functions are essentially different, entries of
23124f02517SLei Wang // the merged profile are ambiguous. In sample loader, the IR from one version
23224f02517SLei Wang // would be attributed towards a merged entries, which is inaccurate. Especially
23324f02517SLei Wang // for fuzzy profile matching, it gets multiple callsites(from different
23424f02517SLei Wang // function) but used to match one callsite, which misleads the matching and
23524f02517SLei Wang // causes a lot of false positives report. Hence, we want to filter them out
23624f02517SLei Wang // from the profile map during the profile generation time. The profiles are all
23724f02517SLei Wang // cold functions, it won't have perf impact.
23824f02517SLei Wang void ProfileGeneratorBase::filterAmbiguousProfile(SampleProfileMap &Profiles) {
23924f02517SLei Wang   for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
24024f02517SLei Wang     auto FS = I++;
24124f02517SLei Wang     if (filterAmbiguousProfile(FS->second))
24224f02517SLei Wang       ProfileMap.erase(FS);
24324f02517SLei Wang   }
24424f02517SLei Wang }
24524f02517SLei Wang 
246d5f20130Swlei void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
2471f05b1a9Swlei                                               const RangeSample &Ranges) {
2481f05b1a9Swlei 
2491f05b1a9Swlei   /*
2501f05b1a9Swlei   Regions may overlap with each other. Using the boundary info, find all
2511f05b1a9Swlei   disjoint ranges and their sample count. BoundaryPoint contains the count
252c82b24f4Swlei   multiple samples begin/end at this points.
2531f05b1a9Swlei 
2541f05b1a9Swlei   |<--100-->|           Sample1
2551f05b1a9Swlei   |<------200------>|   Sample2
2561f05b1a9Swlei   A         B       C
2571f05b1a9Swlei 
2581f05b1a9Swlei   In the example above,
2591f05b1a9Swlei   Sample1 begins at A, ends at B, its value is 100.
2601f05b1a9Swlei   Sample2 beings at A, ends at C, its value is 200.
2611f05b1a9Swlei   For A, BeginCount is the sum of sample begins at A, which is 300 and no
2621f05b1a9Swlei   samples ends at A, so EndCount is 0.
2631f05b1a9Swlei   Then boundary points A, B, and C with begin/end counts are:
2641f05b1a9Swlei   A: (300, 0)
2651f05b1a9Swlei   B: (0, 100)
2661f05b1a9Swlei   C: (0, 200)
2671f05b1a9Swlei   */
2681f05b1a9Swlei   struct BoundaryPoint {
2691f05b1a9Swlei     // Sum of sample counts beginning at this point
27028277e9bSwlei     uint64_t BeginCount = UINT64_MAX;
2711f05b1a9Swlei     // Sum of sample counts ending at this point
27228277e9bSwlei     uint64_t EndCount = UINT64_MAX;
27328277e9bSwlei     // Is the begin point of a zero range.
27428277e9bSwlei     bool IsZeroRangeBegin = false;
27528277e9bSwlei     // Is the end point of a zero range.
27628277e9bSwlei     bool IsZeroRangeEnd = false;
2771f05b1a9Swlei 
27828277e9bSwlei     void addBeginCount(uint64_t Count) {
27928277e9bSwlei       if (BeginCount == UINT64_MAX)
28028277e9bSwlei         BeginCount = 0;
28128277e9bSwlei       BeginCount += Count;
28228277e9bSwlei     }
2831f05b1a9Swlei 
28428277e9bSwlei     void addEndCount(uint64_t Count) {
28528277e9bSwlei       if (EndCount == UINT64_MAX)
28628277e9bSwlei         EndCount = 0;
28728277e9bSwlei       EndCount += Count;
28828277e9bSwlei     }
2891f05b1a9Swlei   };
2901f05b1a9Swlei 
2911f05b1a9Swlei   /*
2921f05b1a9Swlei   For the above example. With boundary points, follwing logic finds two
2931f05b1a9Swlei   disjoint region of
2941f05b1a9Swlei 
2951f05b1a9Swlei   [A,B]:   300
2961f05b1a9Swlei   [B+1,C]: 200
2971f05b1a9Swlei 
2981f05b1a9Swlei   If there is a boundary point that both begin and end, the point itself
2991f05b1a9Swlei   becomes a separate disjoint region. For example, if we have original
3001f05b1a9Swlei   ranges of
3011f05b1a9Swlei 
3021f05b1a9Swlei   |<--- 100 --->|
3031f05b1a9Swlei                 |<--- 200 --->|
3041f05b1a9Swlei   A             B             C
3051f05b1a9Swlei 
3061f05b1a9Swlei   there are three boundary points with their begin/end counts of
3071f05b1a9Swlei 
3081f05b1a9Swlei   A: (100, 0)
3091f05b1a9Swlei   B: (200, 100)
3101f05b1a9Swlei   C: (0, 200)
3111f05b1a9Swlei 
3121f05b1a9Swlei   the disjoint ranges would be
3131f05b1a9Swlei 
3141f05b1a9Swlei   [A, B-1]: 100
3151f05b1a9Swlei   [B, B]:   300
3161f05b1a9Swlei   [B+1, C]: 200.
31728277e9bSwlei 
31828277e9bSwlei   Example for zero value range:
31928277e9bSwlei 
32028277e9bSwlei     |<--- 100 --->|
32128277e9bSwlei                        |<--- 200 --->|
32228277e9bSwlei   |<---------------  0 ----------------->|
32328277e9bSwlei   A  B            C    D             E   F
32428277e9bSwlei 
32528277e9bSwlei   [A, B-1]  : 0
32628277e9bSwlei   [B, C]    : 100
32728277e9bSwlei   [C+1, D-1]: 0
32828277e9bSwlei   [D, E]    : 200
32928277e9bSwlei   [E+1, F]  : 0
3301f05b1a9Swlei   */
3311f05b1a9Swlei   std::map<uint64_t, BoundaryPoint> Boundaries;
3321f05b1a9Swlei 
33392ba979cSSimon Pilgrim   for (const auto &Item : Ranges) {
33428277e9bSwlei     assert(Item.first.first <= Item.first.second &&
33528277e9bSwlei            "Invalid instruction range");
33628277e9bSwlei     auto &BeginPoint = Boundaries[Item.first.first];
33728277e9bSwlei     auto &EndPoint = Boundaries[Item.first.second];
3381f05b1a9Swlei     uint64_t Count = Item.second;
3391f05b1a9Swlei 
34028277e9bSwlei     BeginPoint.addBeginCount(Count);
34128277e9bSwlei     EndPoint.addEndCount(Count);
34228277e9bSwlei     if (Count == 0) {
34328277e9bSwlei       BeginPoint.IsZeroRangeBegin = true;
34428277e9bSwlei       EndPoint.IsZeroRangeEnd = true;
34528277e9bSwlei     }
3461f05b1a9Swlei   }
3471f05b1a9Swlei 
34828277e9bSwlei   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
34928277e9bSwlei   // and the next valid address
350fb19aa0cSHongtao Yu   uint64_t BeginAddress = UINT64_MAX;
35128277e9bSwlei   int ZeroRangeDepth = 0;
3528cbbd7e0SHongtao Yu   uint64_t Count = 0;
35392ba979cSSimon Pilgrim   for (const auto &Item : Boundaries) {
3541f05b1a9Swlei     uint64_t Address = Item.first;
35592ba979cSSimon Pilgrim     const BoundaryPoint &Point = Item.second;
35628277e9bSwlei     if (Point.BeginCount != UINT64_MAX) {
357fb19aa0cSHongtao Yu       if (BeginAddress != UINT64_MAX)
3581f05b1a9Swlei         DisjointRanges[{BeginAddress, Address - 1}] = Count;
3591f05b1a9Swlei       Count += Point.BeginCount;
3601f05b1a9Swlei       BeginAddress = Address;
36128277e9bSwlei       ZeroRangeDepth += Point.IsZeroRangeBegin;
3621f05b1a9Swlei     }
36328277e9bSwlei     if (Point.EndCount != UINT64_MAX) {
364fb19aa0cSHongtao Yu       assert((BeginAddress != UINT64_MAX) &&
365fb19aa0cSHongtao Yu              "First boundary point cannot be 'end' point");
3661f05b1a9Swlei       DisjointRanges[{BeginAddress, Address}] = Count;
3678cbbd7e0SHongtao Yu       assert(Count >= Point.EndCount && "Mismatched live ranges");
3681f05b1a9Swlei       Count -= Point.EndCount;
3691f05b1a9Swlei       BeginAddress = Address + 1;
37028277e9bSwlei       ZeroRangeDepth -= Point.IsZeroRangeEnd;
37128277e9bSwlei       // If the remaining count is zero and it's no longer in a zero range, this
37228277e9bSwlei       // means we consume all the ranges before, thus mark BeginAddress as
37328277e9bSwlei       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
37428277e9bSwlei       //  [<---- 10 ---->]
37528277e9bSwlei       //                       [<---- 20 ---->]
37628277e9bSwlei       //   A             B     C              D
37728277e9bSwlei       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
37828277e9bSwlei       // have the [B+1, C-1] zero range.
37928277e9bSwlei       if (Count == 0 && ZeroRangeDepth == 0)
38028277e9bSwlei         BeginAddress = UINT64_MAX;
3811f05b1a9Swlei     }
3821f05b1a9Swlei   }
3831f05b1a9Swlei }
3841f05b1a9Swlei 
385d5f20130Swlei void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
386d5f20130Swlei     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
387d5f20130Swlei     uint64_t Count) {
388d5f20130Swlei   // Use the maximum count of samples with same line location
38946cf7d75Swlei   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
390e8c245dcSWenlei He 
39123609a38STim Creech   // Use duplication factor to compensated for loop unroll/vectorization.
39223609a38STim Creech   // Note that this is only needed when we're taking MAX of the counts at
39323609a38STim Creech   // the location instead of SUM.
394e8c245dcSWenlei He   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
395e8c245dcSWenlei He 
39623609a38STim Creech   ErrorOr<uint64_t> R =
39723609a38STim Creech       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
39846cf7d75Swlei 
399d5f20130Swlei   uint64_t PreviousCount = R ? R.get() : 0;
40028277e9bSwlei   if (PreviousCount <= Count) {
40146cf7d75Swlei     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
402d5f20130Swlei                                    Count - PreviousCount);
403d5f20130Swlei   }
404d5f20130Swlei }
405d5f20130Swlei 
406f5537643Swlei void ProfileGeneratorBase::updateTotalSamples() {
407f5537643Swlei   for (auto &Item : ProfileMap) {
408f5537643Swlei     FunctionSamples &FunctionProfile = Item.second;
409f5537643Swlei     FunctionProfile.updateTotalSamples();
410f5537643Swlei   }
411f5537643Swlei }
412f5537643Swlei 
413acfd0a34SHongtao Yu void ProfileGeneratorBase::updateCallsiteSamples() {
414acfd0a34SHongtao Yu   for (auto &Item : ProfileMap) {
415acfd0a34SHongtao Yu     FunctionSamples &FunctionProfile = Item.second;
416acfd0a34SHongtao Yu     FunctionProfile.updateCallsiteSamples();
417acfd0a34SHongtao Yu   }
418acfd0a34SHongtao Yu }
419acfd0a34SHongtao Yu 
420acfd0a34SHongtao Yu void ProfileGeneratorBase::updateFunctionSamples() {
421acfd0a34SHongtao Yu   updateCallsiteSamples();
422acfd0a34SHongtao Yu 
423acfd0a34SHongtao Yu   if (UpdateTotalSamples)
424acfd0a34SHongtao Yu     updateTotalSamples();
425acfd0a34SHongtao Yu }
426acfd0a34SHongtao Yu 
4273f970168SHongtao Yu void ProfileGeneratorBase::collectProfiledFunctions() {
4283f970168SHongtao Yu   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
429aa58b7b1Swlei   if (collectFunctionsFromRawProfile(ProfiledFunctions))
430aa58b7b1Swlei     Binary->setProfiledFunctions(ProfiledFunctions);
431aa58b7b1Swlei   else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
432aa58b7b1Swlei     Binary->setProfiledFunctions(ProfiledFunctions);
433aa58b7b1Swlei   else
434aa58b7b1Swlei     llvm_unreachable("Unsupported input profile");
435aa58b7b1Swlei }
436aa58b7b1Swlei 
437aa58b7b1Swlei bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
438aa58b7b1Swlei     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
439aa58b7b1Swlei   if (!SampleCounters)
440aa58b7b1Swlei     return false;
441937924ebSHongtao Yu   // Go through all the stacks, ranges and branches in sample counters, use
442937924ebSHongtao Yu   // the start of the range to look up the function it belongs and record the
4433f970168SHongtao Yu   // function.
444937924ebSHongtao Yu   for (const auto &CI : *SampleCounters) {
4453f970168SHongtao Yu     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
44646765248Swlei       for (auto StackAddr : CtxKey->Context) {
44746765248Swlei         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
4483f970168SHongtao Yu           ProfiledFunctions.insert(FRange->Func);
4493f970168SHongtao Yu       }
4503f970168SHongtao Yu     }
4513f970168SHongtao Yu 
4523f970168SHongtao Yu     for (auto Item : CI.second.RangeCounter) {
45346765248Swlei       uint64_t StartAddress = Item.first.first;
45446765248Swlei       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
4553f970168SHongtao Yu         ProfiledFunctions.insert(FRange->Func);
4563f970168SHongtao Yu     }
4573f970168SHongtao Yu 
4583f970168SHongtao Yu     for (auto Item : CI.second.BranchCounter) {
45946765248Swlei       uint64_t SourceAddress = Item.first.first;
460c5667778SHongtao Yu       uint64_t TargetAddress = Item.first.second;
46146765248Swlei       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
4623f970168SHongtao Yu         ProfiledFunctions.insert(FRange->Func);
46346765248Swlei       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
4643f970168SHongtao Yu         ProfiledFunctions.insert(FRange->Func);
4653f970168SHongtao Yu     }
4663f970168SHongtao Yu   }
467aa58b7b1Swlei   return true;
468aa58b7b1Swlei }
469aa58b7b1Swlei 
470aa58b7b1Swlei bool ProfileGenerator::collectFunctionsFromLLVMProfile(
471aa58b7b1Swlei     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
472937924ebSHongtao Yu   for (const auto &FS : ProfileMap) {
473ef0e0adcSWilliam Junda Huang     if (auto *Func = Binary->getBinaryFunction(FS.second.getFunction()))
474937924ebSHongtao Yu       ProfiledFunctions.insert(Func);
475937924ebSHongtao Yu   }
476aa58b7b1Swlei   return true;
477937924ebSHongtao Yu }
4783f970168SHongtao Yu 
479aa58b7b1Swlei bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
480aa58b7b1Swlei     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
4817e86b13cSwlei   for (auto *Node : ContextTracker) {
482aa58b7b1Swlei     if (!Node->getFuncName().empty())
483aa58b7b1Swlei       if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
484aa58b7b1Swlei         ProfiledFunctions.insert(Func);
485aa58b7b1Swlei   }
486aa58b7b1Swlei   return true;
4873f970168SHongtao Yu }
4883f970168SHongtao Yu 
489d5f20130Swlei FunctionSamples &
490ef0e0adcSWilliam Junda Huang ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
491d5f20130Swlei   SampleContext Context(FuncName);
492ce03155aSMircea Trofin   return ProfileMap.create(Context);
493d5f20130Swlei }
494d5f20130Swlei 
495d5f20130Swlei void ProfileGenerator::generateProfile() {
4963f970168SHongtao Yu   collectProfiledFunctions();
497937924ebSHongtao Yu 
498937924ebSHongtao Yu   if (Binary->usePseudoProbes())
499937924ebSHongtao Yu     Binary->decodePseudoProbe();
500937924ebSHongtao Yu 
501937924ebSHongtao Yu   if (SampleCounters) {
502d5f20130Swlei     if (Binary->usePseudoProbes()) {
50323391febSHongtao Yu       generateProbeBasedProfile();
504d5f20130Swlei     } else {
505d5f20130Swlei       generateLineNumBasedProfile();
506d5f20130Swlei     }
507937924ebSHongtao Yu   }
508937924ebSHongtao Yu 
509c2e08abaSwlei   postProcessProfiles();
510c2e08abaSwlei }
511c2e08abaSwlei 
512c2e08abaSwlei void ProfileGenerator::postProcessProfiles() {
513aa58b7b1Swlei   computeSummaryAndThreshold(ProfileMap);
51427cb3707Swlei   trimColdProfiles(ProfileMap, ColdCountThreshold);
51524f02517SLei Wang   filterAmbiguousProfile(ProfileMap);
516c2e08abaSwlei   calculateAndShowDensity(ProfileMap);
517d5f20130Swlei }
518d5f20130Swlei 
51927cb3707Swlei void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
52027cb3707Swlei                                         uint64_t ColdCntThreshold) {
52127cb3707Swlei   if (!TrimColdProfile)
52227cb3707Swlei     return;
52327cb3707Swlei 
52427cb3707Swlei   // Move cold profiles into a tmp container.
5257624de5bSWilliam Huang   std::vector<hash_code> ColdProfileHashes;
52627cb3707Swlei   for (const auto &I : ProfileMap) {
52727cb3707Swlei     if (I.second.getTotalSamples() < ColdCntThreshold)
5287624de5bSWilliam Huang       ColdProfileHashes.emplace_back(I.first);
52927cb3707Swlei   }
53027cb3707Swlei 
53127cb3707Swlei   // Remove the cold profile from ProfileMap.
5327624de5bSWilliam Huang   for (const auto &I : ColdProfileHashes)
53327cb3707Swlei     ProfileMap.erase(I);
53427cb3707Swlei }
53527cb3707Swlei 
536d5f20130Swlei void ProfileGenerator::generateLineNumBasedProfile() {
537937924ebSHongtao Yu   assert(SampleCounters->size() == 1 &&
538d5f20130Swlei          "Must have one entry for profile generation.");
539937924ebSHongtao Yu   const SampleCounter &SC = SampleCounters->begin()->second;
540d5f20130Swlei   // Fill in function body samples
541d5f20130Swlei   populateBodySamplesForAllFunctions(SC.RangeCounter);
542d5f20130Swlei   // Fill in boundary sample counts as well as call site samples for calls
543d5f20130Swlei   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
544f5537643Swlei 
545acfd0a34SHongtao Yu   updateFunctionSamples();
546d5f20130Swlei }
547d5f20130Swlei 
54823391febSHongtao Yu void ProfileGenerator::generateProbeBasedProfile() {
549937924ebSHongtao Yu   assert(SampleCounters->size() == 1 &&
55023391febSHongtao Yu          "Must have one entry for profile generation.");
55123391febSHongtao Yu   // Enable pseudo probe functionalities in SampleProf
55223391febSHongtao Yu   FunctionSamples::ProfileIsProbeBased = true;
553937924ebSHongtao Yu   const SampleCounter &SC = SampleCounters->begin()->second;
55423391febSHongtao Yu   // Fill in function body samples
55523391febSHongtao Yu   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
55623391febSHongtao Yu   // Fill in boundary sample counts as well as call site samples for calls
55723391febSHongtao Yu   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
55823391febSHongtao Yu 
559acfd0a34SHongtao Yu   updateFunctionSamples();
56023391febSHongtao Yu }
56123391febSHongtao Yu 
56223391febSHongtao Yu void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
56323391febSHongtao Yu     const RangeSample &RangeCounter) {
56423391febSHongtao Yu   ProbeCounterMap ProbeCounter;
5653f970168SHongtao Yu   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
5663f970168SHongtao Yu   // inside extractProbesFromRange.
5673f970168SHongtao Yu   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
5683f970168SHongtao Yu                          false);
56923391febSHongtao Yu 
57023391febSHongtao Yu   for (const auto &PI : ProbeCounter) {
57123391febSHongtao Yu     const MCDecodedPseudoProbe *Probe = PI.first;
57223391febSHongtao Yu     uint64_t Count = PI.second;
57323391febSHongtao Yu     SampleContextFrameVector FrameVec;
57423391febSHongtao Yu     Binary->getInlineContextForProbe(Probe, FrameVec, true);
5753f970168SHongtao Yu     FunctionSamples &FunctionProfile =
5763f970168SHongtao Yu         getLeafProfileAndAddTotalSamples(FrameVec, Count);
577345fd0c1SHongtao Yu     FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
578345fd0c1SHongtao Yu                                    Count);
57923391febSHongtao Yu     if (Probe->isEntry())
58023391febSHongtao Yu       FunctionProfile.addHeadSamples(Count);
58123391febSHongtao Yu   }
58223391febSHongtao Yu }
58323391febSHongtao Yu 
58423391febSHongtao Yu void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
58523391febSHongtao Yu     const BranchSample &BranchCounters) {
58623391febSHongtao Yu   for (const auto &Entry : BranchCounters) {
58746765248Swlei     uint64_t SourceAddress = Entry.first.first;
58846765248Swlei     uint64_t TargetAddress = Entry.first.second;
58923391febSHongtao Yu     uint64_t Count = Entry.second;
59023391febSHongtao Yu     assert(Count != 0 && "Unexpected zero weight branch");
59123391febSHongtao Yu 
59246765248Swlei     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
59323391febSHongtao Yu     if (CalleeName.size() == 0)
59423391febSHongtao Yu       continue;
59523391febSHongtao Yu 
59623391febSHongtao Yu     const MCDecodedPseudoProbe *CallProbe =
59723391febSHongtao Yu         Binary->getCallProbeForAddr(SourceAddress);
59823391febSHongtao Yu     if (CallProbe == nullptr)
59923391febSHongtao Yu       continue;
60023391febSHongtao Yu 
60123391febSHongtao Yu     // Record called target sample and its count.
60223391febSHongtao Yu     SampleContextFrameVector FrameVec;
60323391febSHongtao Yu     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
60423391febSHongtao Yu 
60523391febSHongtao Yu     if (!FrameVec.empty()) {
60623391febSHongtao Yu       FunctionSamples &FunctionProfile =
60723391febSHongtao Yu           getLeafProfileAndAddTotalSamples(FrameVec, 0);
60823391febSHongtao Yu       FunctionProfile.addCalledTargetSamples(
609345fd0c1SHongtao Yu           FrameVec.back().Location.LineOffset,
610345fd0c1SHongtao Yu           FrameVec.back().Location.Discriminator,
611ef0e0adcSWilliam Junda Huang           FunctionId(CalleeName), Count);
61223391febSHongtao Yu     }
61323391febSHongtao Yu   }
61423391febSHongtao Yu }
61523391febSHongtao Yu 
616484a569eSwlei FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
617484a569eSwlei     const SampleContextFrameVector &FrameVec, uint64_t Count) {
618d5f20130Swlei   // Get top level profile
619d5f20130Swlei   FunctionSamples *FunctionProfile =
620ef0e0adcSWilliam Junda Huang       &getTopLevelFunctionProfile(FrameVec[0].Func);
621484a569eSwlei   FunctionProfile->addTotalSamples(Count);
62223391febSHongtao Yu   if (Binary->usePseudoProbes()) {
6233f970168SHongtao Yu     const auto *FuncDesc = Binary->getFuncDescForGUID(
624ef0e0adcSWilliam Junda Huang         FunctionProfile->getFunction().getHashCode());
62523391febSHongtao Yu     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
62623391febSHongtao Yu   }
627d5f20130Swlei 
628d5f20130Swlei   for (size_t I = 1; I < FrameVec.size(); I++) {
62946cf7d75Swlei     LineLocation Callsite(
63046cf7d75Swlei         FrameVec[I - 1].Location.LineOffset,
63146cf7d75Swlei         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
632d5f20130Swlei     FunctionSamplesMap &SamplesMap =
63346cf7d75Swlei         FunctionProfile->functionSamplesAt(Callsite);
634d5f20130Swlei     auto Ret =
635ef0e0adcSWilliam Junda Huang         SamplesMap.emplace(FrameVec[I].Func, FunctionSamples());
636d5f20130Swlei     if (Ret.second) {
637ef0e0adcSWilliam Junda Huang       SampleContext Context(FrameVec[I].Func);
638d5f20130Swlei       Ret.first->second.setContext(Context);
639d5f20130Swlei     }
640d5f20130Swlei     FunctionProfile = &Ret.first->second;
641484a569eSwlei     FunctionProfile->addTotalSamples(Count);
64223391febSHongtao Yu     if (Binary->usePseudoProbes()) {
6433f970168SHongtao Yu       const auto *FuncDesc = Binary->getFuncDescForGUID(
644ef0e0adcSWilliam Junda Huang           FunctionProfile->getFunction().getHashCode());
64523391febSHongtao Yu       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
64623391febSHongtao Yu     }
647d5f20130Swlei   }
648d5f20130Swlei 
649d5f20130Swlei   return *FunctionProfile;
650d5f20130Swlei }
651d5f20130Swlei 
65228277e9bSwlei RangeSample
65328277e9bSwlei ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
65428277e9bSwlei   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
6553f3103c6Swlei   if (FillZeroForAllFuncs) {
6563f3103c6Swlei     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
6573f3103c6Swlei       for (auto &R : FuncI.second.Ranges) {
6585bf191a3Swlei         Ranges[{R.first, R.second - 1}] += 0;
6593f3103c6Swlei       }
6603f3103c6Swlei     }
6613f3103c6Swlei   } else {
6623f3103c6Swlei     // For each range, we search for all ranges of the function it belongs to
6633f3103c6Swlei     // and initialize it with zero count, so it remains zero if doesn't hit any
66428277e9bSwlei     // samples. This is to be consistent with compiler that interpret zero count
66528277e9bSwlei     // as unexecuted(cold).
66692ba979cSSimon Pilgrim     for (const auto &I : RangeCounter) {
66746765248Swlei       uint64_t StartAddress = I.first.first;
66846765248Swlei       for (const auto &Range : Binary->getRanges(StartAddress))
66940ca4112Swlei         Ranges[{Range.first, Range.second - 1}] += 0;
67028277e9bSwlei     }
6713f3103c6Swlei   }
67228277e9bSwlei   RangeSample DisjointRanges;
67328277e9bSwlei   findDisjointRanges(DisjointRanges, Ranges);
67428277e9bSwlei   return DisjointRanges;
67528277e9bSwlei }
67628277e9bSwlei 
677d5f20130Swlei void ProfileGenerator::populateBodySamplesForAllFunctions(
678d5f20130Swlei     const RangeSample &RangeCounter) {
67992ba979cSSimon Pilgrim   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
68046765248Swlei     uint64_t RangeBegin = Range.first.first;
68146765248Swlei     uint64_t RangeEnd = Range.first.second;
682d5f20130Swlei     uint64_t Count = Range.second;
683d5f20130Swlei 
684d5f20130Swlei     InstructionPointer IP(Binary, RangeBegin, true);
685d5f20130Swlei     // Disjoint ranges may have range in the middle of two instr,
686d5f20130Swlei     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
687d5f20130Swlei     // can be Addr1+1 to Addr2-1. We should ignore such range.
6885bf191a3Swlei     if (IP.Address > RangeEnd)
6895bf191a3Swlei       continue;
6905bf191a3Swlei 
6915bf191a3Swlei     do {
69291cc53d5Swlei       const SampleContextFrameVector FrameVec =
69346765248Swlei           Binary->getFrameLocationStack(IP.Address);
694d5f20130Swlei       if (!FrameVec.empty()) {
695484a569eSwlei         // FIXME: As accumulating total count per instruction caused some
696484a569eSwlei         // regression, we changed to accumulate total count per byte as a
697484a569eSwlei         // workaround. Tuning hotness threshold on the compiler side might be
698484a569eSwlei         // necessary in the future.
699484a569eSwlei         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
70046765248Swlei             FrameVec, Count * Binary->getInstSize(IP.Address));
701d5f20130Swlei         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
702e8c245dcSWenlei He                                             Count);
703d5f20130Swlei       }
7045bf191a3Swlei     } while (IP.advance() && IP.Address <= RangeEnd);
705d5f20130Swlei   }
706d5f20130Swlei }
707d5f20130Swlei 
70846765248Swlei StringRef
70946765248Swlei ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
71040ca4112Swlei   // Get the function range by branch target if it's a call branch.
71146765248Swlei   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
712b1a45c62Swlei 
71340ca4112Swlei   // We won't accumulate sample count for a range whose start is not the real
71440ca4112Swlei   // function entry such as outlined function or inner labels.
71540ca4112Swlei   if (!FRange || !FRange->IsFuncEntry)
716b1a45c62Swlei     return StringRef();
717b1a45c62Swlei 
71840ca4112Swlei   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
719b1a45c62Swlei }
720b1a45c62Swlei 
721d5f20130Swlei void ProfileGenerator::populateBoundarySamplesForAllFunctions(
722d5f20130Swlei     const BranchSample &BranchCounters) {
72392ba979cSSimon Pilgrim   for (const auto &Entry : BranchCounters) {
72446765248Swlei     uint64_t SourceAddress = Entry.first.first;
72546765248Swlei     uint64_t TargetAddress = Entry.first.second;
726d5f20130Swlei     uint64_t Count = Entry.second;
727d5f20130Swlei     assert(Count != 0 && "Unexpected zero weight branch");
728d5f20130Swlei 
72946765248Swlei     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
730d5f20130Swlei     if (CalleeName.size() == 0)
731d5f20130Swlei       continue;
732d5f20130Swlei     // Record called target sample and its count.
733d5f20130Swlei     const SampleContextFrameVector &FrameVec =
73491cc53d5Swlei         Binary->getCachedFrameLocationStack(SourceAddress);
735d5f20130Swlei     if (!FrameVec.empty()) {
736484a569eSwlei       FunctionSamples &FunctionProfile =
737484a569eSwlei           getLeafProfileAndAddTotalSamples(FrameVec, 0);
738d5f20130Swlei       FunctionProfile.addCalledTargetSamples(
739fb29d812Swlei           FrameVec.back().Location.LineOffset,
74046cf7d75Swlei           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
741ef0e0adcSWilliam Junda Huang           FunctionId(CalleeName), Count);
742d5f20130Swlei     }
743d5f20130Swlei     // Add head samples for callee.
744ef0e0adcSWilliam Junda Huang     FunctionSamples &CalleeProfile =
745ef0e0adcSWilliam Junda Huang         getTopLevelFunctionProfile(FunctionId(CalleeName));
746d5f20130Swlei     CalleeProfile.addHeadSamples(Count);
747d5f20130Swlei   }
748d5f20130Swlei }
749d5f20130Swlei 
750b9d40a7aSLei Wang void ProfileGeneratorBase::calculateBodySamplesAndSize(
751b9d40a7aSLei Wang     const FunctionSamples &FSamples, uint64_t &TotalBodySamples,
752b9d40a7aSLei Wang     uint64_t &FuncBodySize) {
753b9d40a7aSLei Wang   // Note that ideally the size should be the number of function instruction.
754b9d40a7aSLei Wang   // However, for probe-based profile, we don't have the accurate instruction
755b9d40a7aSLei Wang   // count for each probe, instead, the probe sample is the samples count for
756b9d40a7aSLei Wang   // the block, which is equivelant to
757b9d40a7aSLei Wang   // total_instruction_samples/num_of_instruction in one block. Hence, we use
758b9d40a7aSLei Wang   // the number of probe as a proxy for the function's size.
759b9d40a7aSLei Wang   FuncBodySize += FSamples.getBodySamples().size();
760b9d40a7aSLei Wang 
761b9d40a7aSLei Wang   // The accumulated body samples re-calculated here could be different from the
762b9d40a7aSLei Wang   // TotalSamples(getTotalSamples) field of FunctionSamples for line-number
763b9d40a7aSLei Wang   // based profile. The reason is that TotalSamples is the sum of all the
764b9d40a7aSLei Wang   // samples of the machine instruction in one source-code line, however, the
765b9d40a7aSLei Wang   // entry of Bodysamples is the only max number of them, so the TotalSamples is
766b9d40a7aSLei Wang   // usually much bigger than the accumulated body samples as one souce-code
767b9d40a7aSLei Wang   // line can emit many machine instructions. We observed a regression when we
768b9d40a7aSLei Wang   // switched to use the accumulated body samples(by using
769b9d40a7aSLei Wang   // -update-total-samples). Hence, it's safer to re-calculate here to avoid
770b9d40a7aSLei Wang   // such discrepancy. There is no problem for probe-based profile, as the
771b9d40a7aSLei Wang   // TotalSamples is exactly the same as the accumulated body samples.
772b9d40a7aSLei Wang   for (const auto &I : FSamples.getBodySamples())
773b9d40a7aSLei Wang     TotalBodySamples += I.second.getSamples();
774b9d40a7aSLei Wang 
775b9d40a7aSLei Wang   for (const auto &CallsiteSamples : FSamples.getCallsiteSamples())
776b9d40a7aSLei Wang     for (const auto &Callee : CallsiteSamples.second) {
777b9d40a7aSLei Wang       // For binary-level density, the inlinees' samples and size should be
778b9d40a7aSLei Wang       // included in the calculation.
779b9d40a7aSLei Wang       calculateBodySamplesAndSize(Callee.second, TotalBodySamples,
780b9d40a7aSLei Wang                                   FuncBodySize);
781b9d40a7aSLei Wang     }
782b9d40a7aSLei Wang }
783b9d40a7aSLei Wang 
784b9d40a7aSLei Wang // Calculate Profile-density:
785b9d40a7aSLei Wang // Calculate the density for each function and sort them in descending order,
786b9d40a7aSLei Wang // keep accumulating their total samples unitl it exceeds the
787b9d40a7aSLei Wang // percentage_threshold(cut-off) of total profile samples, the profile-density
788b9d40a7aSLei Wang // is the last(minimum) function-density of the processed functions, which means
789b9d40a7aSLei Wang // all the functions hot to perf are on good density if the profile-density is
790b9d40a7aSLei Wang // good. The percentage_threshold(--profile-density-cutoff-hot) is configurable
791b9d40a7aSLei Wang // depending on how much regression the system want to tolerate.
792b9d40a7aSLei Wang double
793b9d40a7aSLei Wang ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles) {
794b9d40a7aSLei Wang   double ProfileDensity = 0.0;
795b9d40a7aSLei Wang 
796b9d40a7aSLei Wang   uint64_t TotalProfileSamples = 0;
797b9d40a7aSLei Wang   // A list of the function profile density and its total samples.
798b9d40a7aSLei Wang   std::vector<std::pair<double, uint64_t>> FuncDensityList;
799b9d40a7aSLei Wang   for (const auto &I : Profiles) {
800b9d40a7aSLei Wang     uint64_t TotalBodySamples = 0;
801b9d40a7aSLei Wang     uint64_t FuncBodySize = 0;
802b9d40a7aSLei Wang     calculateBodySamplesAndSize(I.second, TotalBodySamples, FuncBodySize);
803b9d40a7aSLei Wang 
804b9d40a7aSLei Wang     if (FuncBodySize == 0)
805b9d40a7aSLei Wang       continue;
806b9d40a7aSLei Wang 
807b9d40a7aSLei Wang     double FuncDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
808b9d40a7aSLei Wang     TotalProfileSamples += TotalBodySamples;
809b9d40a7aSLei Wang     FuncDensityList.emplace_back(FuncDensity, TotalBodySamples);
810b9d40a7aSLei Wang   }
811b9d40a7aSLei Wang 
812b9d40a7aSLei Wang   // Sorted by the density in descending order.
813b9d40a7aSLei Wang   llvm::stable_sort(FuncDensityList, [&](const std::pair<double, uint64_t> &A,
814b9d40a7aSLei Wang                                          const std::pair<double, uint64_t> &B) {
815b9d40a7aSLei Wang     if (A.first != B.first)
816b9d40a7aSLei Wang       return A.first > B.first;
817b9d40a7aSLei Wang     return A.second < B.second;
818b9d40a7aSLei Wang   });
819b9d40a7aSLei Wang 
820b9d40a7aSLei Wang   uint64_t AccumulatedSamples = 0;
821b9d40a7aSLei Wang   uint32_t I = 0;
822b9d40a7aSLei Wang   assert(ProfileDensityCutOffHot <= 1000000 &&
823b9d40a7aSLei Wang          "The cutoff value is greater than 1000000(100%)");
824b9d40a7aSLei Wang   while (AccumulatedSamples < TotalProfileSamples *
825b9d40a7aSLei Wang                                   static_cast<float>(ProfileDensityCutOffHot) /
826b9d40a7aSLei Wang                                   1000000 &&
827b9d40a7aSLei Wang          I < FuncDensityList.size()) {
828b9d40a7aSLei Wang     AccumulatedSamples += FuncDensityList[I].second;
829b9d40a7aSLei Wang     ProfileDensity = FuncDensityList[I].first;
830b9d40a7aSLei Wang     I++;
831b9d40a7aSLei Wang   }
832b9d40a7aSLei Wang 
833b9d40a7aSLei Wang   return ProfileDensity;
834b9d40a7aSLei Wang }
835b9d40a7aSLei Wang 
836c2e08abaSwlei void ProfileGeneratorBase::calculateAndShowDensity(
837c2e08abaSwlei     const SampleProfileMap &Profiles) {
838b9d40a7aSLei Wang   double Density = calculateDensity(Profiles);
839c2e08abaSwlei   showDensitySuggestion(Density);
840c2e08abaSwlei }
841c2e08abaSwlei 
842eba57492Swlei FunctionSamples *
843eba57492Swlei CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
844eba57492Swlei                                                bool WasLeafInlined) {
845eba57492Swlei   FunctionSamples *FProfile = ContextNode->getFunctionSamples();
846eba57492Swlei   if (!FProfile) {
847eba57492Swlei     FSamplesList.emplace_back();
848eba57492Swlei     FProfile = &FSamplesList.back();
849ef0e0adcSWilliam Junda Huang     FProfile->setFunction(ContextNode->getFuncName());
850eba57492Swlei     ContextNode->setFunctionSamples(FProfile);
851eba57492Swlei   }
8524c2b57aeSHongtao Yu   // Update ContextWasInlined attribute for existing contexts.
8534c2b57aeSHongtao Yu   // The current function can be called in two ways:
8544c2b57aeSHongtao Yu   //  - when processing a probe of the current frame
8554c2b57aeSHongtao Yu   //  - when processing the entry probe of an inlinee's frame, which
8564c2b57aeSHongtao Yu   //    is then used to update the callsite count of the current frame.
8574c2b57aeSHongtao Yu   // The two can happen in any order, hence here we are making sure
8584c2b57aeSHongtao Yu   // `ContextWasInlined` is always set as expected.
8594c2b57aeSHongtao Yu   // TODO: Note that the former does not always happen if no probes of the
8604c2b57aeSHongtao Yu   // current frame has samples, and if the latter happens, we could lose the
8614c2b57aeSHongtao Yu   // attribute. This should be fixed.
8624c2b57aeSHongtao Yu   if (WasLeafInlined)
863eba57492Swlei     FProfile->getContext().setAttribute(ContextWasInlined);
864eba57492Swlei   return FProfile;
8651f05b1a9Swlei }
8664c2b57aeSHongtao Yu 
867eba57492Swlei ContextTrieNode *
868eba57492Swlei CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
869eba57492Swlei                                            bool WasLeafInlined) {
870eba57492Swlei   ContextTrieNode *ContextNode =
871eba57492Swlei       ContextTracker.getOrCreateContextPath(Context, true);
872eba57492Swlei   getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
873eba57492Swlei   return ContextNode;
8741ed69bb8Swlei }
8751f05b1a9Swlei 
876a5d30421SWenlei He void CSProfileGenerator::generateProfile() {
877e36786d1SHongtao Yu   FunctionSamples::ProfileIsCS = true;
878ce40843aSwlei 
8793f970168SHongtao Yu   collectProfiledFunctions();
880ce40843aSwlei 
8815d7950a4SHongtao Yu   if (Binary->usePseudoProbes()) {
882937924ebSHongtao Yu     Binary->decodePseudoProbe();
8835d7950a4SHongtao Yu     if (InferMissingFrames)
8845d7950a4SHongtao Yu       initializeMissingFrameInferrer();
8855d7950a4SHongtao Yu   }
886937924ebSHongtao Yu 
887937924ebSHongtao Yu   if (SampleCounters) {
888d5f20130Swlei     if (Binary->usePseudoProbes()) {
889d5f20130Swlei       generateProbeBasedProfile();
890d5f20130Swlei     } else {
891d5f20130Swlei       generateLineNumBasedProfile();
892d5f20130Swlei     }
893937924ebSHongtao Yu   }
8943f970168SHongtao Yu 
8953f970168SHongtao Yu   if (Binary->getTrackFuncContextSize())
8963f970168SHongtao Yu     computeSizeForProfiledFunctions();
8973f970168SHongtao Yu 
898d5f20130Swlei   postProcessProfiles();
899d5f20130Swlei }
900d5f20130Swlei 
9015d7950a4SHongtao Yu void CSProfileGenerator::initializeMissingFrameInferrer() {
9025d7950a4SHongtao Yu   Binary->getMissingContextInferrer()->initialize(SampleCounters);
9035d7950a4SHongtao Yu }
9045d7950a4SHongtao Yu 
9055d7950a4SHongtao Yu void CSProfileGenerator::inferMissingFrames(
9065d7950a4SHongtao Yu     const SmallVectorImpl<uint64_t> &Context,
9075d7950a4SHongtao Yu     SmallVectorImpl<uint64_t> &NewContext) {
9085d7950a4SHongtao Yu   Binary->inferMissingFrames(Context, NewContext);
9095d7950a4SHongtao Yu }
9105d7950a4SHongtao Yu 
911ce40843aSwlei void CSProfileGenerator::computeSizeForProfiledFunctions() {
9123f970168SHongtao Yu   for (auto *Func : Binary->getProfiledFunctions())
91334e131b0SHongtao Yu     Binary->computeInlinedContextSizeForFunc(Func);
91434e131b0SHongtao Yu 
91534e131b0SHongtao Yu   // Flush the symbolizer to save memory.
91634e131b0SHongtao Yu   Binary->flushSymbolizer();
917ce40843aSwlei }
918ce40843aSwlei 
919eba57492Swlei void CSProfileGenerator::updateFunctionSamples() {
9207e86b13cSwlei   for (auto *Node : ContextTracker) {
921eba57492Swlei     FunctionSamples *FSamples = Node->getFunctionSamples();
922eba57492Swlei     if (FSamples) {
923eba57492Swlei       if (UpdateTotalSamples)
924eba57492Swlei         FSamples->updateTotalSamples();
925eba57492Swlei       FSamples->updateCallsiteSamples();
926eba57492Swlei     }
927eba57492Swlei   }
928eba57492Swlei }
929eba57492Swlei 
930d5f20130Swlei void CSProfileGenerator::generateLineNumBasedProfile() {
931937924ebSHongtao Yu   for (const auto &CI : *SampleCounters) {
93286bbf01dSSimon Pilgrim     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
93386bbf01dSSimon Pilgrim 
934eba57492Swlei     ContextTrieNode *ContextNode = &getRootContext();
935bfcb2c11Swlei     // Sample context will be empty if the jump is an external-to-internal call
936bfcb2c11Swlei     // pattern, the head samples should be added for the internal function.
937bfcb2c11Swlei     if (!CtxKey->Context.empty()) {
938a5d30421SWenlei He       // Get or create function profile for the range
939eba57492Swlei       ContextNode =
940eba57492Swlei           getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
941a5d30421SWenlei He       // Fill in function body samples
942eba57492Swlei       populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
943eba57492Swlei                                      CI.second.RangeCounter);
944bfcb2c11Swlei     }
945a5d30421SWenlei He     // Fill in boundary sample counts as well as call site samples for calls
946eba57492Swlei     populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
947a5d30421SWenlei He   }
948a5d30421SWenlei He   // Fill in call site value sample for inlined calls and also use context to
949a5d30421SWenlei He   // infer missing samples. Since we don't have call count for inlined
950a5d30421SWenlei He   // functions, we estimate it from inlinee's profile using the entry of the
951a5d30421SWenlei He   // body sample.
952eba57492Swlei   populateInferredFunctionSamples(getRootContext());
953f5537643Swlei 
954acfd0a34SHongtao Yu   updateFunctionSamples();
955a5d30421SWenlei He }
956a5d30421SWenlei He 
957d5f20130Swlei void CSProfileGenerator::populateBodySamplesForFunction(
958f812c192Swlei     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
9591f05b1a9Swlei   // Compute disjoint ranges first, so we can use MAX
9601f05b1a9Swlei   // for calculating count for each location.
9611f05b1a9Swlei   RangeSample Ranges;
962414930b9Swlei   findDisjointRanges(Ranges, RangeCounter);
96392ba979cSSimon Pilgrim   for (const auto &Range : Ranges) {
96446765248Swlei     uint64_t RangeBegin = Range.first.first;
96546765248Swlei     uint64_t RangeEnd = Range.first.second;
9661f05b1a9Swlei     uint64_t Count = Range.second;
9671f05b1a9Swlei     // Disjoint ranges have introduce zero-filled gap that
9681f05b1a9Swlei     // doesn't belong to current context, filter them out.
9691f05b1a9Swlei     if (Count == 0)
9701f05b1a9Swlei       continue;
9711f05b1a9Swlei 
9721f05b1a9Swlei     InstructionPointer IP(Binary, RangeBegin, true);
9731f05b1a9Swlei     // Disjoint ranges may have range in the middle of two instr,
9741f05b1a9Swlei     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
9751f05b1a9Swlei     // can be Addr1+1 to Addr2-1. We should ignore such range.
9765bf191a3Swlei     if (IP.Address > RangeEnd)
9775bf191a3Swlei       continue;
9785bf191a3Swlei 
9795bf191a3Swlei     do {
98046765248Swlei       auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
981e0e687a6SKazu Hirata       if (LeafLoc) {
9821f05b1a9Swlei         // Recording body sample for this specific context
983e8c245dcSWenlei He         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
984484a569eSwlei         FunctionProfile.addTotalSamples(Count);
985afd8bd60Swlei       }
9865bf191a3Swlei     } while (IP.advance() && IP.Address <= RangeEnd);
9871f05b1a9Swlei   }
9881f05b1a9Swlei }
9891f05b1a9Swlei 
990d5f20130Swlei void CSProfileGenerator::populateBoundarySamplesForFunction(
991eba57492Swlei     ContextTrieNode *Node, const BranchSample &BranchCounters) {
9921f05b1a9Swlei 
99392ba979cSSimon Pilgrim   for (const auto &Entry : BranchCounters) {
99446765248Swlei     uint64_t SourceAddress = Entry.first.first;
99546765248Swlei     uint64_t TargetAddress = Entry.first.second;
9961f05b1a9Swlei     uint64_t Count = Entry.second;
997d5f20130Swlei     assert(Count != 0 && "Unexpected zero weight branch");
998d5f20130Swlei 
99946765248Swlei     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
10001f05b1a9Swlei     if (CalleeName.size() == 0)
10011f05b1a9Swlei       continue;
10021f05b1a9Swlei 
1003eba57492Swlei     ContextTrieNode *CallerNode = Node;
1004eba57492Swlei     LineLocation CalleeCallSite(0, 0);
1005eba57492Swlei     if (CallerNode != &getRootContext()) {
10061f05b1a9Swlei       // Record called target sample and its count
100746765248Swlei       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
1008e0e687a6SKazu Hirata       if (LeafLoc) {
1009eba57492Swlei         CallerNode->getFunctionSamples()->addCalledTargetSamples(
101046cf7d75Swlei             LeafLoc->Location.LineOffset,
1011ef0e0adcSWilliam Junda Huang             getBaseDiscriminator(LeafLoc->Location.Discriminator),
1012ef0e0adcSWilliam Junda Huang             FunctionId(CalleeName),
101346cf7d75Swlei             Count);
10141f05b1a9Swlei         // Record head sample for called target(callee)
1015eba57492Swlei         CalleeCallSite = LeafLoc->Location;
10161f05b1a9Swlei       }
10171f05b1a9Swlei     }
10181f05b1a9Swlei 
1019eba57492Swlei     ContextTrieNode *CalleeNode =
1020ef0e0adcSWilliam Junda Huang         CallerNode->getOrCreateChildContext(CalleeCallSite,
1021ef0e0adcSWilliam Junda Huang                                             FunctionId(CalleeName));
1022eba57492Swlei     FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
1023eba57492Swlei     CalleeProfile->addHeadSamples(Count);
1024eba57492Swlei   }
10251f05b1a9Swlei }
10261f05b1a9Swlei 
1027eba57492Swlei void CSProfileGenerator::populateInferredFunctionSamples(
1028eba57492Swlei     ContextTrieNode &Node) {
1029eba57492Swlei   // There is no call jmp sample between the inliner and inlinee, we need to use
1030eba57492Swlei   // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
1031eba57492Swlei   // sample depends on child(inlinee)'s sample, so traverse the tree in
1032eba57492Swlei   // post-order.
1033eba57492Swlei   for (auto &It : Node.getAllChildContext())
1034eba57492Swlei     populateInferredFunctionSamples(It.second);
10351f05b1a9Swlei 
1036eba57492Swlei   FunctionSamples *CalleeProfile = Node.getFunctionSamples();
1037eba57492Swlei   if (!CalleeProfile)
1038eba57492Swlei     return;
10391f05b1a9Swlei   // If we already have head sample counts, we must have value profile
10401f05b1a9Swlei   // for call sites added already. Skip to avoid double counting.
1041eba57492Swlei   if (CalleeProfile->getHeadSamples())
1042eba57492Swlei     return;
1043eba57492Swlei   ContextTrieNode *CallerNode = Node.getParentContext();
10441f05b1a9Swlei   // If we don't have context, nothing to do for caller's call site.
10451f05b1a9Swlei   // This could happen for entry point function.
1046eba57492Swlei   if (CallerNode == &getRootContext())
1047eba57492Swlei     return;
10481f05b1a9Swlei 
1049eba57492Swlei   LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
1050eba57492Swlei   FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
10511f05b1a9Swlei   // Since we don't have call count for inlined functions, we
10521f05b1a9Swlei   // estimate it from inlinee's profile using entry body sample.
10537b81a81dSMircea Trofin   uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
10541f05b1a9Swlei   // If we don't have samples with location, use 1 to indicate live.
1055eba57492Swlei   if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
10561f05b1a9Swlei     EstimatedCallCount = 1;
1057eba57492Swlei   CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
1058eba57492Swlei                                        CallerLeafFrameLoc.Discriminator,
1059eba57492Swlei                                        Node.getFuncName(), EstimatedCallCount);
1060eba57492Swlei   CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
1061eba57492Swlei                                CallerLeafFrameLoc.Discriminator,
10621f05b1a9Swlei                                EstimatedCallCount);
1063484a569eSwlei   CallerProfile.addTotalSamples(EstimatedCallCount);
10641f05b1a9Swlei }
1065eba57492Swlei 
1066eba57492Swlei void CSProfileGenerator::convertToProfileMap(
1067eba57492Swlei     ContextTrieNode &Node, SampleContextFrameVector &Context) {
1068eba57492Swlei   FunctionSamples *FProfile = Node.getFunctionSamples();
1069eba57492Swlei   if (FProfile) {
1070eba57492Swlei     Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
1071eba57492Swlei     // Save the new context for future references.
1072eba57492Swlei     SampleContextFrames NewContext = *Contexts.insert(Context).first;
1073eba57492Swlei     auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
1074eba57492Swlei     FunctionSamples &NewProfile = Ret.first->second;
1075eba57492Swlei     NewProfile.getContext().setContext(NewContext);
1076eba57492Swlei     Context.pop_back();
1077eba57492Swlei   }
1078eba57492Swlei 
1079eba57492Swlei   for (auto &It : Node.getAllChildContext()) {
1080eba57492Swlei     ContextTrieNode &ChildNode = It.second;
1081eba57492Swlei     Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
1082eba57492Swlei     convertToProfileMap(ChildNode, Context);
1083eba57492Swlei     Context.pop_back();
1084eba57492Swlei   }
1085eba57492Swlei }
1086eba57492Swlei 
1087eba57492Swlei void CSProfileGenerator::convertToProfileMap() {
1088eba57492Swlei   assert(ProfileMap.empty() &&
1089eba57492Swlei          "ProfileMap should be empty before converting from the trie");
1090eba57492Swlei   assert(IsProfileValidOnTrie &&
1091eba57492Swlei          "Do not convert the trie twice, it's already destroyed");
1092eba57492Swlei 
1093eba57492Swlei   SampleContextFrameVector Context;
1094eba57492Swlei   for (auto &It : getRootContext().getAllChildContext())
1095eba57492Swlei     convertToProfileMap(It.second, Context);
1096eba57492Swlei 
1097eba57492Swlei   IsProfileValidOnTrie = false;
10981f05b1a9Swlei }
10991f05b1a9Swlei 
110030b02323SWenlei He void CSProfileGenerator::postProcessProfiles() {
110130b02323SWenlei He   // Compute hot/cold threshold based on profile. This will be used for cold
110230b02323SWenlei He   // context profile merging/trimming.
110330b02323SWenlei He   computeSummaryAndThreshold();
110430b02323SWenlei He 
110530b02323SWenlei He   // Run global pre-inliner to adjust/merge context profile based on estimated
110630b02323SWenlei He   // inline decisions.
1107eca03d27SWenlei He   if (EnableCSPreInliner) {
11087e86b13cSwlei     ContextTracker.populateFuncToCtxtMap();
11097e86b13cSwlei     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
1110259e4c56SHongtao Yu     // Turn off the profile merger by default unless it is explicitly enabled.
1111259e4c56SHongtao Yu     if (!CSProfMergeColdContext.getNumOccurrences())
1112259e4c56SHongtao Yu       CSProfMergeColdContext = false;
1113eca03d27SWenlei He   }
111430b02323SWenlei He 
11157e86b13cSwlei   convertToProfileMap();
11167e86b13cSwlei 
1117259e4c56SHongtao Yu   // Trim and merge cold context profile using cold threshold above.
111827cb3707Swlei   if (TrimColdProfile || CSProfMergeColdContext) {
111900ef28efSWenlei He     SampleContextTrimmer(ProfileMap)
112000ef28efSWenlei He         .trimAndMergeColdContextProfiles(
112127cb3707Swlei             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1122259e4c56SHongtao Yu             CSProfMaxColdContextDepth, EnableCSPreInliner);
112330b02323SWenlei He   }
1124c2e08abaSwlei 
11255740bb80SHongtao Yu   if (GenCSNestedProfile) {
1126339b8a00Swlei     ProfileConverter CSConverter(ProfileMap);
1127339b8a00Swlei     CSConverter.convertCSProfiles();
1128e36786d1SHongtao Yu     FunctionSamples::ProfileIsCS = false;
11295740bb80SHongtao Yu   }
113024f02517SLei Wang   filterAmbiguousProfile(ProfileMap);
1131b9d40a7aSLei Wang   ProfileGeneratorBase::calculateAndShowDensity(ProfileMap);
1132c2e08abaSwlei }
1133c2e08abaSwlei 
1134aa58b7b1Swlei void ProfileGeneratorBase::computeSummaryAndThreshold(
1135aa58b7b1Swlei     SampleProfileMap &Profiles) {
1136ce6bfe94SWenlei He   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1137aa58b7b1Swlei   Summary = Builder.computeSummaryForProfiles(Profiles);
1138dff83158SWenlei He   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1139dff83158SWenlei He       (Summary->getDetailedSummary()));
1140dff83158SWenlei He   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1141dff83158SWenlei He       (Summary->getDetailedSummary()));
1142e10b73f6Swlei }
1143e10b73f6Swlei 
1144aa58b7b1Swlei void CSProfileGenerator::computeSummaryAndThreshold() {
1145aa58b7b1Swlei   // Always merge and use context-less profile map to compute summary.
1146aa58b7b1Swlei   SampleProfileMap ContextLessProfiles;
1147aa58b7b1Swlei   ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1148aa58b7b1Swlei 
1149aa58b7b1Swlei   // Set the flag below to avoid merging the profile again in
1150aa58b7b1Swlei   // computeSummaryAndThreshold
1151aa58b7b1Swlei   FunctionSamples::ProfileIsCS = false;
1152aa58b7b1Swlei   assert(
1153aa58b7b1Swlei       (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1154aa58b7b1Swlei       "Don't set --profile-summary-contextless to false for profile "
1155aa58b7b1Swlei       "generation");
1156aa58b7b1Swlei   ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1157aa58b7b1Swlei   // Recover the old value.
1158aa58b7b1Swlei   FunctionSamples::ProfileIsCS = true;
1159aa58b7b1Swlei }
1160aa58b7b1Swlei 
116123391febSHongtao Yu void ProfileGeneratorBase::extractProbesFromRange(
116223391febSHongtao Yu     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
116323391febSHongtao Yu     bool FindDisjointRanges) {
116423391febSHongtao Yu   const RangeSample *PRanges = &RangeCounter;
1165c82b24f4Swlei   RangeSample Ranges;
116623391febSHongtao Yu   if (FindDisjointRanges) {
1167c82b24f4Swlei     findDisjointRanges(Ranges, RangeCounter);
116823391febSHongtao Yu     PRanges = &Ranges;
116923391febSHongtao Yu   }
117023391febSHongtao Yu 
117123391febSHongtao Yu   for (const auto &Range : *PRanges) {
117246765248Swlei     uint64_t RangeBegin = Range.first.first;
117346765248Swlei     uint64_t RangeEnd = Range.first.second;
1174c82b24f4Swlei     uint64_t Count = Range.second;
1175c82b24f4Swlei 
1176c82b24f4Swlei     InstructionPointer IP(Binary, RangeBegin, true);
1177c82b24f4Swlei     // Disjoint ranges may have range in the middle of two instr,
1178c82b24f4Swlei     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1179c82b24f4Swlei     // can be Addr1+1 to Addr2-1. We should ignore such range.
1180c82b24f4Swlei     if (IP.Address > RangeEnd)
1181c82b24f4Swlei       continue;
1182c82b24f4Swlei 
11835bf191a3Swlei     do {
1184c82b24f4Swlei       const AddressProbesMap &Address2ProbesMap =
1185c82b24f4Swlei           Binary->getAddress2ProbesMap();
1186*ee09f7d1SAmir Ayupov       for (const MCDecodedPseudoProbe &Probe :
1187*ee09f7d1SAmir Ayupov            Address2ProbesMap.find(IP.Address)) {
1188c82b24f4Swlei         ProbeCounter[&Probe] += Count;
1189c82b24f4Swlei       }
11905bf191a3Swlei     } while (IP.advance() && IP.Address <= RangeEnd);
1191c82b24f4Swlei   }
1192c82b24f4Swlei }
1193c82b24f4Swlei 
119446765248Swlei static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
119546765248Swlei                                       const SmallVectorImpl<uint64_t> &AddrVec,
119623391febSHongtao Yu                                       ProfiledBinary *Binary) {
11973f970168SHongtao Yu   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
119846765248Swlei   for (auto Address : reverse(AddrVec)) {
119946765248Swlei     const MCDecodedPseudoProbe *CallProbe =
120046765248Swlei         Binary->getCallProbeForAddr(Address);
12013f970168SHongtao Yu     // These could be the cases when a probe is not found at a calliste. Cutting
12023f970168SHongtao Yu     // off the context from here since the inliner will not know how to consume
12033f970168SHongtao Yu     // a context with unknown callsites.
12043f970168SHongtao Yu     // 1. for functions that are not sampled when
12053f970168SHongtao Yu     // --decode-probe-for-profiled-functions-only is on.
12063f970168SHongtao Yu     // 2. for a merged callsite. Callsite merging may cause the loss of original
12073f970168SHongtao Yu     // probe IDs.
12083f970168SHongtao Yu     // 3. for an external callsite.
12093f970168SHongtao Yu     if (!CallProbe)
12103f970168SHongtao Yu       break;
12113f970168SHongtao Yu     Probes.push_back(CallProbe);
12123f970168SHongtao Yu   }
12133f970168SHongtao Yu 
12143f970168SHongtao Yu   std::reverse(Probes.begin(), Probes.end());
12153f970168SHongtao Yu 
12163f970168SHongtao Yu   // Extract context stack for reusing, leaf context stack will be added
12173f970168SHongtao Yu   // compressed while looking up function profile.
121823391febSHongtao Yu   for (const auto *P : Probes) {
121923391febSHongtao Yu     Binary->getInlineContextForProbe(P, ContextStack, true);
122023391febSHongtao Yu   }
122123391febSHongtao Yu }
122223391febSHongtao Yu 
122323391febSHongtao Yu void CSProfileGenerator::generateProbeBasedProfile() {
122423391febSHongtao Yu   // Enable pseudo probe functionalities in SampleProf
122523391febSHongtao Yu   FunctionSamples::ProfileIsProbeBased = true;
1226937924ebSHongtao Yu   for (const auto &CI : *SampleCounters) {
12273f970168SHongtao Yu     const AddrBasedCtxKey *CtxKey =
12283f970168SHongtao Yu         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
122923391febSHongtao Yu     // Fill in function body samples from probes, also infer caller's samples
123023391febSHongtao Yu     // from callee's probe
12315d7950a4SHongtao Yu     populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey);
123223391febSHongtao Yu     // Fill in boundary samples for a call probe
12335d7950a4SHongtao Yu     populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey);
123423391febSHongtao Yu   }
123523391febSHongtao Yu }
123623391febSHongtao Yu 
1237d5f20130Swlei void CSProfileGenerator::populateBodySamplesWithProbes(
12385d7950a4SHongtao Yu     const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1239c82b24f4Swlei   ProbeCounterMap ProbeCounter;
1240c82b24f4Swlei   // Extract the top frame probes by looking up each address among the range in
1241c82b24f4Swlei   // the Address2ProbeMap
1242f812c192Swlei   extractProbesFromRange(RangeCounter, ProbeCounter);
1243a8a38ef3Swlei   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1244a8a38ef3Swlei                      std::unordered_set<FunctionSamples *>>
1245ee7d20e8Sjamesluox       FrameSamples;
124692ba979cSSimon Pilgrim   for (const auto &PI : ProbeCounter) {
1247ee7d20e8Sjamesluox     const MCDecodedPseudoProbe *Probe = PI.first;
1248c82b24f4Swlei     uint64_t Count = PI.second;
124923391febSHongtao Yu     // Disjoint ranges have introduce zero-filled gap that
125023391febSHongtao Yu     // doesn't belong to current context, filter them out.
125123391febSHongtao Yu     if (!Probe->isBlock() || Count == 0)
125223391febSHongtao Yu       continue;
1253eba57492Swlei 
12545d7950a4SHongtao Yu     ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe);
1255eba57492Swlei     FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
12561a719089SHongtao Yu     // Record the current frame and FunctionProfile whenever samples are
12571a719089SHongtao Yu     // collected for non-danglie probes. This is for reporting all of the
1258bd524955SHongtao Yu     // zero count probes of the frame later.
1259a8a38ef3Swlei     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1260345fd0c1SHongtao Yu     FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
1261345fd0c1SHongtao Yu                                    Count);
1262c82b24f4Swlei     FunctionProfile.addTotalSamples(Count);
1263c82b24f4Swlei     if (Probe->isEntry()) {
1264c82b24f4Swlei       FunctionProfile.addHeadSamples(Count);
1265c82b24f4Swlei       // Look up for the caller's function profile
1266c82b24f4Swlei       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1267eba57492Swlei       ContextTrieNode *CallerNode = ContextNode->getParentContext();
1268eba57492Swlei       if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1269c82b24f4Swlei         // Since the context id will be compressed, we have to use callee's
1270c82b24f4Swlei         // context id to infer caller's context id to ensure they share the
1271c82b24f4Swlei         // same context prefix.
1272eba57492Swlei         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1273345fd0c1SHongtao Yu         uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
1274c82b24f4Swlei         assert(CallerIndex &&
1275c82b24f4Swlei                "Inferred caller's location index shouldn't be zero!");
1276345fd0c1SHongtao Yu         assert(!CallerDiscriminator &&
1277345fd0c1SHongtao Yu                "Callsite probe should not have a discriminator!");
1278c82b24f4Swlei         FunctionSamples &CallerProfile =
1279eba57492Swlei             *getOrCreateFunctionSamples(CallerNode);
1280c82b24f4Swlei         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1281345fd0c1SHongtao Yu         CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
1282c82b24f4Swlei         CallerProfile.addTotalSamples(Count);
1283345fd0c1SHongtao Yu         CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
1284eba57492Swlei                                              ContextNode->getFuncName(), Count);
1285c82b24f4Swlei       }
1286c82b24f4Swlei     }
1287a8a38ef3Swlei   }
12881a719089SHongtao Yu 
1289cef9b96bSHongtao Yu   // Assign zero count for remaining probes without sample hits to
1290cef9b96bSHongtao Yu   // differentiate from probes optimized away, of which the counts are unknown
1291cef9b96bSHongtao Yu   // and will be inferred by the compiler.
12921a719089SHongtao Yu   for (auto &I : FrameSamples) {
1293a8a38ef3Swlei     for (auto *FunctionProfile : I.second) {
129404ebd190SAmir Ayupov       for (const MCDecodedPseudoProbe &Probe : I.first->getProbes()) {
129504ebd190SAmir Ayupov         FunctionProfile->addBodySamples(Probe.getIndex(),
129604ebd190SAmir Ayupov                                         Probe.getDiscriminator(), 0);
12971a719089SHongtao Yu       }
12981a719089SHongtao Yu     }
1299c82b24f4Swlei   }
1300c82b24f4Swlei }
1301c82b24f4Swlei 
1302d5f20130Swlei void CSProfileGenerator::populateBoundarySamplesWithProbes(
13035d7950a4SHongtao Yu     const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
130492ba979cSSimon Pilgrim   for (const auto &BI : BranchCounter) {
130546765248Swlei     uint64_t SourceAddress = BI.first.first;
130646765248Swlei     uint64_t TargetAddress = BI.first.second;
1307c82b24f4Swlei     uint64_t Count = BI.second;
1308ee7d20e8Sjamesluox     const MCDecodedPseudoProbe *CallProbe =
1309ee7d20e8Sjamesluox         Binary->getCallProbeForAddr(SourceAddress);
1310c82b24f4Swlei     if (CallProbe == nullptr)
1311c82b24f4Swlei       continue;
1312c82b24f4Swlei     FunctionSamples &FunctionProfile =
13135d7950a4SHongtao Yu         getFunctionProfileForLeafProbe(CtxKey, CallProbe);
1314ee7d20e8Sjamesluox     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1315c82b24f4Swlei     FunctionProfile.addTotalSamples(Count);
131646765248Swlei     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1317c82b24f4Swlei     if (CalleeName.size() == 0)
1318c82b24f4Swlei       continue;
1319345fd0c1SHongtao Yu     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
1320345fd0c1SHongtao Yu                                            CallProbe->getDiscriminator(),
1321ef0e0adcSWilliam Junda Huang                                            FunctionId(CalleeName), Count);
1322c82b24f4Swlei   }
1323c82b24f4Swlei }
1324c82b24f4Swlei 
1325eba57492Swlei ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
13265d7950a4SHongtao Yu     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
13275d7950a4SHongtao Yu 
13285d7950a4SHongtao Yu   const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
13295d7950a4SHongtao Yu   SmallVector<uint64_t, 16> NewContext;
13305d7950a4SHongtao Yu 
13315d7950a4SHongtao Yu   if (InferMissingFrames) {
13325d7950a4SHongtao Yu     SmallVector<uint64_t, 16> Context = CtxKey->Context;
13335d7950a4SHongtao Yu     // Append leaf frame for a complete inference.
13345d7950a4SHongtao Yu     Context.push_back(LeafProbe->getAddress());
13355d7950a4SHongtao Yu     inferMissingFrames(Context, NewContext);
13365d7950a4SHongtao Yu     // Pop out the leaf probe that was pushed in above.
13375d7950a4SHongtao Yu     NewContext.pop_back();
13385d7950a4SHongtao Yu     PContext = &NewContext;
13395d7950a4SHongtao Yu   }
13405d7950a4SHongtao Yu 
13415d7950a4SHongtao Yu   SampleContextFrameVector ContextStack;
13425d7950a4SHongtao Yu   extractPrefixContextStack(ContextStack, *PContext, Binary);
13436bccdcdbSwlei 
1344b9db7036SHongtao Yu   // Explicitly copy the context for appending the leaf context
1345b9db7036SHongtao Yu   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1346b9db7036SHongtao Yu                                            ContextStack.end());
1347b9db7036SHongtao Yu   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1348c82b24f4Swlei   // For leaf inlined context with the top frame, we should strip off the top
1349c82b24f4Swlei   // frame's probe id, like:
1350c82b24f4Swlei   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1351b9db7036SHongtao Yu   auto LeafFrame = NewContextStack.back();
1352fb29d812Swlei   LeafFrame.Location = LineLocation(0, 0);
1353b9db7036SHongtao Yu   NewContextStack.pop_back();
1354b9db7036SHongtao Yu   // Compress the context string except for the leaf frame
1355b9db7036SHongtao Yu   CSProfileGenerator::compressRecursionContext(NewContextStack);
1356b9db7036SHongtao Yu   CSProfileGenerator::trimContext(NewContextStack);
1357b9db7036SHongtao Yu   NewContextStack.push_back(LeafFrame);
1358c82b24f4Swlei 
1359ee7d20e8Sjamesluox   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1360ee7d20e8Sjamesluox   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1361eba57492Swlei   ContextTrieNode *ContextNode =
1362eba57492Swlei       getOrCreateContextNode(NewContextStack, WasLeafInlined);
1363eba57492Swlei   ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1364eba57492Swlei   return ContextNode;
1365eba57492Swlei }
1366eba57492Swlei 
1367eba57492Swlei FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
13685d7950a4SHongtao Yu     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
13695d7950a4SHongtao Yu   return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1370c82b24f4Swlei }
1371c82b24f4Swlei 
13721f05b1a9Swlei } // end namespace sampleprof
13731f05b1a9Swlei } // end namespace llvm
1374