xref: /llvm-project/llvm/tools/llvm-profgen/ProfileGenerator.cpp (revision 1a53b5c367b5ebf7d7f34afaa653ea337982f1d6)
1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "MissingFrameInferrer.h"
11 #include "PerfReader.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/ProfileData/ProfileCommon.h"
15 #include <algorithm>
16 #include <float.h>
17 #include <unordered_set>
18 #include <utility>
19 
20 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
21                                     cl::Required,
22                                     cl::desc("Output profile file"));
23 static cl::alias OutputA("o", cl::desc("Alias for --output"),
24                          cl::aliasopt(OutputFilename));
25 
26 static cl::opt<SampleProfileFormat> OutputFormat(
27     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
28     cl::values(
29         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
30         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31         clEnumValN(SPF_Text, "text", "Text encoding"),
32         clEnumValN(SPF_GCC, "gcc",
33                    "GCC encoding (only meaningful for -sample)")));
34 
35 static cl::opt<bool> UseMD5(
36     "use-md5", cl::Hidden,
37     cl::desc("Use md5 to represent function names in the output profile (only "
38              "meaningful for -extbinary)"));
39 
40 static cl::opt<bool> PopulateProfileSymbolList(
41     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43 
44 static cl::opt<bool> FillZeroForAllFuncs(
45     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46     cl::desc("Attribute all functions' range with zero count "
47              "even it's not hit by any samples."));
48 
49 static cl::opt<int32_t, true> RecursionCompression(
50     "compress-recursion",
51     cl::desc("Compressing recursion by deduplicating adjacent frame "
52              "sequences up to the specified size. -1 means no size limit."),
53     cl::Hidden,
54     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55 
56 static cl::opt<bool>
57     TrimColdProfile("trim-cold-profile",
58                     cl::desc("If the total count of the profile is smaller "
59                              "than threshold, it will be trimmed."));
60 
61 static cl::opt<bool> CSProfMergeColdContext(
62     "csprof-merge-cold-context", cl::init(true),
63     cl::desc("If the total count of context profile is smaller than "
64              "the threshold, it will be merged into context-less base "
65              "profile."));
66 
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68     "csprof-max-cold-context-depth", cl::init(1),
69     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70              "context-less base profile"));
71 
72 static cl::opt<int, true> CSProfMaxContextDepth(
73     "csprof-max-context-depth",
74     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75              "depth limit."),
76     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77 
78 static cl::opt<double> HotFunctionDensityThreshold(
79     "hot-function-density-threshold", llvm::cl::init(1000),
80     llvm::cl::desc(
81         "specify density threshold for hot functions (default: 1000)"),
82     llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84                                  llvm::cl::desc("show profile density details"),
85                                  llvm::cl::Optional);
86 
87 static cl::opt<bool> UpdateTotalSamples(
88     "update-total-samples", llvm::cl::init(false),
89     llvm::cl::desc(
90         "Update total samples by accumulating all its body samples."),
91     llvm::cl::Optional);
92 
93 static cl::opt<bool> GenCSNestedProfile(
94     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
95     cl::desc("Generate nested function profiles for CSSPGO"));
96 
97 cl::opt<bool> InferMissingFrames(
98     "infer-missing-frames", llvm::cl::init(true),
99     llvm::cl::desc(
100         "Infer missing call frames due to compiler tail call elimination."),
101     llvm::cl::Optional);
102 
103 using namespace llvm;
104 using namespace sampleprof;
105 
106 namespace llvm {
107 extern cl::opt<int> ProfileSummaryCutoffHot;
108 extern cl::opt<bool> UseContextLessSummary;
109 
110 namespace sampleprof {
111 
112 // Initialize the MaxCompressionSize to -1 which means no size limit
113 int32_t CSProfileGenerator::MaxCompressionSize = -1;
114 
115 int CSProfileGenerator::MaxContextDepth = -1;
116 
117 bool ProfileGeneratorBase::UseFSDiscriminator = false;
118 
119 std::unique_ptr<ProfileGeneratorBase>
120 ProfileGeneratorBase::create(ProfiledBinary *Binary,
121                              const ContextSampleCounterMap *SampleCounters,
122                              bool ProfileIsCS) {
123   std::unique_ptr<ProfileGeneratorBase> Generator;
124   if (ProfileIsCS) {
125     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
126   } else {
127     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
128   }
129   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
130   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
131 
132   return Generator;
133 }
134 
135 std::unique_ptr<ProfileGeneratorBase>
136 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
137                              bool ProfileIsCS) {
138   std::unique_ptr<ProfileGeneratorBase> Generator;
139   if (ProfileIsCS) {
140     Generator.reset(new CSProfileGenerator(Binary, Profiles));
141   } else {
142     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
143   }
144   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
145   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
146 
147   return Generator;
148 }
149 
150 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
151                                  SampleProfileMap &ProfileMap) {
152   // Populate profile symbol list if extended binary format is used.
153   ProfileSymbolList SymbolList;
154 
155   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
156     Binary->populateSymbolListFromDWARF(SymbolList);
157     Writer->setProfileSymbolList(&SymbolList);
158   }
159 
160   if (std::error_code EC = Writer->write(ProfileMap))
161     exitWithError(std::move(EC));
162 }
163 
164 void ProfileGeneratorBase::write() {
165   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
166   if (std::error_code EC = WriterOrErr.getError())
167     exitWithError(EC, OutputFilename);
168 
169   if (UseMD5) {
170     if (OutputFormat != SPF_Ext_Binary)
171       WithColor::warning() << "-use-md5 is ignored. Specify "
172                               "--format=extbinary to enable it\n";
173     else
174       WriterOrErr.get()->setUseMD5();
175   }
176 
177   write(std::move(WriterOrErr.get()), ProfileMap);
178 }
179 
180 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
181   if (Density == 0.0)
182     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
183                             "set too low. Please check your command.\n";
184   else if (Density < HotFunctionDensityThreshold)
185     WithColor::warning()
186         << "Sample PGO is estimated to optimize better with "
187         << format("%.1f", HotFunctionDensityThreshold / Density)
188         << "x more samples. Please consider increasing sampling rate or "
189            "profiling for longer duration to get more samples.\n";
190 
191   if (ShowDensity)
192     outs() << "Minimum profile density for hot functions with top "
193            << format("%.2f",
194                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
195                          10000)
196            << "% total samples: " << format("%.1f", Density) << "\n";
197 }
198 
199 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
200                                               uint64_t HotCntThreshold) {
201   double Density = DBL_MAX;
202   std::vector<const FunctionSamples *> HotFuncs;
203   for (auto &I : Profiles) {
204     auto &FuncSamples = I.second;
205     if (FuncSamples.getTotalSamples() < HotCntThreshold)
206       continue;
207     HotFuncs.emplace_back(&FuncSamples);
208   }
209 
210   for (auto *FuncSamples : HotFuncs) {
211     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
212     if (!Func)
213       continue;
214     uint64_t FuncSize = Func->getFuncSize();
215     if (FuncSize == 0)
216       continue;
217     Density =
218         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
219                               FuncSize);
220   }
221 
222   return Density == DBL_MAX ? 0.0 : Density;
223 }
224 
225 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
226                                               const RangeSample &Ranges) {
227 
228   /*
229   Regions may overlap with each other. Using the boundary info, find all
230   disjoint ranges and their sample count. BoundaryPoint contains the count
231   multiple samples begin/end at this points.
232 
233   |<--100-->|           Sample1
234   |<------200------>|   Sample2
235   A         B       C
236 
237   In the example above,
238   Sample1 begins at A, ends at B, its value is 100.
239   Sample2 beings at A, ends at C, its value is 200.
240   For A, BeginCount is the sum of sample begins at A, which is 300 and no
241   samples ends at A, so EndCount is 0.
242   Then boundary points A, B, and C with begin/end counts are:
243   A: (300, 0)
244   B: (0, 100)
245   C: (0, 200)
246   */
247   struct BoundaryPoint {
248     // Sum of sample counts beginning at this point
249     uint64_t BeginCount = UINT64_MAX;
250     // Sum of sample counts ending at this point
251     uint64_t EndCount = UINT64_MAX;
252     // Is the begin point of a zero range.
253     bool IsZeroRangeBegin = false;
254     // Is the end point of a zero range.
255     bool IsZeroRangeEnd = false;
256 
257     void addBeginCount(uint64_t Count) {
258       if (BeginCount == UINT64_MAX)
259         BeginCount = 0;
260       BeginCount += Count;
261     }
262 
263     void addEndCount(uint64_t Count) {
264       if (EndCount == UINT64_MAX)
265         EndCount = 0;
266       EndCount += Count;
267     }
268   };
269 
270   /*
271   For the above example. With boundary points, follwing logic finds two
272   disjoint region of
273 
274   [A,B]:   300
275   [B+1,C]: 200
276 
277   If there is a boundary point that both begin and end, the point itself
278   becomes a separate disjoint region. For example, if we have original
279   ranges of
280 
281   |<--- 100 --->|
282                 |<--- 200 --->|
283   A             B             C
284 
285   there are three boundary points with their begin/end counts of
286 
287   A: (100, 0)
288   B: (200, 100)
289   C: (0, 200)
290 
291   the disjoint ranges would be
292 
293   [A, B-1]: 100
294   [B, B]:   300
295   [B+1, C]: 200.
296 
297   Example for zero value range:
298 
299     |<--- 100 --->|
300                        |<--- 200 --->|
301   |<---------------  0 ----------------->|
302   A  B            C    D             E   F
303 
304   [A, B-1]  : 0
305   [B, C]    : 100
306   [C+1, D-1]: 0
307   [D, E]    : 200
308   [E+1, F]  : 0
309   */
310   std::map<uint64_t, BoundaryPoint> Boundaries;
311 
312   for (const auto &Item : Ranges) {
313     assert(Item.first.first <= Item.first.second &&
314            "Invalid instruction range");
315     auto &BeginPoint = Boundaries[Item.first.first];
316     auto &EndPoint = Boundaries[Item.first.second];
317     uint64_t Count = Item.second;
318 
319     BeginPoint.addBeginCount(Count);
320     EndPoint.addEndCount(Count);
321     if (Count == 0) {
322       BeginPoint.IsZeroRangeBegin = true;
323       EndPoint.IsZeroRangeEnd = true;
324     }
325   }
326 
327   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
328   // and the next valid address
329   uint64_t BeginAddress = UINT64_MAX;
330   int ZeroRangeDepth = 0;
331   uint64_t Count = 0;
332   for (const auto &Item : Boundaries) {
333     uint64_t Address = Item.first;
334     const BoundaryPoint &Point = Item.second;
335     if (Point.BeginCount != UINT64_MAX) {
336       if (BeginAddress != UINT64_MAX)
337         DisjointRanges[{BeginAddress, Address - 1}] = Count;
338       Count += Point.BeginCount;
339       BeginAddress = Address;
340       ZeroRangeDepth += Point.IsZeroRangeBegin;
341     }
342     if (Point.EndCount != UINT64_MAX) {
343       assert((BeginAddress != UINT64_MAX) &&
344              "First boundary point cannot be 'end' point");
345       DisjointRanges[{BeginAddress, Address}] = Count;
346       assert(Count >= Point.EndCount && "Mismatched live ranges");
347       Count -= Point.EndCount;
348       BeginAddress = Address + 1;
349       ZeroRangeDepth -= Point.IsZeroRangeEnd;
350       // If the remaining count is zero and it's no longer in a zero range, this
351       // means we consume all the ranges before, thus mark BeginAddress as
352       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
353       //  [<---- 10 ---->]
354       //                       [<---- 20 ---->]
355       //   A             B     C              D
356       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
357       // have the [B+1, C-1] zero range.
358       if (Count == 0 && ZeroRangeDepth == 0)
359         BeginAddress = UINT64_MAX;
360     }
361   }
362 }
363 
364 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
365     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
366     uint64_t Count) {
367   // Use the maximum count of samples with same line location
368   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
369 
370   // Use duplication factor to compensated for loop unroll/vectorization.
371   // Note that this is only needed when we're taking MAX of the counts at
372   // the location instead of SUM.
373   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
374 
375   ErrorOr<uint64_t> R =
376       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
377 
378   uint64_t PreviousCount = R ? R.get() : 0;
379   if (PreviousCount <= Count) {
380     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
381                                    Count - PreviousCount);
382   }
383 }
384 
385 void ProfileGeneratorBase::updateTotalSamples() {
386   for (auto &Item : ProfileMap) {
387     FunctionSamples &FunctionProfile = Item.second;
388     FunctionProfile.updateTotalSamples();
389   }
390 }
391 
392 void ProfileGeneratorBase::updateCallsiteSamples() {
393   for (auto &Item : ProfileMap) {
394     FunctionSamples &FunctionProfile = Item.second;
395     FunctionProfile.updateCallsiteSamples();
396   }
397 }
398 
399 void ProfileGeneratorBase::updateFunctionSamples() {
400   updateCallsiteSamples();
401 
402   if (UpdateTotalSamples)
403     updateTotalSamples();
404 }
405 
406 void ProfileGeneratorBase::collectProfiledFunctions() {
407   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
408   if (collectFunctionsFromRawProfile(ProfiledFunctions))
409     Binary->setProfiledFunctions(ProfiledFunctions);
410   else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
411     Binary->setProfiledFunctions(ProfiledFunctions);
412   else
413     llvm_unreachable("Unsupported input profile");
414 }
415 
416 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
417     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
418   if (!SampleCounters)
419     return false;
420   // Go through all the stacks, ranges and branches in sample counters, use
421   // the start of the range to look up the function it belongs and record the
422   // function.
423   for (const auto &CI : *SampleCounters) {
424     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
425       for (auto StackAddr : CtxKey->Context) {
426         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
427           ProfiledFunctions.insert(FRange->Func);
428       }
429     }
430 
431     for (auto Item : CI.second.RangeCounter) {
432       uint64_t StartAddress = Item.first.first;
433       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
434         ProfiledFunctions.insert(FRange->Func);
435     }
436 
437     for (auto Item : CI.second.BranchCounter) {
438       uint64_t SourceAddress = Item.first.first;
439       uint64_t TargetAddress = Item.first.second;
440       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
441         ProfiledFunctions.insert(FRange->Func);
442       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
443         ProfiledFunctions.insert(FRange->Func);
444     }
445   }
446   return true;
447 }
448 
449 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
450     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
451   for (const auto &FS : ProfileMap) {
452     if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
453       ProfiledFunctions.insert(Func);
454   }
455   return true;
456 }
457 
458 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
459     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
460   for (auto *Node : ContextTracker) {
461     if (!Node->getFuncName().empty())
462       if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
463         ProfiledFunctions.insert(Func);
464   }
465   return true;
466 }
467 
468 FunctionSamples &
469 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
470   SampleContext Context(FuncName);
471   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
472   if (Ret.second) {
473     FunctionSamples &FProfile = Ret.first->second;
474     FProfile.setContext(Context);
475   }
476   return Ret.first->second;
477 }
478 
479 void ProfileGenerator::generateProfile() {
480   collectProfiledFunctions();
481 
482   if (Binary->usePseudoProbes())
483     Binary->decodePseudoProbe();
484 
485   if (SampleCounters) {
486     if (Binary->usePseudoProbes()) {
487       generateProbeBasedProfile();
488     } else {
489       generateLineNumBasedProfile();
490     }
491   }
492 
493   postProcessProfiles();
494 }
495 
496 void ProfileGenerator::postProcessProfiles() {
497   computeSummaryAndThreshold(ProfileMap);
498   trimColdProfiles(ProfileMap, ColdCountThreshold);
499   calculateAndShowDensity(ProfileMap);
500 }
501 
502 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
503                                         uint64_t ColdCntThreshold) {
504   if (!TrimColdProfile)
505     return;
506 
507   // Move cold profiles into a tmp container.
508   std::vector<SampleContext> ColdProfiles;
509   for (const auto &I : ProfileMap) {
510     if (I.second.getTotalSamples() < ColdCntThreshold)
511       ColdProfiles.emplace_back(I.first);
512   }
513 
514   // Remove the cold profile from ProfileMap.
515   for (const auto &I : ColdProfiles)
516     ProfileMap.erase(I);
517 }
518 
519 void ProfileGenerator::generateLineNumBasedProfile() {
520   assert(SampleCounters->size() == 1 &&
521          "Must have one entry for profile generation.");
522   const SampleCounter &SC = SampleCounters->begin()->second;
523   // Fill in function body samples
524   populateBodySamplesForAllFunctions(SC.RangeCounter);
525   // Fill in boundary sample counts as well as call site samples for calls
526   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
527 
528   updateFunctionSamples();
529 }
530 
531 void ProfileGenerator::generateProbeBasedProfile() {
532   assert(SampleCounters->size() == 1 &&
533          "Must have one entry for profile generation.");
534   // Enable pseudo probe functionalities in SampleProf
535   FunctionSamples::ProfileIsProbeBased = true;
536   const SampleCounter &SC = SampleCounters->begin()->second;
537   // Fill in function body samples
538   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
539   // Fill in boundary sample counts as well as call site samples for calls
540   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
541 
542   updateFunctionSamples();
543 }
544 
545 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
546     const RangeSample &RangeCounter) {
547   ProbeCounterMap ProbeCounter;
548   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
549   // inside extractProbesFromRange.
550   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
551                          false);
552 
553   for (const auto &PI : ProbeCounter) {
554     const MCDecodedPseudoProbe *Probe = PI.first;
555     uint64_t Count = PI.second;
556     SampleContextFrameVector FrameVec;
557     Binary->getInlineContextForProbe(Probe, FrameVec, true);
558     FunctionSamples &FunctionProfile =
559         getLeafProfileAndAddTotalSamples(FrameVec, Count);
560     FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
561                                    Count);
562     if (Probe->isEntry())
563       FunctionProfile.addHeadSamples(Count);
564   }
565 }
566 
567 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
568     const BranchSample &BranchCounters) {
569   for (const auto &Entry : BranchCounters) {
570     uint64_t SourceAddress = Entry.first.first;
571     uint64_t TargetAddress = Entry.first.second;
572     uint64_t Count = Entry.second;
573     assert(Count != 0 && "Unexpected zero weight branch");
574 
575     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
576     if (CalleeName.size() == 0)
577       continue;
578 
579     const MCDecodedPseudoProbe *CallProbe =
580         Binary->getCallProbeForAddr(SourceAddress);
581     if (CallProbe == nullptr)
582       continue;
583 
584     // Record called target sample and its count.
585     SampleContextFrameVector FrameVec;
586     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
587 
588     if (!FrameVec.empty()) {
589       FunctionSamples &FunctionProfile =
590           getLeafProfileAndAddTotalSamples(FrameVec, 0);
591       FunctionProfile.addCalledTargetSamples(
592           FrameVec.back().Location.LineOffset,
593           FrameVec.back().Location.Discriminator,
594           CalleeName, Count);
595     }
596   }
597 }
598 
599 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
600     const SampleContextFrameVector &FrameVec, uint64_t Count) {
601   // Get top level profile
602   FunctionSamples *FunctionProfile =
603       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
604   FunctionProfile->addTotalSamples(Count);
605   if (Binary->usePseudoProbes()) {
606     const auto *FuncDesc = Binary->getFuncDescForGUID(
607         Function::getGUID(FunctionProfile->getName()));
608     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
609   }
610 
611   for (size_t I = 1; I < FrameVec.size(); I++) {
612     LineLocation Callsite(
613         FrameVec[I - 1].Location.LineOffset,
614         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
615     FunctionSamplesMap &SamplesMap =
616         FunctionProfile->functionSamplesAt(Callsite);
617     auto Ret =
618         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
619     if (Ret.second) {
620       SampleContext Context(FrameVec[I].FuncName);
621       Ret.first->second.setContext(Context);
622     }
623     FunctionProfile = &Ret.first->second;
624     FunctionProfile->addTotalSamples(Count);
625     if (Binary->usePseudoProbes()) {
626       const auto *FuncDesc = Binary->getFuncDescForGUID(
627           Function::getGUID(FunctionProfile->getName()));
628       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
629     }
630   }
631 
632   return *FunctionProfile;
633 }
634 
635 RangeSample
636 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
637   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
638   if (FillZeroForAllFuncs) {
639     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
640       for (auto &R : FuncI.second.Ranges) {
641         Ranges[{R.first, R.second - 1}] += 0;
642       }
643     }
644   } else {
645     // For each range, we search for all ranges of the function it belongs to
646     // and initialize it with zero count, so it remains zero if doesn't hit any
647     // samples. This is to be consistent with compiler that interpret zero count
648     // as unexecuted(cold).
649     for (const auto &I : RangeCounter) {
650       uint64_t StartAddress = I.first.first;
651       for (const auto &Range : Binary->getRanges(StartAddress))
652         Ranges[{Range.first, Range.second - 1}] += 0;
653     }
654   }
655   RangeSample DisjointRanges;
656   findDisjointRanges(DisjointRanges, Ranges);
657   return DisjointRanges;
658 }
659 
660 void ProfileGenerator::populateBodySamplesForAllFunctions(
661     const RangeSample &RangeCounter) {
662   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
663     uint64_t RangeBegin = Range.first.first;
664     uint64_t RangeEnd = Range.first.second;
665     uint64_t Count = Range.second;
666 
667     InstructionPointer IP(Binary, RangeBegin, true);
668     // Disjoint ranges may have range in the middle of two instr,
669     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
670     // can be Addr1+1 to Addr2-1. We should ignore such range.
671     if (IP.Address > RangeEnd)
672       continue;
673 
674     do {
675       const SampleContextFrameVector FrameVec =
676           Binary->getFrameLocationStack(IP.Address);
677       if (!FrameVec.empty()) {
678         // FIXME: As accumulating total count per instruction caused some
679         // regression, we changed to accumulate total count per byte as a
680         // workaround. Tuning hotness threshold on the compiler side might be
681         // necessary in the future.
682         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
683             FrameVec, Count * Binary->getInstSize(IP.Address));
684         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
685                                             Count);
686       }
687     } while (IP.advance() && IP.Address <= RangeEnd);
688   }
689 }
690 
691 StringRef
692 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
693   // Get the function range by branch target if it's a call branch.
694   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
695 
696   // We won't accumulate sample count for a range whose start is not the real
697   // function entry such as outlined function or inner labels.
698   if (!FRange || !FRange->IsFuncEntry)
699     return StringRef();
700 
701   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
702 }
703 
704 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
705     const BranchSample &BranchCounters) {
706   for (const auto &Entry : BranchCounters) {
707     uint64_t SourceAddress = Entry.first.first;
708     uint64_t TargetAddress = Entry.first.second;
709     uint64_t Count = Entry.second;
710     assert(Count != 0 && "Unexpected zero weight branch");
711 
712     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
713     if (CalleeName.size() == 0)
714       continue;
715     // Record called target sample and its count.
716     const SampleContextFrameVector &FrameVec =
717         Binary->getCachedFrameLocationStack(SourceAddress);
718     if (!FrameVec.empty()) {
719       FunctionSamples &FunctionProfile =
720           getLeafProfileAndAddTotalSamples(FrameVec, 0);
721       FunctionProfile.addCalledTargetSamples(
722           FrameVec.back().Location.LineOffset,
723           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
724           CalleeName, Count);
725     }
726     // Add head samples for callee.
727     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
728     CalleeProfile.addHeadSamples(Count);
729   }
730 }
731 
732 void ProfileGeneratorBase::calculateAndShowDensity(
733     const SampleProfileMap &Profiles) {
734   double Density = calculateDensity(Profiles, HotCountThreshold);
735   showDensitySuggestion(Density);
736 }
737 
738 FunctionSamples *
739 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
740                                                bool WasLeafInlined) {
741   FunctionSamples *FProfile = ContextNode->getFunctionSamples();
742   if (!FProfile) {
743     FSamplesList.emplace_back();
744     FProfile = &FSamplesList.back();
745     FProfile->setName(ContextNode->getFuncName());
746     ContextNode->setFunctionSamples(FProfile);
747   }
748   // Update ContextWasInlined attribute for existing contexts.
749   // The current function can be called in two ways:
750   //  - when processing a probe of the current frame
751   //  - when processing the entry probe of an inlinee's frame, which
752   //    is then used to update the callsite count of the current frame.
753   // The two can happen in any order, hence here we are making sure
754   // `ContextWasInlined` is always set as expected.
755   // TODO: Note that the former does not always happen if no probes of the
756   // current frame has samples, and if the latter happens, we could lose the
757   // attribute. This should be fixed.
758   if (WasLeafInlined)
759     FProfile->getContext().setAttribute(ContextWasInlined);
760   return FProfile;
761 }
762 
763 ContextTrieNode *
764 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
765                                            bool WasLeafInlined) {
766   ContextTrieNode *ContextNode =
767       ContextTracker.getOrCreateContextPath(Context, true);
768   getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
769   return ContextNode;
770 }
771 
772 void CSProfileGenerator::generateProfile() {
773   FunctionSamples::ProfileIsCS = true;
774 
775   collectProfiledFunctions();
776 
777   if (Binary->usePseudoProbes()) {
778     Binary->decodePseudoProbe();
779     if (InferMissingFrames)
780       initializeMissingFrameInferrer();
781   }
782 
783   if (SampleCounters) {
784     if (Binary->usePseudoProbes()) {
785       generateProbeBasedProfile();
786     } else {
787       generateLineNumBasedProfile();
788     }
789   }
790 
791   if (Binary->getTrackFuncContextSize())
792     computeSizeForProfiledFunctions();
793 
794   postProcessProfiles();
795 }
796 
797 void CSProfileGenerator::initializeMissingFrameInferrer() {
798   Binary->getMissingContextInferrer()->initialize(SampleCounters);
799 }
800 
801 void CSProfileGenerator::inferMissingFrames(
802     const SmallVectorImpl<uint64_t> &Context,
803     SmallVectorImpl<uint64_t> &NewContext) {
804   Binary->inferMissingFrames(Context, NewContext);
805 }
806 
807 void CSProfileGenerator::computeSizeForProfiledFunctions() {
808   for (auto *Func : Binary->getProfiledFunctions())
809     Binary->computeInlinedContextSizeForFunc(Func);
810 
811   // Flush the symbolizer to save memory.
812   Binary->flushSymbolizer();
813 }
814 
815 void CSProfileGenerator::updateFunctionSamples() {
816   for (auto *Node : ContextTracker) {
817     FunctionSamples *FSamples = Node->getFunctionSamples();
818     if (FSamples) {
819       if (UpdateTotalSamples)
820         FSamples->updateTotalSamples();
821       FSamples->updateCallsiteSamples();
822     }
823   }
824 }
825 
826 void CSProfileGenerator::generateLineNumBasedProfile() {
827   for (const auto &CI : *SampleCounters) {
828     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
829 
830     ContextTrieNode *ContextNode = &getRootContext();
831     // Sample context will be empty if the jump is an external-to-internal call
832     // pattern, the head samples should be added for the internal function.
833     if (!CtxKey->Context.empty()) {
834       // Get or create function profile for the range
835       ContextNode =
836           getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
837       // Fill in function body samples
838       populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
839                                      CI.second.RangeCounter);
840     }
841     // Fill in boundary sample counts as well as call site samples for calls
842     populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
843   }
844   // Fill in call site value sample for inlined calls and also use context to
845   // infer missing samples. Since we don't have call count for inlined
846   // functions, we estimate it from inlinee's profile using the entry of the
847   // body sample.
848   populateInferredFunctionSamples(getRootContext());
849 
850   updateFunctionSamples();
851 }
852 
853 void CSProfileGenerator::populateBodySamplesForFunction(
854     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
855   // Compute disjoint ranges first, so we can use MAX
856   // for calculating count for each location.
857   RangeSample Ranges;
858   findDisjointRanges(Ranges, RangeCounter);
859   for (const auto &Range : Ranges) {
860     uint64_t RangeBegin = Range.first.first;
861     uint64_t RangeEnd = Range.first.second;
862     uint64_t Count = Range.second;
863     // Disjoint ranges have introduce zero-filled gap that
864     // doesn't belong to current context, filter them out.
865     if (Count == 0)
866       continue;
867 
868     InstructionPointer IP(Binary, RangeBegin, true);
869     // Disjoint ranges may have range in the middle of two instr,
870     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
871     // can be Addr1+1 to Addr2-1. We should ignore such range.
872     if (IP.Address > RangeEnd)
873       continue;
874 
875     do {
876       auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
877       if (LeafLoc) {
878         // Recording body sample for this specific context
879         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
880         FunctionProfile.addTotalSamples(Count);
881       }
882     } while (IP.advance() && IP.Address <= RangeEnd);
883   }
884 }
885 
886 void CSProfileGenerator::populateBoundarySamplesForFunction(
887     ContextTrieNode *Node, const BranchSample &BranchCounters) {
888 
889   for (const auto &Entry : BranchCounters) {
890     uint64_t SourceAddress = Entry.first.first;
891     uint64_t TargetAddress = Entry.first.second;
892     uint64_t Count = Entry.second;
893     assert(Count != 0 && "Unexpected zero weight branch");
894 
895     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
896     if (CalleeName.size() == 0)
897       continue;
898 
899     ContextTrieNode *CallerNode = Node;
900     LineLocation CalleeCallSite(0, 0);
901     if (CallerNode != &getRootContext()) {
902       // Record called target sample and its count
903       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
904       if (LeafLoc) {
905         CallerNode->getFunctionSamples()->addCalledTargetSamples(
906             LeafLoc->Location.LineOffset,
907             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
908             Count);
909         // Record head sample for called target(callee)
910         CalleeCallSite = LeafLoc->Location;
911       }
912     }
913 
914     ContextTrieNode *CalleeNode =
915         CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
916     FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
917     CalleeProfile->addHeadSamples(Count);
918   }
919 }
920 
921 void CSProfileGenerator::populateInferredFunctionSamples(
922     ContextTrieNode &Node) {
923   // There is no call jmp sample between the inliner and inlinee, we need to use
924   // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
925   // sample depends on child(inlinee)'s sample, so traverse the tree in
926   // post-order.
927   for (auto &It : Node.getAllChildContext())
928     populateInferredFunctionSamples(It.second);
929 
930   FunctionSamples *CalleeProfile = Node.getFunctionSamples();
931   if (!CalleeProfile)
932     return;
933   // If we already have head sample counts, we must have value profile
934   // for call sites added already. Skip to avoid double counting.
935   if (CalleeProfile->getHeadSamples())
936     return;
937   ContextTrieNode *CallerNode = Node.getParentContext();
938   // If we don't have context, nothing to do for caller's call site.
939   // This could happen for entry point function.
940   if (CallerNode == &getRootContext())
941     return;
942 
943   LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
944   FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
945   // Since we don't have call count for inlined functions, we
946   // estimate it from inlinee's profile using entry body sample.
947   uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
948   // If we don't have samples with location, use 1 to indicate live.
949   if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
950     EstimatedCallCount = 1;
951   CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
952                                        CallerLeafFrameLoc.Discriminator,
953                                        Node.getFuncName(), EstimatedCallCount);
954   CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
955                                CallerLeafFrameLoc.Discriminator,
956                                EstimatedCallCount);
957   CallerProfile.addTotalSamples(EstimatedCallCount);
958 }
959 
960 void CSProfileGenerator::convertToProfileMap(
961     ContextTrieNode &Node, SampleContextFrameVector &Context) {
962   FunctionSamples *FProfile = Node.getFunctionSamples();
963   if (FProfile) {
964     Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
965     // Save the new context for future references.
966     SampleContextFrames NewContext = *Contexts.insert(Context).first;
967     auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
968     FunctionSamples &NewProfile = Ret.first->second;
969     NewProfile.getContext().setContext(NewContext);
970     Context.pop_back();
971   }
972 
973   for (auto &It : Node.getAllChildContext()) {
974     ContextTrieNode &ChildNode = It.second;
975     Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
976     convertToProfileMap(ChildNode, Context);
977     Context.pop_back();
978   }
979 }
980 
981 void CSProfileGenerator::convertToProfileMap() {
982   assert(ProfileMap.empty() &&
983          "ProfileMap should be empty before converting from the trie");
984   assert(IsProfileValidOnTrie &&
985          "Do not convert the trie twice, it's already destroyed");
986 
987   SampleContextFrameVector Context;
988   for (auto &It : getRootContext().getAllChildContext())
989     convertToProfileMap(It.second, Context);
990 
991   IsProfileValidOnTrie = false;
992 }
993 
994 void CSProfileGenerator::postProcessProfiles() {
995   // Compute hot/cold threshold based on profile. This will be used for cold
996   // context profile merging/trimming.
997   computeSummaryAndThreshold();
998 
999   // Run global pre-inliner to adjust/merge context profile based on estimated
1000   // inline decisions.
1001   if (EnableCSPreInliner) {
1002     ContextTracker.populateFuncToCtxtMap();
1003     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
1004     // Turn off the profile merger by default unless it is explicitly enabled.
1005     if (!CSProfMergeColdContext.getNumOccurrences())
1006       CSProfMergeColdContext = false;
1007   }
1008 
1009   convertToProfileMap();
1010 
1011   // Trim and merge cold context profile using cold threshold above.
1012   if (TrimColdProfile || CSProfMergeColdContext) {
1013     SampleContextTrimmer(ProfileMap)
1014         .trimAndMergeColdContextProfiles(
1015             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1016             CSProfMaxColdContextDepth, EnableCSPreInliner);
1017   }
1018 
1019   // Merge function samples of CS profile to calculate profile density.
1020   sampleprof::SampleProfileMap ContextLessProfiles;
1021   for (const auto &I : ProfileMap) {
1022     ContextLessProfiles[I.second.getName()].merge(I.second);
1023   }
1024 
1025   calculateAndShowDensity(ContextLessProfiles);
1026   if (GenCSNestedProfile) {
1027     ProfileConverter CSConverter(ProfileMap);
1028     CSConverter.convertCSProfiles();
1029     FunctionSamples::ProfileIsCS = false;
1030   }
1031 }
1032 
1033 void ProfileGeneratorBase::computeSummaryAndThreshold(
1034     SampleProfileMap &Profiles) {
1035   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1036   Summary = Builder.computeSummaryForProfiles(Profiles);
1037   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1038       (Summary->getDetailedSummary()));
1039   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1040       (Summary->getDetailedSummary()));
1041 }
1042 
1043 void CSProfileGenerator::computeSummaryAndThreshold() {
1044   // Always merge and use context-less profile map to compute summary.
1045   SampleProfileMap ContextLessProfiles;
1046   ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1047 
1048   // Set the flag below to avoid merging the profile again in
1049   // computeSummaryAndThreshold
1050   FunctionSamples::ProfileIsCS = false;
1051   assert(
1052       (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1053       "Don't set --profile-summary-contextless to false for profile "
1054       "generation");
1055   ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1056   // Recover the old value.
1057   FunctionSamples::ProfileIsCS = true;
1058 }
1059 
1060 void ProfileGeneratorBase::extractProbesFromRange(
1061     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1062     bool FindDisjointRanges) {
1063   const RangeSample *PRanges = &RangeCounter;
1064   RangeSample Ranges;
1065   if (FindDisjointRanges) {
1066     findDisjointRanges(Ranges, RangeCounter);
1067     PRanges = &Ranges;
1068   }
1069 
1070   for (const auto &Range : *PRanges) {
1071     uint64_t RangeBegin = Range.first.first;
1072     uint64_t RangeEnd = Range.first.second;
1073     uint64_t Count = Range.second;
1074 
1075     InstructionPointer IP(Binary, RangeBegin, true);
1076     // Disjoint ranges may have range in the middle of two instr,
1077     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1078     // can be Addr1+1 to Addr2-1. We should ignore such range.
1079     if (IP.Address > RangeEnd)
1080       continue;
1081 
1082     do {
1083       const AddressProbesMap &Address2ProbesMap =
1084           Binary->getAddress2ProbesMap();
1085       auto It = Address2ProbesMap.find(IP.Address);
1086       if (It != Address2ProbesMap.end()) {
1087         for (const auto &Probe : It->second) {
1088           ProbeCounter[&Probe] += Count;
1089         }
1090       }
1091     } while (IP.advance() && IP.Address <= RangeEnd);
1092   }
1093 }
1094 
1095 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1096                                       const SmallVectorImpl<uint64_t> &AddrVec,
1097                                       ProfiledBinary *Binary) {
1098   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1099   for (auto Address : reverse(AddrVec)) {
1100     const MCDecodedPseudoProbe *CallProbe =
1101         Binary->getCallProbeForAddr(Address);
1102     // These could be the cases when a probe is not found at a calliste. Cutting
1103     // off the context from here since the inliner will not know how to consume
1104     // a context with unknown callsites.
1105     // 1. for functions that are not sampled when
1106     // --decode-probe-for-profiled-functions-only is on.
1107     // 2. for a merged callsite. Callsite merging may cause the loss of original
1108     // probe IDs.
1109     // 3. for an external callsite.
1110     if (!CallProbe)
1111       break;
1112     Probes.push_back(CallProbe);
1113   }
1114 
1115   std::reverse(Probes.begin(), Probes.end());
1116 
1117   // Extract context stack for reusing, leaf context stack will be added
1118   // compressed while looking up function profile.
1119   for (const auto *P : Probes) {
1120     Binary->getInlineContextForProbe(P, ContextStack, true);
1121   }
1122 }
1123 
1124 void CSProfileGenerator::generateProbeBasedProfile() {
1125   // Enable pseudo probe functionalities in SampleProf
1126   FunctionSamples::ProfileIsProbeBased = true;
1127   for (const auto &CI : *SampleCounters) {
1128     const AddrBasedCtxKey *CtxKey =
1129         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1130     // Fill in function body samples from probes, also infer caller's samples
1131     // from callee's probe
1132     populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey);
1133     // Fill in boundary samples for a call probe
1134     populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey);
1135   }
1136 }
1137 
1138 void CSProfileGenerator::populateBodySamplesWithProbes(
1139     const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1140   ProbeCounterMap ProbeCounter;
1141   // Extract the top frame probes by looking up each address among the range in
1142   // the Address2ProbeMap
1143   extractProbesFromRange(RangeCounter, ProbeCounter);
1144   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1145                      std::unordered_set<FunctionSamples *>>
1146       FrameSamples;
1147   for (const auto &PI : ProbeCounter) {
1148     const MCDecodedPseudoProbe *Probe = PI.first;
1149     uint64_t Count = PI.second;
1150     // Disjoint ranges have introduce zero-filled gap that
1151     // doesn't belong to current context, filter them out.
1152     if (!Probe->isBlock() || Count == 0)
1153       continue;
1154 
1155     ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe);
1156     FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1157     // Record the current frame and FunctionProfile whenever samples are
1158     // collected for non-danglie probes. This is for reporting all of the
1159     // zero count probes of the frame later.
1160     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1161     FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
1162                                    Count);
1163     FunctionProfile.addTotalSamples(Count);
1164     if (Probe->isEntry()) {
1165       FunctionProfile.addHeadSamples(Count);
1166       // Look up for the caller's function profile
1167       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1168       ContextTrieNode *CallerNode = ContextNode->getParentContext();
1169       if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1170         // Since the context id will be compressed, we have to use callee's
1171         // context id to infer caller's context id to ensure they share the
1172         // same context prefix.
1173         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1174         uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
1175         assert(CallerIndex &&
1176                "Inferred caller's location index shouldn't be zero!");
1177         assert(!CallerDiscriminator &&
1178                "Callsite probe should not have a discriminator!");
1179         FunctionSamples &CallerProfile =
1180             *getOrCreateFunctionSamples(CallerNode);
1181         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1182         CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
1183         CallerProfile.addTotalSamples(Count);
1184         CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
1185                                              ContextNode->getFuncName(), Count);
1186       }
1187     }
1188   }
1189 
1190   // Assign zero count for remaining probes without sample hits to
1191   // differentiate from probes optimized away, of which the counts are unknown
1192   // and will be inferred by the compiler.
1193   for (auto &I : FrameSamples) {
1194     for (auto *FunctionProfile : I.second) {
1195       for (auto *Probe : I.first->getProbes()) {
1196         FunctionProfile->addBodySamples(Probe->getIndex(),
1197                                         Probe->getDiscriminator(), 0);
1198       }
1199     }
1200   }
1201 }
1202 
1203 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1204     const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
1205   for (const auto &BI : BranchCounter) {
1206     uint64_t SourceAddress = BI.first.first;
1207     uint64_t TargetAddress = BI.first.second;
1208     uint64_t Count = BI.second;
1209     const MCDecodedPseudoProbe *CallProbe =
1210         Binary->getCallProbeForAddr(SourceAddress);
1211     if (CallProbe == nullptr)
1212       continue;
1213     FunctionSamples &FunctionProfile =
1214         getFunctionProfileForLeafProbe(CtxKey, CallProbe);
1215     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1216     FunctionProfile.addTotalSamples(Count);
1217     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1218     if (CalleeName.size() == 0)
1219       continue;
1220     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
1221                                            CallProbe->getDiscriminator(),
1222                                            CalleeName, Count);
1223   }
1224 }
1225 
1226 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1227     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1228 
1229   const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
1230   SmallVector<uint64_t, 16> NewContext;
1231 
1232   if (InferMissingFrames) {
1233     SmallVector<uint64_t, 16> Context = CtxKey->Context;
1234     // Append leaf frame for a complete inference.
1235     Context.push_back(LeafProbe->getAddress());
1236     inferMissingFrames(Context, NewContext);
1237     // Pop out the leaf probe that was pushed in above.
1238     NewContext.pop_back();
1239     PContext = &NewContext;
1240   }
1241 
1242   SampleContextFrameVector ContextStack;
1243   extractPrefixContextStack(ContextStack, *PContext, Binary);
1244 
1245   // Explicitly copy the context for appending the leaf context
1246   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1247                                            ContextStack.end());
1248   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1249   // For leaf inlined context with the top frame, we should strip off the top
1250   // frame's probe id, like:
1251   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1252   auto LeafFrame = NewContextStack.back();
1253   LeafFrame.Location = LineLocation(0, 0);
1254   NewContextStack.pop_back();
1255   // Compress the context string except for the leaf frame
1256   CSProfileGenerator::compressRecursionContext(NewContextStack);
1257   CSProfileGenerator::trimContext(NewContextStack);
1258   NewContextStack.push_back(LeafFrame);
1259 
1260   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1261   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1262   ContextTrieNode *ContextNode =
1263       getOrCreateContextNode(NewContextStack, WasLeafInlined);
1264   ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1265   return ContextNode;
1266 }
1267 
1268 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1269     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1270   return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1271 }
1272 
1273 } // end namespace sampleprof
1274 } // end namespace llvm
1275