xref: /llvm-project/llvm/tools/llvm-profgen/ProfileGenerator.cpp (revision d5a963ab8b40fcf7a99acd834e5f10a1a30cc2e5)
1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "PerfReader.h"
11 #include "ProfiledBinary.h"
12 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
13 #include "llvm/ProfileData/ProfileCommon.h"
14 #include <algorithm>
15 #include <float.h>
16 #include <unordered_set>
17 #include <utility>
18 
19 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
20                                     cl::Required,
21                                     cl::desc("Output profile file"));
22 static cl::alias OutputA("o", cl::desc("Alias for --output"),
23                          cl::aliasopt(OutputFilename));
24 
25 static cl::opt<SampleProfileFormat> OutputFormat(
26     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
27     cl::values(
28         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
29         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
30         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31         clEnumValN(SPF_Text, "text", "Text encoding"),
32         clEnumValN(SPF_GCC, "gcc",
33                    "GCC encoding (only meaningful for -sample)")));
34 
35 cl::opt<bool> UseMD5(
36     "use-md5", cl::init(false), cl::Hidden,
37     cl::desc("Use md5 to represent function names in the output profile (only "
38              "meaningful for -extbinary)"));
39 
40 static cl::opt<bool> PopulateProfileSymbolList(
41     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43 
44 static cl::opt<bool> FillZeroForAllFuncs(
45     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46     cl::desc("Attribute all functions' range with zero count "
47              "even it's not hit by any samples."));
48 
49 static cl::opt<int32_t, true> RecursionCompression(
50     "compress-recursion",
51     cl::desc("Compressing recursion by deduplicating adjacent frame "
52              "sequences up to the specified size. -1 means no size limit."),
53     cl::Hidden,
54     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55 
56 static cl::opt<bool>
57     TrimColdProfile("trim-cold-profile",
58                     cl::desc("If the total count of the profile is smaller "
59                              "than threshold, it will be trimmed."));
60 
61 static cl::opt<bool> CSProfMergeColdContext(
62     "csprof-merge-cold-context", cl::init(true),
63     cl::desc("If the total count of context profile is smaller than "
64              "the threshold, it will be merged into context-less base "
65              "profile."));
66 
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68     "csprof-max-cold-context-depth", cl::init(1),
69     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70              "context-less base profile"));
71 
72 static cl::opt<int, true> CSProfMaxContextDepth(
73     "csprof-max-context-depth",
74     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75              "depth limit."),
76     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77 
78 static cl::opt<double> HotFunctionDensityThreshold(
79     "hot-function-density-threshold", llvm::cl::init(1000),
80     llvm::cl::desc(
81         "specify density threshold for hot functions (default: 1000)"),
82     llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84                                  llvm::cl::desc("show profile density details"),
85                                  llvm::cl::Optional);
86 
87 static cl::opt<bool> UpdateTotalSamples(
88     "update-total-samples", llvm::cl::init(false),
89     llvm::cl::desc(
90         "Update total samples by accumulating all its body samples."),
91     llvm::cl::Optional);
92 
93 extern cl::opt<int> ProfileSummaryCutoffHot;
94 extern cl::opt<bool> UseContextLessSummary;
95 
96 static cl::opt<bool> GenCSNestedProfile(
97     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
98     cl::desc("Generate nested function profiles for CSSPGO"));
99 
100 using namespace llvm;
101 using namespace sampleprof;
102 
103 namespace llvm {
104 namespace sampleprof {
105 
106 // Initialize the MaxCompressionSize to -1 which means no size limit
107 int32_t CSProfileGenerator::MaxCompressionSize = -1;
108 
109 int CSProfileGenerator::MaxContextDepth = -1;
110 
111 bool ProfileGeneratorBase::UseFSDiscriminator = false;
112 
113 std::unique_ptr<ProfileGeneratorBase>
114 ProfileGeneratorBase::create(ProfiledBinary *Binary,
115                              const ContextSampleCounterMap *SampleCounters,
116                              bool ProfileIsCS) {
117   std::unique_ptr<ProfileGeneratorBase> Generator;
118   if (ProfileIsCS) {
119     if (Binary->useFSDiscriminator())
120       exitWithError("FS discriminator is not supported in CS profile.");
121     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
122   } else {
123     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
124   }
125   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
126   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
127 
128   return Generator;
129 }
130 
131 std::unique_ptr<ProfileGeneratorBase>
132 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
133                              bool ProfileIsCS) {
134   std::unique_ptr<ProfileGeneratorBase> Generator;
135   if (ProfileIsCS) {
136     if (Binary->useFSDiscriminator())
137       exitWithError("FS discriminator is not supported in CS profile.");
138     Generator.reset(new CSProfileGenerator(Binary, Profiles));
139   } else {
140     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
141   }
142   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
143   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
144 
145   return Generator;
146 }
147 
148 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
149                                  SampleProfileMap &ProfileMap) {
150   // Populate profile symbol list if extended binary format is used.
151   ProfileSymbolList SymbolList;
152 
153   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
154     Binary->populateSymbolListFromDWARF(SymbolList);
155     Writer->setProfileSymbolList(&SymbolList);
156   }
157 
158   if (std::error_code EC = Writer->write(ProfileMap))
159     exitWithError(std::move(EC));
160 }
161 
162 void ProfileGeneratorBase::write() {
163   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
164   if (std::error_code EC = WriterOrErr.getError())
165     exitWithError(EC, OutputFilename);
166 
167   if (UseMD5) {
168     if (OutputFormat != SPF_Ext_Binary)
169       WithColor::warning() << "-use-md5 is ignored. Specify "
170                               "--format=extbinary to enable it\n";
171     else
172       WriterOrErr.get()->setUseMD5();
173   }
174 
175   write(std::move(WriterOrErr.get()), ProfileMap);
176 }
177 
178 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
179   if (Density == 0.0)
180     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
181                             "set too low. Please check your command.\n";
182   else if (Density < HotFunctionDensityThreshold)
183     WithColor::warning()
184         << "AutoFDO is estimated to optimize better with "
185         << format("%.1f", HotFunctionDensityThreshold / Density)
186         << "x more samples. Please consider increasing sampling rate or "
187            "profiling for longer duration to get more samples.\n";
188 
189   if (ShowDensity)
190     outs() << "Minimum profile density for hot functions with top "
191            << format("%.2f",
192                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
193                          10000)
194            << "% total samples: " << format("%.1f", Density) << "\n";
195 }
196 
197 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
198                                               uint64_t HotCntThreshold) {
199   double Density = DBL_MAX;
200   std::vector<const FunctionSamples *> HotFuncs;
201   for (auto &I : Profiles) {
202     auto &FuncSamples = I.second;
203     if (FuncSamples.getTotalSamples() < HotCntThreshold)
204       continue;
205     HotFuncs.emplace_back(&FuncSamples);
206   }
207 
208   for (auto *FuncSamples : HotFuncs) {
209     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
210     if (!Func)
211       continue;
212     uint64_t FuncSize = Func->getFuncSize();
213     if (FuncSize == 0)
214       continue;
215     Density =
216         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
217                               FuncSize);
218   }
219 
220   return Density == DBL_MAX ? 0.0 : Density;
221 }
222 
223 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
224                                               const RangeSample &Ranges) {
225 
226   /*
227   Regions may overlap with each other. Using the boundary info, find all
228   disjoint ranges and their sample count. BoundaryPoint contains the count
229   multiple samples begin/end at this points.
230 
231   |<--100-->|           Sample1
232   |<------200------>|   Sample2
233   A         B       C
234 
235   In the example above,
236   Sample1 begins at A, ends at B, its value is 100.
237   Sample2 beings at A, ends at C, its value is 200.
238   For A, BeginCount is the sum of sample begins at A, which is 300 and no
239   samples ends at A, so EndCount is 0.
240   Then boundary points A, B, and C with begin/end counts are:
241   A: (300, 0)
242   B: (0, 100)
243   C: (0, 200)
244   */
245   struct BoundaryPoint {
246     // Sum of sample counts beginning at this point
247     uint64_t BeginCount = UINT64_MAX;
248     // Sum of sample counts ending at this point
249     uint64_t EndCount = UINT64_MAX;
250     // Is the begin point of a zero range.
251     bool IsZeroRangeBegin = false;
252     // Is the end point of a zero range.
253     bool IsZeroRangeEnd = false;
254 
255     void addBeginCount(uint64_t Count) {
256       if (BeginCount == UINT64_MAX)
257         BeginCount = 0;
258       BeginCount += Count;
259     }
260 
261     void addEndCount(uint64_t Count) {
262       if (EndCount == UINT64_MAX)
263         EndCount = 0;
264       EndCount += Count;
265     }
266   };
267 
268   /*
269   For the above example. With boundary points, follwing logic finds two
270   disjoint region of
271 
272   [A,B]:   300
273   [B+1,C]: 200
274 
275   If there is a boundary point that both begin and end, the point itself
276   becomes a separate disjoint region. For example, if we have original
277   ranges of
278 
279   |<--- 100 --->|
280                 |<--- 200 --->|
281   A             B             C
282 
283   there are three boundary points with their begin/end counts of
284 
285   A: (100, 0)
286   B: (200, 100)
287   C: (0, 200)
288 
289   the disjoint ranges would be
290 
291   [A, B-1]: 100
292   [B, B]:   300
293   [B+1, C]: 200.
294 
295   Example for zero value range:
296 
297     |<--- 100 --->|
298                        |<--- 200 --->|
299   |<---------------  0 ----------------->|
300   A  B            C    D             E   F
301 
302   [A, B-1]  : 0
303   [B, C]    : 100
304   [C+1, D-1]: 0
305   [D, E]    : 200
306   [E+1, F]  : 0
307   */
308   std::map<uint64_t, BoundaryPoint> Boundaries;
309 
310   for (const auto &Item : Ranges) {
311     assert(Item.first.first <= Item.first.second &&
312            "Invalid instruction range");
313     auto &BeginPoint = Boundaries[Item.first.first];
314     auto &EndPoint = Boundaries[Item.first.second];
315     uint64_t Count = Item.second;
316 
317     BeginPoint.addBeginCount(Count);
318     EndPoint.addEndCount(Count);
319     if (Count == 0) {
320       BeginPoint.IsZeroRangeBegin = true;
321       EndPoint.IsZeroRangeEnd = true;
322     }
323   }
324 
325   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
326   // and the next valid address
327   uint64_t BeginAddress = UINT64_MAX;
328   int ZeroRangeDepth = 0;
329   uint64_t Count = 0;
330   for (const auto &Item : Boundaries) {
331     uint64_t Address = Item.first;
332     const BoundaryPoint &Point = Item.second;
333     if (Point.BeginCount != UINT64_MAX) {
334       if (BeginAddress != UINT64_MAX)
335         DisjointRanges[{BeginAddress, Address - 1}] = Count;
336       Count += Point.BeginCount;
337       BeginAddress = Address;
338       ZeroRangeDepth += Point.IsZeroRangeBegin;
339     }
340     if (Point.EndCount != UINT64_MAX) {
341       assert((BeginAddress != UINT64_MAX) &&
342              "First boundary point cannot be 'end' point");
343       DisjointRanges[{BeginAddress, Address}] = Count;
344       assert(Count >= Point.EndCount && "Mismatched live ranges");
345       Count -= Point.EndCount;
346       BeginAddress = Address + 1;
347       ZeroRangeDepth -= Point.IsZeroRangeEnd;
348       // If the remaining count is zero and it's no longer in a zero range, this
349       // means we consume all the ranges before, thus mark BeginAddress as
350       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
351       //  [<---- 10 ---->]
352       //                       [<---- 20 ---->]
353       //   A             B     C              D
354       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
355       // have the [B+1, C-1] zero range.
356       if (Count == 0 && ZeroRangeDepth == 0)
357         BeginAddress = UINT64_MAX;
358     }
359   }
360 }
361 
362 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
363     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
364     uint64_t Count) {
365   // Use the maximum count of samples with same line location
366   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
367 
368   // Use duplication factor to compensated for loop unroll/vectorization.
369   // Note that this is only needed when we're taking MAX of the counts at
370   // the location instead of SUM.
371   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
372 
373   ErrorOr<uint64_t> R =
374       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
375 
376   uint64_t PreviousCount = R ? R.get() : 0;
377   if (PreviousCount <= Count) {
378     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
379                                    Count - PreviousCount);
380   }
381 }
382 
383 void ProfileGeneratorBase::updateTotalSamples() {
384   for (auto &Item : ProfileMap) {
385     FunctionSamples &FunctionProfile = Item.second;
386     FunctionProfile.updateTotalSamples();
387   }
388 }
389 
390 void ProfileGeneratorBase::updateCallsiteSamples() {
391   for (auto &Item : ProfileMap) {
392     FunctionSamples &FunctionProfile = Item.second;
393     FunctionProfile.updateCallsiteSamples();
394   }
395 }
396 
397 void ProfileGeneratorBase::updateFunctionSamples() {
398   updateCallsiteSamples();
399 
400   if (UpdateTotalSamples)
401     updateTotalSamples();
402 }
403 
404 void ProfileGeneratorBase::collectProfiledFunctions() {
405   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
406   if (collectFunctionsFromRawProfile(ProfiledFunctions))
407     Binary->setProfiledFunctions(ProfiledFunctions);
408   else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
409     Binary->setProfiledFunctions(ProfiledFunctions);
410   else
411     llvm_unreachable("Unsupported input profile");
412 }
413 
414 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
415     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
416   if (!SampleCounters)
417     return false;
418   // Go through all the stacks, ranges and branches in sample counters, use
419   // the start of the range to look up the function it belongs and record the
420   // function.
421   for (const auto &CI : *SampleCounters) {
422     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
423       for (auto StackAddr : CtxKey->Context) {
424         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
425           ProfiledFunctions.insert(FRange->Func);
426       }
427     }
428 
429     for (auto Item : CI.second.RangeCounter) {
430       uint64_t StartAddress = Item.first.first;
431       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
432         ProfiledFunctions.insert(FRange->Func);
433     }
434 
435     for (auto Item : CI.second.BranchCounter) {
436       uint64_t SourceAddress = Item.first.first;
437       uint64_t TargetAddress = Item.first.first;
438       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
439         ProfiledFunctions.insert(FRange->Func);
440       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
441         ProfiledFunctions.insert(FRange->Func);
442     }
443   }
444   return true;
445 }
446 
447 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
448     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
449   for (const auto &FS : ProfileMap) {
450     if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
451       ProfiledFunctions.insert(Func);
452   }
453   return true;
454 }
455 
456 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
457     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
458   for (auto *Node : ContextTracker) {
459     if (!Node->getFuncName().empty())
460       if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
461         ProfiledFunctions.insert(Func);
462   }
463   return true;
464 }
465 
466 FunctionSamples &
467 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
468   SampleContext Context(FuncName);
469   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
470   if (Ret.second) {
471     FunctionSamples &FProfile = Ret.first->second;
472     FProfile.setContext(Context);
473   }
474   return Ret.first->second;
475 }
476 
477 void ProfileGenerator::generateProfile() {
478   collectProfiledFunctions();
479 
480   if (Binary->usePseudoProbes())
481     Binary->decodePseudoProbe();
482 
483   if (SampleCounters) {
484     if (Binary->usePseudoProbes()) {
485       generateProbeBasedProfile();
486     } else {
487       generateLineNumBasedProfile();
488     }
489   }
490 
491   postProcessProfiles();
492 }
493 
494 void ProfileGenerator::postProcessProfiles() {
495   computeSummaryAndThreshold(ProfileMap);
496   trimColdProfiles(ProfileMap, ColdCountThreshold);
497   calculateAndShowDensity(ProfileMap);
498 }
499 
500 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
501                                         uint64_t ColdCntThreshold) {
502   if (!TrimColdProfile)
503     return;
504 
505   // Move cold profiles into a tmp container.
506   std::vector<SampleContext> ColdProfiles;
507   for (const auto &I : ProfileMap) {
508     if (I.second.getTotalSamples() < ColdCntThreshold)
509       ColdProfiles.emplace_back(I.first);
510   }
511 
512   // Remove the cold profile from ProfileMap.
513   for (const auto &I : ColdProfiles)
514     ProfileMap.erase(I);
515 }
516 
517 void ProfileGenerator::generateLineNumBasedProfile() {
518   assert(SampleCounters->size() == 1 &&
519          "Must have one entry for profile generation.");
520   const SampleCounter &SC = SampleCounters->begin()->second;
521   // Fill in function body samples
522   populateBodySamplesForAllFunctions(SC.RangeCounter);
523   // Fill in boundary sample counts as well as call site samples for calls
524   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
525 
526   updateFunctionSamples();
527 }
528 
529 void ProfileGenerator::generateProbeBasedProfile() {
530   assert(SampleCounters->size() == 1 &&
531          "Must have one entry for profile generation.");
532   // Enable pseudo probe functionalities in SampleProf
533   FunctionSamples::ProfileIsProbeBased = true;
534   const SampleCounter &SC = SampleCounters->begin()->second;
535   // Fill in function body samples
536   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
537   // Fill in boundary sample counts as well as call site samples for calls
538   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
539 
540   updateFunctionSamples();
541 }
542 
543 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
544     const RangeSample &RangeCounter) {
545   ProbeCounterMap ProbeCounter;
546   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
547   // inside extractProbesFromRange.
548   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
549                          false);
550 
551   for (const auto &PI : ProbeCounter) {
552     const MCDecodedPseudoProbe *Probe = PI.first;
553     uint64_t Count = PI.second;
554     SampleContextFrameVector FrameVec;
555     Binary->getInlineContextForProbe(Probe, FrameVec, true);
556     FunctionSamples &FunctionProfile =
557         getLeafProfileAndAddTotalSamples(FrameVec, Count);
558     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
559     if (Probe->isEntry())
560       FunctionProfile.addHeadSamples(Count);
561   }
562 }
563 
564 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
565     const BranchSample &BranchCounters) {
566   for (const auto &Entry : BranchCounters) {
567     uint64_t SourceAddress = Entry.first.first;
568     uint64_t TargetAddress = Entry.first.second;
569     uint64_t Count = Entry.second;
570     assert(Count != 0 && "Unexpected zero weight branch");
571 
572     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
573     if (CalleeName.size() == 0)
574       continue;
575 
576     const MCDecodedPseudoProbe *CallProbe =
577         Binary->getCallProbeForAddr(SourceAddress);
578     if (CallProbe == nullptr)
579       continue;
580 
581     // Record called target sample and its count.
582     SampleContextFrameVector FrameVec;
583     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
584 
585     if (!FrameVec.empty()) {
586       FunctionSamples &FunctionProfile =
587           getLeafProfileAndAddTotalSamples(FrameVec, 0);
588       FunctionProfile.addCalledTargetSamples(
589           FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
590     }
591   }
592 }
593 
594 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
595     const SampleContextFrameVector &FrameVec, uint64_t Count) {
596   // Get top level profile
597   FunctionSamples *FunctionProfile =
598       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
599   FunctionProfile->addTotalSamples(Count);
600   if (Binary->usePseudoProbes()) {
601     const auto *FuncDesc = Binary->getFuncDescForGUID(
602         Function::getGUID(FunctionProfile->getName()));
603     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
604   }
605 
606   for (size_t I = 1; I < FrameVec.size(); I++) {
607     LineLocation Callsite(
608         FrameVec[I - 1].Location.LineOffset,
609         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
610     FunctionSamplesMap &SamplesMap =
611         FunctionProfile->functionSamplesAt(Callsite);
612     auto Ret =
613         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
614     if (Ret.second) {
615       SampleContext Context(FrameVec[I].FuncName);
616       Ret.first->second.setContext(Context);
617     }
618     FunctionProfile = &Ret.first->second;
619     FunctionProfile->addTotalSamples(Count);
620     if (Binary->usePseudoProbes()) {
621       const auto *FuncDesc = Binary->getFuncDescForGUID(
622           Function::getGUID(FunctionProfile->getName()));
623       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
624     }
625   }
626 
627   return *FunctionProfile;
628 }
629 
630 RangeSample
631 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
632   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
633   if (FillZeroForAllFuncs) {
634     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
635       for (auto &R : FuncI.second.Ranges) {
636         Ranges[{R.first, R.second - 1}] += 0;
637       }
638     }
639   } else {
640     // For each range, we search for all ranges of the function it belongs to
641     // and initialize it with zero count, so it remains zero if doesn't hit any
642     // samples. This is to be consistent with compiler that interpret zero count
643     // as unexecuted(cold).
644     for (const auto &I : RangeCounter) {
645       uint64_t StartAddress = I.first.first;
646       for (const auto &Range : Binary->getRanges(StartAddress))
647         Ranges[{Range.first, Range.second - 1}] += 0;
648     }
649   }
650   RangeSample DisjointRanges;
651   findDisjointRanges(DisjointRanges, Ranges);
652   return DisjointRanges;
653 }
654 
655 void ProfileGenerator::populateBodySamplesForAllFunctions(
656     const RangeSample &RangeCounter) {
657   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
658     uint64_t RangeBegin = Range.first.first;
659     uint64_t RangeEnd = Range.first.second;
660     uint64_t Count = Range.second;
661 
662     InstructionPointer IP(Binary, RangeBegin, true);
663     // Disjoint ranges may have range in the middle of two instr,
664     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
665     // can be Addr1+1 to Addr2-1. We should ignore such range.
666     if (IP.Address > RangeEnd)
667       continue;
668 
669     do {
670       const SampleContextFrameVector FrameVec =
671           Binary->getFrameLocationStack(IP.Address);
672       if (!FrameVec.empty()) {
673         // FIXME: As accumulating total count per instruction caused some
674         // regression, we changed to accumulate total count per byte as a
675         // workaround. Tuning hotness threshold on the compiler side might be
676         // necessary in the future.
677         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
678             FrameVec, Count * Binary->getInstSize(IP.Address));
679         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
680                                             Count);
681       }
682     } while (IP.advance() && IP.Address <= RangeEnd);
683   }
684 }
685 
686 StringRef
687 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
688   // Get the function range by branch target if it's a call branch.
689   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
690 
691   // We won't accumulate sample count for a range whose start is not the real
692   // function entry such as outlined function or inner labels.
693   if (!FRange || !FRange->IsFuncEntry)
694     return StringRef();
695 
696   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
697 }
698 
699 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
700     const BranchSample &BranchCounters) {
701   for (const auto &Entry : BranchCounters) {
702     uint64_t SourceAddress = Entry.first.first;
703     uint64_t TargetAddress = Entry.first.second;
704     uint64_t Count = Entry.second;
705     assert(Count != 0 && "Unexpected zero weight branch");
706 
707     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
708     if (CalleeName.size() == 0)
709       continue;
710     // Record called target sample and its count.
711     const SampleContextFrameVector &FrameVec =
712         Binary->getCachedFrameLocationStack(SourceAddress);
713     if (!FrameVec.empty()) {
714       FunctionSamples &FunctionProfile =
715           getLeafProfileAndAddTotalSamples(FrameVec, 0);
716       FunctionProfile.addCalledTargetSamples(
717           FrameVec.back().Location.LineOffset,
718           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
719           CalleeName, Count);
720     }
721     // Add head samples for callee.
722     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
723     CalleeProfile.addHeadSamples(Count);
724   }
725 }
726 
727 void ProfileGeneratorBase::calculateAndShowDensity(
728     const SampleProfileMap &Profiles) {
729   double Density = calculateDensity(Profiles, HotCountThreshold);
730   showDensitySuggestion(Density);
731 }
732 
733 FunctionSamples *
734 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
735                                                bool WasLeafInlined) {
736   FunctionSamples *FProfile = ContextNode->getFunctionSamples();
737   if (!FProfile) {
738     FSamplesList.emplace_back();
739     FProfile = &FSamplesList.back();
740     FProfile->setName(ContextNode->getFuncName());
741     ContextNode->setFunctionSamples(FProfile);
742   }
743   // Update ContextWasInlined attribute for existing contexts.
744   // The current function can be called in two ways:
745   //  - when processing a probe of the current frame
746   //  - when processing the entry probe of an inlinee's frame, which
747   //    is then used to update the callsite count of the current frame.
748   // The two can happen in any order, hence here we are making sure
749   // `ContextWasInlined` is always set as expected.
750   // TODO: Note that the former does not always happen if no probes of the
751   // current frame has samples, and if the latter happens, we could lose the
752   // attribute. This should be fixed.
753   if (WasLeafInlined)
754     FProfile->getContext().setAttribute(ContextWasInlined);
755   return FProfile;
756 }
757 
758 ContextTrieNode *
759 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
760                                            bool WasLeafInlined) {
761   ContextTrieNode *ContextNode =
762       ContextTracker.getOrCreateContextPath(Context, true);
763   getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
764   return ContextNode;
765 }
766 
767 void CSProfileGenerator::generateProfile() {
768   FunctionSamples::ProfileIsCS = true;
769 
770   collectProfiledFunctions();
771 
772   if (Binary->usePseudoProbes())
773     Binary->decodePseudoProbe();
774 
775   if (SampleCounters) {
776     if (Binary->usePseudoProbes()) {
777       generateProbeBasedProfile();
778     } else {
779       generateLineNumBasedProfile();
780     }
781   }
782 
783   if (Binary->getTrackFuncContextSize())
784     computeSizeForProfiledFunctions();
785 
786   postProcessProfiles();
787 }
788 
789 void CSProfileGenerator::computeSizeForProfiledFunctions() {
790   for (auto *Func : Binary->getProfiledFunctions())
791     Binary->computeInlinedContextSizeForFunc(Func);
792 
793   // Flush the symbolizer to save memory.
794   Binary->flushSymbolizer();
795 }
796 
797 void CSProfileGenerator::updateFunctionSamples() {
798   for (auto *Node : ContextTracker) {
799     FunctionSamples *FSamples = Node->getFunctionSamples();
800     if (FSamples) {
801       if (UpdateTotalSamples)
802         FSamples->updateTotalSamples();
803       FSamples->updateCallsiteSamples();
804     }
805   }
806 }
807 
808 void CSProfileGenerator::generateLineNumBasedProfile() {
809   for (const auto &CI : *SampleCounters) {
810     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
811 
812     ContextTrieNode *ContextNode = &getRootContext();
813     // Sample context will be empty if the jump is an external-to-internal call
814     // pattern, the head samples should be added for the internal function.
815     if (!CtxKey->Context.empty()) {
816       // Get or create function profile for the range
817       ContextNode =
818           getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
819       // Fill in function body samples
820       populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
821                                      CI.second.RangeCounter);
822     }
823     // Fill in boundary sample counts as well as call site samples for calls
824     populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
825   }
826   // Fill in call site value sample for inlined calls and also use context to
827   // infer missing samples. Since we don't have call count for inlined
828   // functions, we estimate it from inlinee's profile using the entry of the
829   // body sample.
830   populateInferredFunctionSamples(getRootContext());
831 
832   updateFunctionSamples();
833 }
834 
835 void CSProfileGenerator::populateBodySamplesForFunction(
836     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
837   // Compute disjoint ranges first, so we can use MAX
838   // for calculating count for each location.
839   RangeSample Ranges;
840   findDisjointRanges(Ranges, RangeCounter);
841   for (const auto &Range : Ranges) {
842     uint64_t RangeBegin = Range.first.first;
843     uint64_t RangeEnd = Range.first.second;
844     uint64_t Count = Range.second;
845     // Disjoint ranges have introduce zero-filled gap that
846     // doesn't belong to current context, filter them out.
847     if (Count == 0)
848       continue;
849 
850     InstructionPointer IP(Binary, RangeBegin, true);
851     // Disjoint ranges may have range in the middle of two instr,
852     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
853     // can be Addr1+1 to Addr2-1. We should ignore such range.
854     if (IP.Address > RangeEnd)
855       continue;
856 
857     do {
858       auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
859       if (LeafLoc) {
860         // Recording body sample for this specific context
861         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
862         FunctionProfile.addTotalSamples(Count);
863       }
864     } while (IP.advance() && IP.Address <= RangeEnd);
865   }
866 }
867 
868 void CSProfileGenerator::populateBoundarySamplesForFunction(
869     ContextTrieNode *Node, const BranchSample &BranchCounters) {
870 
871   for (const auto &Entry : BranchCounters) {
872     uint64_t SourceAddress = Entry.first.first;
873     uint64_t TargetAddress = Entry.first.second;
874     uint64_t Count = Entry.second;
875     assert(Count != 0 && "Unexpected zero weight branch");
876 
877     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
878     if (CalleeName.size() == 0)
879       continue;
880 
881     ContextTrieNode *CallerNode = Node;
882     LineLocation CalleeCallSite(0, 0);
883     if (CallerNode != &getRootContext()) {
884       // Record called target sample and its count
885       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
886       if (LeafLoc) {
887         CallerNode->getFunctionSamples()->addCalledTargetSamples(
888             LeafLoc->Location.LineOffset,
889             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
890             Count);
891         // Record head sample for called target(callee)
892         CalleeCallSite = LeafLoc->Location;
893       }
894     }
895 
896     ContextTrieNode *CalleeNode =
897         CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
898     FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
899     CalleeProfile->addHeadSamples(Count);
900   }
901 }
902 
903 void CSProfileGenerator::populateInferredFunctionSamples(
904     ContextTrieNode &Node) {
905   // There is no call jmp sample between the inliner and inlinee, we need to use
906   // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
907   // sample depends on child(inlinee)'s sample, so traverse the tree in
908   // post-order.
909   for (auto &It : Node.getAllChildContext())
910     populateInferredFunctionSamples(It.second);
911 
912   FunctionSamples *CalleeProfile = Node.getFunctionSamples();
913   if (!CalleeProfile)
914     return;
915   // If we already have head sample counts, we must have value profile
916   // for call sites added already. Skip to avoid double counting.
917   if (CalleeProfile->getHeadSamples())
918     return;
919   ContextTrieNode *CallerNode = Node.getParentContext();
920   // If we don't have context, nothing to do for caller's call site.
921   // This could happen for entry point function.
922   if (CallerNode == &getRootContext())
923     return;
924 
925   LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
926   FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
927   // Since we don't have call count for inlined functions, we
928   // estimate it from inlinee's profile using entry body sample.
929   uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
930   // If we don't have samples with location, use 1 to indicate live.
931   if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
932     EstimatedCallCount = 1;
933   CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
934                                        CallerLeafFrameLoc.Discriminator,
935                                        Node.getFuncName(), EstimatedCallCount);
936   CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
937                                CallerLeafFrameLoc.Discriminator,
938                                EstimatedCallCount);
939   CallerProfile.addTotalSamples(EstimatedCallCount);
940 }
941 
942 void CSProfileGenerator::convertToProfileMap(
943     ContextTrieNode &Node, SampleContextFrameVector &Context) {
944   FunctionSamples *FProfile = Node.getFunctionSamples();
945   if (FProfile) {
946     Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
947     // Save the new context for future references.
948     SampleContextFrames NewContext = *Contexts.insert(Context).first;
949     auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
950     FunctionSamples &NewProfile = Ret.first->second;
951     NewProfile.getContext().setContext(NewContext);
952     Context.pop_back();
953   }
954 
955   for (auto &It : Node.getAllChildContext()) {
956     ContextTrieNode &ChildNode = It.second;
957     Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
958     convertToProfileMap(ChildNode, Context);
959     Context.pop_back();
960   }
961 }
962 
963 void CSProfileGenerator::convertToProfileMap() {
964   assert(ProfileMap.empty() &&
965          "ProfileMap should be empty before converting from the trie");
966   assert(IsProfileValidOnTrie &&
967          "Do not convert the trie twice, it's already destroyed");
968 
969   SampleContextFrameVector Context;
970   for (auto &It : getRootContext().getAllChildContext())
971     convertToProfileMap(It.second, Context);
972 
973   IsProfileValidOnTrie = false;
974 }
975 
976 void CSProfileGenerator::postProcessProfiles() {
977   // Compute hot/cold threshold based on profile. This will be used for cold
978   // context profile merging/trimming.
979   computeSummaryAndThreshold();
980 
981   // Run global pre-inliner to adjust/merge context profile based on estimated
982   // inline decisions.
983   if (EnableCSPreInliner) {
984     ContextTracker.populateFuncToCtxtMap();
985     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
986     // Turn off the profile merger by default unless it is explicitly enabled.
987     if (!CSProfMergeColdContext.getNumOccurrences())
988       CSProfMergeColdContext = false;
989   }
990 
991   convertToProfileMap();
992 
993   // Trim and merge cold context profile using cold threshold above.
994   if (TrimColdProfile || CSProfMergeColdContext) {
995     SampleContextTrimmer(ProfileMap)
996         .trimAndMergeColdContextProfiles(
997             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
998             CSProfMaxColdContextDepth, EnableCSPreInliner);
999   }
1000 
1001   // Merge function samples of CS profile to calculate profile density.
1002   sampleprof::SampleProfileMap ContextLessProfiles;
1003   for (const auto &I : ProfileMap) {
1004     ContextLessProfiles[I.second.getName()].merge(I.second);
1005   }
1006 
1007   calculateAndShowDensity(ContextLessProfiles);
1008   if (GenCSNestedProfile) {
1009     CSProfileConverter CSConverter(ProfileMap);
1010     CSConverter.convertProfiles();
1011     FunctionSamples::ProfileIsCS = false;
1012   }
1013 }
1014 
1015 void ProfileGeneratorBase::computeSummaryAndThreshold(
1016     SampleProfileMap &Profiles) {
1017   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1018   Summary = Builder.computeSummaryForProfiles(Profiles);
1019   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1020       (Summary->getDetailedSummary()));
1021   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1022       (Summary->getDetailedSummary()));
1023 }
1024 
1025 void CSProfileGenerator::computeSummaryAndThreshold() {
1026   // Always merge and use context-less profile map to compute summary.
1027   SampleProfileMap ContextLessProfiles;
1028   ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1029 
1030   // Set the flag below to avoid merging the profile again in
1031   // computeSummaryAndThreshold
1032   FunctionSamples::ProfileIsCS = false;
1033   assert(
1034       (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1035       "Don't set --profile-summary-contextless to false for profile "
1036       "generation");
1037   ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1038   // Recover the old value.
1039   FunctionSamples::ProfileIsCS = true;
1040 }
1041 
1042 void ProfileGeneratorBase::extractProbesFromRange(
1043     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1044     bool FindDisjointRanges) {
1045   const RangeSample *PRanges = &RangeCounter;
1046   RangeSample Ranges;
1047   if (FindDisjointRanges) {
1048     findDisjointRanges(Ranges, RangeCounter);
1049     PRanges = &Ranges;
1050   }
1051 
1052   for (const auto &Range : *PRanges) {
1053     uint64_t RangeBegin = Range.first.first;
1054     uint64_t RangeEnd = Range.first.second;
1055     uint64_t Count = Range.second;
1056 
1057     InstructionPointer IP(Binary, RangeBegin, true);
1058     // Disjoint ranges may have range in the middle of two instr,
1059     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1060     // can be Addr1+1 to Addr2-1. We should ignore such range.
1061     if (IP.Address > RangeEnd)
1062       continue;
1063 
1064     do {
1065       const AddressProbesMap &Address2ProbesMap =
1066           Binary->getAddress2ProbesMap();
1067       auto It = Address2ProbesMap.find(IP.Address);
1068       if (It != Address2ProbesMap.end()) {
1069         for (const auto &Probe : It->second) {
1070           ProbeCounter[&Probe] += Count;
1071         }
1072       }
1073     } while (IP.advance() && IP.Address <= RangeEnd);
1074   }
1075 }
1076 
1077 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1078                                       const SmallVectorImpl<uint64_t> &AddrVec,
1079                                       ProfiledBinary *Binary) {
1080   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1081   for (auto Address : reverse(AddrVec)) {
1082     const MCDecodedPseudoProbe *CallProbe =
1083         Binary->getCallProbeForAddr(Address);
1084     // These could be the cases when a probe is not found at a calliste. Cutting
1085     // off the context from here since the inliner will not know how to consume
1086     // a context with unknown callsites.
1087     // 1. for functions that are not sampled when
1088     // --decode-probe-for-profiled-functions-only is on.
1089     // 2. for a merged callsite. Callsite merging may cause the loss of original
1090     // probe IDs.
1091     // 3. for an external callsite.
1092     if (!CallProbe)
1093       break;
1094     Probes.push_back(CallProbe);
1095   }
1096 
1097   std::reverse(Probes.begin(), Probes.end());
1098 
1099   // Extract context stack for reusing, leaf context stack will be added
1100   // compressed while looking up function profile.
1101   for (const auto *P : Probes) {
1102     Binary->getInlineContextForProbe(P, ContextStack, true);
1103   }
1104 }
1105 
1106 void CSProfileGenerator::generateProbeBasedProfile() {
1107   // Enable pseudo probe functionalities in SampleProf
1108   FunctionSamples::ProfileIsProbeBased = true;
1109   for (const auto &CI : *SampleCounters) {
1110     const AddrBasedCtxKey *CtxKey =
1111         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1112     SampleContextFrameVector ContextStack;
1113     extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
1114     // Fill in function body samples from probes, also infer caller's samples
1115     // from callee's probe
1116     populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
1117     // Fill in boundary samples for a call probe
1118     populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
1119   }
1120 }
1121 
1122 void CSProfileGenerator::populateBodySamplesWithProbes(
1123     const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
1124   ProbeCounterMap ProbeCounter;
1125   // Extract the top frame probes by looking up each address among the range in
1126   // the Address2ProbeMap
1127   extractProbesFromRange(RangeCounter, ProbeCounter);
1128   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1129                      std::unordered_set<FunctionSamples *>>
1130       FrameSamples;
1131   for (const auto &PI : ProbeCounter) {
1132     const MCDecodedPseudoProbe *Probe = PI.first;
1133     uint64_t Count = PI.second;
1134     // Disjoint ranges have introduce zero-filled gap that
1135     // doesn't belong to current context, filter them out.
1136     if (!Probe->isBlock() || Count == 0)
1137       continue;
1138 
1139     ContextTrieNode *ContextNode =
1140         getContextNodeForLeafProbe(ContextStack, Probe);
1141     FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1142     // Record the current frame and FunctionProfile whenever samples are
1143     // collected for non-danglie probes. This is for reporting all of the
1144     // zero count probes of the frame later.
1145     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1146     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1147     FunctionProfile.addTotalSamples(Count);
1148     if (Probe->isEntry()) {
1149       FunctionProfile.addHeadSamples(Count);
1150       // Look up for the caller's function profile
1151       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1152       ContextTrieNode *CallerNode = ContextNode->getParentContext();
1153       if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1154         // Since the context id will be compressed, we have to use callee's
1155         // context id to infer caller's context id to ensure they share the
1156         // same context prefix.
1157         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1158         assert(CallerIndex &&
1159                "Inferred caller's location index shouldn't be zero!");
1160         FunctionSamples &CallerProfile =
1161             *getOrCreateFunctionSamples(CallerNode);
1162         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1163         CallerProfile.addBodySamples(CallerIndex, 0, Count);
1164         CallerProfile.addTotalSamples(Count);
1165         CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1166                                              ContextNode->getFuncName(), Count);
1167       }
1168     }
1169   }
1170 
1171   // Assign zero count for remaining probes without sample hits to
1172   // differentiate from probes optimized away, of which the counts are unknown
1173   // and will be inferred by the compiler.
1174   for (auto &I : FrameSamples) {
1175     for (auto *FunctionProfile : I.second) {
1176       for (auto *Probe : I.first->getProbes()) {
1177         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1178       }
1179     }
1180   }
1181 }
1182 
1183 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1184     const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
1185   for (const auto &BI : BranchCounter) {
1186     uint64_t SourceAddress = BI.first.first;
1187     uint64_t TargetAddress = BI.first.second;
1188     uint64_t Count = BI.second;
1189     const MCDecodedPseudoProbe *CallProbe =
1190         Binary->getCallProbeForAddr(SourceAddress);
1191     if (CallProbe == nullptr)
1192       continue;
1193     FunctionSamples &FunctionProfile =
1194         getFunctionProfileForLeafProbe(ContextStack, CallProbe);
1195     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1196     FunctionProfile.addTotalSamples(Count);
1197     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1198     if (CalleeName.size() == 0)
1199       continue;
1200     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1201                                            Count);
1202   }
1203 }
1204 
1205 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1206     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1207 
1208   // Explicitly copy the context for appending the leaf context
1209   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1210                                            ContextStack.end());
1211   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1212   // For leaf inlined context with the top frame, we should strip off the top
1213   // frame's probe id, like:
1214   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1215   auto LeafFrame = NewContextStack.back();
1216   LeafFrame.Location = LineLocation(0, 0);
1217   NewContextStack.pop_back();
1218   // Compress the context string except for the leaf frame
1219   CSProfileGenerator::compressRecursionContext(NewContextStack);
1220   CSProfileGenerator::trimContext(NewContextStack);
1221   NewContextStack.push_back(LeafFrame);
1222 
1223   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1224   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1225   ContextTrieNode *ContextNode =
1226       getOrCreateContextNode(NewContextStack, WasLeafInlined);
1227   ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1228   return ContextNode;
1229 }
1230 
1231 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1232     SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
1233   return *getContextNodeForLeafProbe(ContextStack, LeafProbe)
1234               ->getFunctionSamples();
1235 }
1236 
1237 } // end namespace sampleprof
1238 } // end namespace llvm
1239