xref: /llvm-project/llvm/tools/llvm-profgen/ProfileGenerator.cpp (revision 31af18bccea95fe1ae8aa2c51cf7c8e92a1c208e)
1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "MissingFrameInferrer.h"
11 #include "PerfReader.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/ProfileData/ProfileCommon.h"
15 #include <algorithm>
16 #include <float.h>
17 #include <unordered_set>
18 #include <utility>
19 
20 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
21                                     cl::Required,
22                                     cl::desc("Output profile file"));
23 static cl::alias OutputA("o", cl::desc("Alias for --output"),
24                          cl::aliasopt(OutputFilename));
25 
26 static cl::opt<SampleProfileFormat> OutputFormat(
27     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
28     cl::values(
29         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
30         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
31         clEnumValN(SPF_Text, "text", "Text encoding"),
32         clEnumValN(SPF_GCC, "gcc",
33                    "GCC encoding (only meaningful for -sample)")));
34 
35 static cl::opt<bool> UseMD5(
36     "use-md5", cl::Hidden,
37     cl::desc("Use md5 to represent function names in the output profile (only "
38              "meaningful for -extbinary)"));
39 
40 static cl::opt<bool> PopulateProfileSymbolList(
41     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
42     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
43 
44 static cl::opt<bool> FillZeroForAllFuncs(
45     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
46     cl::desc("Attribute all functions' range with zero count "
47              "even it's not hit by any samples."));
48 
49 static cl::opt<int32_t, true> RecursionCompression(
50     "compress-recursion",
51     cl::desc("Compressing recursion by deduplicating adjacent frame "
52              "sequences up to the specified size. -1 means no size limit."),
53     cl::Hidden,
54     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
55 
56 static cl::opt<bool>
57     TrimColdProfile("trim-cold-profile",
58                     cl::desc("If the total count of the profile is smaller "
59                              "than threshold, it will be trimmed."));
60 
61 static cl::opt<bool> CSProfMergeColdContext(
62     "csprof-merge-cold-context", cl::init(true),
63     cl::desc("If the total count of context profile is smaller than "
64              "the threshold, it will be merged into context-less base "
65              "profile."));
66 
67 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
68     "csprof-max-cold-context-depth", cl::init(1),
69     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
70              "context-less base profile"));
71 
72 static cl::opt<int, true> CSProfMaxContextDepth(
73     "csprof-max-context-depth",
74     cl::desc("Keep the last K contexts while merging profile. -1 means no "
75              "depth limit."),
76     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
77 
78 static cl::opt<double> HotFunctionDensityThreshold(
79     "hot-function-density-threshold", llvm::cl::init(1000),
80     llvm::cl::desc(
81         "specify density threshold for hot functions (default: 1000)"),
82     llvm::cl::Optional);
83 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
84                                  llvm::cl::desc("show profile density details"),
85                                  llvm::cl::Optional);
86 
87 static cl::opt<bool> UpdateTotalSamples(
88     "update-total-samples", llvm::cl::init(false),
89     llvm::cl::desc(
90         "Update total samples by accumulating all its body samples."),
91     llvm::cl::Optional);
92 
93 static cl::opt<bool> GenCSNestedProfile(
94     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
95     cl::desc("Generate nested function profiles for CSSPGO"));
96 
97 cl::opt<bool> InferMissingFrames(
98     "infer-missing-frames", llvm::cl::init(true),
99     llvm::cl::desc(
100         "Infer missing call frames due to compiler tail call elimination."),
101     llvm::cl::Optional);
102 
103 using namespace llvm;
104 using namespace sampleprof;
105 
106 namespace llvm {
107 extern cl::opt<int> ProfileSummaryCutoffHot;
108 extern cl::opt<bool> UseContextLessSummary;
109 
110 namespace sampleprof {
111 
112 // Initialize the MaxCompressionSize to -1 which means no size limit
113 int32_t CSProfileGenerator::MaxCompressionSize = -1;
114 
115 int CSProfileGenerator::MaxContextDepth = -1;
116 
117 bool ProfileGeneratorBase::UseFSDiscriminator = false;
118 
119 std::unique_ptr<ProfileGeneratorBase>
120 ProfileGeneratorBase::create(ProfiledBinary *Binary,
121                              const ContextSampleCounterMap *SampleCounters,
122                              bool ProfileIsCS) {
123   std::unique_ptr<ProfileGeneratorBase> Generator;
124   if (ProfileIsCS) {
125     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
126   } else {
127     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
128   }
129   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
130   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
131 
132   return Generator;
133 }
134 
135 std::unique_ptr<ProfileGeneratorBase>
136 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
137                              bool ProfileIsCS) {
138   std::unique_ptr<ProfileGeneratorBase> Generator;
139   if (ProfileIsCS) {
140     Generator.reset(new CSProfileGenerator(Binary, Profiles));
141   } else {
142     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
143   }
144   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
145   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
146 
147   return Generator;
148 }
149 
150 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
151                                  SampleProfileMap &ProfileMap) {
152   // Populate profile symbol list if extended binary format is used.
153   ProfileSymbolList SymbolList;
154 
155   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
156     Binary->populateSymbolListFromDWARF(SymbolList);
157     Writer->setProfileSymbolList(&SymbolList);
158   }
159 
160   if (std::error_code EC = Writer->write(ProfileMap))
161     exitWithError(std::move(EC));
162 }
163 
164 void ProfileGeneratorBase::write() {
165   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
166   if (std::error_code EC = WriterOrErr.getError())
167     exitWithError(EC, OutputFilename);
168 
169   if (UseMD5) {
170     if (OutputFormat != SPF_Ext_Binary)
171       WithColor::warning() << "-use-md5 is ignored. Specify "
172                               "--format=extbinary to enable it\n";
173     else
174       WriterOrErr.get()->setUseMD5();
175   }
176 
177   write(std::move(WriterOrErr.get()), ProfileMap);
178 }
179 
180 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
181   if (Density == 0.0)
182     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
183                             "set too low. Please check your command.\n";
184   else if (Density < HotFunctionDensityThreshold)
185     WithColor::warning()
186         << "AutoFDO is estimated to optimize better with "
187         << format("%.1f", HotFunctionDensityThreshold / Density)
188         << "x more samples. Please consider increasing sampling rate or "
189            "profiling for longer duration to get more samples.\n";
190 
191   if (ShowDensity)
192     outs() << "Minimum profile density for hot functions with top "
193            << format("%.2f",
194                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
195                          10000)
196            << "% total samples: " << format("%.1f", Density) << "\n";
197 }
198 
199 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
200                                               uint64_t HotCntThreshold) {
201   double Density = DBL_MAX;
202   std::vector<const FunctionSamples *> HotFuncs;
203   for (auto &I : Profiles) {
204     auto &FuncSamples = I.second;
205     if (FuncSamples.getTotalSamples() < HotCntThreshold)
206       continue;
207     HotFuncs.emplace_back(&FuncSamples);
208   }
209 
210   for (auto *FuncSamples : HotFuncs) {
211     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
212     if (!Func)
213       continue;
214     uint64_t FuncSize = Func->getFuncSize();
215     if (FuncSize == 0)
216       continue;
217     Density =
218         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
219                               FuncSize);
220   }
221 
222   return Density == DBL_MAX ? 0.0 : Density;
223 }
224 
225 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
226                                               const RangeSample &Ranges) {
227 
228   /*
229   Regions may overlap with each other. Using the boundary info, find all
230   disjoint ranges and their sample count. BoundaryPoint contains the count
231   multiple samples begin/end at this points.
232 
233   |<--100-->|           Sample1
234   |<------200------>|   Sample2
235   A         B       C
236 
237   In the example above,
238   Sample1 begins at A, ends at B, its value is 100.
239   Sample2 beings at A, ends at C, its value is 200.
240   For A, BeginCount is the sum of sample begins at A, which is 300 and no
241   samples ends at A, so EndCount is 0.
242   Then boundary points A, B, and C with begin/end counts are:
243   A: (300, 0)
244   B: (0, 100)
245   C: (0, 200)
246   */
247   struct BoundaryPoint {
248     // Sum of sample counts beginning at this point
249     uint64_t BeginCount = UINT64_MAX;
250     // Sum of sample counts ending at this point
251     uint64_t EndCount = UINT64_MAX;
252     // Is the begin point of a zero range.
253     bool IsZeroRangeBegin = false;
254     // Is the end point of a zero range.
255     bool IsZeroRangeEnd = false;
256 
257     void addBeginCount(uint64_t Count) {
258       if (BeginCount == UINT64_MAX)
259         BeginCount = 0;
260       BeginCount += Count;
261     }
262 
263     void addEndCount(uint64_t Count) {
264       if (EndCount == UINT64_MAX)
265         EndCount = 0;
266       EndCount += Count;
267     }
268   };
269 
270   /*
271   For the above example. With boundary points, follwing logic finds two
272   disjoint region of
273 
274   [A,B]:   300
275   [B+1,C]: 200
276 
277   If there is a boundary point that both begin and end, the point itself
278   becomes a separate disjoint region. For example, if we have original
279   ranges of
280 
281   |<--- 100 --->|
282                 |<--- 200 --->|
283   A             B             C
284 
285   there are three boundary points with their begin/end counts of
286 
287   A: (100, 0)
288   B: (200, 100)
289   C: (0, 200)
290 
291   the disjoint ranges would be
292 
293   [A, B-1]: 100
294   [B, B]:   300
295   [B+1, C]: 200.
296 
297   Example for zero value range:
298 
299     |<--- 100 --->|
300                        |<--- 200 --->|
301   |<---------------  0 ----------------->|
302   A  B            C    D             E   F
303 
304   [A, B-1]  : 0
305   [B, C]    : 100
306   [C+1, D-1]: 0
307   [D, E]    : 200
308   [E+1, F]  : 0
309   */
310   std::map<uint64_t, BoundaryPoint> Boundaries;
311 
312   for (const auto &Item : Ranges) {
313     assert(Item.first.first <= Item.first.second &&
314            "Invalid instruction range");
315     auto &BeginPoint = Boundaries[Item.first.first];
316     auto &EndPoint = Boundaries[Item.first.second];
317     uint64_t Count = Item.second;
318 
319     BeginPoint.addBeginCount(Count);
320     EndPoint.addEndCount(Count);
321     if (Count == 0) {
322       BeginPoint.IsZeroRangeBegin = true;
323       EndPoint.IsZeroRangeEnd = true;
324     }
325   }
326 
327   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
328   // and the next valid address
329   uint64_t BeginAddress = UINT64_MAX;
330   int ZeroRangeDepth = 0;
331   uint64_t Count = 0;
332   for (const auto &Item : Boundaries) {
333     uint64_t Address = Item.first;
334     const BoundaryPoint &Point = Item.second;
335     if (Point.BeginCount != UINT64_MAX) {
336       if (BeginAddress != UINT64_MAX)
337         DisjointRanges[{BeginAddress, Address - 1}] = Count;
338       Count += Point.BeginCount;
339       BeginAddress = Address;
340       ZeroRangeDepth += Point.IsZeroRangeBegin;
341     }
342     if (Point.EndCount != UINT64_MAX) {
343       assert((BeginAddress != UINT64_MAX) &&
344              "First boundary point cannot be 'end' point");
345       DisjointRanges[{BeginAddress, Address}] = Count;
346       assert(Count >= Point.EndCount && "Mismatched live ranges");
347       Count -= Point.EndCount;
348       BeginAddress = Address + 1;
349       ZeroRangeDepth -= Point.IsZeroRangeEnd;
350       // If the remaining count is zero and it's no longer in a zero range, this
351       // means we consume all the ranges before, thus mark BeginAddress as
352       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
353       //  [<---- 10 ---->]
354       //                       [<---- 20 ---->]
355       //   A             B     C              D
356       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
357       // have the [B+1, C-1] zero range.
358       if (Count == 0 && ZeroRangeDepth == 0)
359         BeginAddress = UINT64_MAX;
360     }
361   }
362 }
363 
364 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
365     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
366     uint64_t Count) {
367   // Use the maximum count of samples with same line location
368   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
369 
370   // Use duplication factor to compensated for loop unroll/vectorization.
371   // Note that this is only needed when we're taking MAX of the counts at
372   // the location instead of SUM.
373   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
374 
375   ErrorOr<uint64_t> R =
376       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
377 
378   uint64_t PreviousCount = R ? R.get() : 0;
379   if (PreviousCount <= Count) {
380     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
381                                    Count - PreviousCount);
382   }
383 }
384 
385 void ProfileGeneratorBase::updateTotalSamples() {
386   for (auto &Item : ProfileMap) {
387     FunctionSamples &FunctionProfile = Item.second;
388     FunctionProfile.updateTotalSamples();
389   }
390 }
391 
392 void ProfileGeneratorBase::updateCallsiteSamples() {
393   for (auto &Item : ProfileMap) {
394     FunctionSamples &FunctionProfile = Item.second;
395     FunctionProfile.updateCallsiteSamples();
396   }
397 }
398 
399 void ProfileGeneratorBase::updateFunctionSamples() {
400   updateCallsiteSamples();
401 
402   if (UpdateTotalSamples)
403     updateTotalSamples();
404 }
405 
406 void ProfileGeneratorBase::collectProfiledFunctions() {
407   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
408   if (collectFunctionsFromRawProfile(ProfiledFunctions))
409     Binary->setProfiledFunctions(ProfiledFunctions);
410   else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
411     Binary->setProfiledFunctions(ProfiledFunctions);
412   else
413     llvm_unreachable("Unsupported input profile");
414 }
415 
416 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
417     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
418   if (!SampleCounters)
419     return false;
420   // Go through all the stacks, ranges and branches in sample counters, use
421   // the start of the range to look up the function it belongs and record the
422   // function.
423   for (const auto &CI : *SampleCounters) {
424     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
425       for (auto StackAddr : CtxKey->Context) {
426         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
427           ProfiledFunctions.insert(FRange->Func);
428       }
429     }
430 
431     for (auto Item : CI.second.RangeCounter) {
432       uint64_t StartAddress = Item.first.first;
433       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
434         ProfiledFunctions.insert(FRange->Func);
435     }
436 
437     for (auto Item : CI.second.BranchCounter) {
438       uint64_t SourceAddress = Item.first.first;
439       uint64_t TargetAddress = Item.first.second;
440       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
441         ProfiledFunctions.insert(FRange->Func);
442       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
443         ProfiledFunctions.insert(FRange->Func);
444     }
445   }
446   return true;
447 }
448 
449 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
450     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
451   for (const auto &FS : ProfileMap) {
452     if (auto *Func = Binary->getBinaryFunction(FS.second.getName()))
453       ProfiledFunctions.insert(Func);
454   }
455   return true;
456 }
457 
458 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
459     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
460   for (auto *Node : ContextTracker) {
461     if (!Node->getFuncName().empty())
462       if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
463         ProfiledFunctions.insert(Func);
464   }
465   return true;
466 }
467 
468 FunctionSamples &
469 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
470   SampleContext Context(FuncName);
471   return ProfileMap.Create(Context);
472 }
473 
474 void ProfileGenerator::generateProfile() {
475   collectProfiledFunctions();
476 
477   if (Binary->usePseudoProbes())
478     Binary->decodePseudoProbe();
479 
480   if (SampleCounters) {
481     if (Binary->usePseudoProbes()) {
482       generateProbeBasedProfile();
483     } else {
484       generateLineNumBasedProfile();
485     }
486   }
487 
488   postProcessProfiles();
489 }
490 
491 void ProfileGenerator::postProcessProfiles() {
492   computeSummaryAndThreshold(ProfileMap);
493   trimColdProfiles(ProfileMap, ColdCountThreshold);
494   calculateAndShowDensity(ProfileMap);
495 }
496 
497 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
498                                         uint64_t ColdCntThreshold) {
499   if (!TrimColdProfile)
500     return;
501 
502   // Move cold profiles into a tmp container.
503   std::vector<hash_code> ColdProfileHashes;
504   for (const auto &I : ProfileMap) {
505     if (I.second.getTotalSamples() < ColdCntThreshold)
506       ColdProfileHashes.emplace_back(I.first);
507   }
508 
509   // Remove the cold profile from ProfileMap.
510   for (const auto &I : ColdProfileHashes)
511     ProfileMap.erase(I);
512 }
513 
514 void ProfileGenerator::generateLineNumBasedProfile() {
515   assert(SampleCounters->size() == 1 &&
516          "Must have one entry for profile generation.");
517   const SampleCounter &SC = SampleCounters->begin()->second;
518   // Fill in function body samples
519   populateBodySamplesForAllFunctions(SC.RangeCounter);
520   // Fill in boundary sample counts as well as call site samples for calls
521   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
522 
523   updateFunctionSamples();
524 }
525 
526 void ProfileGenerator::generateProbeBasedProfile() {
527   assert(SampleCounters->size() == 1 &&
528          "Must have one entry for profile generation.");
529   // Enable pseudo probe functionalities in SampleProf
530   FunctionSamples::ProfileIsProbeBased = true;
531   const SampleCounter &SC = SampleCounters->begin()->second;
532   // Fill in function body samples
533   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
534   // Fill in boundary sample counts as well as call site samples for calls
535   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
536 
537   updateFunctionSamples();
538 }
539 
540 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
541     const RangeSample &RangeCounter) {
542   ProbeCounterMap ProbeCounter;
543   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
544   // inside extractProbesFromRange.
545   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
546                          false);
547 
548   for (const auto &PI : ProbeCounter) {
549     const MCDecodedPseudoProbe *Probe = PI.first;
550     uint64_t Count = PI.second;
551     SampleContextFrameVector FrameVec;
552     Binary->getInlineContextForProbe(Probe, FrameVec, true);
553     FunctionSamples &FunctionProfile =
554         getLeafProfileAndAddTotalSamples(FrameVec, Count);
555     FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
556                                    Count);
557     if (Probe->isEntry())
558       FunctionProfile.addHeadSamples(Count);
559   }
560 }
561 
562 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
563     const BranchSample &BranchCounters) {
564   for (const auto &Entry : BranchCounters) {
565     uint64_t SourceAddress = Entry.first.first;
566     uint64_t TargetAddress = Entry.first.second;
567     uint64_t Count = Entry.second;
568     assert(Count != 0 && "Unexpected zero weight branch");
569 
570     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
571     if (CalleeName.size() == 0)
572       continue;
573 
574     const MCDecodedPseudoProbe *CallProbe =
575         Binary->getCallProbeForAddr(SourceAddress);
576     if (CallProbe == nullptr)
577       continue;
578 
579     // Record called target sample and its count.
580     SampleContextFrameVector FrameVec;
581     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
582 
583     if (!FrameVec.empty()) {
584       FunctionSamples &FunctionProfile =
585           getLeafProfileAndAddTotalSamples(FrameVec, 0);
586       FunctionProfile.addCalledTargetSamples(
587           FrameVec.back().Location.LineOffset,
588           FrameVec.back().Location.Discriminator,
589           CalleeName, Count);
590     }
591   }
592 }
593 
594 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
595     const SampleContextFrameVector &FrameVec, uint64_t Count) {
596   // Get top level profile
597   FunctionSamples *FunctionProfile =
598       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
599   FunctionProfile->addTotalSamples(Count);
600   if (Binary->usePseudoProbes()) {
601     const auto *FuncDesc = Binary->getFuncDescForGUID(
602         Function::getGUID(FunctionProfile->getName()));
603     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
604   }
605 
606   for (size_t I = 1; I < FrameVec.size(); I++) {
607     LineLocation Callsite(
608         FrameVec[I - 1].Location.LineOffset,
609         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
610     FunctionSamplesMap &SamplesMap =
611         FunctionProfile->functionSamplesAt(Callsite);
612     auto Ret =
613         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
614     if (Ret.second) {
615       SampleContext Context(FrameVec[I].FuncName);
616       Ret.first->second.setContext(Context);
617     }
618     FunctionProfile = &Ret.first->second;
619     FunctionProfile->addTotalSamples(Count);
620     if (Binary->usePseudoProbes()) {
621       const auto *FuncDesc = Binary->getFuncDescForGUID(
622           Function::getGUID(FunctionProfile->getName()));
623       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
624     }
625   }
626 
627   return *FunctionProfile;
628 }
629 
630 RangeSample
631 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
632   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
633   if (FillZeroForAllFuncs) {
634     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
635       for (auto &R : FuncI.second.Ranges) {
636         Ranges[{R.first, R.second - 1}] += 0;
637       }
638     }
639   } else {
640     // For each range, we search for all ranges of the function it belongs to
641     // and initialize it with zero count, so it remains zero if doesn't hit any
642     // samples. This is to be consistent with compiler that interpret zero count
643     // as unexecuted(cold).
644     for (const auto &I : RangeCounter) {
645       uint64_t StartAddress = I.first.first;
646       for (const auto &Range : Binary->getRanges(StartAddress))
647         Ranges[{Range.first, Range.second - 1}] += 0;
648     }
649   }
650   RangeSample DisjointRanges;
651   findDisjointRanges(DisjointRanges, Ranges);
652   return DisjointRanges;
653 }
654 
655 void ProfileGenerator::populateBodySamplesForAllFunctions(
656     const RangeSample &RangeCounter) {
657   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
658     uint64_t RangeBegin = Range.first.first;
659     uint64_t RangeEnd = Range.first.second;
660     uint64_t Count = Range.second;
661 
662     InstructionPointer IP(Binary, RangeBegin, true);
663     // Disjoint ranges may have range in the middle of two instr,
664     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
665     // can be Addr1+1 to Addr2-1. We should ignore such range.
666     if (IP.Address > RangeEnd)
667       continue;
668 
669     do {
670       const SampleContextFrameVector FrameVec =
671           Binary->getFrameLocationStack(IP.Address);
672       if (!FrameVec.empty()) {
673         // FIXME: As accumulating total count per instruction caused some
674         // regression, we changed to accumulate total count per byte as a
675         // workaround. Tuning hotness threshold on the compiler side might be
676         // necessary in the future.
677         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
678             FrameVec, Count * Binary->getInstSize(IP.Address));
679         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
680                                             Count);
681       }
682     } while (IP.advance() && IP.Address <= RangeEnd);
683   }
684 }
685 
686 StringRef
687 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
688   // Get the function range by branch target if it's a call branch.
689   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
690 
691   // We won't accumulate sample count for a range whose start is not the real
692   // function entry such as outlined function or inner labels.
693   if (!FRange || !FRange->IsFuncEntry)
694     return StringRef();
695 
696   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
697 }
698 
699 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
700     const BranchSample &BranchCounters) {
701   for (const auto &Entry : BranchCounters) {
702     uint64_t SourceAddress = Entry.first.first;
703     uint64_t TargetAddress = Entry.first.second;
704     uint64_t Count = Entry.second;
705     assert(Count != 0 && "Unexpected zero weight branch");
706 
707     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
708     if (CalleeName.size() == 0)
709       continue;
710     // Record called target sample and its count.
711     const SampleContextFrameVector &FrameVec =
712         Binary->getCachedFrameLocationStack(SourceAddress);
713     if (!FrameVec.empty()) {
714       FunctionSamples &FunctionProfile =
715           getLeafProfileAndAddTotalSamples(FrameVec, 0);
716       FunctionProfile.addCalledTargetSamples(
717           FrameVec.back().Location.LineOffset,
718           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
719           CalleeName, Count);
720     }
721     // Add head samples for callee.
722     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
723     CalleeProfile.addHeadSamples(Count);
724   }
725 }
726 
727 void ProfileGeneratorBase::calculateAndShowDensity(
728     const SampleProfileMap &Profiles) {
729   double Density = calculateDensity(Profiles, HotCountThreshold);
730   showDensitySuggestion(Density);
731 }
732 
733 FunctionSamples *
734 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
735                                                bool WasLeafInlined) {
736   FunctionSamples *FProfile = ContextNode->getFunctionSamples();
737   if (!FProfile) {
738     FSamplesList.emplace_back();
739     FProfile = &FSamplesList.back();
740     FProfile->setName(ContextNode->getFuncName());
741     ContextNode->setFunctionSamples(FProfile);
742   }
743   // Update ContextWasInlined attribute for existing contexts.
744   // The current function can be called in two ways:
745   //  - when processing a probe of the current frame
746   //  - when processing the entry probe of an inlinee's frame, which
747   //    is then used to update the callsite count of the current frame.
748   // The two can happen in any order, hence here we are making sure
749   // `ContextWasInlined` is always set as expected.
750   // TODO: Note that the former does not always happen if no probes of the
751   // current frame has samples, and if the latter happens, we could lose the
752   // attribute. This should be fixed.
753   if (WasLeafInlined)
754     FProfile->getContext().setAttribute(ContextWasInlined);
755   return FProfile;
756 }
757 
758 ContextTrieNode *
759 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
760                                            bool WasLeafInlined) {
761   ContextTrieNode *ContextNode =
762       ContextTracker.getOrCreateContextPath(Context, true);
763   getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
764   return ContextNode;
765 }
766 
767 void CSProfileGenerator::generateProfile() {
768   FunctionSamples::ProfileIsCS = true;
769 
770   collectProfiledFunctions();
771 
772   if (Binary->usePseudoProbes()) {
773     Binary->decodePseudoProbe();
774     if (InferMissingFrames)
775       initializeMissingFrameInferrer();
776   }
777 
778   if (SampleCounters) {
779     if (Binary->usePseudoProbes()) {
780       generateProbeBasedProfile();
781     } else {
782       generateLineNumBasedProfile();
783     }
784   }
785 
786   if (Binary->getTrackFuncContextSize())
787     computeSizeForProfiledFunctions();
788 
789   postProcessProfiles();
790 }
791 
792 void CSProfileGenerator::initializeMissingFrameInferrer() {
793   Binary->getMissingContextInferrer()->initialize(SampleCounters);
794 }
795 
796 void CSProfileGenerator::inferMissingFrames(
797     const SmallVectorImpl<uint64_t> &Context,
798     SmallVectorImpl<uint64_t> &NewContext) {
799   Binary->inferMissingFrames(Context, NewContext);
800 }
801 
802 void CSProfileGenerator::computeSizeForProfiledFunctions() {
803   for (auto *Func : Binary->getProfiledFunctions())
804     Binary->computeInlinedContextSizeForFunc(Func);
805 
806   // Flush the symbolizer to save memory.
807   Binary->flushSymbolizer();
808 }
809 
810 void CSProfileGenerator::updateFunctionSamples() {
811   for (auto *Node : ContextTracker) {
812     FunctionSamples *FSamples = Node->getFunctionSamples();
813     if (FSamples) {
814       if (UpdateTotalSamples)
815         FSamples->updateTotalSamples();
816       FSamples->updateCallsiteSamples();
817     }
818   }
819 }
820 
821 void CSProfileGenerator::generateLineNumBasedProfile() {
822   for (const auto &CI : *SampleCounters) {
823     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
824 
825     ContextTrieNode *ContextNode = &getRootContext();
826     // Sample context will be empty if the jump is an external-to-internal call
827     // pattern, the head samples should be added for the internal function.
828     if (!CtxKey->Context.empty()) {
829       // Get or create function profile for the range
830       ContextNode =
831           getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
832       // Fill in function body samples
833       populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
834                                      CI.second.RangeCounter);
835     }
836     // Fill in boundary sample counts as well as call site samples for calls
837     populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
838   }
839   // Fill in call site value sample for inlined calls and also use context to
840   // infer missing samples. Since we don't have call count for inlined
841   // functions, we estimate it from inlinee's profile using the entry of the
842   // body sample.
843   populateInferredFunctionSamples(getRootContext());
844 
845   updateFunctionSamples();
846 }
847 
848 void CSProfileGenerator::populateBodySamplesForFunction(
849     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
850   // Compute disjoint ranges first, so we can use MAX
851   // for calculating count for each location.
852   RangeSample Ranges;
853   findDisjointRanges(Ranges, RangeCounter);
854   for (const auto &Range : Ranges) {
855     uint64_t RangeBegin = Range.first.first;
856     uint64_t RangeEnd = Range.first.second;
857     uint64_t Count = Range.second;
858     // Disjoint ranges have introduce zero-filled gap that
859     // doesn't belong to current context, filter them out.
860     if (Count == 0)
861       continue;
862 
863     InstructionPointer IP(Binary, RangeBegin, true);
864     // Disjoint ranges may have range in the middle of two instr,
865     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
866     // can be Addr1+1 to Addr2-1. We should ignore such range.
867     if (IP.Address > RangeEnd)
868       continue;
869 
870     do {
871       auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
872       if (LeafLoc) {
873         // Recording body sample for this specific context
874         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
875         FunctionProfile.addTotalSamples(Count);
876       }
877     } while (IP.advance() && IP.Address <= RangeEnd);
878   }
879 }
880 
881 void CSProfileGenerator::populateBoundarySamplesForFunction(
882     ContextTrieNode *Node, const BranchSample &BranchCounters) {
883 
884   for (const auto &Entry : BranchCounters) {
885     uint64_t SourceAddress = Entry.first.first;
886     uint64_t TargetAddress = Entry.first.second;
887     uint64_t Count = Entry.second;
888     assert(Count != 0 && "Unexpected zero weight branch");
889 
890     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
891     if (CalleeName.size() == 0)
892       continue;
893 
894     ContextTrieNode *CallerNode = Node;
895     LineLocation CalleeCallSite(0, 0);
896     if (CallerNode != &getRootContext()) {
897       // Record called target sample and its count
898       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
899       if (LeafLoc) {
900         CallerNode->getFunctionSamples()->addCalledTargetSamples(
901             LeafLoc->Location.LineOffset,
902             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
903             Count);
904         // Record head sample for called target(callee)
905         CalleeCallSite = LeafLoc->Location;
906       }
907     }
908 
909     ContextTrieNode *CalleeNode =
910         CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
911     FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
912     CalleeProfile->addHeadSamples(Count);
913   }
914 }
915 
916 void CSProfileGenerator::populateInferredFunctionSamples(
917     ContextTrieNode &Node) {
918   // There is no call jmp sample between the inliner and inlinee, we need to use
919   // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
920   // sample depends on child(inlinee)'s sample, so traverse the tree in
921   // post-order.
922   for (auto &It : Node.getAllChildContext())
923     populateInferredFunctionSamples(It.second);
924 
925   FunctionSamples *CalleeProfile = Node.getFunctionSamples();
926   if (!CalleeProfile)
927     return;
928   // If we already have head sample counts, we must have value profile
929   // for call sites added already. Skip to avoid double counting.
930   if (CalleeProfile->getHeadSamples())
931     return;
932   ContextTrieNode *CallerNode = Node.getParentContext();
933   // If we don't have context, nothing to do for caller's call site.
934   // This could happen for entry point function.
935   if (CallerNode == &getRootContext())
936     return;
937 
938   LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
939   FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
940   // Since we don't have call count for inlined functions, we
941   // estimate it from inlinee's profile using entry body sample.
942   uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
943   // If we don't have samples with location, use 1 to indicate live.
944   if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
945     EstimatedCallCount = 1;
946   CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
947                                        CallerLeafFrameLoc.Discriminator,
948                                        Node.getFuncName(), EstimatedCallCount);
949   CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
950                                CallerLeafFrameLoc.Discriminator,
951                                EstimatedCallCount);
952   CallerProfile.addTotalSamples(EstimatedCallCount);
953 }
954 
955 void CSProfileGenerator::convertToProfileMap(
956     ContextTrieNode &Node, SampleContextFrameVector &Context) {
957   FunctionSamples *FProfile = Node.getFunctionSamples();
958   if (FProfile) {
959     Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
960     // Save the new context for future references.
961     SampleContextFrames NewContext = *Contexts.insert(Context).first;
962     auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
963     FunctionSamples &NewProfile = Ret.first->second;
964     NewProfile.getContext().setContext(NewContext);
965     Context.pop_back();
966   }
967 
968   for (auto &It : Node.getAllChildContext()) {
969     ContextTrieNode &ChildNode = It.second;
970     Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
971     convertToProfileMap(ChildNode, Context);
972     Context.pop_back();
973   }
974 }
975 
976 void CSProfileGenerator::convertToProfileMap() {
977   assert(ProfileMap.empty() &&
978          "ProfileMap should be empty before converting from the trie");
979   assert(IsProfileValidOnTrie &&
980          "Do not convert the trie twice, it's already destroyed");
981 
982   SampleContextFrameVector Context;
983   for (auto &It : getRootContext().getAllChildContext())
984     convertToProfileMap(It.second, Context);
985 
986   IsProfileValidOnTrie = false;
987 }
988 
989 void CSProfileGenerator::postProcessProfiles() {
990   // Compute hot/cold threshold based on profile. This will be used for cold
991   // context profile merging/trimming.
992   computeSummaryAndThreshold();
993 
994   // Run global pre-inliner to adjust/merge context profile based on estimated
995   // inline decisions.
996   if (EnableCSPreInliner) {
997     ContextTracker.populateFuncToCtxtMap();
998     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
999     // Turn off the profile merger by default unless it is explicitly enabled.
1000     if (!CSProfMergeColdContext.getNumOccurrences())
1001       CSProfMergeColdContext = false;
1002   }
1003 
1004   convertToProfileMap();
1005 
1006   // Trim and merge cold context profile using cold threshold above.
1007   if (TrimColdProfile || CSProfMergeColdContext) {
1008     SampleContextTrimmer(ProfileMap)
1009         .trimAndMergeColdContextProfiles(
1010             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1011             CSProfMaxColdContextDepth, EnableCSPreInliner);
1012   }
1013 
1014   // Merge function samples of CS profile to calculate profile density.
1015   sampleprof::SampleProfileMap ContextLessProfiles;
1016   ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
1017 
1018   calculateAndShowDensity(ContextLessProfiles);
1019   if (GenCSNestedProfile) {
1020     ProfileConverter CSConverter(ProfileMap);
1021     CSConverter.convertCSProfiles();
1022     FunctionSamples::ProfileIsCS = false;
1023   }
1024 }
1025 
1026 void ProfileGeneratorBase::computeSummaryAndThreshold(
1027     SampleProfileMap &Profiles) {
1028   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1029   Summary = Builder.computeSummaryForProfiles(Profiles);
1030   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1031       (Summary->getDetailedSummary()));
1032   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1033       (Summary->getDetailedSummary()));
1034 }
1035 
1036 void CSProfileGenerator::computeSummaryAndThreshold() {
1037   // Always merge and use context-less profile map to compute summary.
1038   SampleProfileMap ContextLessProfiles;
1039   ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1040 
1041   // Set the flag below to avoid merging the profile again in
1042   // computeSummaryAndThreshold
1043   FunctionSamples::ProfileIsCS = false;
1044   assert(
1045       (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1046       "Don't set --profile-summary-contextless to false for profile "
1047       "generation");
1048   ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1049   // Recover the old value.
1050   FunctionSamples::ProfileIsCS = true;
1051 }
1052 
1053 void ProfileGeneratorBase::extractProbesFromRange(
1054     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1055     bool FindDisjointRanges) {
1056   const RangeSample *PRanges = &RangeCounter;
1057   RangeSample Ranges;
1058   if (FindDisjointRanges) {
1059     findDisjointRanges(Ranges, RangeCounter);
1060     PRanges = &Ranges;
1061   }
1062 
1063   for (const auto &Range : *PRanges) {
1064     uint64_t RangeBegin = Range.first.first;
1065     uint64_t RangeEnd = Range.first.second;
1066     uint64_t Count = Range.second;
1067 
1068     InstructionPointer IP(Binary, RangeBegin, true);
1069     // Disjoint ranges may have range in the middle of two instr,
1070     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1071     // can be Addr1+1 to Addr2-1. We should ignore such range.
1072     if (IP.Address > RangeEnd)
1073       continue;
1074 
1075     do {
1076       const AddressProbesMap &Address2ProbesMap =
1077           Binary->getAddress2ProbesMap();
1078       auto It = Address2ProbesMap.find(IP.Address);
1079       if (It != Address2ProbesMap.end()) {
1080         for (const auto &Probe : It->second) {
1081           ProbeCounter[&Probe] += Count;
1082         }
1083       }
1084     } while (IP.advance() && IP.Address <= RangeEnd);
1085   }
1086 }
1087 
1088 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1089                                       const SmallVectorImpl<uint64_t> &AddrVec,
1090                                       ProfiledBinary *Binary) {
1091   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1092   for (auto Address : reverse(AddrVec)) {
1093     const MCDecodedPseudoProbe *CallProbe =
1094         Binary->getCallProbeForAddr(Address);
1095     // These could be the cases when a probe is not found at a calliste. Cutting
1096     // off the context from here since the inliner will not know how to consume
1097     // a context with unknown callsites.
1098     // 1. for functions that are not sampled when
1099     // --decode-probe-for-profiled-functions-only is on.
1100     // 2. for a merged callsite. Callsite merging may cause the loss of original
1101     // probe IDs.
1102     // 3. for an external callsite.
1103     if (!CallProbe)
1104       break;
1105     Probes.push_back(CallProbe);
1106   }
1107 
1108   std::reverse(Probes.begin(), Probes.end());
1109 
1110   // Extract context stack for reusing, leaf context stack will be added
1111   // compressed while looking up function profile.
1112   for (const auto *P : Probes) {
1113     Binary->getInlineContextForProbe(P, ContextStack, true);
1114   }
1115 }
1116 
1117 void CSProfileGenerator::generateProbeBasedProfile() {
1118   // Enable pseudo probe functionalities in SampleProf
1119   FunctionSamples::ProfileIsProbeBased = true;
1120   for (const auto &CI : *SampleCounters) {
1121     const AddrBasedCtxKey *CtxKey =
1122         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1123     // Fill in function body samples from probes, also infer caller's samples
1124     // from callee's probe
1125     populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey);
1126     // Fill in boundary samples for a call probe
1127     populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey);
1128   }
1129 }
1130 
1131 void CSProfileGenerator::populateBodySamplesWithProbes(
1132     const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1133   ProbeCounterMap ProbeCounter;
1134   // Extract the top frame probes by looking up each address among the range in
1135   // the Address2ProbeMap
1136   extractProbesFromRange(RangeCounter, ProbeCounter);
1137   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1138                      std::unordered_set<FunctionSamples *>>
1139       FrameSamples;
1140   for (const auto &PI : ProbeCounter) {
1141     const MCDecodedPseudoProbe *Probe = PI.first;
1142     uint64_t Count = PI.second;
1143     // Disjoint ranges have introduce zero-filled gap that
1144     // doesn't belong to current context, filter them out.
1145     if (!Probe->isBlock() || Count == 0)
1146       continue;
1147 
1148     ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe);
1149     FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1150     // Record the current frame and FunctionProfile whenever samples are
1151     // collected for non-danglie probes. This is for reporting all of the
1152     // zero count probes of the frame later.
1153     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1154     FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(),
1155                                    Count);
1156     FunctionProfile.addTotalSamples(Count);
1157     if (Probe->isEntry()) {
1158       FunctionProfile.addHeadSamples(Count);
1159       // Look up for the caller's function profile
1160       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1161       ContextTrieNode *CallerNode = ContextNode->getParentContext();
1162       if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1163         // Since the context id will be compressed, we have to use callee's
1164         // context id to infer caller's context id to ensure they share the
1165         // same context prefix.
1166         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1167         uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator;
1168         assert(CallerIndex &&
1169                "Inferred caller's location index shouldn't be zero!");
1170         assert(!CallerDiscriminator &&
1171                "Callsite probe should not have a discriminator!");
1172         FunctionSamples &CallerProfile =
1173             *getOrCreateFunctionSamples(CallerNode);
1174         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1175         CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count);
1176         CallerProfile.addTotalSamples(Count);
1177         CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator,
1178                                              ContextNode->getFuncName(), Count);
1179       }
1180     }
1181   }
1182 
1183   // Assign zero count for remaining probes without sample hits to
1184   // differentiate from probes optimized away, of which the counts are unknown
1185   // and will be inferred by the compiler.
1186   for (auto &I : FrameSamples) {
1187     for (auto *FunctionProfile : I.second) {
1188       for (auto *Probe : I.first->getProbes()) {
1189         FunctionProfile->addBodySamples(Probe->getIndex(),
1190                                         Probe->getDiscriminator(), 0);
1191       }
1192     }
1193   }
1194 }
1195 
1196 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1197     const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
1198   for (const auto &BI : BranchCounter) {
1199     uint64_t SourceAddress = BI.first.first;
1200     uint64_t TargetAddress = BI.first.second;
1201     uint64_t Count = BI.second;
1202     const MCDecodedPseudoProbe *CallProbe =
1203         Binary->getCallProbeForAddr(SourceAddress);
1204     if (CallProbe == nullptr)
1205       continue;
1206     FunctionSamples &FunctionProfile =
1207         getFunctionProfileForLeafProbe(CtxKey, CallProbe);
1208     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1209     FunctionProfile.addTotalSamples(Count);
1210     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1211     if (CalleeName.size() == 0)
1212       continue;
1213     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
1214                                            CallProbe->getDiscriminator(),
1215                                            CalleeName, Count);
1216   }
1217 }
1218 
1219 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1220     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1221 
1222   const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
1223   SmallVector<uint64_t, 16> NewContext;
1224 
1225   if (InferMissingFrames) {
1226     SmallVector<uint64_t, 16> Context = CtxKey->Context;
1227     // Append leaf frame for a complete inference.
1228     Context.push_back(LeafProbe->getAddress());
1229     inferMissingFrames(Context, NewContext);
1230     // Pop out the leaf probe that was pushed in above.
1231     NewContext.pop_back();
1232     PContext = &NewContext;
1233   }
1234 
1235   SampleContextFrameVector ContextStack;
1236   extractPrefixContextStack(ContextStack, *PContext, Binary);
1237 
1238   // Explicitly copy the context for appending the leaf context
1239   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1240                                            ContextStack.end());
1241   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1242   // For leaf inlined context with the top frame, we should strip off the top
1243   // frame's probe id, like:
1244   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1245   auto LeafFrame = NewContextStack.back();
1246   LeafFrame.Location = LineLocation(0, 0);
1247   NewContextStack.pop_back();
1248   // Compress the context string except for the leaf frame
1249   CSProfileGenerator::compressRecursionContext(NewContextStack);
1250   CSProfileGenerator::trimContext(NewContextStack);
1251   NewContextStack.push_back(LeafFrame);
1252 
1253   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1254   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1255   ContextTrieNode *ContextNode =
1256       getOrCreateContextNode(NewContextStack, WasLeafInlined);
1257   ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1258   return ContextNode;
1259 }
1260 
1261 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1262     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1263   return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1264 }
1265 
1266 } // end namespace sampleprof
1267 } // end namespace llvm
1268