xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/llvm-profgen/ProfileGenerator.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "llvm/ProfileData/ProfileCommon.h"
11 
12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13                                            cl::Required,
14                                            cl::desc("Output profile file"));
15 static cl::alias OutputA("o", cl::desc("Alias for --output"),
16                          cl::aliasopt(OutputFilename));
17 
18 static cl::opt<SampleProfileFormat> OutputFormat(
19     "format", cl::desc("Format of output profile"), cl::init(SPF_Text),
20     cl::values(
21         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24         clEnumValN(SPF_Text, "text", "Text encoding"),
25         clEnumValN(SPF_GCC, "gcc",
26                    "GCC encoding (only meaningful for -sample)")));
27 
28 static cl::opt<int32_t, true> RecursionCompression(
29     "compress-recursion",
30     cl::desc("Compressing recursion by deduplicating adjacent frame "
31              "sequences up to the specified size. -1 means no size limit."),
32     cl::Hidden,
33     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34 
35 static cl::opt<uint64_t> CSProfColdThreshold(
36     "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
37     cl::desc("Specify the total samples threshold for a context profile to "
38              "be considered cold, any cold profiles will be merged into "
39              "context-less base profiles"));
40 
41 static cl::opt<bool> CSProfMergeColdContext(
42     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
43     cl::desc("This works together with --csprof-cold-thres. If the total count "
44              "of context profile is smaller than the threshold, it will be "
45              "merged into context-less base profile."));
46 
47 static cl::opt<bool> CSProfTrimColdContext(
48     "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
49     cl::desc("This works together with --csprof-cold-thres. If the total count "
50              "of the profile after all merge is done is still smaller than "
51              "threshold, it will be trimmed."));
52 
53 using namespace llvm;
54 using namespace sampleprof;
55 
56 namespace llvm {
57 namespace sampleprof {
58 
59 // Initialize the MaxCompressionSize to -1 which means no size limit
60 int32_t CSProfileGenerator::MaxCompressionSize = -1;
61 
62 static bool
usePseudoProbes(const BinarySampleCounterMap & BinarySampleCounters)63 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
64   return BinarySampleCounters.size() &&
65          BinarySampleCounters.begin()->first->usePseudoProbes();
66 }
67 
68 std::unique_ptr<ProfileGenerator>
create(const BinarySampleCounterMap & BinarySampleCounters,enum PerfScriptType SampleType)69 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
70                          enum PerfScriptType SampleType) {
71   std::unique_ptr<ProfileGenerator> ProfileGenerator;
72   if (SampleType == PERF_LBR_STACK) {
73     if (usePseudoProbes(BinarySampleCounters)) {
74       ProfileGenerator.reset(
75           new PseudoProbeCSProfileGenerator(BinarySampleCounters));
76     } else {
77       ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
78     }
79   } else {
80     // TODO:
81     llvm_unreachable("Unsupported perfscript!");
82   }
83 
84   return ProfileGenerator;
85 }
86 
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)87 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
88                              StringMap<FunctionSamples> &ProfileMap) {
89   if (std::error_code EC = Writer->write(ProfileMap))
90     exitWithError(std::move(EC));
91 }
92 
write()93 void ProfileGenerator::write() {
94   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
95   if (std::error_code EC = WriterOrErr.getError())
96     exitWithError(EC, OutputFilename);
97   write(std::move(WriterOrErr.get()), ProfileMap);
98 }
99 
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)100 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
101                                           const RangeSample &Ranges) {
102 
103   /*
104   Regions may overlap with each other. Using the boundary info, find all
105   disjoint ranges and their sample count. BoundaryPoint contains the count
106   multiple samples begin/end at this points.
107 
108   |<--100-->|           Sample1
109   |<------200------>|   Sample2
110   A         B       C
111 
112   In the example above,
113   Sample1 begins at A, ends at B, its value is 100.
114   Sample2 beings at A, ends at C, its value is 200.
115   For A, BeginCount is the sum of sample begins at A, which is 300 and no
116   samples ends at A, so EndCount is 0.
117   Then boundary points A, B, and C with begin/end counts are:
118   A: (300, 0)
119   B: (0, 100)
120   C: (0, 200)
121   */
122   struct BoundaryPoint {
123     // Sum of sample counts beginning at this point
124     uint64_t BeginCount;
125     // Sum of sample counts ending at this point
126     uint64_t EndCount;
127 
128     BoundaryPoint() : BeginCount(0), EndCount(0){};
129 
130     void addBeginCount(uint64_t Count) { BeginCount += Count; }
131 
132     void addEndCount(uint64_t Count) { EndCount += Count; }
133   };
134 
135   /*
136   For the above example. With boundary points, follwing logic finds two
137   disjoint region of
138 
139   [A,B]:   300
140   [B+1,C]: 200
141 
142   If there is a boundary point that both begin and end, the point itself
143   becomes a separate disjoint region. For example, if we have original
144   ranges of
145 
146   |<--- 100 --->|
147                 |<--- 200 --->|
148   A             B             C
149 
150   there are three boundary points with their begin/end counts of
151 
152   A: (100, 0)
153   B: (200, 100)
154   C: (0, 200)
155 
156   the disjoint ranges would be
157 
158   [A, B-1]: 100
159   [B, B]:   300
160   [B+1, C]: 200.
161   */
162   std::map<uint64_t, BoundaryPoint> Boundaries;
163 
164   for (auto Item : Ranges) {
165     uint64_t Begin = Item.first.first;
166     uint64_t End = Item.first.second;
167     uint64_t Count = Item.second;
168     if (Boundaries.find(Begin) == Boundaries.end())
169       Boundaries[Begin] = BoundaryPoint();
170     Boundaries[Begin].addBeginCount(Count);
171 
172     if (Boundaries.find(End) == Boundaries.end())
173       Boundaries[End] = BoundaryPoint();
174     Boundaries[End].addEndCount(Count);
175   }
176 
177   uint64_t BeginAddress = 0;
178   int Count = 0;
179   for (auto Item : Boundaries) {
180     uint64_t Address = Item.first;
181     BoundaryPoint &Point = Item.second;
182     if (Point.BeginCount) {
183       if (BeginAddress)
184         DisjointRanges[{BeginAddress, Address - 1}] = Count;
185       Count += Point.BeginCount;
186       BeginAddress = Address;
187     }
188     if (Point.EndCount) {
189       assert(BeginAddress && "First boundary point cannot be 'end' point");
190       DisjointRanges[{BeginAddress, Address}] = Count;
191       Count -= Point.EndCount;
192       BeginAddress = Address + 1;
193     }
194   }
195 }
196 
197 FunctionSamples &
getFunctionProfileForContext(StringRef ContextStr,bool WasLeafInlined)198 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
199                                                  bool WasLeafInlined) {
200   auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
201   if (Ret.second) {
202     // Make a copy of the underlying context string in string table
203     // before StringRef wrapper is used for context.
204     auto It = ContextStrings.insert(ContextStr.str());
205     SampleContext FContext(*It.first, RawContext);
206     if (WasLeafInlined)
207       FContext.setAttribute(ContextWasInlined);
208     FunctionSamples &FProfile = Ret.first->second;
209     FProfile.setContext(FContext);
210     FProfile.setName(FContext.getNameWithoutContext());
211   }
212   return Ret.first->second;
213 }
214 
generateProfile()215 void CSProfileGenerator::generateProfile() {
216   FunctionSamples::ProfileIsCS = true;
217   for (const auto &BI : BinarySampleCounters) {
218     ProfiledBinary *Binary = BI.first;
219     for (const auto &CI : BI.second) {
220       const StringBasedCtxKey *CtxKey =
221           dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
222       StringRef ContextId(CtxKey->Context);
223       // Get or create function profile for the range
224       FunctionSamples &FunctionProfile =
225           getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
226 
227       // Fill in function body samples
228       populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
229                                   Binary);
230       // Fill in boundary sample counts as well as call site samples for calls
231       populateFunctionBoundarySamples(ContextId, FunctionProfile,
232                                       CI.second.BranchCounter, Binary);
233     }
234   }
235   // Fill in call site value sample for inlined calls and also use context to
236   // infer missing samples. Since we don't have call count for inlined
237   // functions, we estimate it from inlinee's profile using the entry of the
238   // body sample.
239   populateInferredFunctionSamples();
240 
241   postProcessProfiles();
242 }
243 
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const FrameLocation & LeafLoc,uint64_t Count)244 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
245     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
246     uint64_t Count) {
247   // Filter out invalid negative(int type) lineOffset
248   if (LeafLoc.second.LineOffset & 0x80000000)
249     return;
250   // Use the maximum count of samples with same line location
251   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
252       LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
253   uint64_t PreviousCount = R ? R.get() : 0;
254   if (PreviousCount < Count) {
255     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
256                                    LeafLoc.second.Discriminator,
257                                    Count - PreviousCount);
258   }
259 }
260 
populateFunctionBodySamples(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter,ProfiledBinary * Binary)261 void CSProfileGenerator::populateFunctionBodySamples(
262     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
263     ProfiledBinary *Binary) {
264   // Compute disjoint ranges first, so we can use MAX
265   // for calculating count for each location.
266   RangeSample Ranges;
267   findDisjointRanges(Ranges, RangeCounter);
268   for (auto Range : Ranges) {
269     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
270     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
271     uint64_t Count = Range.second;
272     // Disjoint ranges have introduce zero-filled gap that
273     // doesn't belong to current context, filter them out.
274     if (Count == 0)
275       continue;
276 
277     InstructionPointer IP(Binary, RangeBegin, true);
278 
279     // Disjoint ranges may have range in the middle of two instr,
280     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
281     // can be Addr1+1 to Addr2-1. We should ignore such range.
282     if (IP.Address > RangeEnd)
283       continue;
284 
285     while (IP.Address <= RangeEnd) {
286       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
287       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
288       if (LeafLoc.hasValue()) {
289         // Recording body sample for this specific context
290         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
291       }
292       // Accumulate total sample count even it's a line with invalid debug info
293       FunctionProfile.addTotalSamples(Count);
294       // Move to next IP within the range
295       IP.advance();
296     }
297   }
298 }
299 
populateFunctionBoundarySamples(StringRef ContextId,FunctionSamples & FunctionProfile,const BranchSample & BranchCounters,ProfiledBinary * Binary)300 void CSProfileGenerator::populateFunctionBoundarySamples(
301     StringRef ContextId, FunctionSamples &FunctionProfile,
302     const BranchSample &BranchCounters, ProfiledBinary *Binary) {
303 
304   for (auto Entry : BranchCounters) {
305     uint64_t SourceOffset = Entry.first.first;
306     uint64_t TargetOffset = Entry.first.second;
307     uint64_t Count = Entry.second;
308     // Get the callee name by branch target if it's a call branch
309     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
310         Binary->getFuncFromStartOffset(TargetOffset));
311     if (CalleeName.size() == 0)
312       continue;
313 
314     // Record called target sample and its count
315     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
316     if (!LeafLoc.hasValue())
317       continue;
318     FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
319                                            LeafLoc->second.Discriminator,
320                                            CalleeName, Count);
321 
322     // Record head sample for called target(callee)
323     std::ostringstream OCalleeCtxStr;
324     if (ContextId.find(" @ ") != StringRef::npos) {
325       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
326       OCalleeCtxStr << " @ ";
327     }
328     OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
329 
330     FunctionSamples &CalleeProfile =
331         getFunctionProfileForContext(OCalleeCtxStr.str());
332     assert(Count != 0 && "Unexpected zero weight branch");
333     CalleeProfile.addHeadSamples(Count);
334   }
335 }
336 
getCallerContext(StringRef CalleeContext,StringRef & CallerNameWithContext)337 static FrameLocation getCallerContext(StringRef CalleeContext,
338                                       StringRef &CallerNameWithContext) {
339   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
340   CallerNameWithContext = CallerContext.rsplit(':').first;
341   auto ContextSplit = CallerContext.rsplit(" @ ");
342   StringRef CallerFrameStr = ContextSplit.second.size() == 0
343                                  ? ContextSplit.first
344                                  : ContextSplit.second;
345   FrameLocation LeafFrameLoc = {"", {0, 0}};
346   StringRef Funcname;
347   SampleContext::decodeContextString(CallerFrameStr, Funcname,
348                                      LeafFrameLoc.second);
349   LeafFrameLoc.first = Funcname.str();
350   return LeafFrameLoc;
351 }
352 
populateInferredFunctionSamples()353 void CSProfileGenerator::populateInferredFunctionSamples() {
354   for (const auto &Item : ProfileMap) {
355     const StringRef CalleeContext = Item.first();
356     const FunctionSamples &CalleeProfile = Item.second;
357 
358     // If we already have head sample counts, we must have value profile
359     // for call sites added already. Skip to avoid double counting.
360     if (CalleeProfile.getHeadSamples())
361       continue;
362     // If we don't have context, nothing to do for caller's call site.
363     // This could happen for entry point function.
364     if (CalleeContext.find(" @ ") == StringRef::npos)
365       continue;
366 
367     // Infer Caller's frame loc and context ID through string splitting
368     StringRef CallerContextId;
369     FrameLocation &&CallerLeafFrameLoc =
370         getCallerContext(CalleeContext, CallerContextId);
371 
372     // It's possible that we haven't seen any sample directly in the caller,
373     // in which case CallerProfile will not exist. But we can't modify
374     // ProfileMap while iterating it.
375     // TODO: created function profile for those callers too
376     if (ProfileMap.find(CallerContextId) == ProfileMap.end())
377       continue;
378     FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
379 
380     // Since we don't have call count for inlined functions, we
381     // estimate it from inlinee's profile using entry body sample.
382     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
383     // If we don't have samples with location, use 1 to indicate live.
384     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
385       EstimatedCallCount = 1;
386     CallerProfile.addCalledTargetSamples(
387         CallerLeafFrameLoc.second.LineOffset,
388         CallerLeafFrameLoc.second.Discriminator,
389         CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
390     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
391                                  CallerLeafFrameLoc.second.Discriminator,
392                                  EstimatedCallCount);
393     CallerProfile.addTotalSamples(EstimatedCallCount);
394   }
395 }
396 
postProcessProfiles()397 void CSProfileGenerator::postProcessProfiles() {
398   // Compute hot/cold threshold based on profile. This will be used for cold
399   // context profile merging/trimming.
400   computeSummaryAndThreshold();
401 
402   // Run global pre-inliner to adjust/merge context profile based on estimated
403   // inline decisions.
404   CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
405 
406   // Trim and merge cold context profile using cold threshold above;
407   SampleContextTrimmer(ProfileMap)
408       .trimAndMergeColdContextProfiles(
409           ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
410 }
411 
computeSummaryAndThreshold()412 void CSProfileGenerator::computeSummaryAndThreshold() {
413   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
414   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
415   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
416       (Summary->getDetailedSummary()));
417   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
418       (Summary->getDetailedSummary()));
419 
420   // Use threshold calculated from profile summary unless specified.
421   if (CSProfColdThreshold.getNumOccurrences()) {
422     ColdCountThreshold = CSProfColdThreshold;
423   }
424 }
425 
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)426 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
427                                StringMap<FunctionSamples> &ProfileMap) {
428   if (std::error_code EC = Writer->write(ProfileMap))
429     exitWithError(std::move(EC));
430 }
431 
432 // Helper function to extract context prefix string stack
433 // Extract context stack for reusing, leaf context stack will
434 // be added compressed while looking up function profile
435 static void
extractPrefixContextStack(SmallVectorImpl<std::string> & ContextStrStack,const SmallVectorImpl<const PseudoProbe * > & Probes,ProfiledBinary * Binary)436 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
437                           const SmallVectorImpl<const PseudoProbe *> &Probes,
438                           ProfiledBinary *Binary) {
439   for (const auto *P : Probes) {
440     Binary->getInlineContextForProbe(P, ContextStrStack, true);
441   }
442 }
443 
generateProfile()444 void PseudoProbeCSProfileGenerator::generateProfile() {
445   // Enable pseudo probe functionalities in SampleProf
446   FunctionSamples::ProfileIsProbeBased = true;
447   FunctionSamples::ProfileIsCS = true;
448   for (const auto &BI : BinarySampleCounters) {
449     ProfiledBinary *Binary = BI.first;
450     for (const auto &CI : BI.second) {
451       const ProbeBasedCtxKey *CtxKey =
452           dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
453       SmallVector<std::string, 16> ContextStrStack;
454       extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
455       // Fill in function body samples from probes, also infer caller's samples
456       // from callee's probe
457       populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
458                                     Binary);
459       // Fill in boundary samples for a call probe
460       populateBoundarySamplesWithProbes(CI.second.BranchCounter,
461                                         ContextStrStack, Binary);
462     }
463   }
464 
465   postProcessProfiles();
466 }
467 
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,ProfiledBinary * Binary)468 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
469     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
470     ProfiledBinary *Binary) {
471   RangeSample Ranges;
472   findDisjointRanges(Ranges, RangeCounter);
473   for (const auto &Range : Ranges) {
474     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
475     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
476     uint64_t Count = Range.second;
477     // Disjoint ranges have introduce zero-filled gap that
478     // doesn't belong to current context, filter them out.
479     if (Count == 0)
480       continue;
481 
482     InstructionPointer IP(Binary, RangeBegin, true);
483 
484     // Disjoint ranges may have range in the middle of two instr,
485     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
486     // can be Addr1+1 to Addr2-1. We should ignore such range.
487     if (IP.Address > RangeEnd)
488       continue;
489 
490     while (IP.Address <= RangeEnd) {
491       const AddressProbesMap &Address2ProbesMap =
492           Binary->getAddress2ProbesMap();
493       auto It = Address2ProbesMap.find(IP.Address);
494       if (It != Address2ProbesMap.end()) {
495         for (const auto &Probe : It->second) {
496           if (!Probe.isBlock())
497             continue;
498           ProbeCounter[&Probe] += Count;
499         }
500       }
501 
502       IP.advance();
503     }
504   }
505 }
506 
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)507 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
508     const RangeSample &RangeCounter,
509     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
510   ProbeCounterMap ProbeCounter;
511   // Extract the top frame probes by looking up each address among the range in
512   // the Address2ProbeMap
513   extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
514   std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
515   for (auto PI : ProbeCounter) {
516     const PseudoProbe *Probe = PI.first;
517     uint64_t Count = PI.second;
518     // Ignore dangling probes since they will be reported later if needed.
519     if (Probe->isDangling())
520       continue;
521     FunctionSamples &FunctionProfile =
522         getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
523     // Record the current frame and FunctionProfile whenever samples are
524     // collected for non-danglie probes. This is for reporting all of the
525     // dangling probes of the frame later.
526     FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
527     FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
528     FunctionProfile.addTotalSamples(Count);
529     if (Probe->isEntry()) {
530       FunctionProfile.addHeadSamples(Count);
531       // Look up for the caller's function profile
532       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
533       if (InlinerDesc != nullptr) {
534         // Since the context id will be compressed, we have to use callee's
535         // context id to infer caller's context id to ensure they share the
536         // same context prefix.
537         StringRef CalleeContextId =
538             FunctionProfile.getContext().getNameWithContext();
539         StringRef CallerContextId;
540         FrameLocation &&CallerLeafFrameLoc =
541             getCallerContext(CalleeContextId, CallerContextId);
542         uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
543         assert(CallerIndex &&
544                "Inferred caller's location index shouldn't be zero!");
545         FunctionSamples &CallerProfile =
546             getFunctionProfileForContext(CallerContextId);
547         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
548         CallerProfile.addBodySamples(CallerIndex, 0, Count);
549         CallerProfile.addTotalSamples(Count);
550         CallerProfile.addCalledTargetSamples(
551             CallerIndex, 0,
552             FunctionProfile.getContext().getNameWithoutContext(), Count);
553       }
554     }
555 
556     // Report dangling probes for frames that have real samples collected.
557     // Dangling probes are the probes associated to an empty block. With this
558     // place holder, sample count on a dangling probe will not be trusted by the
559     // compiler and we will rely on the counts inference algorithm to get the
560     // probe a reasonable count. Use InvalidProbeCount to mark sample count for
561     // a dangling probe.
562     for (auto &I : FrameSamples) {
563       auto *FunctionProfile = I.second;
564       for (auto *Probe : I.first->getProbes()) {
565         if (Probe->isDangling()) {
566           FunctionProfile->addBodySamplesForProbe(
567               Probe->Index, FunctionSamples::InvalidProbeCount);
568         }
569       }
570     }
571   }
572 }
573 
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)574 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
575     const BranchSample &BranchCounter,
576     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
577   for (auto BI : BranchCounter) {
578     uint64_t SourceOffset = BI.first.first;
579     uint64_t TargetOffset = BI.first.second;
580     uint64_t Count = BI.second;
581     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
582     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
583     if (CallProbe == nullptr)
584       continue;
585     FunctionSamples &FunctionProfile =
586         getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
587     FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
588     FunctionProfile.addTotalSamples(Count);
589     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
590         Binary->getFuncFromStartOffset(TargetOffset));
591     if (CalleeName.size() == 0)
592       continue;
593     FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
594                                            Count);
595   }
596 }
597 
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbeFuncDesc * LeafFuncDesc,bool WasLeafInlined)598 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
599     SmallVectorImpl<std::string> &ContextStrStack,
600     const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
601   assert(ContextStrStack.size() && "Profile context must have the leaf frame");
602   // Compress the context string except for the leaf frame
603   std::string LeafFrame = ContextStrStack.back();
604   ContextStrStack.pop_back();
605   CSProfileGenerator::compressRecursionContext(ContextStrStack);
606 
607   std::ostringstream OContextStr;
608   for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
609     if (OContextStr.str().size())
610       OContextStr << " @ ";
611     OContextStr << ContextStrStack[I];
612   }
613   // For leaf inlined context with the top frame, we should strip off the top
614   // frame's probe id, like:
615   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
616   if (OContextStr.str().size())
617     OContextStr << " @ ";
618   OContextStr << StringRef(LeafFrame).split(":").first.str();
619 
620   FunctionSamples &FunctionProile =
621       getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
622   FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
623   return FunctionProile;
624 }
625 
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbe * LeafProbe,ProfiledBinary * Binary)626 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
627     SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
628     ProfiledBinary *Binary) {
629   // Explicitly copy the context for appending the leaf context
630   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
631                                                    ContextStrStack.end());
632   Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
633   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
634   bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
635   return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
636                                         WasLeafInlined);
637 }
638 
639 } // end namespace sampleprof
640 } // end namespace llvm
641