1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ProfileGenerator.h"
10 #include "llvm/ProfileData/ProfileCommon.h"
11
12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13 cl::Required,
14 cl::desc("Output profile file"));
15 static cl::alias OutputA("o", cl::desc("Alias for --output"),
16 cl::aliasopt(OutputFilename));
17
18 static cl::opt<SampleProfileFormat> OutputFormat(
19 "format", cl::desc("Format of output profile"), cl::init(SPF_Text),
20 cl::values(
21 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24 clEnumValN(SPF_Text, "text", "Text encoding"),
25 clEnumValN(SPF_GCC, "gcc",
26 "GCC encoding (only meaningful for -sample)")));
27
28 static cl::opt<int32_t, true> RecursionCompression(
29 "compress-recursion",
30 cl::desc("Compressing recursion by deduplicating adjacent frame "
31 "sequences up to the specified size. -1 means no size limit."),
32 cl::Hidden,
33 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34
35 static cl::opt<uint64_t> CSProfColdThreshold(
36 "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
37 cl::desc("Specify the total samples threshold for a context profile to "
38 "be considered cold, any cold profiles will be merged into "
39 "context-less base profiles"));
40
41 static cl::opt<bool> CSProfMergeColdContext(
42 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
43 cl::desc("This works together with --csprof-cold-thres. If the total count "
44 "of context profile is smaller than the threshold, it will be "
45 "merged into context-less base profile."));
46
47 static cl::opt<bool> CSProfTrimColdContext(
48 "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
49 cl::desc("This works together with --csprof-cold-thres. If the total count "
50 "of the profile after all merge is done is still smaller than "
51 "threshold, it will be trimmed."));
52
53 using namespace llvm;
54 using namespace sampleprof;
55
56 namespace llvm {
57 namespace sampleprof {
58
59 // Initialize the MaxCompressionSize to -1 which means no size limit
60 int32_t CSProfileGenerator::MaxCompressionSize = -1;
61
62 static bool
usePseudoProbes(const BinarySampleCounterMap & BinarySampleCounters)63 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
64 return BinarySampleCounters.size() &&
65 BinarySampleCounters.begin()->first->usePseudoProbes();
66 }
67
68 std::unique_ptr<ProfileGenerator>
create(const BinarySampleCounterMap & BinarySampleCounters,enum PerfScriptType SampleType)69 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
70 enum PerfScriptType SampleType) {
71 std::unique_ptr<ProfileGenerator> ProfileGenerator;
72 if (SampleType == PERF_LBR_STACK) {
73 if (usePseudoProbes(BinarySampleCounters)) {
74 ProfileGenerator.reset(
75 new PseudoProbeCSProfileGenerator(BinarySampleCounters));
76 } else {
77 ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
78 }
79 } else {
80 // TODO:
81 llvm_unreachable("Unsupported perfscript!");
82 }
83
84 return ProfileGenerator;
85 }
86
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)87 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
88 StringMap<FunctionSamples> &ProfileMap) {
89 if (std::error_code EC = Writer->write(ProfileMap))
90 exitWithError(std::move(EC));
91 }
92
write()93 void ProfileGenerator::write() {
94 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
95 if (std::error_code EC = WriterOrErr.getError())
96 exitWithError(EC, OutputFilename);
97 write(std::move(WriterOrErr.get()), ProfileMap);
98 }
99
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)100 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
101 const RangeSample &Ranges) {
102
103 /*
104 Regions may overlap with each other. Using the boundary info, find all
105 disjoint ranges and their sample count. BoundaryPoint contains the count
106 multiple samples begin/end at this points.
107
108 |<--100-->| Sample1
109 |<------200------>| Sample2
110 A B C
111
112 In the example above,
113 Sample1 begins at A, ends at B, its value is 100.
114 Sample2 beings at A, ends at C, its value is 200.
115 For A, BeginCount is the sum of sample begins at A, which is 300 and no
116 samples ends at A, so EndCount is 0.
117 Then boundary points A, B, and C with begin/end counts are:
118 A: (300, 0)
119 B: (0, 100)
120 C: (0, 200)
121 */
122 struct BoundaryPoint {
123 // Sum of sample counts beginning at this point
124 uint64_t BeginCount;
125 // Sum of sample counts ending at this point
126 uint64_t EndCount;
127
128 BoundaryPoint() : BeginCount(0), EndCount(0){};
129
130 void addBeginCount(uint64_t Count) { BeginCount += Count; }
131
132 void addEndCount(uint64_t Count) { EndCount += Count; }
133 };
134
135 /*
136 For the above example. With boundary points, follwing logic finds two
137 disjoint region of
138
139 [A,B]: 300
140 [B+1,C]: 200
141
142 If there is a boundary point that both begin and end, the point itself
143 becomes a separate disjoint region. For example, if we have original
144 ranges of
145
146 |<--- 100 --->|
147 |<--- 200 --->|
148 A B C
149
150 there are three boundary points with their begin/end counts of
151
152 A: (100, 0)
153 B: (200, 100)
154 C: (0, 200)
155
156 the disjoint ranges would be
157
158 [A, B-1]: 100
159 [B, B]: 300
160 [B+1, C]: 200.
161 */
162 std::map<uint64_t, BoundaryPoint> Boundaries;
163
164 for (auto Item : Ranges) {
165 uint64_t Begin = Item.first.first;
166 uint64_t End = Item.first.second;
167 uint64_t Count = Item.second;
168 if (Boundaries.find(Begin) == Boundaries.end())
169 Boundaries[Begin] = BoundaryPoint();
170 Boundaries[Begin].addBeginCount(Count);
171
172 if (Boundaries.find(End) == Boundaries.end())
173 Boundaries[End] = BoundaryPoint();
174 Boundaries[End].addEndCount(Count);
175 }
176
177 uint64_t BeginAddress = 0;
178 int Count = 0;
179 for (auto Item : Boundaries) {
180 uint64_t Address = Item.first;
181 BoundaryPoint &Point = Item.second;
182 if (Point.BeginCount) {
183 if (BeginAddress)
184 DisjointRanges[{BeginAddress, Address - 1}] = Count;
185 Count += Point.BeginCount;
186 BeginAddress = Address;
187 }
188 if (Point.EndCount) {
189 assert(BeginAddress && "First boundary point cannot be 'end' point");
190 DisjointRanges[{BeginAddress, Address}] = Count;
191 Count -= Point.EndCount;
192 BeginAddress = Address + 1;
193 }
194 }
195 }
196
197 FunctionSamples &
getFunctionProfileForContext(StringRef ContextStr,bool WasLeafInlined)198 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
199 bool WasLeafInlined) {
200 auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
201 if (Ret.second) {
202 // Make a copy of the underlying context string in string table
203 // before StringRef wrapper is used for context.
204 auto It = ContextStrings.insert(ContextStr.str());
205 SampleContext FContext(*It.first, RawContext);
206 if (WasLeafInlined)
207 FContext.setAttribute(ContextWasInlined);
208 FunctionSamples &FProfile = Ret.first->second;
209 FProfile.setContext(FContext);
210 FProfile.setName(FContext.getNameWithoutContext());
211 }
212 return Ret.first->second;
213 }
214
generateProfile()215 void CSProfileGenerator::generateProfile() {
216 FunctionSamples::ProfileIsCS = true;
217 for (const auto &BI : BinarySampleCounters) {
218 ProfiledBinary *Binary = BI.first;
219 for (const auto &CI : BI.second) {
220 const StringBasedCtxKey *CtxKey =
221 dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
222 StringRef ContextId(CtxKey->Context);
223 // Get or create function profile for the range
224 FunctionSamples &FunctionProfile =
225 getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
226
227 // Fill in function body samples
228 populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
229 Binary);
230 // Fill in boundary sample counts as well as call site samples for calls
231 populateFunctionBoundarySamples(ContextId, FunctionProfile,
232 CI.second.BranchCounter, Binary);
233 }
234 }
235 // Fill in call site value sample for inlined calls and also use context to
236 // infer missing samples. Since we don't have call count for inlined
237 // functions, we estimate it from inlinee's profile using the entry of the
238 // body sample.
239 populateInferredFunctionSamples();
240
241 postProcessProfiles();
242 }
243
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const FrameLocation & LeafLoc,uint64_t Count)244 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
245 FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
246 uint64_t Count) {
247 // Filter out invalid negative(int type) lineOffset
248 if (LeafLoc.second.LineOffset & 0x80000000)
249 return;
250 // Use the maximum count of samples with same line location
251 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
252 LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
253 uint64_t PreviousCount = R ? R.get() : 0;
254 if (PreviousCount < Count) {
255 FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
256 LeafLoc.second.Discriminator,
257 Count - PreviousCount);
258 }
259 }
260
populateFunctionBodySamples(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter,ProfiledBinary * Binary)261 void CSProfileGenerator::populateFunctionBodySamples(
262 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
263 ProfiledBinary *Binary) {
264 // Compute disjoint ranges first, so we can use MAX
265 // for calculating count for each location.
266 RangeSample Ranges;
267 findDisjointRanges(Ranges, RangeCounter);
268 for (auto Range : Ranges) {
269 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
270 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
271 uint64_t Count = Range.second;
272 // Disjoint ranges have introduce zero-filled gap that
273 // doesn't belong to current context, filter them out.
274 if (Count == 0)
275 continue;
276
277 InstructionPointer IP(Binary, RangeBegin, true);
278
279 // Disjoint ranges may have range in the middle of two instr,
280 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
281 // can be Addr1+1 to Addr2-1. We should ignore such range.
282 if (IP.Address > RangeEnd)
283 continue;
284
285 while (IP.Address <= RangeEnd) {
286 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
287 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
288 if (LeafLoc.hasValue()) {
289 // Recording body sample for this specific context
290 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
291 }
292 // Accumulate total sample count even it's a line with invalid debug info
293 FunctionProfile.addTotalSamples(Count);
294 // Move to next IP within the range
295 IP.advance();
296 }
297 }
298 }
299
populateFunctionBoundarySamples(StringRef ContextId,FunctionSamples & FunctionProfile,const BranchSample & BranchCounters,ProfiledBinary * Binary)300 void CSProfileGenerator::populateFunctionBoundarySamples(
301 StringRef ContextId, FunctionSamples &FunctionProfile,
302 const BranchSample &BranchCounters, ProfiledBinary *Binary) {
303
304 for (auto Entry : BranchCounters) {
305 uint64_t SourceOffset = Entry.first.first;
306 uint64_t TargetOffset = Entry.first.second;
307 uint64_t Count = Entry.second;
308 // Get the callee name by branch target if it's a call branch
309 StringRef CalleeName = FunctionSamples::getCanonicalFnName(
310 Binary->getFuncFromStartOffset(TargetOffset));
311 if (CalleeName.size() == 0)
312 continue;
313
314 // Record called target sample and its count
315 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
316 if (!LeafLoc.hasValue())
317 continue;
318 FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
319 LeafLoc->second.Discriminator,
320 CalleeName, Count);
321
322 // Record head sample for called target(callee)
323 std::ostringstream OCalleeCtxStr;
324 if (ContextId.find(" @ ") != StringRef::npos) {
325 OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
326 OCalleeCtxStr << " @ ";
327 }
328 OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
329
330 FunctionSamples &CalleeProfile =
331 getFunctionProfileForContext(OCalleeCtxStr.str());
332 assert(Count != 0 && "Unexpected zero weight branch");
333 CalleeProfile.addHeadSamples(Count);
334 }
335 }
336
getCallerContext(StringRef CalleeContext,StringRef & CallerNameWithContext)337 static FrameLocation getCallerContext(StringRef CalleeContext,
338 StringRef &CallerNameWithContext) {
339 StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
340 CallerNameWithContext = CallerContext.rsplit(':').first;
341 auto ContextSplit = CallerContext.rsplit(" @ ");
342 StringRef CallerFrameStr = ContextSplit.second.size() == 0
343 ? ContextSplit.first
344 : ContextSplit.second;
345 FrameLocation LeafFrameLoc = {"", {0, 0}};
346 StringRef Funcname;
347 SampleContext::decodeContextString(CallerFrameStr, Funcname,
348 LeafFrameLoc.second);
349 LeafFrameLoc.first = Funcname.str();
350 return LeafFrameLoc;
351 }
352
populateInferredFunctionSamples()353 void CSProfileGenerator::populateInferredFunctionSamples() {
354 for (const auto &Item : ProfileMap) {
355 const StringRef CalleeContext = Item.first();
356 const FunctionSamples &CalleeProfile = Item.second;
357
358 // If we already have head sample counts, we must have value profile
359 // for call sites added already. Skip to avoid double counting.
360 if (CalleeProfile.getHeadSamples())
361 continue;
362 // If we don't have context, nothing to do for caller's call site.
363 // This could happen for entry point function.
364 if (CalleeContext.find(" @ ") == StringRef::npos)
365 continue;
366
367 // Infer Caller's frame loc and context ID through string splitting
368 StringRef CallerContextId;
369 FrameLocation &&CallerLeafFrameLoc =
370 getCallerContext(CalleeContext, CallerContextId);
371
372 // It's possible that we haven't seen any sample directly in the caller,
373 // in which case CallerProfile will not exist. But we can't modify
374 // ProfileMap while iterating it.
375 // TODO: created function profile for those callers too
376 if (ProfileMap.find(CallerContextId) == ProfileMap.end())
377 continue;
378 FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
379
380 // Since we don't have call count for inlined functions, we
381 // estimate it from inlinee's profile using entry body sample.
382 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
383 // If we don't have samples with location, use 1 to indicate live.
384 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
385 EstimatedCallCount = 1;
386 CallerProfile.addCalledTargetSamples(
387 CallerLeafFrameLoc.second.LineOffset,
388 CallerLeafFrameLoc.second.Discriminator,
389 CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
390 CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
391 CallerLeafFrameLoc.second.Discriminator,
392 EstimatedCallCount);
393 CallerProfile.addTotalSamples(EstimatedCallCount);
394 }
395 }
396
postProcessProfiles()397 void CSProfileGenerator::postProcessProfiles() {
398 // Compute hot/cold threshold based on profile. This will be used for cold
399 // context profile merging/trimming.
400 computeSummaryAndThreshold();
401
402 // Run global pre-inliner to adjust/merge context profile based on estimated
403 // inline decisions.
404 CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
405
406 // Trim and merge cold context profile using cold threshold above;
407 SampleContextTrimmer(ProfileMap)
408 .trimAndMergeColdContextProfiles(
409 ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
410 }
411
computeSummaryAndThreshold()412 void CSProfileGenerator::computeSummaryAndThreshold() {
413 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
414 auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
415 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
416 (Summary->getDetailedSummary()));
417 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
418 (Summary->getDetailedSummary()));
419
420 // Use threshold calculated from profile summary unless specified.
421 if (CSProfColdThreshold.getNumOccurrences()) {
422 ColdCountThreshold = CSProfColdThreshold;
423 }
424 }
425
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)426 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
427 StringMap<FunctionSamples> &ProfileMap) {
428 if (std::error_code EC = Writer->write(ProfileMap))
429 exitWithError(std::move(EC));
430 }
431
432 // Helper function to extract context prefix string stack
433 // Extract context stack for reusing, leaf context stack will
434 // be added compressed while looking up function profile
435 static void
extractPrefixContextStack(SmallVectorImpl<std::string> & ContextStrStack,const SmallVectorImpl<const PseudoProbe * > & Probes,ProfiledBinary * Binary)436 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
437 const SmallVectorImpl<const PseudoProbe *> &Probes,
438 ProfiledBinary *Binary) {
439 for (const auto *P : Probes) {
440 Binary->getInlineContextForProbe(P, ContextStrStack, true);
441 }
442 }
443
generateProfile()444 void PseudoProbeCSProfileGenerator::generateProfile() {
445 // Enable pseudo probe functionalities in SampleProf
446 FunctionSamples::ProfileIsProbeBased = true;
447 FunctionSamples::ProfileIsCS = true;
448 for (const auto &BI : BinarySampleCounters) {
449 ProfiledBinary *Binary = BI.first;
450 for (const auto &CI : BI.second) {
451 const ProbeBasedCtxKey *CtxKey =
452 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
453 SmallVector<std::string, 16> ContextStrStack;
454 extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
455 // Fill in function body samples from probes, also infer caller's samples
456 // from callee's probe
457 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
458 Binary);
459 // Fill in boundary samples for a call probe
460 populateBoundarySamplesWithProbes(CI.second.BranchCounter,
461 ContextStrStack, Binary);
462 }
463 }
464
465 postProcessProfiles();
466 }
467
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,ProfiledBinary * Binary)468 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
469 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
470 ProfiledBinary *Binary) {
471 RangeSample Ranges;
472 findDisjointRanges(Ranges, RangeCounter);
473 for (const auto &Range : Ranges) {
474 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
475 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
476 uint64_t Count = Range.second;
477 // Disjoint ranges have introduce zero-filled gap that
478 // doesn't belong to current context, filter them out.
479 if (Count == 0)
480 continue;
481
482 InstructionPointer IP(Binary, RangeBegin, true);
483
484 // Disjoint ranges may have range in the middle of two instr,
485 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
486 // can be Addr1+1 to Addr2-1. We should ignore such range.
487 if (IP.Address > RangeEnd)
488 continue;
489
490 while (IP.Address <= RangeEnd) {
491 const AddressProbesMap &Address2ProbesMap =
492 Binary->getAddress2ProbesMap();
493 auto It = Address2ProbesMap.find(IP.Address);
494 if (It != Address2ProbesMap.end()) {
495 for (const auto &Probe : It->second) {
496 if (!Probe.isBlock())
497 continue;
498 ProbeCounter[&Probe] += Count;
499 }
500 }
501
502 IP.advance();
503 }
504 }
505 }
506
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)507 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
508 const RangeSample &RangeCounter,
509 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
510 ProbeCounterMap ProbeCounter;
511 // Extract the top frame probes by looking up each address among the range in
512 // the Address2ProbeMap
513 extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
514 std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
515 for (auto PI : ProbeCounter) {
516 const PseudoProbe *Probe = PI.first;
517 uint64_t Count = PI.second;
518 // Ignore dangling probes since they will be reported later if needed.
519 if (Probe->isDangling())
520 continue;
521 FunctionSamples &FunctionProfile =
522 getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
523 // Record the current frame and FunctionProfile whenever samples are
524 // collected for non-danglie probes. This is for reporting all of the
525 // dangling probes of the frame later.
526 FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
527 FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
528 FunctionProfile.addTotalSamples(Count);
529 if (Probe->isEntry()) {
530 FunctionProfile.addHeadSamples(Count);
531 // Look up for the caller's function profile
532 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
533 if (InlinerDesc != nullptr) {
534 // Since the context id will be compressed, we have to use callee's
535 // context id to infer caller's context id to ensure they share the
536 // same context prefix.
537 StringRef CalleeContextId =
538 FunctionProfile.getContext().getNameWithContext();
539 StringRef CallerContextId;
540 FrameLocation &&CallerLeafFrameLoc =
541 getCallerContext(CalleeContextId, CallerContextId);
542 uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
543 assert(CallerIndex &&
544 "Inferred caller's location index shouldn't be zero!");
545 FunctionSamples &CallerProfile =
546 getFunctionProfileForContext(CallerContextId);
547 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
548 CallerProfile.addBodySamples(CallerIndex, 0, Count);
549 CallerProfile.addTotalSamples(Count);
550 CallerProfile.addCalledTargetSamples(
551 CallerIndex, 0,
552 FunctionProfile.getContext().getNameWithoutContext(), Count);
553 }
554 }
555
556 // Report dangling probes for frames that have real samples collected.
557 // Dangling probes are the probes associated to an empty block. With this
558 // place holder, sample count on a dangling probe will not be trusted by the
559 // compiler and we will rely on the counts inference algorithm to get the
560 // probe a reasonable count. Use InvalidProbeCount to mark sample count for
561 // a dangling probe.
562 for (auto &I : FrameSamples) {
563 auto *FunctionProfile = I.second;
564 for (auto *Probe : I.first->getProbes()) {
565 if (Probe->isDangling()) {
566 FunctionProfile->addBodySamplesForProbe(
567 Probe->Index, FunctionSamples::InvalidProbeCount);
568 }
569 }
570 }
571 }
572 }
573
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)574 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
575 const BranchSample &BranchCounter,
576 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
577 for (auto BI : BranchCounter) {
578 uint64_t SourceOffset = BI.first.first;
579 uint64_t TargetOffset = BI.first.second;
580 uint64_t Count = BI.second;
581 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
582 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
583 if (CallProbe == nullptr)
584 continue;
585 FunctionSamples &FunctionProfile =
586 getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
587 FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
588 FunctionProfile.addTotalSamples(Count);
589 StringRef CalleeName = FunctionSamples::getCanonicalFnName(
590 Binary->getFuncFromStartOffset(TargetOffset));
591 if (CalleeName.size() == 0)
592 continue;
593 FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
594 Count);
595 }
596 }
597
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbeFuncDesc * LeafFuncDesc,bool WasLeafInlined)598 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
599 SmallVectorImpl<std::string> &ContextStrStack,
600 const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
601 assert(ContextStrStack.size() && "Profile context must have the leaf frame");
602 // Compress the context string except for the leaf frame
603 std::string LeafFrame = ContextStrStack.back();
604 ContextStrStack.pop_back();
605 CSProfileGenerator::compressRecursionContext(ContextStrStack);
606
607 std::ostringstream OContextStr;
608 for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
609 if (OContextStr.str().size())
610 OContextStr << " @ ";
611 OContextStr << ContextStrStack[I];
612 }
613 // For leaf inlined context with the top frame, we should strip off the top
614 // frame's probe id, like:
615 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
616 if (OContextStr.str().size())
617 OContextStr << " @ ";
618 OContextStr << StringRef(LeafFrame).split(":").first.str();
619
620 FunctionSamples &FunctionProile =
621 getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
622 FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
623 return FunctionProile;
624 }
625
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbe * LeafProbe,ProfiledBinary * Binary)626 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
627 SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
628 ProfiledBinary *Binary) {
629 // Explicitly copy the context for appending the leaf context
630 SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
631 ContextStrStack.end());
632 Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
633 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
634 bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
635 return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
636 WasLeafInlined);
637 }
638
639 } // end namespace sampleprof
640 } // end namespace llvm
641