xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/llvm-profgen/ProfileGenerator.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1*82d56013Sjoerg //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2*82d56013Sjoerg //
3*82d56013Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*82d56013Sjoerg // See https://llvm.org/LICENSE.txt for license information.
5*82d56013Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*82d56013Sjoerg //
7*82d56013Sjoerg //===----------------------------------------------------------------------===//
8*82d56013Sjoerg 
9*82d56013Sjoerg #include "ProfileGenerator.h"
10*82d56013Sjoerg #include "llvm/ProfileData/ProfileCommon.h"
11*82d56013Sjoerg 
12*82d56013Sjoerg static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13*82d56013Sjoerg                                            cl::Required,
14*82d56013Sjoerg                                            cl::desc("Output profile file"));
15*82d56013Sjoerg static cl::alias OutputA("o", cl::desc("Alias for --output"),
16*82d56013Sjoerg                          cl::aliasopt(OutputFilename));
17*82d56013Sjoerg 
18*82d56013Sjoerg static cl::opt<SampleProfileFormat> OutputFormat(
19*82d56013Sjoerg     "format", cl::desc("Format of output profile"), cl::init(SPF_Text),
20*82d56013Sjoerg     cl::values(
21*82d56013Sjoerg         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22*82d56013Sjoerg         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23*82d56013Sjoerg         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24*82d56013Sjoerg         clEnumValN(SPF_Text, "text", "Text encoding"),
25*82d56013Sjoerg         clEnumValN(SPF_GCC, "gcc",
26*82d56013Sjoerg                    "GCC encoding (only meaningful for -sample)")));
27*82d56013Sjoerg 
28*82d56013Sjoerg static cl::opt<int32_t, true> RecursionCompression(
29*82d56013Sjoerg     "compress-recursion",
30*82d56013Sjoerg     cl::desc("Compressing recursion by deduplicating adjacent frame "
31*82d56013Sjoerg              "sequences up to the specified size. -1 means no size limit."),
32*82d56013Sjoerg     cl::Hidden,
33*82d56013Sjoerg     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34*82d56013Sjoerg 
35*82d56013Sjoerg static cl::opt<uint64_t> CSProfColdThreshold(
36*82d56013Sjoerg     "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
37*82d56013Sjoerg     cl::desc("Specify the total samples threshold for a context profile to "
38*82d56013Sjoerg              "be considered cold, any cold profiles will be merged into "
39*82d56013Sjoerg              "context-less base profiles"));
40*82d56013Sjoerg 
41*82d56013Sjoerg static cl::opt<bool> CSProfMergeColdContext(
42*82d56013Sjoerg     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
43*82d56013Sjoerg     cl::desc("This works together with --csprof-cold-thres. If the total count "
44*82d56013Sjoerg              "of context profile is smaller than the threshold, it will be "
45*82d56013Sjoerg              "merged into context-less base profile."));
46*82d56013Sjoerg 
47*82d56013Sjoerg static cl::opt<bool> CSProfTrimColdContext(
48*82d56013Sjoerg     "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
49*82d56013Sjoerg     cl::desc("This works together with --csprof-cold-thres. If the total count "
50*82d56013Sjoerg              "of the profile after all merge is done is still smaller than "
51*82d56013Sjoerg              "threshold, it will be trimmed."));
52*82d56013Sjoerg 
53*82d56013Sjoerg using namespace llvm;
54*82d56013Sjoerg using namespace sampleprof;
55*82d56013Sjoerg 
56*82d56013Sjoerg namespace llvm {
57*82d56013Sjoerg namespace sampleprof {
58*82d56013Sjoerg 
59*82d56013Sjoerg // Initialize the MaxCompressionSize to -1 which means no size limit
60*82d56013Sjoerg int32_t CSProfileGenerator::MaxCompressionSize = -1;
61*82d56013Sjoerg 
62*82d56013Sjoerg static bool
usePseudoProbes(const BinarySampleCounterMap & BinarySampleCounters)63*82d56013Sjoerg usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
64*82d56013Sjoerg   return BinarySampleCounters.size() &&
65*82d56013Sjoerg          BinarySampleCounters.begin()->first->usePseudoProbes();
66*82d56013Sjoerg }
67*82d56013Sjoerg 
68*82d56013Sjoerg std::unique_ptr<ProfileGenerator>
create(const BinarySampleCounterMap & BinarySampleCounters,enum PerfScriptType SampleType)69*82d56013Sjoerg ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
70*82d56013Sjoerg                          enum PerfScriptType SampleType) {
71*82d56013Sjoerg   std::unique_ptr<ProfileGenerator> ProfileGenerator;
72*82d56013Sjoerg   if (SampleType == PERF_LBR_STACK) {
73*82d56013Sjoerg     if (usePseudoProbes(BinarySampleCounters)) {
74*82d56013Sjoerg       ProfileGenerator.reset(
75*82d56013Sjoerg           new PseudoProbeCSProfileGenerator(BinarySampleCounters));
76*82d56013Sjoerg     } else {
77*82d56013Sjoerg       ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
78*82d56013Sjoerg     }
79*82d56013Sjoerg   } else {
80*82d56013Sjoerg     // TODO:
81*82d56013Sjoerg     llvm_unreachable("Unsupported perfscript!");
82*82d56013Sjoerg   }
83*82d56013Sjoerg 
84*82d56013Sjoerg   return ProfileGenerator;
85*82d56013Sjoerg }
86*82d56013Sjoerg 
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)87*82d56013Sjoerg void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
88*82d56013Sjoerg                              StringMap<FunctionSamples> &ProfileMap) {
89*82d56013Sjoerg   if (std::error_code EC = Writer->write(ProfileMap))
90*82d56013Sjoerg     exitWithError(std::move(EC));
91*82d56013Sjoerg }
92*82d56013Sjoerg 
write()93*82d56013Sjoerg void ProfileGenerator::write() {
94*82d56013Sjoerg   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
95*82d56013Sjoerg   if (std::error_code EC = WriterOrErr.getError())
96*82d56013Sjoerg     exitWithError(EC, OutputFilename);
97*82d56013Sjoerg   write(std::move(WriterOrErr.get()), ProfileMap);
98*82d56013Sjoerg }
99*82d56013Sjoerg 
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)100*82d56013Sjoerg void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
101*82d56013Sjoerg                                           const RangeSample &Ranges) {
102*82d56013Sjoerg 
103*82d56013Sjoerg   /*
104*82d56013Sjoerg   Regions may overlap with each other. Using the boundary info, find all
105*82d56013Sjoerg   disjoint ranges and their sample count. BoundaryPoint contains the count
106*82d56013Sjoerg   multiple samples begin/end at this points.
107*82d56013Sjoerg 
108*82d56013Sjoerg   |<--100-->|           Sample1
109*82d56013Sjoerg   |<------200------>|   Sample2
110*82d56013Sjoerg   A         B       C
111*82d56013Sjoerg 
112*82d56013Sjoerg   In the example above,
113*82d56013Sjoerg   Sample1 begins at A, ends at B, its value is 100.
114*82d56013Sjoerg   Sample2 beings at A, ends at C, its value is 200.
115*82d56013Sjoerg   For A, BeginCount is the sum of sample begins at A, which is 300 and no
116*82d56013Sjoerg   samples ends at A, so EndCount is 0.
117*82d56013Sjoerg   Then boundary points A, B, and C with begin/end counts are:
118*82d56013Sjoerg   A: (300, 0)
119*82d56013Sjoerg   B: (0, 100)
120*82d56013Sjoerg   C: (0, 200)
121*82d56013Sjoerg   */
122*82d56013Sjoerg   struct BoundaryPoint {
123*82d56013Sjoerg     // Sum of sample counts beginning at this point
124*82d56013Sjoerg     uint64_t BeginCount;
125*82d56013Sjoerg     // Sum of sample counts ending at this point
126*82d56013Sjoerg     uint64_t EndCount;
127*82d56013Sjoerg 
128*82d56013Sjoerg     BoundaryPoint() : BeginCount(0), EndCount(0){};
129*82d56013Sjoerg 
130*82d56013Sjoerg     void addBeginCount(uint64_t Count) { BeginCount += Count; }
131*82d56013Sjoerg 
132*82d56013Sjoerg     void addEndCount(uint64_t Count) { EndCount += Count; }
133*82d56013Sjoerg   };
134*82d56013Sjoerg 
135*82d56013Sjoerg   /*
136*82d56013Sjoerg   For the above example. With boundary points, follwing logic finds two
137*82d56013Sjoerg   disjoint region of
138*82d56013Sjoerg 
139*82d56013Sjoerg   [A,B]:   300
140*82d56013Sjoerg   [B+1,C]: 200
141*82d56013Sjoerg 
142*82d56013Sjoerg   If there is a boundary point that both begin and end, the point itself
143*82d56013Sjoerg   becomes a separate disjoint region. For example, if we have original
144*82d56013Sjoerg   ranges of
145*82d56013Sjoerg 
146*82d56013Sjoerg   |<--- 100 --->|
147*82d56013Sjoerg                 |<--- 200 --->|
148*82d56013Sjoerg   A             B             C
149*82d56013Sjoerg 
150*82d56013Sjoerg   there are three boundary points with their begin/end counts of
151*82d56013Sjoerg 
152*82d56013Sjoerg   A: (100, 0)
153*82d56013Sjoerg   B: (200, 100)
154*82d56013Sjoerg   C: (0, 200)
155*82d56013Sjoerg 
156*82d56013Sjoerg   the disjoint ranges would be
157*82d56013Sjoerg 
158*82d56013Sjoerg   [A, B-1]: 100
159*82d56013Sjoerg   [B, B]:   300
160*82d56013Sjoerg   [B+1, C]: 200.
161*82d56013Sjoerg   */
162*82d56013Sjoerg   std::map<uint64_t, BoundaryPoint> Boundaries;
163*82d56013Sjoerg 
164*82d56013Sjoerg   for (auto Item : Ranges) {
165*82d56013Sjoerg     uint64_t Begin = Item.first.first;
166*82d56013Sjoerg     uint64_t End = Item.first.second;
167*82d56013Sjoerg     uint64_t Count = Item.second;
168*82d56013Sjoerg     if (Boundaries.find(Begin) == Boundaries.end())
169*82d56013Sjoerg       Boundaries[Begin] = BoundaryPoint();
170*82d56013Sjoerg     Boundaries[Begin].addBeginCount(Count);
171*82d56013Sjoerg 
172*82d56013Sjoerg     if (Boundaries.find(End) == Boundaries.end())
173*82d56013Sjoerg       Boundaries[End] = BoundaryPoint();
174*82d56013Sjoerg     Boundaries[End].addEndCount(Count);
175*82d56013Sjoerg   }
176*82d56013Sjoerg 
177*82d56013Sjoerg   uint64_t BeginAddress = 0;
178*82d56013Sjoerg   int Count = 0;
179*82d56013Sjoerg   for (auto Item : Boundaries) {
180*82d56013Sjoerg     uint64_t Address = Item.first;
181*82d56013Sjoerg     BoundaryPoint &Point = Item.second;
182*82d56013Sjoerg     if (Point.BeginCount) {
183*82d56013Sjoerg       if (BeginAddress)
184*82d56013Sjoerg         DisjointRanges[{BeginAddress, Address - 1}] = Count;
185*82d56013Sjoerg       Count += Point.BeginCount;
186*82d56013Sjoerg       BeginAddress = Address;
187*82d56013Sjoerg     }
188*82d56013Sjoerg     if (Point.EndCount) {
189*82d56013Sjoerg       assert(BeginAddress && "First boundary point cannot be 'end' point");
190*82d56013Sjoerg       DisjointRanges[{BeginAddress, Address}] = Count;
191*82d56013Sjoerg       Count -= Point.EndCount;
192*82d56013Sjoerg       BeginAddress = Address + 1;
193*82d56013Sjoerg     }
194*82d56013Sjoerg   }
195*82d56013Sjoerg }
196*82d56013Sjoerg 
197*82d56013Sjoerg FunctionSamples &
getFunctionProfileForContext(StringRef ContextStr,bool WasLeafInlined)198*82d56013Sjoerg CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
199*82d56013Sjoerg                                                  bool WasLeafInlined) {
200*82d56013Sjoerg   auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
201*82d56013Sjoerg   if (Ret.second) {
202*82d56013Sjoerg     // Make a copy of the underlying context string in string table
203*82d56013Sjoerg     // before StringRef wrapper is used for context.
204*82d56013Sjoerg     auto It = ContextStrings.insert(ContextStr.str());
205*82d56013Sjoerg     SampleContext FContext(*It.first, RawContext);
206*82d56013Sjoerg     if (WasLeafInlined)
207*82d56013Sjoerg       FContext.setAttribute(ContextWasInlined);
208*82d56013Sjoerg     FunctionSamples &FProfile = Ret.first->second;
209*82d56013Sjoerg     FProfile.setContext(FContext);
210*82d56013Sjoerg     FProfile.setName(FContext.getNameWithoutContext());
211*82d56013Sjoerg   }
212*82d56013Sjoerg   return Ret.first->second;
213*82d56013Sjoerg }
214*82d56013Sjoerg 
generateProfile()215*82d56013Sjoerg void CSProfileGenerator::generateProfile() {
216*82d56013Sjoerg   FunctionSamples::ProfileIsCS = true;
217*82d56013Sjoerg   for (const auto &BI : BinarySampleCounters) {
218*82d56013Sjoerg     ProfiledBinary *Binary = BI.first;
219*82d56013Sjoerg     for (const auto &CI : BI.second) {
220*82d56013Sjoerg       const StringBasedCtxKey *CtxKey =
221*82d56013Sjoerg           dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
222*82d56013Sjoerg       StringRef ContextId(CtxKey->Context);
223*82d56013Sjoerg       // Get or create function profile for the range
224*82d56013Sjoerg       FunctionSamples &FunctionProfile =
225*82d56013Sjoerg           getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
226*82d56013Sjoerg 
227*82d56013Sjoerg       // Fill in function body samples
228*82d56013Sjoerg       populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
229*82d56013Sjoerg                                   Binary);
230*82d56013Sjoerg       // Fill in boundary sample counts as well as call site samples for calls
231*82d56013Sjoerg       populateFunctionBoundarySamples(ContextId, FunctionProfile,
232*82d56013Sjoerg                                       CI.second.BranchCounter, Binary);
233*82d56013Sjoerg     }
234*82d56013Sjoerg   }
235*82d56013Sjoerg   // Fill in call site value sample for inlined calls and also use context to
236*82d56013Sjoerg   // infer missing samples. Since we don't have call count for inlined
237*82d56013Sjoerg   // functions, we estimate it from inlinee's profile using the entry of the
238*82d56013Sjoerg   // body sample.
239*82d56013Sjoerg   populateInferredFunctionSamples();
240*82d56013Sjoerg 
241*82d56013Sjoerg   postProcessProfiles();
242*82d56013Sjoerg }
243*82d56013Sjoerg 
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const FrameLocation & LeafLoc,uint64_t Count)244*82d56013Sjoerg void CSProfileGenerator::updateBodySamplesforFunctionProfile(
245*82d56013Sjoerg     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
246*82d56013Sjoerg     uint64_t Count) {
247*82d56013Sjoerg   // Filter out invalid negative(int type) lineOffset
248*82d56013Sjoerg   if (LeafLoc.second.LineOffset & 0x80000000)
249*82d56013Sjoerg     return;
250*82d56013Sjoerg   // Use the maximum count of samples with same line location
251*82d56013Sjoerg   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
252*82d56013Sjoerg       LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
253*82d56013Sjoerg   uint64_t PreviousCount = R ? R.get() : 0;
254*82d56013Sjoerg   if (PreviousCount < Count) {
255*82d56013Sjoerg     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
256*82d56013Sjoerg                                    LeafLoc.second.Discriminator,
257*82d56013Sjoerg                                    Count - PreviousCount);
258*82d56013Sjoerg   }
259*82d56013Sjoerg }
260*82d56013Sjoerg 
populateFunctionBodySamples(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter,ProfiledBinary * Binary)261*82d56013Sjoerg void CSProfileGenerator::populateFunctionBodySamples(
262*82d56013Sjoerg     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
263*82d56013Sjoerg     ProfiledBinary *Binary) {
264*82d56013Sjoerg   // Compute disjoint ranges first, so we can use MAX
265*82d56013Sjoerg   // for calculating count for each location.
266*82d56013Sjoerg   RangeSample Ranges;
267*82d56013Sjoerg   findDisjointRanges(Ranges, RangeCounter);
268*82d56013Sjoerg   for (auto Range : Ranges) {
269*82d56013Sjoerg     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
270*82d56013Sjoerg     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
271*82d56013Sjoerg     uint64_t Count = Range.second;
272*82d56013Sjoerg     // Disjoint ranges have introduce zero-filled gap that
273*82d56013Sjoerg     // doesn't belong to current context, filter them out.
274*82d56013Sjoerg     if (Count == 0)
275*82d56013Sjoerg       continue;
276*82d56013Sjoerg 
277*82d56013Sjoerg     InstructionPointer IP(Binary, RangeBegin, true);
278*82d56013Sjoerg 
279*82d56013Sjoerg     // Disjoint ranges may have range in the middle of two instr,
280*82d56013Sjoerg     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
281*82d56013Sjoerg     // can be Addr1+1 to Addr2-1. We should ignore such range.
282*82d56013Sjoerg     if (IP.Address > RangeEnd)
283*82d56013Sjoerg       continue;
284*82d56013Sjoerg 
285*82d56013Sjoerg     while (IP.Address <= RangeEnd) {
286*82d56013Sjoerg       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
287*82d56013Sjoerg       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
288*82d56013Sjoerg       if (LeafLoc.hasValue()) {
289*82d56013Sjoerg         // Recording body sample for this specific context
290*82d56013Sjoerg         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
291*82d56013Sjoerg       }
292*82d56013Sjoerg       // Accumulate total sample count even it's a line with invalid debug info
293*82d56013Sjoerg       FunctionProfile.addTotalSamples(Count);
294*82d56013Sjoerg       // Move to next IP within the range
295*82d56013Sjoerg       IP.advance();
296*82d56013Sjoerg     }
297*82d56013Sjoerg   }
298*82d56013Sjoerg }
299*82d56013Sjoerg 
populateFunctionBoundarySamples(StringRef ContextId,FunctionSamples & FunctionProfile,const BranchSample & BranchCounters,ProfiledBinary * Binary)300*82d56013Sjoerg void CSProfileGenerator::populateFunctionBoundarySamples(
301*82d56013Sjoerg     StringRef ContextId, FunctionSamples &FunctionProfile,
302*82d56013Sjoerg     const BranchSample &BranchCounters, ProfiledBinary *Binary) {
303*82d56013Sjoerg 
304*82d56013Sjoerg   for (auto Entry : BranchCounters) {
305*82d56013Sjoerg     uint64_t SourceOffset = Entry.first.first;
306*82d56013Sjoerg     uint64_t TargetOffset = Entry.first.second;
307*82d56013Sjoerg     uint64_t Count = Entry.second;
308*82d56013Sjoerg     // Get the callee name by branch target if it's a call branch
309*82d56013Sjoerg     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
310*82d56013Sjoerg         Binary->getFuncFromStartOffset(TargetOffset));
311*82d56013Sjoerg     if (CalleeName.size() == 0)
312*82d56013Sjoerg       continue;
313*82d56013Sjoerg 
314*82d56013Sjoerg     // Record called target sample and its count
315*82d56013Sjoerg     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
316*82d56013Sjoerg     if (!LeafLoc.hasValue())
317*82d56013Sjoerg       continue;
318*82d56013Sjoerg     FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
319*82d56013Sjoerg                                            LeafLoc->second.Discriminator,
320*82d56013Sjoerg                                            CalleeName, Count);
321*82d56013Sjoerg 
322*82d56013Sjoerg     // Record head sample for called target(callee)
323*82d56013Sjoerg     std::ostringstream OCalleeCtxStr;
324*82d56013Sjoerg     if (ContextId.find(" @ ") != StringRef::npos) {
325*82d56013Sjoerg       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
326*82d56013Sjoerg       OCalleeCtxStr << " @ ";
327*82d56013Sjoerg     }
328*82d56013Sjoerg     OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
329*82d56013Sjoerg 
330*82d56013Sjoerg     FunctionSamples &CalleeProfile =
331*82d56013Sjoerg         getFunctionProfileForContext(OCalleeCtxStr.str());
332*82d56013Sjoerg     assert(Count != 0 && "Unexpected zero weight branch");
333*82d56013Sjoerg     CalleeProfile.addHeadSamples(Count);
334*82d56013Sjoerg   }
335*82d56013Sjoerg }
336*82d56013Sjoerg 
getCallerContext(StringRef CalleeContext,StringRef & CallerNameWithContext)337*82d56013Sjoerg static FrameLocation getCallerContext(StringRef CalleeContext,
338*82d56013Sjoerg                                       StringRef &CallerNameWithContext) {
339*82d56013Sjoerg   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
340*82d56013Sjoerg   CallerNameWithContext = CallerContext.rsplit(':').first;
341*82d56013Sjoerg   auto ContextSplit = CallerContext.rsplit(" @ ");
342*82d56013Sjoerg   StringRef CallerFrameStr = ContextSplit.second.size() == 0
343*82d56013Sjoerg                                  ? ContextSplit.first
344*82d56013Sjoerg                                  : ContextSplit.second;
345*82d56013Sjoerg   FrameLocation LeafFrameLoc = {"", {0, 0}};
346*82d56013Sjoerg   StringRef Funcname;
347*82d56013Sjoerg   SampleContext::decodeContextString(CallerFrameStr, Funcname,
348*82d56013Sjoerg                                      LeafFrameLoc.second);
349*82d56013Sjoerg   LeafFrameLoc.first = Funcname.str();
350*82d56013Sjoerg   return LeafFrameLoc;
351*82d56013Sjoerg }
352*82d56013Sjoerg 
populateInferredFunctionSamples()353*82d56013Sjoerg void CSProfileGenerator::populateInferredFunctionSamples() {
354*82d56013Sjoerg   for (const auto &Item : ProfileMap) {
355*82d56013Sjoerg     const StringRef CalleeContext = Item.first();
356*82d56013Sjoerg     const FunctionSamples &CalleeProfile = Item.second;
357*82d56013Sjoerg 
358*82d56013Sjoerg     // If we already have head sample counts, we must have value profile
359*82d56013Sjoerg     // for call sites added already. Skip to avoid double counting.
360*82d56013Sjoerg     if (CalleeProfile.getHeadSamples())
361*82d56013Sjoerg       continue;
362*82d56013Sjoerg     // If we don't have context, nothing to do for caller's call site.
363*82d56013Sjoerg     // This could happen for entry point function.
364*82d56013Sjoerg     if (CalleeContext.find(" @ ") == StringRef::npos)
365*82d56013Sjoerg       continue;
366*82d56013Sjoerg 
367*82d56013Sjoerg     // Infer Caller's frame loc and context ID through string splitting
368*82d56013Sjoerg     StringRef CallerContextId;
369*82d56013Sjoerg     FrameLocation &&CallerLeafFrameLoc =
370*82d56013Sjoerg         getCallerContext(CalleeContext, CallerContextId);
371*82d56013Sjoerg 
372*82d56013Sjoerg     // It's possible that we haven't seen any sample directly in the caller,
373*82d56013Sjoerg     // in which case CallerProfile will not exist. But we can't modify
374*82d56013Sjoerg     // ProfileMap while iterating it.
375*82d56013Sjoerg     // TODO: created function profile for those callers too
376*82d56013Sjoerg     if (ProfileMap.find(CallerContextId) == ProfileMap.end())
377*82d56013Sjoerg       continue;
378*82d56013Sjoerg     FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
379*82d56013Sjoerg 
380*82d56013Sjoerg     // Since we don't have call count for inlined functions, we
381*82d56013Sjoerg     // estimate it from inlinee's profile using entry body sample.
382*82d56013Sjoerg     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
383*82d56013Sjoerg     // If we don't have samples with location, use 1 to indicate live.
384*82d56013Sjoerg     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
385*82d56013Sjoerg       EstimatedCallCount = 1;
386*82d56013Sjoerg     CallerProfile.addCalledTargetSamples(
387*82d56013Sjoerg         CallerLeafFrameLoc.second.LineOffset,
388*82d56013Sjoerg         CallerLeafFrameLoc.second.Discriminator,
389*82d56013Sjoerg         CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
390*82d56013Sjoerg     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
391*82d56013Sjoerg                                  CallerLeafFrameLoc.second.Discriminator,
392*82d56013Sjoerg                                  EstimatedCallCount);
393*82d56013Sjoerg     CallerProfile.addTotalSamples(EstimatedCallCount);
394*82d56013Sjoerg   }
395*82d56013Sjoerg }
396*82d56013Sjoerg 
postProcessProfiles()397*82d56013Sjoerg void CSProfileGenerator::postProcessProfiles() {
398*82d56013Sjoerg   // Compute hot/cold threshold based on profile. This will be used for cold
399*82d56013Sjoerg   // context profile merging/trimming.
400*82d56013Sjoerg   computeSummaryAndThreshold();
401*82d56013Sjoerg 
402*82d56013Sjoerg   // Run global pre-inliner to adjust/merge context profile based on estimated
403*82d56013Sjoerg   // inline decisions.
404*82d56013Sjoerg   CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
405*82d56013Sjoerg 
406*82d56013Sjoerg   // Trim and merge cold context profile using cold threshold above;
407*82d56013Sjoerg   SampleContextTrimmer(ProfileMap)
408*82d56013Sjoerg       .trimAndMergeColdContextProfiles(
409*82d56013Sjoerg           ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
410*82d56013Sjoerg }
411*82d56013Sjoerg 
computeSummaryAndThreshold()412*82d56013Sjoerg void CSProfileGenerator::computeSummaryAndThreshold() {
413*82d56013Sjoerg   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
414*82d56013Sjoerg   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
415*82d56013Sjoerg   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
416*82d56013Sjoerg       (Summary->getDetailedSummary()));
417*82d56013Sjoerg   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
418*82d56013Sjoerg       (Summary->getDetailedSummary()));
419*82d56013Sjoerg 
420*82d56013Sjoerg   // Use threshold calculated from profile summary unless specified.
421*82d56013Sjoerg   if (CSProfColdThreshold.getNumOccurrences()) {
422*82d56013Sjoerg     ColdCountThreshold = CSProfColdThreshold;
423*82d56013Sjoerg   }
424*82d56013Sjoerg }
425*82d56013Sjoerg 
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)426*82d56013Sjoerg void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
427*82d56013Sjoerg                                StringMap<FunctionSamples> &ProfileMap) {
428*82d56013Sjoerg   if (std::error_code EC = Writer->write(ProfileMap))
429*82d56013Sjoerg     exitWithError(std::move(EC));
430*82d56013Sjoerg }
431*82d56013Sjoerg 
432*82d56013Sjoerg // Helper function to extract context prefix string stack
433*82d56013Sjoerg // Extract context stack for reusing, leaf context stack will
434*82d56013Sjoerg // be added compressed while looking up function profile
435*82d56013Sjoerg static void
extractPrefixContextStack(SmallVectorImpl<std::string> & ContextStrStack,const SmallVectorImpl<const PseudoProbe * > & Probes,ProfiledBinary * Binary)436*82d56013Sjoerg extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
437*82d56013Sjoerg                           const SmallVectorImpl<const PseudoProbe *> &Probes,
438*82d56013Sjoerg                           ProfiledBinary *Binary) {
439*82d56013Sjoerg   for (const auto *P : Probes) {
440*82d56013Sjoerg     Binary->getInlineContextForProbe(P, ContextStrStack, true);
441*82d56013Sjoerg   }
442*82d56013Sjoerg }
443*82d56013Sjoerg 
generateProfile()444*82d56013Sjoerg void PseudoProbeCSProfileGenerator::generateProfile() {
445*82d56013Sjoerg   // Enable pseudo probe functionalities in SampleProf
446*82d56013Sjoerg   FunctionSamples::ProfileIsProbeBased = true;
447*82d56013Sjoerg   FunctionSamples::ProfileIsCS = true;
448*82d56013Sjoerg   for (const auto &BI : BinarySampleCounters) {
449*82d56013Sjoerg     ProfiledBinary *Binary = BI.first;
450*82d56013Sjoerg     for (const auto &CI : BI.second) {
451*82d56013Sjoerg       const ProbeBasedCtxKey *CtxKey =
452*82d56013Sjoerg           dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
453*82d56013Sjoerg       SmallVector<std::string, 16> ContextStrStack;
454*82d56013Sjoerg       extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
455*82d56013Sjoerg       // Fill in function body samples from probes, also infer caller's samples
456*82d56013Sjoerg       // from callee's probe
457*82d56013Sjoerg       populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
458*82d56013Sjoerg                                     Binary);
459*82d56013Sjoerg       // Fill in boundary samples for a call probe
460*82d56013Sjoerg       populateBoundarySamplesWithProbes(CI.second.BranchCounter,
461*82d56013Sjoerg                                         ContextStrStack, Binary);
462*82d56013Sjoerg     }
463*82d56013Sjoerg   }
464*82d56013Sjoerg 
465*82d56013Sjoerg   postProcessProfiles();
466*82d56013Sjoerg }
467*82d56013Sjoerg 
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,ProfiledBinary * Binary)468*82d56013Sjoerg void PseudoProbeCSProfileGenerator::extractProbesFromRange(
469*82d56013Sjoerg     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
470*82d56013Sjoerg     ProfiledBinary *Binary) {
471*82d56013Sjoerg   RangeSample Ranges;
472*82d56013Sjoerg   findDisjointRanges(Ranges, RangeCounter);
473*82d56013Sjoerg   for (const auto &Range : Ranges) {
474*82d56013Sjoerg     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
475*82d56013Sjoerg     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
476*82d56013Sjoerg     uint64_t Count = Range.second;
477*82d56013Sjoerg     // Disjoint ranges have introduce zero-filled gap that
478*82d56013Sjoerg     // doesn't belong to current context, filter them out.
479*82d56013Sjoerg     if (Count == 0)
480*82d56013Sjoerg       continue;
481*82d56013Sjoerg 
482*82d56013Sjoerg     InstructionPointer IP(Binary, RangeBegin, true);
483*82d56013Sjoerg 
484*82d56013Sjoerg     // Disjoint ranges may have range in the middle of two instr,
485*82d56013Sjoerg     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
486*82d56013Sjoerg     // can be Addr1+1 to Addr2-1. We should ignore such range.
487*82d56013Sjoerg     if (IP.Address > RangeEnd)
488*82d56013Sjoerg       continue;
489*82d56013Sjoerg 
490*82d56013Sjoerg     while (IP.Address <= RangeEnd) {
491*82d56013Sjoerg       const AddressProbesMap &Address2ProbesMap =
492*82d56013Sjoerg           Binary->getAddress2ProbesMap();
493*82d56013Sjoerg       auto It = Address2ProbesMap.find(IP.Address);
494*82d56013Sjoerg       if (It != Address2ProbesMap.end()) {
495*82d56013Sjoerg         for (const auto &Probe : It->second) {
496*82d56013Sjoerg           if (!Probe.isBlock())
497*82d56013Sjoerg             continue;
498*82d56013Sjoerg           ProbeCounter[&Probe] += Count;
499*82d56013Sjoerg         }
500*82d56013Sjoerg       }
501*82d56013Sjoerg 
502*82d56013Sjoerg       IP.advance();
503*82d56013Sjoerg     }
504*82d56013Sjoerg   }
505*82d56013Sjoerg }
506*82d56013Sjoerg 
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)507*82d56013Sjoerg void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
508*82d56013Sjoerg     const RangeSample &RangeCounter,
509*82d56013Sjoerg     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
510*82d56013Sjoerg   ProbeCounterMap ProbeCounter;
511*82d56013Sjoerg   // Extract the top frame probes by looking up each address among the range in
512*82d56013Sjoerg   // the Address2ProbeMap
513*82d56013Sjoerg   extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
514*82d56013Sjoerg   std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
515*82d56013Sjoerg   for (auto PI : ProbeCounter) {
516*82d56013Sjoerg     const PseudoProbe *Probe = PI.first;
517*82d56013Sjoerg     uint64_t Count = PI.second;
518*82d56013Sjoerg     // Ignore dangling probes since they will be reported later if needed.
519*82d56013Sjoerg     if (Probe->isDangling())
520*82d56013Sjoerg       continue;
521*82d56013Sjoerg     FunctionSamples &FunctionProfile =
522*82d56013Sjoerg         getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
523*82d56013Sjoerg     // Record the current frame and FunctionProfile whenever samples are
524*82d56013Sjoerg     // collected for non-danglie probes. This is for reporting all of the
525*82d56013Sjoerg     // dangling probes of the frame later.
526*82d56013Sjoerg     FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
527*82d56013Sjoerg     FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
528*82d56013Sjoerg     FunctionProfile.addTotalSamples(Count);
529*82d56013Sjoerg     if (Probe->isEntry()) {
530*82d56013Sjoerg       FunctionProfile.addHeadSamples(Count);
531*82d56013Sjoerg       // Look up for the caller's function profile
532*82d56013Sjoerg       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
533*82d56013Sjoerg       if (InlinerDesc != nullptr) {
534*82d56013Sjoerg         // Since the context id will be compressed, we have to use callee's
535*82d56013Sjoerg         // context id to infer caller's context id to ensure they share the
536*82d56013Sjoerg         // same context prefix.
537*82d56013Sjoerg         StringRef CalleeContextId =
538*82d56013Sjoerg             FunctionProfile.getContext().getNameWithContext();
539*82d56013Sjoerg         StringRef CallerContextId;
540*82d56013Sjoerg         FrameLocation &&CallerLeafFrameLoc =
541*82d56013Sjoerg             getCallerContext(CalleeContextId, CallerContextId);
542*82d56013Sjoerg         uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
543*82d56013Sjoerg         assert(CallerIndex &&
544*82d56013Sjoerg                "Inferred caller's location index shouldn't be zero!");
545*82d56013Sjoerg         FunctionSamples &CallerProfile =
546*82d56013Sjoerg             getFunctionProfileForContext(CallerContextId);
547*82d56013Sjoerg         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
548*82d56013Sjoerg         CallerProfile.addBodySamples(CallerIndex, 0, Count);
549*82d56013Sjoerg         CallerProfile.addTotalSamples(Count);
550*82d56013Sjoerg         CallerProfile.addCalledTargetSamples(
551*82d56013Sjoerg             CallerIndex, 0,
552*82d56013Sjoerg             FunctionProfile.getContext().getNameWithoutContext(), Count);
553*82d56013Sjoerg       }
554*82d56013Sjoerg     }
555*82d56013Sjoerg 
556*82d56013Sjoerg     // Report dangling probes for frames that have real samples collected.
557*82d56013Sjoerg     // Dangling probes are the probes associated to an empty block. With this
558*82d56013Sjoerg     // place holder, sample count on a dangling probe will not be trusted by the
559*82d56013Sjoerg     // compiler and we will rely on the counts inference algorithm to get the
560*82d56013Sjoerg     // probe a reasonable count. Use InvalidProbeCount to mark sample count for
561*82d56013Sjoerg     // a dangling probe.
562*82d56013Sjoerg     for (auto &I : FrameSamples) {
563*82d56013Sjoerg       auto *FunctionProfile = I.second;
564*82d56013Sjoerg       for (auto *Probe : I.first->getProbes()) {
565*82d56013Sjoerg         if (Probe->isDangling()) {
566*82d56013Sjoerg           FunctionProfile->addBodySamplesForProbe(
567*82d56013Sjoerg               Probe->Index, FunctionSamples::InvalidProbeCount);
568*82d56013Sjoerg         }
569*82d56013Sjoerg       }
570*82d56013Sjoerg     }
571*82d56013Sjoerg   }
572*82d56013Sjoerg }
573*82d56013Sjoerg 
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)574*82d56013Sjoerg void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
575*82d56013Sjoerg     const BranchSample &BranchCounter,
576*82d56013Sjoerg     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
577*82d56013Sjoerg   for (auto BI : BranchCounter) {
578*82d56013Sjoerg     uint64_t SourceOffset = BI.first.first;
579*82d56013Sjoerg     uint64_t TargetOffset = BI.first.second;
580*82d56013Sjoerg     uint64_t Count = BI.second;
581*82d56013Sjoerg     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
582*82d56013Sjoerg     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
583*82d56013Sjoerg     if (CallProbe == nullptr)
584*82d56013Sjoerg       continue;
585*82d56013Sjoerg     FunctionSamples &FunctionProfile =
586*82d56013Sjoerg         getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
587*82d56013Sjoerg     FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
588*82d56013Sjoerg     FunctionProfile.addTotalSamples(Count);
589*82d56013Sjoerg     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
590*82d56013Sjoerg         Binary->getFuncFromStartOffset(TargetOffset));
591*82d56013Sjoerg     if (CalleeName.size() == 0)
592*82d56013Sjoerg       continue;
593*82d56013Sjoerg     FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
594*82d56013Sjoerg                                            Count);
595*82d56013Sjoerg   }
596*82d56013Sjoerg }
597*82d56013Sjoerg 
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbeFuncDesc * LeafFuncDesc,bool WasLeafInlined)598*82d56013Sjoerg FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
599*82d56013Sjoerg     SmallVectorImpl<std::string> &ContextStrStack,
600*82d56013Sjoerg     const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
601*82d56013Sjoerg   assert(ContextStrStack.size() && "Profile context must have the leaf frame");
602*82d56013Sjoerg   // Compress the context string except for the leaf frame
603*82d56013Sjoerg   std::string LeafFrame = ContextStrStack.back();
604*82d56013Sjoerg   ContextStrStack.pop_back();
605*82d56013Sjoerg   CSProfileGenerator::compressRecursionContext(ContextStrStack);
606*82d56013Sjoerg 
607*82d56013Sjoerg   std::ostringstream OContextStr;
608*82d56013Sjoerg   for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
609*82d56013Sjoerg     if (OContextStr.str().size())
610*82d56013Sjoerg       OContextStr << " @ ";
611*82d56013Sjoerg     OContextStr << ContextStrStack[I];
612*82d56013Sjoerg   }
613*82d56013Sjoerg   // For leaf inlined context with the top frame, we should strip off the top
614*82d56013Sjoerg   // frame's probe id, like:
615*82d56013Sjoerg   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
616*82d56013Sjoerg   if (OContextStr.str().size())
617*82d56013Sjoerg     OContextStr << " @ ";
618*82d56013Sjoerg   OContextStr << StringRef(LeafFrame).split(":").first.str();
619*82d56013Sjoerg 
620*82d56013Sjoerg   FunctionSamples &FunctionProile =
621*82d56013Sjoerg       getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
622*82d56013Sjoerg   FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
623*82d56013Sjoerg   return FunctionProile;
624*82d56013Sjoerg }
625*82d56013Sjoerg 
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbe * LeafProbe,ProfiledBinary * Binary)626*82d56013Sjoerg FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
627*82d56013Sjoerg     SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
628*82d56013Sjoerg     ProfiledBinary *Binary) {
629*82d56013Sjoerg   // Explicitly copy the context for appending the leaf context
630*82d56013Sjoerg   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
631*82d56013Sjoerg                                                    ContextStrStack.end());
632*82d56013Sjoerg   Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
633*82d56013Sjoerg   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
634*82d56013Sjoerg   bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
635*82d56013Sjoerg   return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
636*82d56013Sjoerg                                         WasLeafInlined);
637*82d56013Sjoerg }
638*82d56013Sjoerg 
639*82d56013Sjoerg } // end namespace sampleprof
640*82d56013Sjoerg } // end namespace llvm
641