1*82d56013Sjoerg //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2*82d56013Sjoerg //
3*82d56013Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*82d56013Sjoerg // See https://llvm.org/LICENSE.txt for license information.
5*82d56013Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*82d56013Sjoerg //
7*82d56013Sjoerg //===----------------------------------------------------------------------===//
8*82d56013Sjoerg
9*82d56013Sjoerg #include "ProfileGenerator.h"
10*82d56013Sjoerg #include "llvm/ProfileData/ProfileCommon.h"
11*82d56013Sjoerg
12*82d56013Sjoerg static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13*82d56013Sjoerg cl::Required,
14*82d56013Sjoerg cl::desc("Output profile file"));
15*82d56013Sjoerg static cl::alias OutputA("o", cl::desc("Alias for --output"),
16*82d56013Sjoerg cl::aliasopt(OutputFilename));
17*82d56013Sjoerg
18*82d56013Sjoerg static cl::opt<SampleProfileFormat> OutputFormat(
19*82d56013Sjoerg "format", cl::desc("Format of output profile"), cl::init(SPF_Text),
20*82d56013Sjoerg cl::values(
21*82d56013Sjoerg clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22*82d56013Sjoerg clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23*82d56013Sjoerg clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24*82d56013Sjoerg clEnumValN(SPF_Text, "text", "Text encoding"),
25*82d56013Sjoerg clEnumValN(SPF_GCC, "gcc",
26*82d56013Sjoerg "GCC encoding (only meaningful for -sample)")));
27*82d56013Sjoerg
28*82d56013Sjoerg static cl::opt<int32_t, true> RecursionCompression(
29*82d56013Sjoerg "compress-recursion",
30*82d56013Sjoerg cl::desc("Compressing recursion by deduplicating adjacent frame "
31*82d56013Sjoerg "sequences up to the specified size. -1 means no size limit."),
32*82d56013Sjoerg cl::Hidden,
33*82d56013Sjoerg cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34*82d56013Sjoerg
35*82d56013Sjoerg static cl::opt<uint64_t> CSProfColdThreshold(
36*82d56013Sjoerg "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
37*82d56013Sjoerg cl::desc("Specify the total samples threshold for a context profile to "
38*82d56013Sjoerg "be considered cold, any cold profiles will be merged into "
39*82d56013Sjoerg "context-less base profiles"));
40*82d56013Sjoerg
41*82d56013Sjoerg static cl::opt<bool> CSProfMergeColdContext(
42*82d56013Sjoerg "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
43*82d56013Sjoerg cl::desc("This works together with --csprof-cold-thres. If the total count "
44*82d56013Sjoerg "of context profile is smaller than the threshold, it will be "
45*82d56013Sjoerg "merged into context-less base profile."));
46*82d56013Sjoerg
47*82d56013Sjoerg static cl::opt<bool> CSProfTrimColdContext(
48*82d56013Sjoerg "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
49*82d56013Sjoerg cl::desc("This works together with --csprof-cold-thres. If the total count "
50*82d56013Sjoerg "of the profile after all merge is done is still smaller than "
51*82d56013Sjoerg "threshold, it will be trimmed."));
52*82d56013Sjoerg
53*82d56013Sjoerg using namespace llvm;
54*82d56013Sjoerg using namespace sampleprof;
55*82d56013Sjoerg
56*82d56013Sjoerg namespace llvm {
57*82d56013Sjoerg namespace sampleprof {
58*82d56013Sjoerg
59*82d56013Sjoerg // Initialize the MaxCompressionSize to -1 which means no size limit
60*82d56013Sjoerg int32_t CSProfileGenerator::MaxCompressionSize = -1;
61*82d56013Sjoerg
62*82d56013Sjoerg static bool
usePseudoProbes(const BinarySampleCounterMap & BinarySampleCounters)63*82d56013Sjoerg usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
64*82d56013Sjoerg return BinarySampleCounters.size() &&
65*82d56013Sjoerg BinarySampleCounters.begin()->first->usePseudoProbes();
66*82d56013Sjoerg }
67*82d56013Sjoerg
68*82d56013Sjoerg std::unique_ptr<ProfileGenerator>
create(const BinarySampleCounterMap & BinarySampleCounters,enum PerfScriptType SampleType)69*82d56013Sjoerg ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
70*82d56013Sjoerg enum PerfScriptType SampleType) {
71*82d56013Sjoerg std::unique_ptr<ProfileGenerator> ProfileGenerator;
72*82d56013Sjoerg if (SampleType == PERF_LBR_STACK) {
73*82d56013Sjoerg if (usePseudoProbes(BinarySampleCounters)) {
74*82d56013Sjoerg ProfileGenerator.reset(
75*82d56013Sjoerg new PseudoProbeCSProfileGenerator(BinarySampleCounters));
76*82d56013Sjoerg } else {
77*82d56013Sjoerg ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
78*82d56013Sjoerg }
79*82d56013Sjoerg } else {
80*82d56013Sjoerg // TODO:
81*82d56013Sjoerg llvm_unreachable("Unsupported perfscript!");
82*82d56013Sjoerg }
83*82d56013Sjoerg
84*82d56013Sjoerg return ProfileGenerator;
85*82d56013Sjoerg }
86*82d56013Sjoerg
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)87*82d56013Sjoerg void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
88*82d56013Sjoerg StringMap<FunctionSamples> &ProfileMap) {
89*82d56013Sjoerg if (std::error_code EC = Writer->write(ProfileMap))
90*82d56013Sjoerg exitWithError(std::move(EC));
91*82d56013Sjoerg }
92*82d56013Sjoerg
write()93*82d56013Sjoerg void ProfileGenerator::write() {
94*82d56013Sjoerg auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
95*82d56013Sjoerg if (std::error_code EC = WriterOrErr.getError())
96*82d56013Sjoerg exitWithError(EC, OutputFilename);
97*82d56013Sjoerg write(std::move(WriterOrErr.get()), ProfileMap);
98*82d56013Sjoerg }
99*82d56013Sjoerg
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)100*82d56013Sjoerg void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
101*82d56013Sjoerg const RangeSample &Ranges) {
102*82d56013Sjoerg
103*82d56013Sjoerg /*
104*82d56013Sjoerg Regions may overlap with each other. Using the boundary info, find all
105*82d56013Sjoerg disjoint ranges and their sample count. BoundaryPoint contains the count
106*82d56013Sjoerg multiple samples begin/end at this points.
107*82d56013Sjoerg
108*82d56013Sjoerg |<--100-->| Sample1
109*82d56013Sjoerg |<------200------>| Sample2
110*82d56013Sjoerg A B C
111*82d56013Sjoerg
112*82d56013Sjoerg In the example above,
113*82d56013Sjoerg Sample1 begins at A, ends at B, its value is 100.
114*82d56013Sjoerg Sample2 beings at A, ends at C, its value is 200.
115*82d56013Sjoerg For A, BeginCount is the sum of sample begins at A, which is 300 and no
116*82d56013Sjoerg samples ends at A, so EndCount is 0.
117*82d56013Sjoerg Then boundary points A, B, and C with begin/end counts are:
118*82d56013Sjoerg A: (300, 0)
119*82d56013Sjoerg B: (0, 100)
120*82d56013Sjoerg C: (0, 200)
121*82d56013Sjoerg */
122*82d56013Sjoerg struct BoundaryPoint {
123*82d56013Sjoerg // Sum of sample counts beginning at this point
124*82d56013Sjoerg uint64_t BeginCount;
125*82d56013Sjoerg // Sum of sample counts ending at this point
126*82d56013Sjoerg uint64_t EndCount;
127*82d56013Sjoerg
128*82d56013Sjoerg BoundaryPoint() : BeginCount(0), EndCount(0){};
129*82d56013Sjoerg
130*82d56013Sjoerg void addBeginCount(uint64_t Count) { BeginCount += Count; }
131*82d56013Sjoerg
132*82d56013Sjoerg void addEndCount(uint64_t Count) { EndCount += Count; }
133*82d56013Sjoerg };
134*82d56013Sjoerg
135*82d56013Sjoerg /*
136*82d56013Sjoerg For the above example. With boundary points, follwing logic finds two
137*82d56013Sjoerg disjoint region of
138*82d56013Sjoerg
139*82d56013Sjoerg [A,B]: 300
140*82d56013Sjoerg [B+1,C]: 200
141*82d56013Sjoerg
142*82d56013Sjoerg If there is a boundary point that both begin and end, the point itself
143*82d56013Sjoerg becomes a separate disjoint region. For example, if we have original
144*82d56013Sjoerg ranges of
145*82d56013Sjoerg
146*82d56013Sjoerg |<--- 100 --->|
147*82d56013Sjoerg |<--- 200 --->|
148*82d56013Sjoerg A B C
149*82d56013Sjoerg
150*82d56013Sjoerg there are three boundary points with their begin/end counts of
151*82d56013Sjoerg
152*82d56013Sjoerg A: (100, 0)
153*82d56013Sjoerg B: (200, 100)
154*82d56013Sjoerg C: (0, 200)
155*82d56013Sjoerg
156*82d56013Sjoerg the disjoint ranges would be
157*82d56013Sjoerg
158*82d56013Sjoerg [A, B-1]: 100
159*82d56013Sjoerg [B, B]: 300
160*82d56013Sjoerg [B+1, C]: 200.
161*82d56013Sjoerg */
162*82d56013Sjoerg std::map<uint64_t, BoundaryPoint> Boundaries;
163*82d56013Sjoerg
164*82d56013Sjoerg for (auto Item : Ranges) {
165*82d56013Sjoerg uint64_t Begin = Item.first.first;
166*82d56013Sjoerg uint64_t End = Item.first.second;
167*82d56013Sjoerg uint64_t Count = Item.second;
168*82d56013Sjoerg if (Boundaries.find(Begin) == Boundaries.end())
169*82d56013Sjoerg Boundaries[Begin] = BoundaryPoint();
170*82d56013Sjoerg Boundaries[Begin].addBeginCount(Count);
171*82d56013Sjoerg
172*82d56013Sjoerg if (Boundaries.find(End) == Boundaries.end())
173*82d56013Sjoerg Boundaries[End] = BoundaryPoint();
174*82d56013Sjoerg Boundaries[End].addEndCount(Count);
175*82d56013Sjoerg }
176*82d56013Sjoerg
177*82d56013Sjoerg uint64_t BeginAddress = 0;
178*82d56013Sjoerg int Count = 0;
179*82d56013Sjoerg for (auto Item : Boundaries) {
180*82d56013Sjoerg uint64_t Address = Item.first;
181*82d56013Sjoerg BoundaryPoint &Point = Item.second;
182*82d56013Sjoerg if (Point.BeginCount) {
183*82d56013Sjoerg if (BeginAddress)
184*82d56013Sjoerg DisjointRanges[{BeginAddress, Address - 1}] = Count;
185*82d56013Sjoerg Count += Point.BeginCount;
186*82d56013Sjoerg BeginAddress = Address;
187*82d56013Sjoerg }
188*82d56013Sjoerg if (Point.EndCount) {
189*82d56013Sjoerg assert(BeginAddress && "First boundary point cannot be 'end' point");
190*82d56013Sjoerg DisjointRanges[{BeginAddress, Address}] = Count;
191*82d56013Sjoerg Count -= Point.EndCount;
192*82d56013Sjoerg BeginAddress = Address + 1;
193*82d56013Sjoerg }
194*82d56013Sjoerg }
195*82d56013Sjoerg }
196*82d56013Sjoerg
197*82d56013Sjoerg FunctionSamples &
getFunctionProfileForContext(StringRef ContextStr,bool WasLeafInlined)198*82d56013Sjoerg CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
199*82d56013Sjoerg bool WasLeafInlined) {
200*82d56013Sjoerg auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
201*82d56013Sjoerg if (Ret.second) {
202*82d56013Sjoerg // Make a copy of the underlying context string in string table
203*82d56013Sjoerg // before StringRef wrapper is used for context.
204*82d56013Sjoerg auto It = ContextStrings.insert(ContextStr.str());
205*82d56013Sjoerg SampleContext FContext(*It.first, RawContext);
206*82d56013Sjoerg if (WasLeafInlined)
207*82d56013Sjoerg FContext.setAttribute(ContextWasInlined);
208*82d56013Sjoerg FunctionSamples &FProfile = Ret.first->second;
209*82d56013Sjoerg FProfile.setContext(FContext);
210*82d56013Sjoerg FProfile.setName(FContext.getNameWithoutContext());
211*82d56013Sjoerg }
212*82d56013Sjoerg return Ret.first->second;
213*82d56013Sjoerg }
214*82d56013Sjoerg
generateProfile()215*82d56013Sjoerg void CSProfileGenerator::generateProfile() {
216*82d56013Sjoerg FunctionSamples::ProfileIsCS = true;
217*82d56013Sjoerg for (const auto &BI : BinarySampleCounters) {
218*82d56013Sjoerg ProfiledBinary *Binary = BI.first;
219*82d56013Sjoerg for (const auto &CI : BI.second) {
220*82d56013Sjoerg const StringBasedCtxKey *CtxKey =
221*82d56013Sjoerg dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
222*82d56013Sjoerg StringRef ContextId(CtxKey->Context);
223*82d56013Sjoerg // Get or create function profile for the range
224*82d56013Sjoerg FunctionSamples &FunctionProfile =
225*82d56013Sjoerg getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
226*82d56013Sjoerg
227*82d56013Sjoerg // Fill in function body samples
228*82d56013Sjoerg populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
229*82d56013Sjoerg Binary);
230*82d56013Sjoerg // Fill in boundary sample counts as well as call site samples for calls
231*82d56013Sjoerg populateFunctionBoundarySamples(ContextId, FunctionProfile,
232*82d56013Sjoerg CI.second.BranchCounter, Binary);
233*82d56013Sjoerg }
234*82d56013Sjoerg }
235*82d56013Sjoerg // Fill in call site value sample for inlined calls and also use context to
236*82d56013Sjoerg // infer missing samples. Since we don't have call count for inlined
237*82d56013Sjoerg // functions, we estimate it from inlinee's profile using the entry of the
238*82d56013Sjoerg // body sample.
239*82d56013Sjoerg populateInferredFunctionSamples();
240*82d56013Sjoerg
241*82d56013Sjoerg postProcessProfiles();
242*82d56013Sjoerg }
243*82d56013Sjoerg
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const FrameLocation & LeafLoc,uint64_t Count)244*82d56013Sjoerg void CSProfileGenerator::updateBodySamplesforFunctionProfile(
245*82d56013Sjoerg FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
246*82d56013Sjoerg uint64_t Count) {
247*82d56013Sjoerg // Filter out invalid negative(int type) lineOffset
248*82d56013Sjoerg if (LeafLoc.second.LineOffset & 0x80000000)
249*82d56013Sjoerg return;
250*82d56013Sjoerg // Use the maximum count of samples with same line location
251*82d56013Sjoerg ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
252*82d56013Sjoerg LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
253*82d56013Sjoerg uint64_t PreviousCount = R ? R.get() : 0;
254*82d56013Sjoerg if (PreviousCount < Count) {
255*82d56013Sjoerg FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
256*82d56013Sjoerg LeafLoc.second.Discriminator,
257*82d56013Sjoerg Count - PreviousCount);
258*82d56013Sjoerg }
259*82d56013Sjoerg }
260*82d56013Sjoerg
populateFunctionBodySamples(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter,ProfiledBinary * Binary)261*82d56013Sjoerg void CSProfileGenerator::populateFunctionBodySamples(
262*82d56013Sjoerg FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
263*82d56013Sjoerg ProfiledBinary *Binary) {
264*82d56013Sjoerg // Compute disjoint ranges first, so we can use MAX
265*82d56013Sjoerg // for calculating count for each location.
266*82d56013Sjoerg RangeSample Ranges;
267*82d56013Sjoerg findDisjointRanges(Ranges, RangeCounter);
268*82d56013Sjoerg for (auto Range : Ranges) {
269*82d56013Sjoerg uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
270*82d56013Sjoerg uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
271*82d56013Sjoerg uint64_t Count = Range.second;
272*82d56013Sjoerg // Disjoint ranges have introduce zero-filled gap that
273*82d56013Sjoerg // doesn't belong to current context, filter them out.
274*82d56013Sjoerg if (Count == 0)
275*82d56013Sjoerg continue;
276*82d56013Sjoerg
277*82d56013Sjoerg InstructionPointer IP(Binary, RangeBegin, true);
278*82d56013Sjoerg
279*82d56013Sjoerg // Disjoint ranges may have range in the middle of two instr,
280*82d56013Sjoerg // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
281*82d56013Sjoerg // can be Addr1+1 to Addr2-1. We should ignore such range.
282*82d56013Sjoerg if (IP.Address > RangeEnd)
283*82d56013Sjoerg continue;
284*82d56013Sjoerg
285*82d56013Sjoerg while (IP.Address <= RangeEnd) {
286*82d56013Sjoerg uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
287*82d56013Sjoerg auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
288*82d56013Sjoerg if (LeafLoc.hasValue()) {
289*82d56013Sjoerg // Recording body sample for this specific context
290*82d56013Sjoerg updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
291*82d56013Sjoerg }
292*82d56013Sjoerg // Accumulate total sample count even it's a line with invalid debug info
293*82d56013Sjoerg FunctionProfile.addTotalSamples(Count);
294*82d56013Sjoerg // Move to next IP within the range
295*82d56013Sjoerg IP.advance();
296*82d56013Sjoerg }
297*82d56013Sjoerg }
298*82d56013Sjoerg }
299*82d56013Sjoerg
populateFunctionBoundarySamples(StringRef ContextId,FunctionSamples & FunctionProfile,const BranchSample & BranchCounters,ProfiledBinary * Binary)300*82d56013Sjoerg void CSProfileGenerator::populateFunctionBoundarySamples(
301*82d56013Sjoerg StringRef ContextId, FunctionSamples &FunctionProfile,
302*82d56013Sjoerg const BranchSample &BranchCounters, ProfiledBinary *Binary) {
303*82d56013Sjoerg
304*82d56013Sjoerg for (auto Entry : BranchCounters) {
305*82d56013Sjoerg uint64_t SourceOffset = Entry.first.first;
306*82d56013Sjoerg uint64_t TargetOffset = Entry.first.second;
307*82d56013Sjoerg uint64_t Count = Entry.second;
308*82d56013Sjoerg // Get the callee name by branch target if it's a call branch
309*82d56013Sjoerg StringRef CalleeName = FunctionSamples::getCanonicalFnName(
310*82d56013Sjoerg Binary->getFuncFromStartOffset(TargetOffset));
311*82d56013Sjoerg if (CalleeName.size() == 0)
312*82d56013Sjoerg continue;
313*82d56013Sjoerg
314*82d56013Sjoerg // Record called target sample and its count
315*82d56013Sjoerg auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
316*82d56013Sjoerg if (!LeafLoc.hasValue())
317*82d56013Sjoerg continue;
318*82d56013Sjoerg FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
319*82d56013Sjoerg LeafLoc->second.Discriminator,
320*82d56013Sjoerg CalleeName, Count);
321*82d56013Sjoerg
322*82d56013Sjoerg // Record head sample for called target(callee)
323*82d56013Sjoerg std::ostringstream OCalleeCtxStr;
324*82d56013Sjoerg if (ContextId.find(" @ ") != StringRef::npos) {
325*82d56013Sjoerg OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
326*82d56013Sjoerg OCalleeCtxStr << " @ ";
327*82d56013Sjoerg }
328*82d56013Sjoerg OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
329*82d56013Sjoerg
330*82d56013Sjoerg FunctionSamples &CalleeProfile =
331*82d56013Sjoerg getFunctionProfileForContext(OCalleeCtxStr.str());
332*82d56013Sjoerg assert(Count != 0 && "Unexpected zero weight branch");
333*82d56013Sjoerg CalleeProfile.addHeadSamples(Count);
334*82d56013Sjoerg }
335*82d56013Sjoerg }
336*82d56013Sjoerg
getCallerContext(StringRef CalleeContext,StringRef & CallerNameWithContext)337*82d56013Sjoerg static FrameLocation getCallerContext(StringRef CalleeContext,
338*82d56013Sjoerg StringRef &CallerNameWithContext) {
339*82d56013Sjoerg StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
340*82d56013Sjoerg CallerNameWithContext = CallerContext.rsplit(':').first;
341*82d56013Sjoerg auto ContextSplit = CallerContext.rsplit(" @ ");
342*82d56013Sjoerg StringRef CallerFrameStr = ContextSplit.second.size() == 0
343*82d56013Sjoerg ? ContextSplit.first
344*82d56013Sjoerg : ContextSplit.second;
345*82d56013Sjoerg FrameLocation LeafFrameLoc = {"", {0, 0}};
346*82d56013Sjoerg StringRef Funcname;
347*82d56013Sjoerg SampleContext::decodeContextString(CallerFrameStr, Funcname,
348*82d56013Sjoerg LeafFrameLoc.second);
349*82d56013Sjoerg LeafFrameLoc.first = Funcname.str();
350*82d56013Sjoerg return LeafFrameLoc;
351*82d56013Sjoerg }
352*82d56013Sjoerg
populateInferredFunctionSamples()353*82d56013Sjoerg void CSProfileGenerator::populateInferredFunctionSamples() {
354*82d56013Sjoerg for (const auto &Item : ProfileMap) {
355*82d56013Sjoerg const StringRef CalleeContext = Item.first();
356*82d56013Sjoerg const FunctionSamples &CalleeProfile = Item.second;
357*82d56013Sjoerg
358*82d56013Sjoerg // If we already have head sample counts, we must have value profile
359*82d56013Sjoerg // for call sites added already. Skip to avoid double counting.
360*82d56013Sjoerg if (CalleeProfile.getHeadSamples())
361*82d56013Sjoerg continue;
362*82d56013Sjoerg // If we don't have context, nothing to do for caller's call site.
363*82d56013Sjoerg // This could happen for entry point function.
364*82d56013Sjoerg if (CalleeContext.find(" @ ") == StringRef::npos)
365*82d56013Sjoerg continue;
366*82d56013Sjoerg
367*82d56013Sjoerg // Infer Caller's frame loc and context ID through string splitting
368*82d56013Sjoerg StringRef CallerContextId;
369*82d56013Sjoerg FrameLocation &&CallerLeafFrameLoc =
370*82d56013Sjoerg getCallerContext(CalleeContext, CallerContextId);
371*82d56013Sjoerg
372*82d56013Sjoerg // It's possible that we haven't seen any sample directly in the caller,
373*82d56013Sjoerg // in which case CallerProfile will not exist. But we can't modify
374*82d56013Sjoerg // ProfileMap while iterating it.
375*82d56013Sjoerg // TODO: created function profile for those callers too
376*82d56013Sjoerg if (ProfileMap.find(CallerContextId) == ProfileMap.end())
377*82d56013Sjoerg continue;
378*82d56013Sjoerg FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
379*82d56013Sjoerg
380*82d56013Sjoerg // Since we don't have call count for inlined functions, we
381*82d56013Sjoerg // estimate it from inlinee's profile using entry body sample.
382*82d56013Sjoerg uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
383*82d56013Sjoerg // If we don't have samples with location, use 1 to indicate live.
384*82d56013Sjoerg if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
385*82d56013Sjoerg EstimatedCallCount = 1;
386*82d56013Sjoerg CallerProfile.addCalledTargetSamples(
387*82d56013Sjoerg CallerLeafFrameLoc.second.LineOffset,
388*82d56013Sjoerg CallerLeafFrameLoc.second.Discriminator,
389*82d56013Sjoerg CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
390*82d56013Sjoerg CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
391*82d56013Sjoerg CallerLeafFrameLoc.second.Discriminator,
392*82d56013Sjoerg EstimatedCallCount);
393*82d56013Sjoerg CallerProfile.addTotalSamples(EstimatedCallCount);
394*82d56013Sjoerg }
395*82d56013Sjoerg }
396*82d56013Sjoerg
postProcessProfiles()397*82d56013Sjoerg void CSProfileGenerator::postProcessProfiles() {
398*82d56013Sjoerg // Compute hot/cold threshold based on profile. This will be used for cold
399*82d56013Sjoerg // context profile merging/trimming.
400*82d56013Sjoerg computeSummaryAndThreshold();
401*82d56013Sjoerg
402*82d56013Sjoerg // Run global pre-inliner to adjust/merge context profile based on estimated
403*82d56013Sjoerg // inline decisions.
404*82d56013Sjoerg CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
405*82d56013Sjoerg
406*82d56013Sjoerg // Trim and merge cold context profile using cold threshold above;
407*82d56013Sjoerg SampleContextTrimmer(ProfileMap)
408*82d56013Sjoerg .trimAndMergeColdContextProfiles(
409*82d56013Sjoerg ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
410*82d56013Sjoerg }
411*82d56013Sjoerg
computeSummaryAndThreshold()412*82d56013Sjoerg void CSProfileGenerator::computeSummaryAndThreshold() {
413*82d56013Sjoerg SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
414*82d56013Sjoerg auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
415*82d56013Sjoerg HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
416*82d56013Sjoerg (Summary->getDetailedSummary()));
417*82d56013Sjoerg ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
418*82d56013Sjoerg (Summary->getDetailedSummary()));
419*82d56013Sjoerg
420*82d56013Sjoerg // Use threshold calculated from profile summary unless specified.
421*82d56013Sjoerg if (CSProfColdThreshold.getNumOccurrences()) {
422*82d56013Sjoerg ColdCountThreshold = CSProfColdThreshold;
423*82d56013Sjoerg }
424*82d56013Sjoerg }
425*82d56013Sjoerg
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)426*82d56013Sjoerg void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
427*82d56013Sjoerg StringMap<FunctionSamples> &ProfileMap) {
428*82d56013Sjoerg if (std::error_code EC = Writer->write(ProfileMap))
429*82d56013Sjoerg exitWithError(std::move(EC));
430*82d56013Sjoerg }
431*82d56013Sjoerg
432*82d56013Sjoerg // Helper function to extract context prefix string stack
433*82d56013Sjoerg // Extract context stack for reusing, leaf context stack will
434*82d56013Sjoerg // be added compressed while looking up function profile
435*82d56013Sjoerg static void
extractPrefixContextStack(SmallVectorImpl<std::string> & ContextStrStack,const SmallVectorImpl<const PseudoProbe * > & Probes,ProfiledBinary * Binary)436*82d56013Sjoerg extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
437*82d56013Sjoerg const SmallVectorImpl<const PseudoProbe *> &Probes,
438*82d56013Sjoerg ProfiledBinary *Binary) {
439*82d56013Sjoerg for (const auto *P : Probes) {
440*82d56013Sjoerg Binary->getInlineContextForProbe(P, ContextStrStack, true);
441*82d56013Sjoerg }
442*82d56013Sjoerg }
443*82d56013Sjoerg
generateProfile()444*82d56013Sjoerg void PseudoProbeCSProfileGenerator::generateProfile() {
445*82d56013Sjoerg // Enable pseudo probe functionalities in SampleProf
446*82d56013Sjoerg FunctionSamples::ProfileIsProbeBased = true;
447*82d56013Sjoerg FunctionSamples::ProfileIsCS = true;
448*82d56013Sjoerg for (const auto &BI : BinarySampleCounters) {
449*82d56013Sjoerg ProfiledBinary *Binary = BI.first;
450*82d56013Sjoerg for (const auto &CI : BI.second) {
451*82d56013Sjoerg const ProbeBasedCtxKey *CtxKey =
452*82d56013Sjoerg dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
453*82d56013Sjoerg SmallVector<std::string, 16> ContextStrStack;
454*82d56013Sjoerg extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
455*82d56013Sjoerg // Fill in function body samples from probes, also infer caller's samples
456*82d56013Sjoerg // from callee's probe
457*82d56013Sjoerg populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
458*82d56013Sjoerg Binary);
459*82d56013Sjoerg // Fill in boundary samples for a call probe
460*82d56013Sjoerg populateBoundarySamplesWithProbes(CI.second.BranchCounter,
461*82d56013Sjoerg ContextStrStack, Binary);
462*82d56013Sjoerg }
463*82d56013Sjoerg }
464*82d56013Sjoerg
465*82d56013Sjoerg postProcessProfiles();
466*82d56013Sjoerg }
467*82d56013Sjoerg
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,ProfiledBinary * Binary)468*82d56013Sjoerg void PseudoProbeCSProfileGenerator::extractProbesFromRange(
469*82d56013Sjoerg const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
470*82d56013Sjoerg ProfiledBinary *Binary) {
471*82d56013Sjoerg RangeSample Ranges;
472*82d56013Sjoerg findDisjointRanges(Ranges, RangeCounter);
473*82d56013Sjoerg for (const auto &Range : Ranges) {
474*82d56013Sjoerg uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
475*82d56013Sjoerg uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
476*82d56013Sjoerg uint64_t Count = Range.second;
477*82d56013Sjoerg // Disjoint ranges have introduce zero-filled gap that
478*82d56013Sjoerg // doesn't belong to current context, filter them out.
479*82d56013Sjoerg if (Count == 0)
480*82d56013Sjoerg continue;
481*82d56013Sjoerg
482*82d56013Sjoerg InstructionPointer IP(Binary, RangeBegin, true);
483*82d56013Sjoerg
484*82d56013Sjoerg // Disjoint ranges may have range in the middle of two instr,
485*82d56013Sjoerg // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
486*82d56013Sjoerg // can be Addr1+1 to Addr2-1. We should ignore such range.
487*82d56013Sjoerg if (IP.Address > RangeEnd)
488*82d56013Sjoerg continue;
489*82d56013Sjoerg
490*82d56013Sjoerg while (IP.Address <= RangeEnd) {
491*82d56013Sjoerg const AddressProbesMap &Address2ProbesMap =
492*82d56013Sjoerg Binary->getAddress2ProbesMap();
493*82d56013Sjoerg auto It = Address2ProbesMap.find(IP.Address);
494*82d56013Sjoerg if (It != Address2ProbesMap.end()) {
495*82d56013Sjoerg for (const auto &Probe : It->second) {
496*82d56013Sjoerg if (!Probe.isBlock())
497*82d56013Sjoerg continue;
498*82d56013Sjoerg ProbeCounter[&Probe] += Count;
499*82d56013Sjoerg }
500*82d56013Sjoerg }
501*82d56013Sjoerg
502*82d56013Sjoerg IP.advance();
503*82d56013Sjoerg }
504*82d56013Sjoerg }
505*82d56013Sjoerg }
506*82d56013Sjoerg
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)507*82d56013Sjoerg void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
508*82d56013Sjoerg const RangeSample &RangeCounter,
509*82d56013Sjoerg SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
510*82d56013Sjoerg ProbeCounterMap ProbeCounter;
511*82d56013Sjoerg // Extract the top frame probes by looking up each address among the range in
512*82d56013Sjoerg // the Address2ProbeMap
513*82d56013Sjoerg extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
514*82d56013Sjoerg std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
515*82d56013Sjoerg for (auto PI : ProbeCounter) {
516*82d56013Sjoerg const PseudoProbe *Probe = PI.first;
517*82d56013Sjoerg uint64_t Count = PI.second;
518*82d56013Sjoerg // Ignore dangling probes since they will be reported later if needed.
519*82d56013Sjoerg if (Probe->isDangling())
520*82d56013Sjoerg continue;
521*82d56013Sjoerg FunctionSamples &FunctionProfile =
522*82d56013Sjoerg getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
523*82d56013Sjoerg // Record the current frame and FunctionProfile whenever samples are
524*82d56013Sjoerg // collected for non-danglie probes. This is for reporting all of the
525*82d56013Sjoerg // dangling probes of the frame later.
526*82d56013Sjoerg FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
527*82d56013Sjoerg FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
528*82d56013Sjoerg FunctionProfile.addTotalSamples(Count);
529*82d56013Sjoerg if (Probe->isEntry()) {
530*82d56013Sjoerg FunctionProfile.addHeadSamples(Count);
531*82d56013Sjoerg // Look up for the caller's function profile
532*82d56013Sjoerg const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
533*82d56013Sjoerg if (InlinerDesc != nullptr) {
534*82d56013Sjoerg // Since the context id will be compressed, we have to use callee's
535*82d56013Sjoerg // context id to infer caller's context id to ensure they share the
536*82d56013Sjoerg // same context prefix.
537*82d56013Sjoerg StringRef CalleeContextId =
538*82d56013Sjoerg FunctionProfile.getContext().getNameWithContext();
539*82d56013Sjoerg StringRef CallerContextId;
540*82d56013Sjoerg FrameLocation &&CallerLeafFrameLoc =
541*82d56013Sjoerg getCallerContext(CalleeContextId, CallerContextId);
542*82d56013Sjoerg uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
543*82d56013Sjoerg assert(CallerIndex &&
544*82d56013Sjoerg "Inferred caller's location index shouldn't be zero!");
545*82d56013Sjoerg FunctionSamples &CallerProfile =
546*82d56013Sjoerg getFunctionProfileForContext(CallerContextId);
547*82d56013Sjoerg CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
548*82d56013Sjoerg CallerProfile.addBodySamples(CallerIndex, 0, Count);
549*82d56013Sjoerg CallerProfile.addTotalSamples(Count);
550*82d56013Sjoerg CallerProfile.addCalledTargetSamples(
551*82d56013Sjoerg CallerIndex, 0,
552*82d56013Sjoerg FunctionProfile.getContext().getNameWithoutContext(), Count);
553*82d56013Sjoerg }
554*82d56013Sjoerg }
555*82d56013Sjoerg
556*82d56013Sjoerg // Report dangling probes for frames that have real samples collected.
557*82d56013Sjoerg // Dangling probes are the probes associated to an empty block. With this
558*82d56013Sjoerg // place holder, sample count on a dangling probe will not be trusted by the
559*82d56013Sjoerg // compiler and we will rely on the counts inference algorithm to get the
560*82d56013Sjoerg // probe a reasonable count. Use InvalidProbeCount to mark sample count for
561*82d56013Sjoerg // a dangling probe.
562*82d56013Sjoerg for (auto &I : FrameSamples) {
563*82d56013Sjoerg auto *FunctionProfile = I.second;
564*82d56013Sjoerg for (auto *Probe : I.first->getProbes()) {
565*82d56013Sjoerg if (Probe->isDangling()) {
566*82d56013Sjoerg FunctionProfile->addBodySamplesForProbe(
567*82d56013Sjoerg Probe->Index, FunctionSamples::InvalidProbeCount);
568*82d56013Sjoerg }
569*82d56013Sjoerg }
570*82d56013Sjoerg }
571*82d56013Sjoerg }
572*82d56013Sjoerg }
573*82d56013Sjoerg
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)574*82d56013Sjoerg void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
575*82d56013Sjoerg const BranchSample &BranchCounter,
576*82d56013Sjoerg SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
577*82d56013Sjoerg for (auto BI : BranchCounter) {
578*82d56013Sjoerg uint64_t SourceOffset = BI.first.first;
579*82d56013Sjoerg uint64_t TargetOffset = BI.first.second;
580*82d56013Sjoerg uint64_t Count = BI.second;
581*82d56013Sjoerg uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
582*82d56013Sjoerg const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
583*82d56013Sjoerg if (CallProbe == nullptr)
584*82d56013Sjoerg continue;
585*82d56013Sjoerg FunctionSamples &FunctionProfile =
586*82d56013Sjoerg getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
587*82d56013Sjoerg FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
588*82d56013Sjoerg FunctionProfile.addTotalSamples(Count);
589*82d56013Sjoerg StringRef CalleeName = FunctionSamples::getCanonicalFnName(
590*82d56013Sjoerg Binary->getFuncFromStartOffset(TargetOffset));
591*82d56013Sjoerg if (CalleeName.size() == 0)
592*82d56013Sjoerg continue;
593*82d56013Sjoerg FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
594*82d56013Sjoerg Count);
595*82d56013Sjoerg }
596*82d56013Sjoerg }
597*82d56013Sjoerg
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbeFuncDesc * LeafFuncDesc,bool WasLeafInlined)598*82d56013Sjoerg FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
599*82d56013Sjoerg SmallVectorImpl<std::string> &ContextStrStack,
600*82d56013Sjoerg const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
601*82d56013Sjoerg assert(ContextStrStack.size() && "Profile context must have the leaf frame");
602*82d56013Sjoerg // Compress the context string except for the leaf frame
603*82d56013Sjoerg std::string LeafFrame = ContextStrStack.back();
604*82d56013Sjoerg ContextStrStack.pop_back();
605*82d56013Sjoerg CSProfileGenerator::compressRecursionContext(ContextStrStack);
606*82d56013Sjoerg
607*82d56013Sjoerg std::ostringstream OContextStr;
608*82d56013Sjoerg for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
609*82d56013Sjoerg if (OContextStr.str().size())
610*82d56013Sjoerg OContextStr << " @ ";
611*82d56013Sjoerg OContextStr << ContextStrStack[I];
612*82d56013Sjoerg }
613*82d56013Sjoerg // For leaf inlined context with the top frame, we should strip off the top
614*82d56013Sjoerg // frame's probe id, like:
615*82d56013Sjoerg // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
616*82d56013Sjoerg if (OContextStr.str().size())
617*82d56013Sjoerg OContextStr << " @ ";
618*82d56013Sjoerg OContextStr << StringRef(LeafFrame).split(":").first.str();
619*82d56013Sjoerg
620*82d56013Sjoerg FunctionSamples &FunctionProile =
621*82d56013Sjoerg getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
622*82d56013Sjoerg FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
623*82d56013Sjoerg return FunctionProile;
624*82d56013Sjoerg }
625*82d56013Sjoerg
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbe * LeafProbe,ProfiledBinary * Binary)626*82d56013Sjoerg FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
627*82d56013Sjoerg SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
628*82d56013Sjoerg ProfiledBinary *Binary) {
629*82d56013Sjoerg // Explicitly copy the context for appending the leaf context
630*82d56013Sjoerg SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
631*82d56013Sjoerg ContextStrStack.end());
632*82d56013Sjoerg Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
633*82d56013Sjoerg const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
634*82d56013Sjoerg bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
635*82d56013Sjoerg return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
636*82d56013Sjoerg WasLeafInlined);
637*82d56013Sjoerg }
638*82d56013Sjoerg
639*82d56013Sjoerg } // end namespace sampleprof
640*82d56013Sjoerg } // end namespace llvm
641