xref: /llvm-project/bolt/lib/Core/BinaryFunctionProfile.cpp (revision cbd302410e9f27013223a96edcd78dfb597979e1)
1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #undef  DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
22 
23 using namespace llvm;
24 using namespace bolt;
25 
26 namespace opts {
27 
28 extern cl::OptionCategory BoltOptCategory;
29 
30 cl::opt<IndirectCallPromotionType> ICP(
31     "indirect-call-promotion", cl::init(ICP_NONE),
32     cl::desc("indirect call promotion"),
33     cl::values(
34         clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
35         clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
36         clEnumValN(ICP_JUMP_TABLES, "jump-tables",
37                    "perform ICP on jump tables"),
38         clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39     cl::ZeroOrMore, cl::cat(BoltOptCategory));
40 
41 static cl::alias ICPAlias("icp",
42                           cl::desc("Alias for --indirect-call-promotion"),
43                           cl::aliasopt(ICP));
44 
45 extern cl::opt<JumpTableSupportLevel> JumpTables;
46 
47 static cl::opt<bool> FixFuncCounts(
48     "fix-func-counts",
49     cl::desc("adjust function counts based on basic blocks execution count"),
50     cl::Hidden, cl::cat(BoltOptCategory));
51 
52 static cl::opt<bool> FixBlockCounts(
53     "fix-block-counts",
54     cl::desc("adjust block counts based on outgoing branch counts"),
55     cl::init(true), cl::Hidden, cl::cat(BoltOptCategory));
56 
57 static cl::opt<bool>
58     InferFallThroughs("infer-fall-throughs",
59                       cl::desc("infer execution count for fall-through blocks"),
60                       cl::Hidden, cl::cat(BoltOptCategory));
61 
62 } // namespace opts
63 
64 namespace llvm {
65 namespace bolt {
66 
67 void BinaryFunction::postProcessProfile() {
68   if (!hasValidProfile()) {
69     clearProfile();
70     return;
71   }
72 
73   if (!(getProfileFlags() & PF_LBR))
74     return;
75 
76   // If we have at least some branch data for the function indicate that it
77   // was executed.
78   if (opts::FixFuncCounts && ExecutionCount == 0)
79     ExecutionCount = 1;
80 
81   // Compute preliminary execution count for each basic block.
82   for (BinaryBasicBlock *BB : BasicBlocks) {
83     if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
84         BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
85       BB->ExecutionCount = 0;
86   }
87   for (BinaryBasicBlock *BB : BasicBlocks) {
88     auto SuccBIIter = BB->branch_info_begin();
89     for (BinaryBasicBlock *Succ : BB->successors()) {
90       // All incoming edges to the primary entry have been accounted for, thus
91       // we skip the update here.
92       if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
93           Succ != BasicBlocks.front())
94         Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
95       ++SuccBIIter;
96     }
97   }
98 
99   // Fix for old profiles.
100   for (BinaryBasicBlock *BB : BasicBlocks) {
101     if (BB->size() != 1 || BB->succ_size() != 1)
102       continue;
103 
104     if (BB->getKnownExecutionCount() == 0)
105       continue;
106 
107     MCInst *Instr = BB->getFirstNonPseudoInstr();
108     assert(Instr && "expected non-pseudo instr");
109     if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
110       continue;
111 
112     BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
113     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
114     if (!BI.Count) {
115       BI.Count = BB->getKnownExecutionCount();
116       FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
117                                      BI.Count);
118     }
119   }
120 
121   if (opts::FixBlockCounts) {
122     for (BinaryBasicBlock *BB : BasicBlocks) {
123       // Make sure that execution count of a block is at least the branch count
124       // of an incoming/outgoing jump.
125       auto SuccBIIter = BB->branch_info_begin();
126       for (BinaryBasicBlock *Succ : BB->successors()) {
127         uint64_t Count = SuccBIIter->Count;
128         if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
129           Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
130           BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
131         }
132         ++SuccBIIter;
133       }
134       // Make sure that execution count of a block is at least the number of
135       // function calls from the block.
136       for (MCInst &Inst : *BB) {
137         // Ignore non-call instruction
138         if (!BC.MIB->isCall(Inst))
139           continue;
140 
141         auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
142         if (CountAnnt)
143           BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
144       }
145     }
146   }
147 
148   if (opts::InferFallThroughs)
149     inferFallThroughCounts();
150 
151   // Update profile information for jump tables based on CFG branch data.
152   for (BinaryBasicBlock *BB : BasicBlocks) {
153     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
154     if (!LastInstr)
155       continue;
156     const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
157     if (!JTAddress)
158       continue;
159     JumpTable *JT = getJumpTableContainingAddress(JTAddress);
160     if (!JT)
161       continue;
162 
163     uint64_t TotalBranchCount = 0;
164     for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
165          BB->branch_info()) {
166       TotalBranchCount += BranchInfo.Count;
167     }
168     JT->Count += TotalBranchCount;
169 
170     if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
171       continue;
172 
173     if (JT->Counts.empty())
174       JT->Counts.resize(JT->Entries.size());
175     auto EI = JT->Entries.begin();
176     uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
177     EI += Delta;
178     while (EI != JT->Entries.end()) {
179       const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
180       if (TargetBB) {
181         const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
182             BB->getBranchInfo(*TargetBB);
183         assert(Delta < JT->Counts.size());
184         JT->Counts[Delta].Count += BranchInfo.Count;
185         JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
186       }
187       ++Delta;
188       ++EI;
189       // A label marks the start of another jump table.
190       if (JT->Labels.count(Delta * JT->EntrySize))
191         break;
192     }
193   }
194 }
195 
196 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
197   // No reason to merge invalid or empty profiles into BF.
198   if (!hasValidProfile())
199     return;
200 
201   // Update function execution count.
202   if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
203     BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
204 
205   // Since we are merging a valid profile, the new profile should be valid too.
206   // It has either already been valid, or it has been cleaned up.
207   BF.ProfileMatchRatio = 1.0f;
208 
209   // Update basic block and edge counts.
210   auto BBMergeI = BF.begin();
211   for (BinaryBasicBlock *BB : BasicBlocks) {
212     BinaryBasicBlock *BBMerge = &*BBMergeI;
213     assert(getIndex(BB) == BF.getIndex(BBMerge));
214 
215     // Update basic block count.
216     if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
217       BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
218                                  BB->getExecutionCount());
219     }
220 
221     // Update edge count for successors of this basic block.
222     auto BBMergeSI = BBMerge->succ_begin();
223     auto BIMergeI = BBMerge->branch_info_begin();
224     auto BII = BB->branch_info_begin();
225     for (const BinaryBasicBlock *BBSucc : BB->successors()) {
226       (void)BBSucc;
227       assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
228       (void)BBMergeSI;
229 
230       // At this point no branch count should be set to COUNT_NO_PROFILE.
231       assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
232              "unexpected unknown branch profile");
233       assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
234              "unexpected unknown branch profile");
235 
236       BIMergeI->Count += BII->Count;
237 
238       // When we merge inferred and real fall-through branch data, the merged
239       // data is considered inferred.
240       if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
241           BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
242         BIMergeI->MispredictedCount += BII->MispredictedCount;
243       } else {
244         BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
245       }
246 
247       ++BBMergeSI;
248       ++BII;
249       ++BIMergeI;
250     }
251     assert(BBMergeSI == BBMerge->succ_end());
252 
253     ++BBMergeI;
254   }
255   assert(BBMergeI == BF.end());
256 
257   // Merge jump tables profile info.
258   auto JTMergeI = BF.JumpTables.begin();
259   for (const auto &JTEntry : JumpTables) {
260     if (JTMergeI->second->Counts.empty())
261       JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
262     auto CountMergeI = JTMergeI->second->Counts.begin();
263     for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
264       CountMergeI->Count += JI.Count;
265       CountMergeI->Mispreds += JI.Mispreds;
266       ++CountMergeI;
267     }
268     assert(CountMergeI == JTMergeI->second->Counts.end());
269 
270     ++JTMergeI;
271   }
272   assert(JTMergeI == BF.JumpTables.end());
273 }
274 
275 void BinaryFunction::inferFallThroughCounts() {
276   // Work on a basic block at a time, propagating frequency information
277   // forwards.
278   // It is important to walk in the layout order.
279   for (BinaryBasicBlock *BB : BasicBlocks) {
280     const uint64_t BBExecCount = BB->getExecutionCount();
281 
282     // Propagate this information to successors, filling in fall-through edges
283     // with frequency information
284     if (BB->succ_size() == 0)
285       continue;
286 
287     // Calculate frequency of outgoing branches from this node according to
288     // LBR data.
289     uint64_t ReportedBranches = 0;
290     for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
291       if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
292         ReportedBranches += SuccBI.Count;
293 
294     // Get taken count of conditional tail call if the block ends with one.
295     uint64_t CTCTakenCount = 0;
296     const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
297     if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
298       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
299           *CTCInstr, "CTCTakenCount");
300     }
301 
302     // Calculate frequency of throws from this node according to LBR data
303     // for branching into associated landing pads. Since it is possible
304     // for a landing pad to be associated with more than one basic blocks,
305     // we may overestimate the frequency of throws for such blocks.
306     uint64_t ReportedThrows = 0;
307     for (const BinaryBasicBlock *LP : BB->landing_pads())
308       ReportedThrows += LP->getExecutionCount();
309 
310     const uint64_t TotalReportedJumps =
311         ReportedBranches + CTCTakenCount + ReportedThrows;
312 
313     // Infer the frequency of the fall-through edge, representing not taking the
314     // branch.
315     uint64_t Inferred = 0;
316     if (BBExecCount > TotalReportedJumps)
317       Inferred = BBExecCount - TotalReportedJumps;
318 
319     LLVM_DEBUG(
320         if (BBExecCount < TotalReportedJumps) dbgs()
321             << "Fall-through inference is slightly inconsistent. "
322                "exec frequency is less than the outgoing edges frequency ("
323             << BBExecCount << " < " << ReportedBranches
324             << ") for  BB at offset 0x"
325             << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
326 
327     if (BB->succ_size() <= 2) {
328       // Skip if the last instruction is an unconditional jump.
329       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
330       if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
331                         BC.MIB->isIndirectBranch(*LastInstr)))
332         continue;
333       // If there is an FT it will be the last successor.
334       auto &SuccBI = *BB->branch_info_rbegin();
335       auto &Succ = *BB->succ_rbegin();
336       if (SuccBI.Count == 0) {
337         SuccBI.Count = Inferred;
338         SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
339         Succ->ExecutionCount =
340             std::max(Succ->getKnownExecutionCount(), Inferred);
341       }
342     }
343   }
344 }
345 
346 void BinaryFunction::clearProfile() {
347   // Keep function execution profile the same. Only clear basic block and edge
348   // counts.
349   for (BinaryBasicBlock *BB : BasicBlocks) {
350     BB->ExecutionCount = 0;
351     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
352       BI.Count = 0;
353       BI.MispredictedCount = 0;
354     }
355   }
356 }
357 
358 } // namespace bolt
359 } // namespace llvm
360