xref: /llvm-project/bolt/lib/Core/BinaryFunctionProfile.cpp (revision b92436efcb7813fc481b30f2593a4907568d917a)
1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #undef  DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
22 
23 using namespace llvm;
24 using namespace bolt;
25 
26 namespace opts {
27 
28 extern cl::OptionCategory BoltOptCategory;
29 
30 cl::opt<IndirectCallPromotionType> ICP(
31     "indirect-call-promotion", cl::init(ICP_NONE),
32     cl::desc("indirect call promotion"),
33     cl::values(
34         clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
35         clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
36         clEnumValN(ICP_JUMP_TABLES, "jump-tables",
37                    "perform ICP on jump tables"),
38         clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39     cl::ZeroOrMore, cl::cat(BoltOptCategory));
40 
41 extern cl::opt<JumpTableSupportLevel> JumpTables;
42 
43 static cl::opt<bool> FixFuncCounts(
44     "fix-func-counts",
45     cl::desc("adjust function counts based on basic blocks execution count"),
46     cl::Hidden, cl::cat(BoltOptCategory));
47 
48 static cl::opt<bool> FixBlockCounts(
49     "fix-block-counts",
50     cl::desc("adjust block counts based on outgoing branch counts"),
51     cl::init(true), cl::Hidden, cl::cat(BoltOptCategory));
52 
53 static cl::opt<bool>
54     InferFallThroughs("infer-fall-throughs",
55                       cl::desc("infer execution count for fall-through blocks"),
56                       cl::Hidden, cl::cat(BoltOptCategory));
57 
58 } // namespace opts
59 
60 namespace llvm {
61 namespace bolt {
62 
63 void BinaryFunction::postProcessProfile() {
64   if (!hasValidProfile()) {
65     clearProfile();
66     return;
67   }
68 
69   if (!(getProfileFlags() & PF_LBR))
70     return;
71 
72   // If we have at least some branch data for the function indicate that it
73   // was executed.
74   if (opts::FixFuncCounts && ExecutionCount == 0)
75     ExecutionCount = 1;
76 
77   // Compute preliminary execution count for each basic block.
78   for (BinaryBasicBlock *BB : BasicBlocks) {
79     if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
80         BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
81       BB->ExecutionCount = 0;
82   }
83   for (BinaryBasicBlock *BB : BasicBlocks) {
84     auto SuccBIIter = BB->branch_info_begin();
85     for (BinaryBasicBlock *Succ : BB->successors()) {
86       // All incoming edges to the primary entry have been accounted for, thus
87       // we skip the update here.
88       if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
89           Succ != BasicBlocks.front())
90         Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
91       ++SuccBIIter;
92     }
93   }
94 
95   // Fix for old profiles.
96   for (BinaryBasicBlock *BB : BasicBlocks) {
97     if (BB->size() != 1 || BB->succ_size() != 1)
98       continue;
99 
100     if (BB->getKnownExecutionCount() == 0)
101       continue;
102 
103     MCInst *Instr = BB->getFirstNonPseudoInstr();
104     assert(Instr && "expected non-pseudo instr");
105     if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
106       continue;
107 
108     BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
109     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
110     if (!BI.Count) {
111       BI.Count = BB->getKnownExecutionCount();
112       FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
113                                      BI.Count);
114     }
115   }
116 
117   if (opts::FixBlockCounts) {
118     for (BinaryBasicBlock *BB : BasicBlocks) {
119       // Make sure that execution count of a block is at least the branch count
120       // of an incoming/outgoing jump.
121       auto SuccBIIter = BB->branch_info_begin();
122       for (BinaryBasicBlock *Succ : BB->successors()) {
123         uint64_t Count = SuccBIIter->Count;
124         if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
125           Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
126           BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
127         }
128         ++SuccBIIter;
129       }
130       // Make sure that execution count of a block is at least the number of
131       // function calls from the block.
132       for (MCInst &Inst : *BB) {
133         // Ignore non-call instruction
134         if (!BC.MIB->isCall(Inst))
135           continue;
136 
137         auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
138         if (CountAnnt)
139           BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
140       }
141     }
142   }
143 
144   if (opts::InferFallThroughs)
145     inferFallThroughCounts();
146 
147   // Update profile information for jump tables based on CFG branch data.
148   for (BinaryBasicBlock *BB : BasicBlocks) {
149     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
150     if (!LastInstr)
151       continue;
152     const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
153     if (!JTAddress)
154       continue;
155     JumpTable *JT = getJumpTableContainingAddress(JTAddress);
156     if (!JT)
157       continue;
158 
159     uint64_t TotalBranchCount = 0;
160     for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
161          BB->branch_info()) {
162       TotalBranchCount += BranchInfo.Count;
163     }
164     JT->Count += TotalBranchCount;
165 
166     if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE)
167       continue;
168 
169     if (JT->Counts.empty())
170       JT->Counts.resize(JT->Entries.size());
171     auto EI = JT->Entries.begin();
172     uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
173     EI += Delta;
174     while (EI != JT->Entries.end()) {
175       const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
176       if (TargetBB) {
177         const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
178             BB->getBranchInfo(*TargetBB);
179         assert(Delta < JT->Counts.size());
180         JT->Counts[Delta].Count += BranchInfo.Count;
181         JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
182       }
183       ++Delta;
184       ++EI;
185       // A label marks the start of another jump table.
186       if (JT->Labels.count(Delta * JT->EntrySize))
187         break;
188     }
189   }
190 }
191 
192 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
193   // No reason to merge invalid or empty profiles into BF.
194   if (!hasValidProfile())
195     return;
196 
197   // Update function execution count.
198   if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE)
199     BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
200 
201   // Since we are merging a valid profile, the new profile should be valid too.
202   // It has either already been valid, or it has been cleaned up.
203   BF.ProfileMatchRatio = 1.0f;
204 
205   // Update basic block and edge counts.
206   auto BBMergeI = BF.begin();
207   for (BinaryBasicBlock *BB : BasicBlocks) {
208     BinaryBasicBlock *BBMerge = &*BBMergeI;
209     assert(getIndex(BB) == BF.getIndex(BBMerge));
210 
211     // Update basic block count.
212     if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
213       BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
214                                  BB->getExecutionCount());
215     }
216 
217     // Update edge count for successors of this basic block.
218     auto BBMergeSI = BBMerge->succ_begin();
219     auto BIMergeI = BBMerge->branch_info_begin();
220     auto BII = BB->branch_info_begin();
221     for (const BinaryBasicBlock *BBSucc : BB->successors()) {
222       (void)BBSucc;
223       assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
224 
225       // At this point no branch count should be set to COUNT_NO_PROFILE.
226       assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
227              "unexpected unknown branch profile");
228       assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
229              "unexpected unknown branch profile");
230 
231       BIMergeI->Count += BII->Count;
232 
233       // When we merge inferred and real fall-through branch data, the merged
234       // data is considered inferred.
235       if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
236           BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
237         BIMergeI->MispredictedCount += BII->MispredictedCount;
238       } else {
239         BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
240       }
241 
242       ++BBMergeSI;
243       ++BII;
244       ++BIMergeI;
245     }
246     assert(BBMergeSI == BBMerge->succ_end());
247 
248     ++BBMergeI;
249   }
250   assert(BBMergeI == BF.end());
251 
252   // Merge jump tables profile info.
253   auto JTMergeI = BF.JumpTables.begin();
254   for (const auto &JTEntry : JumpTables) {
255     if (JTMergeI->second->Counts.empty())
256       JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
257     auto CountMergeI = JTMergeI->second->Counts.begin();
258     for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
259       CountMergeI->Count += JI.Count;
260       CountMergeI->Mispreds += JI.Mispreds;
261       ++CountMergeI;
262     }
263     assert(CountMergeI == JTMergeI->second->Counts.end());
264 
265     ++JTMergeI;
266   }
267   assert(JTMergeI == BF.JumpTables.end());
268 }
269 
270 void BinaryFunction::inferFallThroughCounts() {
271   // Work on a basic block at a time, propagating frequency information
272   // forwards.
273   // It is important to walk in the layout order.
274   for (BinaryBasicBlock *BB : BasicBlocks) {
275     const uint64_t BBExecCount = BB->getExecutionCount();
276 
277     // Propagate this information to successors, filling in fall-through edges
278     // with frequency information
279     if (BB->succ_size() == 0)
280       continue;
281 
282     // Calculate frequency of outgoing branches from this node according to
283     // LBR data.
284     uint64_t ReportedBranches = 0;
285     for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info())
286       if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
287         ReportedBranches += SuccBI.Count;
288 
289     // Get taken count of conditional tail call if the block ends with one.
290     uint64_t CTCTakenCount = 0;
291     const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
292     if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
293       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
294           *CTCInstr, "CTCTakenCount");
295     }
296 
297     // Calculate frequency of throws from this node according to LBR data
298     // for branching into associated landing pads. Since it is possible
299     // for a landing pad to be associated with more than one basic blocks,
300     // we may overestimate the frequency of throws for such blocks.
301     uint64_t ReportedThrows = 0;
302     for (const BinaryBasicBlock *LP : BB->landing_pads())
303       ReportedThrows += LP->getExecutionCount();
304 
305     const uint64_t TotalReportedJumps =
306         ReportedBranches + CTCTakenCount + ReportedThrows;
307 
308     // Infer the frequency of the fall-through edge, representing not taking the
309     // branch.
310     uint64_t Inferred = 0;
311     if (BBExecCount > TotalReportedJumps)
312       Inferred = BBExecCount - TotalReportedJumps;
313 
314     LLVM_DEBUG(
315         if (BBExecCount < TotalReportedJumps) dbgs()
316             << "Fall-through inference is slightly inconsistent. "
317                "exec frequency is less than the outgoing edges frequency ("
318             << BBExecCount << " < " << ReportedBranches
319             << ") for  BB at offset 0x"
320             << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
321 
322     if (BB->succ_size() <= 2) {
323       // Skip if the last instruction is an unconditional jump.
324       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
325       if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
326                         BC.MIB->isIndirectBranch(*LastInstr)))
327         continue;
328       // If there is an FT it will be the last successor.
329       auto &SuccBI = *BB->branch_info_rbegin();
330       auto &Succ = *BB->succ_rbegin();
331       if (SuccBI.Count == 0) {
332         SuccBI.Count = Inferred;
333         SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
334         Succ->ExecutionCount += Inferred;
335       }
336     }
337   }
338 
339   return;
340 }
341 
342 void BinaryFunction::clearProfile() {
343   // Keep function execution profile the same. Only clear basic block and edge
344   // counts.
345   for (BinaryBasicBlock *BB : BasicBlocks) {
346     BB->ExecutionCount = 0;
347     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
348       BI.Count = 0;
349       BI.MispredictedCount = 0;
350     }
351   }
352 }
353 
354 } // namespace bolt
355 } // namespace llvm
356