xref: /llvm-project/bolt/lib/Core/BinaryFunctionProfile.cpp (revision ccb99dd1261a9b40a3141fc7a63437dab5770cb2)
1 //===--- BinaryFunctionProfile.cpp                                 --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Core/BinaryBasicBlock.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/Debug.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #undef  DEBUG_TYPE
18 #define DEBUG_TYPE "bolt-prof"
19 
20 using namespace llvm;
21 using namespace bolt;
22 
23 namespace opts {
24 
25 extern cl::OptionCategory BoltOptCategory;
26 
27 cl::opt<IndirectCallPromotionType>
28 IndirectCallPromotion("indirect-call-promotion",
29   cl::init(ICP_NONE),
30   cl::desc("indirect call promotion"),
31   cl::values(
32     clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
33     clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
34     clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"),
35     clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
36   cl::ZeroOrMore,
37   cl::cat(BoltOptCategory));
38 
39 extern cl::opt<JumpTableSupportLevel> JumpTables;
40 
41 static cl::opt<bool>
42 FixFuncCounts("fix-func-counts",
43   cl::desc("adjust function counts based on basic blocks execution count"),
44   cl::init(false),
45   cl::ZeroOrMore,
46   cl::Hidden,
47   cl::cat(BoltOptCategory));
48 
49 static cl::opt<bool>
50 FixBlockCounts("fix-block-counts",
51   cl::desc("adjust block counts based on outgoing branch counts"),
52   cl::init(true),
53   cl::ZeroOrMore,
54   cl::Hidden,
55   cl::cat(BoltOptCategory));
56 
57 static cl::opt<bool>
58 InferFallThroughs("infer-fall-throughs",
59   cl::desc("infer execution count for fall-through blocks"),
60   cl::init(false),
61   cl::ZeroOrMore,
62   cl::Hidden,
63   cl::cat(BoltOptCategory));
64 
65 } // namespace opts
66 
67 namespace llvm {
68 namespace bolt {
69 
70 void BinaryFunction::postProcessProfile() {
71   if (!hasValidProfile()) {
72     clearProfile();
73     return;
74   }
75 
76   if (!(getProfileFlags() & PF_LBR))
77     return;
78 
79   // If we have at least some branch data for the function indicate that it
80   // was executed.
81   if (opts::FixFuncCounts && ExecutionCount == 0) {
82     ExecutionCount = 1;
83   }
84 
85   // Compute preliminary execution count for each basic block.
86   for (BinaryBasicBlock *BB : BasicBlocks) {
87     if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
88         BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
89       BB->ExecutionCount = 0;
90   }
91   for (BinaryBasicBlock *BB : BasicBlocks) {
92     auto SuccBIIter = BB->branch_info_begin();
93     for (BinaryBasicBlock *Succ : BB->successors()) {
94       // All incoming edges to the primary entry have been accounted for, thus
95       // we skip the update here.
96       if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
97           Succ != BasicBlocks.front())
98         Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
99       ++SuccBIIter;
100     }
101   }
102 
103   // Fix for old profiles.
104   for (BinaryBasicBlock *BB : BasicBlocks) {
105     if (BB->size() != 1 || BB->succ_size() != 1)
106       continue;
107 
108     if (BB->getKnownExecutionCount() == 0)
109       continue;
110 
111     MCInst *Instr = BB->getFirstNonPseudoInstr();
112     assert(Instr && "expected non-pseudo instr");
113     if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
114       continue;
115 
116     BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
117     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
118     if (!BI.Count) {
119       BI.Count = BB->getKnownExecutionCount();
120       FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
121                                      BI.Count);
122     }
123   }
124 
125   if (opts::FixBlockCounts) {
126     for (BinaryBasicBlock *BB : BasicBlocks) {
127       // Make sure that execution count of a block is at least the branch count
128       // of an incoming/outgoing jump.
129       auto SuccBIIter = BB->branch_info_begin();
130       for (BinaryBasicBlock *Succ : BB->successors()) {
131         uint64_t Count = SuccBIIter->Count;
132         if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
133           Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
134           BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
135         }
136         ++SuccBIIter;
137       }
138       // Make sure that execution count of a block is at least the number of
139       // function calls from the block.
140       for (MCInst &Inst : *BB) {
141         // Ignore non-call instruction
142         if (!BC.MIB->isCall(Inst))
143           continue;
144 
145         auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
146         if (CountAnnt) {
147           BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
148         }
149       }
150     }
151   }
152 
153   if (opts::InferFallThroughs)
154     inferFallThroughCounts();
155 
156   // Update profile information for jump tables based on CFG branch data.
157   for (BinaryBasicBlock *BB : BasicBlocks) {
158     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
159     if (!LastInstr)
160       continue;
161     const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
162     if (!JTAddress)
163       continue;
164     JumpTable *JT = getJumpTableContainingAddress(JTAddress);
165     if (!JT)
166       continue;
167 
168     uint64_t TotalBranchCount = 0;
169     for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
170          BB->branch_info()) {
171       TotalBranchCount += BranchInfo.Count;
172     }
173     JT->Count += TotalBranchCount;
174 
175     if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
176         opts::JumpTables < JTS_AGGRESSIVE)
177       continue;
178 
179     if (JT->Counts.empty())
180       JT->Counts.resize(JT->Entries.size());
181     auto EI = JT->Entries.begin();
182     uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
183     EI += Delta;
184     while (EI != JT->Entries.end()) {
185       const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
186       if (TargetBB) {
187         const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
188             BB->getBranchInfo(*TargetBB);
189         assert(Delta < JT->Counts.size());
190         JT->Counts[Delta].Count += BranchInfo.Count;
191         JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
192       }
193       ++Delta;
194       ++EI;
195       // A label marks the start of another jump table.
196       if (JT->Labels.count(Delta * JT->EntrySize))
197         break;
198     }
199   }
200 }
201 
202 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
203   // No reason to merge invalid or empty profiles into BF.
204   if (!hasValidProfile())
205     return;
206 
207   // Update function execution count.
208   if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
209     BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
210   }
211 
212   // Since we are merging a valid profile, the new profile should be valid too.
213   // It has either already been valid, or it has been cleaned up.
214   BF.ProfileMatchRatio = 1.0f;
215 
216   // Update basic block and edge counts.
217   auto BBMergeI = BF.begin();
218   for (BinaryBasicBlock *BB : BasicBlocks) {
219     BinaryBasicBlock *BBMerge = &*BBMergeI;
220     assert(getIndex(BB) == BF.getIndex(BBMerge));
221 
222     // Update basic block count.
223     if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
224       BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
225                                  BB->getExecutionCount());
226     }
227 
228     // Update edge count for successors of this basic block.
229     auto BBMergeSI = BBMerge->succ_begin();
230     auto BIMergeI = BBMerge->branch_info_begin();
231     auto BII = BB->branch_info_begin();
232     for (const BinaryBasicBlock *BBSucc : BB->successors()) {
233       (void)BBSucc;
234       assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
235 
236       // At this point no branch count should be set to COUNT_NO_PROFILE.
237       assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
238              "unexpected unknown branch profile");
239       assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
240              "unexpected unknown branch profile");
241 
242       BIMergeI->Count += BII->Count;
243 
244       // When we merge inferred and real fall-through branch data, the merged
245       // data is considered inferred.
246       if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
247           BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
248         BIMergeI->MispredictedCount += BII->MispredictedCount;
249       } else {
250         BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
251       }
252 
253       ++BBMergeSI;
254       ++BII;
255       ++BIMergeI;
256     }
257     assert(BBMergeSI == BBMerge->succ_end());
258 
259     ++BBMergeI;
260   }
261   assert(BBMergeI == BF.end());
262 
263   // Merge jump tables profile info.
264   auto JTMergeI = BF.JumpTables.begin();
265   for (const auto &JTEntry : JumpTables) {
266     if (JTMergeI->second->Counts.empty())
267       JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
268     auto CountMergeI = JTMergeI->second->Counts.begin();
269     for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
270       CountMergeI->Count += JI.Count;
271       CountMergeI->Mispreds += JI.Mispreds;
272       ++CountMergeI;
273     }
274     assert(CountMergeI == JTMergeI->second->Counts.end());
275 
276     ++JTMergeI;
277   }
278   assert(JTMergeI == BF.JumpTables.end());
279 }
280 
281 void BinaryFunction::inferFallThroughCounts() {
282   // Work on a basic block at a time, propagating frequency information
283   // forwards.
284   // It is important to walk in the layout order.
285   for (BinaryBasicBlock *BB : BasicBlocks) {
286     const uint64_t BBExecCount = BB->getExecutionCount();
287 
288     // Propagate this information to successors, filling in fall-through edges
289     // with frequency information
290     if (BB->succ_size() == 0)
291       continue;
292 
293     // Calculate frequency of outgoing branches from this node according to
294     // LBR data.
295     uint64_t ReportedBranches = 0;
296     for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) {
297       if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
298         ReportedBranches += SuccBI.Count;
299     }
300 
301     // Get taken count of conditional tail call if the block ends with one.
302     uint64_t CTCTakenCount = 0;
303     const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
304     if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
305       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
306           *CTCInstr, "CTCTakenCount");
307     }
308 
309     // Calculate frequency of throws from this node according to LBR data
310     // for branching into associated landing pads. Since it is possible
311     // for a landing pad to be associated with more than one basic blocks,
312     // we may overestimate the frequency of throws for such blocks.
313     uint64_t ReportedThrows = 0;
314     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
315       ReportedThrows += LP->getExecutionCount();
316     }
317 
318     const uint64_t TotalReportedJumps =
319         ReportedBranches + CTCTakenCount + ReportedThrows;
320 
321     // Infer the frequency of the fall-through edge, representing not taking the
322     // branch.
323     uint64_t Inferred = 0;
324     if (BBExecCount > TotalReportedJumps)
325       Inferred = BBExecCount - TotalReportedJumps;
326 
327     LLVM_DEBUG(
328         if (BBExecCount < TotalReportedJumps) dbgs()
329             << "Fall-through inference is slightly inconsistent. "
330                "exec frequency is less than the outgoing edges frequency ("
331             << BBExecCount << " < " << ReportedBranches
332             << ") for  BB at offset 0x"
333             << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
334 
335     if (BB->succ_size() <= 2) {
336       // Skip if the last instruction is an unconditional jump.
337       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
338       if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
339                         BC.MIB->isIndirectBranch(*LastInstr)))
340         continue;
341       // If there is an FT it will be the last successor.
342       auto &SuccBI = *BB->branch_info_rbegin();
343       auto &Succ = *BB->succ_rbegin();
344       if (SuccBI.Count == 0) {
345         SuccBI.Count = Inferred;
346         SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
347         Succ->ExecutionCount += Inferred;
348       }
349     }
350   }
351 
352   return;
353 }
354 
355 void BinaryFunction::clearProfile() {
356   // Keep function execution profile the same. Only clear basic block and edge
357   // counts.
358   for (BinaryBasicBlock *BB : BasicBlocks) {
359     BB->ExecutionCount = 0;
360     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
361       BI.Count = 0;
362       BI.MispredictedCount = 0;
363     }
364   }
365 }
366 
367 } // namespace bolt
368 } // namespace llvm
369