xref: /llvm-project/bolt/lib/Core/BinaryFunctionProfile.cpp (revision 2f09f445b2d6b3ef197aecd8d1e06d08140380f3)
1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BinaryFunction member functions related to processing
10 // the execution profile.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #undef  DEBUG_TYPE
21 #define DEBUG_TYPE "bolt-prof"
22 
23 using namespace llvm;
24 using namespace bolt;
25 
26 namespace opts {
27 
28 extern cl::OptionCategory BoltOptCategory;
29 
30 cl::opt<IndirectCallPromotionType>
31 IndirectCallPromotion("indirect-call-promotion",
32   cl::init(ICP_NONE),
33   cl::desc("indirect call promotion"),
34   cl::values(
35     clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"),
36     clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"),
37     clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"),
38     clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")),
39   cl::ZeroOrMore,
40   cl::cat(BoltOptCategory));
41 
42 extern cl::opt<JumpTableSupportLevel> JumpTables;
43 
44 static cl::opt<bool>
45 FixFuncCounts("fix-func-counts",
46   cl::desc("adjust function counts based on basic blocks execution count"),
47   cl::init(false),
48   cl::ZeroOrMore,
49   cl::Hidden,
50   cl::cat(BoltOptCategory));
51 
52 static cl::opt<bool>
53 FixBlockCounts("fix-block-counts",
54   cl::desc("adjust block counts based on outgoing branch counts"),
55   cl::init(true),
56   cl::ZeroOrMore,
57   cl::Hidden,
58   cl::cat(BoltOptCategory));
59 
60 static cl::opt<bool>
61 InferFallThroughs("infer-fall-throughs",
62   cl::desc("infer execution count for fall-through blocks"),
63   cl::init(false),
64   cl::ZeroOrMore,
65   cl::Hidden,
66   cl::cat(BoltOptCategory));
67 
68 } // namespace opts
69 
70 namespace llvm {
71 namespace bolt {
72 
73 void BinaryFunction::postProcessProfile() {
74   if (!hasValidProfile()) {
75     clearProfile();
76     return;
77   }
78 
79   if (!(getProfileFlags() & PF_LBR))
80     return;
81 
82   // If we have at least some branch data for the function indicate that it
83   // was executed.
84   if (opts::FixFuncCounts && ExecutionCount == 0) {
85     ExecutionCount = 1;
86   }
87 
88   // Compute preliminary execution count for each basic block.
89   for (BinaryBasicBlock *BB : BasicBlocks) {
90     if ((!BB->isEntryPoint() && !BB->isLandingPad()) ||
91         BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
92       BB->ExecutionCount = 0;
93   }
94   for (BinaryBasicBlock *BB : BasicBlocks) {
95     auto SuccBIIter = BB->branch_info_begin();
96     for (BinaryBasicBlock *Succ : BB->successors()) {
97       // All incoming edges to the primary entry have been accounted for, thus
98       // we skip the update here.
99       if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
100           Succ != BasicBlocks.front())
101         Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
102       ++SuccBIIter;
103     }
104   }
105 
106   // Fix for old profiles.
107   for (BinaryBasicBlock *BB : BasicBlocks) {
108     if (BB->size() != 1 || BB->succ_size() != 1)
109       continue;
110 
111     if (BB->getKnownExecutionCount() == 0)
112       continue;
113 
114     MCInst *Instr = BB->getFirstNonPseudoInstr();
115     assert(Instr && "expected non-pseudo instr");
116     if (!BC.MIB->hasAnnotation(*Instr, "NOP"))
117       continue;
118 
119     BinaryBasicBlock *FTSuccessor = BB->getSuccessor();
120     BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor);
121     if (!BI.Count) {
122       BI.Count = BB->getKnownExecutionCount();
123       FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() +
124                                      BI.Count);
125     }
126   }
127 
128   if (opts::FixBlockCounts) {
129     for (BinaryBasicBlock *BB : BasicBlocks) {
130       // Make sure that execution count of a block is at least the branch count
131       // of an incoming/outgoing jump.
132       auto SuccBIIter = BB->branch_info_begin();
133       for (BinaryBasicBlock *Succ : BB->successors()) {
134         uint64_t Count = SuccBIIter->Count;
135         if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) {
136           Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count));
137           BB->setExecutionCount(std::max(BB->getExecutionCount(), Count));
138         }
139         ++SuccBIIter;
140       }
141       // Make sure that execution count of a block is at least the number of
142       // function calls from the block.
143       for (MCInst &Inst : *BB) {
144         // Ignore non-call instruction
145         if (!BC.MIB->isCall(Inst))
146           continue;
147 
148         auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count");
149         if (CountAnnt) {
150           BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt));
151         }
152       }
153     }
154   }
155 
156   if (opts::InferFallThroughs)
157     inferFallThroughCounts();
158 
159   // Update profile information for jump tables based on CFG branch data.
160   for (BinaryBasicBlock *BB : BasicBlocks) {
161     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
162     if (!LastInstr)
163       continue;
164     const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr);
165     if (!JTAddress)
166       continue;
167     JumpTable *JT = getJumpTableContainingAddress(JTAddress);
168     if (!JT)
169       continue;
170 
171     uint64_t TotalBranchCount = 0;
172     for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
173          BB->branch_info()) {
174       TotalBranchCount += BranchInfo.Count;
175     }
176     JT->Count += TotalBranchCount;
177 
178     if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
179         opts::JumpTables < JTS_AGGRESSIVE)
180       continue;
181 
182     if (JT->Counts.empty())
183       JT->Counts.resize(JT->Entries.size());
184     auto EI = JT->Entries.begin();
185     uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize;
186     EI += Delta;
187     while (EI != JT->Entries.end()) {
188       const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI);
189       if (TargetBB) {
190         const BinaryBasicBlock::BinaryBranchInfo &BranchInfo =
191             BB->getBranchInfo(*TargetBB);
192         assert(Delta < JT->Counts.size());
193         JT->Counts[Delta].Count += BranchInfo.Count;
194         JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
195       }
196       ++Delta;
197       ++EI;
198       // A label marks the start of another jump table.
199       if (JT->Labels.count(Delta * JT->EntrySize))
200         break;
201     }
202   }
203 }
204 
205 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
206   // No reason to merge invalid or empty profiles into BF.
207   if (!hasValidProfile())
208     return;
209 
210   // Update function execution count.
211   if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
212     BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
213   }
214 
215   // Since we are merging a valid profile, the new profile should be valid too.
216   // It has either already been valid, or it has been cleaned up.
217   BF.ProfileMatchRatio = 1.0f;
218 
219   // Update basic block and edge counts.
220   auto BBMergeI = BF.begin();
221   for (BinaryBasicBlock *BB : BasicBlocks) {
222     BinaryBasicBlock *BBMerge = &*BBMergeI;
223     assert(getIndex(BB) == BF.getIndex(BBMerge));
224 
225     // Update basic block count.
226     if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
227       BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() +
228                                  BB->getExecutionCount());
229     }
230 
231     // Update edge count for successors of this basic block.
232     auto BBMergeSI = BBMerge->succ_begin();
233     auto BIMergeI = BBMerge->branch_info_begin();
234     auto BII = BB->branch_info_begin();
235     for (const BinaryBasicBlock *BBSucc : BB->successors()) {
236       (void)BBSucc;
237       assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
238 
239       // At this point no branch count should be set to COUNT_NO_PROFILE.
240       assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
241              "unexpected unknown branch profile");
242       assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
243              "unexpected unknown branch profile");
244 
245       BIMergeI->Count += BII->Count;
246 
247       // When we merge inferred and real fall-through branch data, the merged
248       // data is considered inferred.
249       if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
250           BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
251         BIMergeI->MispredictedCount += BII->MispredictedCount;
252       } else {
253         BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
254       }
255 
256       ++BBMergeSI;
257       ++BII;
258       ++BIMergeI;
259     }
260     assert(BBMergeSI == BBMerge->succ_end());
261 
262     ++BBMergeI;
263   }
264   assert(BBMergeI == BF.end());
265 
266   // Merge jump tables profile info.
267   auto JTMergeI = BF.JumpTables.begin();
268   for (const auto &JTEntry : JumpTables) {
269     if (JTMergeI->second->Counts.empty())
270       JTMergeI->second->Counts.resize(JTEntry.second->Counts.size());
271     auto CountMergeI = JTMergeI->second->Counts.begin();
272     for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) {
273       CountMergeI->Count += JI.Count;
274       CountMergeI->Mispreds += JI.Mispreds;
275       ++CountMergeI;
276     }
277     assert(CountMergeI == JTMergeI->second->Counts.end());
278 
279     ++JTMergeI;
280   }
281   assert(JTMergeI == BF.JumpTables.end());
282 }
283 
284 void BinaryFunction::inferFallThroughCounts() {
285   // Work on a basic block at a time, propagating frequency information
286   // forwards.
287   // It is important to walk in the layout order.
288   for (BinaryBasicBlock *BB : BasicBlocks) {
289     const uint64_t BBExecCount = BB->getExecutionCount();
290 
291     // Propagate this information to successors, filling in fall-through edges
292     // with frequency information
293     if (BB->succ_size() == 0)
294       continue;
295 
296     // Calculate frequency of outgoing branches from this node according to
297     // LBR data.
298     uint64_t ReportedBranches = 0;
299     for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) {
300       if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
301         ReportedBranches += SuccBI.Count;
302     }
303 
304     // Get taken count of conditional tail call if the block ends with one.
305     uint64_t CTCTakenCount = 0;
306     const MCInst *CTCInstr = BB->getLastNonPseudoInstr();
307     if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) {
308       CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>(
309           *CTCInstr, "CTCTakenCount");
310     }
311 
312     // Calculate frequency of throws from this node according to LBR data
313     // for branching into associated landing pads. Since it is possible
314     // for a landing pad to be associated with more than one basic blocks,
315     // we may overestimate the frequency of throws for such blocks.
316     uint64_t ReportedThrows = 0;
317     for (const BinaryBasicBlock *LP : BB->landing_pads()) {
318       ReportedThrows += LP->getExecutionCount();
319     }
320 
321     const uint64_t TotalReportedJumps =
322         ReportedBranches + CTCTakenCount + ReportedThrows;
323 
324     // Infer the frequency of the fall-through edge, representing not taking the
325     // branch.
326     uint64_t Inferred = 0;
327     if (BBExecCount > TotalReportedJumps)
328       Inferred = BBExecCount - TotalReportedJumps;
329 
330     LLVM_DEBUG(
331         if (BBExecCount < TotalReportedJumps) dbgs()
332             << "Fall-through inference is slightly inconsistent. "
333                "exec frequency is less than the outgoing edges frequency ("
334             << BBExecCount << " < " << ReportedBranches
335             << ") for  BB at offset 0x"
336             << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';);
337 
338     if (BB->succ_size() <= 2) {
339       // Skip if the last instruction is an unconditional jump.
340       const MCInst *LastInstr = BB->getLastNonPseudoInstr();
341       if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) ||
342                         BC.MIB->isIndirectBranch(*LastInstr)))
343         continue;
344       // If there is an FT it will be the last successor.
345       auto &SuccBI = *BB->branch_info_rbegin();
346       auto &Succ = *BB->succ_rbegin();
347       if (SuccBI.Count == 0) {
348         SuccBI.Count = Inferred;
349         SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
350         Succ->ExecutionCount += Inferred;
351       }
352     }
353   }
354 
355   return;
356 }
357 
358 void BinaryFunction::clearProfile() {
359   // Keep function execution profile the same. Only clear basic block and edge
360   // counts.
361   for (BinaryBasicBlock *BB : BasicBlocks) {
362     BB->ExecutionCount = 0;
363     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) {
364       BI.Count = 0;
365       BI.MispredictedCount = 0;
366     }
367   }
368 }
369 
370 } // namespace bolt
371 } // namespace llvm
372