xref: /llvm-project/bolt/lib/Core/DynoStats.cpp (revision 40c2e0fafe5675306f4ad43910bf6e2fd2025ff3)
1 //===--- DynoStats.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Core/DynoStats.h"
12 #include "bolt/Core/BinaryBasicBlock.h"
13 #include "bolt/Core/BinaryFunction.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/raw_ostream.h"
18 #include <algorithm>
19 #include <string>
20 
21 #undef  DEBUG_TYPE
22 #define DEBUG_TYPE "bolt"
23 
24 using namespace llvm;
25 using namespace bolt;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltCategory;
30 
31 static cl::opt<uint32_t>
32 DynoStatsScale("dyno-stats-scale",
33   cl::desc("scale to be applied while reporting dyno stats"),
34   cl::Optional,
35   cl::init(1),
36   cl::Hidden,
37   cl::cat(BoltCategory));
38 
39 static cl::opt<uint32_t>
40 PrintDynoOpcodeStat("print-dyno-opcode-stats",
41   cl::desc("print per instruction opcode dyno stats and the function"
42               "names:BB offsets of the nth highest execution counts"),
43   cl::init(0),
44   cl::Hidden,
45   cl::cat(BoltCategory));
46 
47 } // namespace opts
48 
49 namespace llvm {
50 namespace bolt {
51 
52 constexpr const char *DynoStats::Desc[];
53 
54 bool DynoStats::operator<(const DynoStats &Other) const {
55   return std::lexicographical_compare(
56       &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
57       &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
58 }
59 
60 bool DynoStats::operator==(const DynoStats &Other) const {
61   return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
62                     &Other.Stats[FIRST_DYNO_STAT]);
63 }
64 
65 bool DynoStats::lessThan(const DynoStats &Other,
66                          ArrayRef<Category> Keys) const {
67   return std::lexicographical_compare(
68       Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
69       [this, &Other](const Category A, const Category) {
70         return Stats[A] < Other.Stats[A];
71       });
72 }
73 
74 void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
75                       MCInstPrinter *Printer) const {
76   auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
77                                 uint64_t OtherStat) {
78     OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
79     if (Other) {
80       if (Stat != OtherStat) {
81         OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
82         OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
83                                        (float)(OtherStat));
84       } else {
85         OS << " (=)";
86       }
87     }
88     OS << '\n';
89   };
90 
91   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
92        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
93 
94     if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
95       continue;
96 
97     printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
98   }
99   if (opts::PrintDynoOpcodeStat && Printer) {
100     outs() << "\nProgram-wide opcode histogram:\n";
101     OS << "              Opcode,   Execution Count,     Max Exec Count, "
102           "Function Name:Offset ...\n";
103     std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
104     for (const OpcodeStatTy &Stat : OpcodeHistogram)
105       SortedHistogram.emplace_back(Stat.second.first, Stat.first);
106 
107     // Sort using lexicographic ordering
108     std::sort(SortedHistogram.begin(), SortedHistogram.end());
109 
110     // Dump in ascending order: Start with Opcode with Highest execution
111     // count.
112     for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend();
113          ++Stat) {
114       OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat->second).data(),
115                    Stat->first * opts::DynoStatsScale);
116 
117       MaxOpcodeHistogramTy MaxMultiMap =
118           OpcodeHistogram.at(Stat->second).second;
119       // Start with function name:BB offset with highest execution count.
120       for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) {
121         OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale)
122            << Max->second.first.str() << ':' << Max->second.second;
123       }
124       OS << '\n';
125     }
126   }
127 }
128 
129 void DynoStats::operator+=(const DynoStats &Other) {
130   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
131        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
132     Stats[Stat] += Other[Stat];
133   }
134   for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
135     auto I = OpcodeHistogram.find(Stat.first);
136     if (I == OpcodeHistogram.end()) {
137       OpcodeHistogram.emplace(Stat);
138     } else {
139       // Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th
140       // maximum counts.
141       I->second.first += Stat.second.first;
142       auto &MMap = I->second.second;
143       auto &OtherMMap = Stat.second.second;
144       auto Size = MMap.size();
145       assert(Size <= opts::PrintDynoOpcodeStat);
146       for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) {
147         if (Size++ >= opts::PrintDynoOpcodeStat) {
148           auto First = MMap.begin();
149           if (Iter->first <= First->first)
150             break;
151           MMap.erase(First);
152         }
153         MMap.emplace(*Iter);
154       }
155     }
156   }
157 }
158 
159 DynoStats getDynoStats(const BinaryFunction &BF) {
160   auto &BC = BF.getBinaryContext();
161 
162   DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
163 
164   // Return empty-stats about the function we don't completely understand.
165   if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
166     return Stats;
167 
168   // Update enumeration of basic blocks for correct detection of branch'
169   // direction.
170   BF.updateLayoutIndices();
171 
172   for (BinaryBasicBlock *const &BB : BF.layout()) {
173     // The basic block execution count equals to the sum of incoming branch
174     // frequencies. This may deviate from the sum of outgoing branches of the
175     // basic block especially since the block may contain a function that
176     // does not return or a function that throws an exception.
177     const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
178 
179     // Ignore empty blocks and blocks that were not executed.
180     if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
181       continue;
182 
183     // Count AArch64 linker-inserted veneers
184     if (BF.isAArch64Veneer())
185       Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
186 
187     // Count various instruction types by iterating through all instructions.
188     // When -print-dyno-opcode-stats is on, count per each opcode and record
189     // maximum execution counts.
190     for (const MCInst &Instr : *BB) {
191       if (opts::PrintDynoOpcodeStat) {
192         unsigned Opcode = Instr.getOpcode();
193         auto I = Stats.OpcodeHistogram.find(Opcode);
194         if (I == Stats.OpcodeHistogram.end()) {
195           DynoStats::MaxOpcodeHistogramTy MMap;
196           MMap.emplace(BBExecutionCount,
197                        std::make_pair(BF.getOneName(), BB->getOffset()));
198           Stats.OpcodeHistogram.emplace(Opcode,
199                                         std::make_pair(BBExecutionCount, MMap));
200         } else {
201           I->second.first += BBExecutionCount;
202           bool Insert = true;
203           if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
204             auto First = I->second.second.begin();
205             if (First->first < BBExecutionCount)
206               I->second.second.erase(First);
207             else
208               Insert = false;
209           }
210           if (Insert) {
211             I->second.second.emplace(
212                 BBExecutionCount,
213                 std::make_pair(BF.getOneName(), BB->getOffset()));
214           }
215         }
216       }
217 
218       if (BC.MIB->isStore(Instr)) {
219         Stats[DynoStats::STORES] += BBExecutionCount;
220       }
221       if (BC.MIB->isLoad(Instr)) {
222         Stats[DynoStats::LOADS] += BBExecutionCount;
223       }
224       if (!BC.MIB->isCall(Instr))
225         continue;
226 
227       uint64_t CallFreq = BBExecutionCount;
228       if (BC.MIB->getConditionalTailCall(Instr)) {
229         CallFreq =
230             BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
231       }
232       Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
233       if (BC.MIB->isIndirectCall(Instr)) {
234         Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
235       } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
236         const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
237         if (BF && BF->isPLTFunction()) {
238           Stats[DynoStats::PLT_CALLS] += CallFreq;
239 
240           // We don't process PLT functions and hence have to adjust relevant
241           // dynostats here for:
242           //
243           //   jmp *GOT_ENTRY(%rip)
244           //
245           // NOTE: this is arch-specific.
246           Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
247           Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
248           Stats[DynoStats::LOADS] += CallFreq;
249           Stats[DynoStats::INSTRUCTIONS] += CallFreq;
250         }
251       }
252     }
253 
254     Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
255 
256     // Jump tables.
257     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
258     if (BC.MIB->getJumpTable(*LastInstr)) {
259       Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
260       LLVM_DEBUG(
261         static uint64_t MostFrequentJT;
262         if (BBExecutionCount > MostFrequentJT) {
263           MostFrequentJT = BBExecutionCount;
264           dbgs() << "BOLT-INFO: most frequently executed jump table is in "
265                  << "function " << BF << " in basic block " << BB->getName()
266                  << " executed totally " << BBExecutionCount << " times.\n";
267         }
268       );
269       continue;
270     }
271 
272     if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
273       Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
274       continue;
275     }
276 
277     // Update stats for branches.
278     const MCSymbol *TBB = nullptr;
279     const MCSymbol *FBB = nullptr;
280     MCInst *CondBranch = nullptr;
281     MCInst *UncondBranch = nullptr;
282     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
283       continue;
284     }
285 
286     if (!CondBranch && !UncondBranch) {
287       continue;
288     }
289 
290     // Simple unconditional branch.
291     if (!CondBranch) {
292       Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
293       continue;
294     }
295 
296     // CTCs: instruction annotations could be stripped, hence check the number
297     // of successors to identify conditional tail calls.
298     if (BB->succ_size() == 1) {
299       if (BB->branch_info_begin() != BB->branch_info_end())
300         Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
301       continue;
302     }
303 
304     // Conditional branch that could be followed by an unconditional branch.
305     uint64_t TakenCount = BB->getTakenBranchInfo().Count;
306     if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
307       TakenCount = 0;
308 
309     uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
310     if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
311       NonTakenCount = 0;
312 
313     if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
314       Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
315       Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
316     } else {
317       Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
318       Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
319     }
320 
321     if (UncondBranch) {
322       Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
323     }
324   }
325 
326   return Stats;
327 }
328 
329 } // namespace bolt
330 } // namespace llvm
331