xref: /llvm-project/bolt/lib/Core/DynoStats.cpp (revision 2f09f445b2d6b3ef197aecd8d1e06d08140380f3)
1 //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the DynoStats class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/DynoStats.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <algorithm>
21 #include <string>
22 
23 #undef  DEBUG_TYPE
24 #define DEBUG_TYPE "bolt"
25 
26 using namespace llvm;
27 using namespace bolt;
28 
29 namespace opts {
30 
31 extern cl::OptionCategory BoltCategory;
32 
33 static cl::opt<uint32_t>
34 DynoStatsScale("dyno-stats-scale",
35   cl::desc("scale to be applied while reporting dyno stats"),
36   cl::Optional,
37   cl::init(1),
38   cl::Hidden,
39   cl::cat(BoltCategory));
40 
41 static cl::opt<uint32_t>
42 PrintDynoOpcodeStat("print-dyno-opcode-stats",
43   cl::desc("print per instruction opcode dyno stats and the function"
44               "names:BB offsets of the nth highest execution counts"),
45   cl::init(0),
46   cl::Hidden,
47   cl::cat(BoltCategory));
48 
49 } // namespace opts
50 
51 namespace llvm {
52 namespace bolt {
53 
54 constexpr const char *DynoStats::Desc[];
55 
56 bool DynoStats::operator<(const DynoStats &Other) const {
57   return std::lexicographical_compare(
58       &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
59       &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
60 }
61 
62 bool DynoStats::operator==(const DynoStats &Other) const {
63   return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
64                     &Other.Stats[FIRST_DYNO_STAT]);
65 }
66 
67 bool DynoStats::lessThan(const DynoStats &Other,
68                          ArrayRef<Category> Keys) const {
69   return std::lexicographical_compare(
70       Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
71       [this, &Other](const Category A, const Category) {
72         return Stats[A] < Other.Stats[A];
73       });
74 }
75 
76 void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77                       MCInstPrinter *Printer) const {
78   auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79                                 uint64_t OtherStat) {
80     OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
81     if (Other) {
82       if (Stat != OtherStat) {
83         OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
84         OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
85                                        (float)(OtherStat));
86       } else {
87         OS << " (=)";
88       }
89     }
90     OS << '\n';
91   };
92 
93   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
94        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
95 
96     if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97       continue;
98 
99     printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
100   }
101   if (opts::PrintDynoOpcodeStat && Printer) {
102     outs() << "\nProgram-wide opcode histogram:\n";
103     OS << "              Opcode,   Execution Count,     Max Exec Count, "
104           "Function Name:Offset ...\n";
105     std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106     for (const OpcodeStatTy &Stat : OpcodeHistogram)
107       SortedHistogram.emplace_back(Stat.second.first, Stat.first);
108 
109     // Sort using lexicographic ordering
110     std::sort(SortedHistogram.begin(), SortedHistogram.end());
111 
112     // Dump in ascending order: Start with Opcode with Highest execution
113     // count.
114     for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend();
115          ++Stat) {
116       OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat->second).data(),
117                    Stat->first * opts::DynoStatsScale);
118 
119       MaxOpcodeHistogramTy MaxMultiMap =
120           OpcodeHistogram.at(Stat->second).second;
121       // Start with function name:BB offset with highest execution count.
122       for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) {
123         OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale)
124            << Max->second.first.str() << ':' << Max->second.second;
125       }
126       OS << '\n';
127     }
128   }
129 }
130 
131 void DynoStats::operator+=(const DynoStats &Other) {
132   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
133        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
134     Stats[Stat] += Other[Stat];
135   }
136   for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
137     auto I = OpcodeHistogram.find(Stat.first);
138     if (I == OpcodeHistogram.end()) {
139       OpcodeHistogram.emplace(Stat);
140     } else {
141       // Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th
142       // maximum counts.
143       I->second.first += Stat.second.first;
144       auto &MMap = I->second.second;
145       auto &OtherMMap = Stat.second.second;
146       auto Size = MMap.size();
147       assert(Size <= opts::PrintDynoOpcodeStat);
148       for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) {
149         if (Size++ >= opts::PrintDynoOpcodeStat) {
150           auto First = MMap.begin();
151           if (Iter->first <= First->first)
152             break;
153           MMap.erase(First);
154         }
155         MMap.emplace(*Iter);
156       }
157     }
158   }
159 }
160 
161 DynoStats getDynoStats(const BinaryFunction &BF) {
162   auto &BC = BF.getBinaryContext();
163 
164   DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
165 
166   // Return empty-stats about the function we don't completely understand.
167   if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
168     return Stats;
169 
170   // Update enumeration of basic blocks for correct detection of branch'
171   // direction.
172   BF.updateLayoutIndices();
173 
174   for (BinaryBasicBlock *const &BB : BF.layout()) {
175     // The basic block execution count equals to the sum of incoming branch
176     // frequencies. This may deviate from the sum of outgoing branches of the
177     // basic block especially since the block may contain a function that
178     // does not return or a function that throws an exception.
179     const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
180 
181     // Ignore empty blocks and blocks that were not executed.
182     if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
183       continue;
184 
185     // Count AArch64 linker-inserted veneers
186     if (BF.isAArch64Veneer())
187       Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
188 
189     // Count various instruction types by iterating through all instructions.
190     // When -print-dyno-opcode-stats is on, count per each opcode and record
191     // maximum execution counts.
192     for (const MCInst &Instr : *BB) {
193       if (opts::PrintDynoOpcodeStat) {
194         unsigned Opcode = Instr.getOpcode();
195         auto I = Stats.OpcodeHistogram.find(Opcode);
196         if (I == Stats.OpcodeHistogram.end()) {
197           DynoStats::MaxOpcodeHistogramTy MMap;
198           MMap.emplace(BBExecutionCount,
199                        std::make_pair(BF.getOneName(), BB->getOffset()));
200           Stats.OpcodeHistogram.emplace(Opcode,
201                                         std::make_pair(BBExecutionCount, MMap));
202         } else {
203           I->second.first += BBExecutionCount;
204           bool Insert = true;
205           if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
206             auto First = I->second.second.begin();
207             if (First->first < BBExecutionCount)
208               I->second.second.erase(First);
209             else
210               Insert = false;
211           }
212           if (Insert) {
213             I->second.second.emplace(
214                 BBExecutionCount,
215                 std::make_pair(BF.getOneName(), BB->getOffset()));
216           }
217         }
218       }
219 
220       if (BC.MIB->isStore(Instr)) {
221         Stats[DynoStats::STORES] += BBExecutionCount;
222       }
223       if (BC.MIB->isLoad(Instr)) {
224         Stats[DynoStats::LOADS] += BBExecutionCount;
225       }
226       if (!BC.MIB->isCall(Instr))
227         continue;
228 
229       uint64_t CallFreq = BBExecutionCount;
230       if (BC.MIB->getConditionalTailCall(Instr)) {
231         CallFreq =
232             BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
233       }
234       Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
235       if (BC.MIB->isIndirectCall(Instr)) {
236         Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
237       } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
238         const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
239         if (BF && BF->isPLTFunction()) {
240           Stats[DynoStats::PLT_CALLS] += CallFreq;
241 
242           // We don't process PLT functions and hence have to adjust relevant
243           // dynostats here for:
244           //
245           //   jmp *GOT_ENTRY(%rip)
246           //
247           // NOTE: this is arch-specific.
248           Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
249           Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
250           Stats[DynoStats::LOADS] += CallFreq;
251           Stats[DynoStats::INSTRUCTIONS] += CallFreq;
252         }
253       }
254     }
255 
256     Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
257 
258     // Jump tables.
259     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
260     if (BC.MIB->getJumpTable(*LastInstr)) {
261       Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
262       LLVM_DEBUG(
263         static uint64_t MostFrequentJT;
264         if (BBExecutionCount > MostFrequentJT) {
265           MostFrequentJT = BBExecutionCount;
266           dbgs() << "BOLT-INFO: most frequently executed jump table is in "
267                  << "function " << BF << " in basic block " << BB->getName()
268                  << " executed totally " << BBExecutionCount << " times.\n";
269         }
270       );
271       continue;
272     }
273 
274     if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
275       Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
276       continue;
277     }
278 
279     // Update stats for branches.
280     const MCSymbol *TBB = nullptr;
281     const MCSymbol *FBB = nullptr;
282     MCInst *CondBranch = nullptr;
283     MCInst *UncondBranch = nullptr;
284     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) {
285       continue;
286     }
287 
288     if (!CondBranch && !UncondBranch) {
289       continue;
290     }
291 
292     // Simple unconditional branch.
293     if (!CondBranch) {
294       Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
295       continue;
296     }
297 
298     // CTCs: instruction annotations could be stripped, hence check the number
299     // of successors to identify conditional tail calls.
300     if (BB->succ_size() == 1) {
301       if (BB->branch_info_begin() != BB->branch_info_end())
302         Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
303       continue;
304     }
305 
306     // Conditional branch that could be followed by an unconditional branch.
307     uint64_t TakenCount = BB->getTakenBranchInfo().Count;
308     if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
309       TakenCount = 0;
310 
311     uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
312     if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
313       NonTakenCount = 0;
314 
315     if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
316       Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
317       Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
318     } else {
319       Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
320       Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
321     }
322 
323     if (UncondBranch) {
324       Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
325     }
326   }
327 
328   return Stats;
329 }
330 
331 } // namespace bolt
332 } // namespace llvm
333