xref: /llvm-project/bolt/lib/Core/DynoStats.cpp (revision c8fc234ee28d0e2a10bd88bae391cb3e32e2ee77)
1 //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the DynoStats class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/DynoStats.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include <algorithm>
21 #include <string>
22 
23 #undef  DEBUG_TYPE
24 #define DEBUG_TYPE "bolt"
25 
26 using namespace llvm;
27 using namespace bolt;
28 
29 namespace opts {
30 
31 extern cl::OptionCategory BoltCategory;
32 
33 static cl::opt<uint32_t>
34 DynoStatsScale("dyno-stats-scale",
35   cl::desc("scale to be applied while reporting dyno stats"),
36   cl::Optional,
37   cl::init(1),
38   cl::Hidden,
39   cl::cat(BoltCategory));
40 
41 static cl::opt<uint32_t>
42 PrintDynoOpcodeStat("print-dyno-opcode-stats",
43   cl::desc("print per instruction opcode dyno stats and the function"
44               "names:BB offsets of the nth highest execution counts"),
45   cl::init(0),
46   cl::Hidden,
47   cl::cat(BoltCategory));
48 
49 } // namespace opts
50 
51 namespace llvm {
52 namespace bolt {
53 
54 constexpr const char *DynoStats::Desc[];
55 
operator <(const DynoStats & Other) const56 bool DynoStats::operator<(const DynoStats &Other) const {
57   return std::lexicographical_compare(
58       &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
59       &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
60 }
61 
operator ==(const DynoStats & Other) const62 bool DynoStats::operator==(const DynoStats &Other) const {
63   return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
64                     &Other.Stats[FIRST_DYNO_STAT]);
65 }
66 
lessThan(const DynoStats & Other,ArrayRef<Category> Keys) const67 bool DynoStats::lessThan(const DynoStats &Other,
68                          ArrayRef<Category> Keys) const {
69   return std::lexicographical_compare(
70       Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
71       [this, &Other](const Category A, const Category) {
72         return Stats[A] < Other.Stats[A];
73       });
74 }
75 
print(raw_ostream & OS,const DynoStats * Other,MCInstPrinter * Printer) const76 void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77                       MCInstPrinter *Printer) const {
78   auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79                                 uint64_t OtherStat) {
80     OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
81     if (Other) {
82       if (Stat != OtherStat) {
83         OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
84         OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
85                                        (float)(OtherStat));
86       } else {
87         OS << " (=)";
88       }
89     }
90     OS << '\n';
91   };
92 
93   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
94        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
95 
96     if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97       continue;
98 
99     printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
100   }
101   if (opts::PrintDynoOpcodeStat && Printer) {
102     OS << "\nProgram-wide opcode histogram:\n";
103     OS << "              Opcode,   Execution Count,     Max Exec Count, "
104           "Function Name:Offset ...\n";
105     std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106     for (const OpcodeStatTy &Stat : OpcodeHistogram)
107       SortedHistogram.emplace_back(Stat.second.first, Stat.first);
108 
109     // Sort using lexicographic ordering
110     llvm::sort(SortedHistogram);
111 
112     // Dump in ascending order: Start with Opcode with Highest execution
113     // count.
114     for (auto &Stat : llvm::reverse(SortedHistogram)) {
115       OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
116                    Stat.first * opts::DynoStatsScale);
117       auto It = OpcodeHistogram.find(Stat.second);
118       assert(It != OpcodeHistogram.end());
119       MaxOpcodeHistogramTy MaxMultiMap = It->second.second;
120       // Start with function name:BB offset with highest execution count.
121       for (auto &Max : llvm::reverse(MaxMultiMap)) {
122         OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)
123            << Max.second.first.str() << ':' << Max.second.second;
124       }
125       OS << '\n';
126     }
127   }
128 }
129 
operator +=(const DynoStats & Other)130 void DynoStats::operator+=(const DynoStats &Other) {
131   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
132        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
133     Stats[Stat] += Other[Stat];
134   }
135   for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
136     auto I = OpcodeHistogram.find(Stat.first);
137     if (I == OpcodeHistogram.end()) {
138       OpcodeHistogram.emplace(Stat);
139     } else {
140       // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
141       // maximum counts.
142       I->second.first += Stat.second.first;
143       auto &MMap = I->second.second;
144       auto &OtherMMap = Stat.second.second;
145       auto Size = MMap.size();
146       assert(Size <= opts::PrintDynoOpcodeStat);
147       for (auto OtherMMapPair : llvm::reverse(OtherMMap)) {
148         if (Size++ >= opts::PrintDynoOpcodeStat) {
149           auto First = MMap.begin();
150           if (OtherMMapPair.first <= First->first)
151             break;
152           MMap.erase(First);
153         }
154         MMap.emplace(OtherMMapPair);
155       }
156     }
157   }
158 }
159 
getDynoStats(BinaryFunction & BF)160 DynoStats getDynoStats(BinaryFunction &BF) {
161   auto &BC = BF.getBinaryContext();
162 
163   DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
164 
165   // Return empty-stats about the function we don't completely understand.
166   if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
167     return Stats;
168 
169   // Update enumeration of basic blocks for correct detection of branch'
170   // direction.
171   BF.getLayout().updateLayoutIndices();
172 
173   for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
174     // The basic block execution count equals to the sum of incoming branch
175     // frequencies. This may deviate from the sum of outgoing branches of the
176     // basic block especially since the block may contain a function that
177     // does not return or a function that throws an exception.
178     const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
179 
180     // Ignore empty blocks and blocks that were not executed.
181     if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
182       continue;
183 
184     // Count AArch64 linker-inserted veneers
185     if (BF.isAArch64Veneer())
186       Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
187 
188     // Count various instruction types by iterating through all instructions.
189     // When -print-dyno-opcode-stats is on, count per each opcode and record
190     // maximum execution counts.
191     for (const MCInst &Instr : *BB) {
192       if (opts::PrintDynoOpcodeStat) {
193         unsigned Opcode = Instr.getOpcode();
194         auto I = Stats.OpcodeHistogram.find(Opcode);
195         if (I == Stats.OpcodeHistogram.end()) {
196           DynoStats::MaxOpcodeHistogramTy MMap;
197           MMap.emplace(BBExecutionCount,
198                        std::make_pair(BF.getOneName(), BB->getOffset()));
199           Stats.OpcodeHistogram.emplace(Opcode,
200                                         std::make_pair(BBExecutionCount, MMap));
201         } else {
202           I->second.first += BBExecutionCount;
203           bool Insert = true;
204           if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
205             auto First = I->second.second.begin();
206             if (First->first < BBExecutionCount)
207               I->second.second.erase(First);
208             else
209               Insert = false;
210           }
211           if (Insert) {
212             I->second.second.emplace(
213                 BBExecutionCount,
214                 std::make_pair(BF.getOneName(), BB->getOffset()));
215           }
216         }
217       }
218 
219       if (BC.MIB->mayStore(Instr)) {
220         Stats[DynoStats::STORES] += BBExecutionCount;
221       }
222       if (BC.MIB->mayLoad(Instr)) {
223         Stats[DynoStats::LOADS] += BBExecutionCount;
224       }
225       if (!BC.MIB->isCall(Instr))
226         continue;
227 
228       uint64_t CallFreq = BBExecutionCount;
229       if (BC.MIB->getConditionalTailCall(Instr)) {
230         CallFreq =
231             BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
232       }
233       Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
234       if (BC.MIB->isIndirectCall(Instr)) {
235         Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
236       } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
237         const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
238         if (BF && BF->isPLTFunction()) {
239           Stats[DynoStats::PLT_CALLS] += CallFreq;
240 
241           // We don't process PLT functions and hence have to adjust relevant
242           // dynostats here for:
243           //
244           //   jmp *GOT_ENTRY(%rip)
245           //
246           // NOTE: this is arch-specific.
247           Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
248           Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
249           Stats[DynoStats::LOADS] += CallFreq;
250           Stats[DynoStats::INSTRUCTIONS] += CallFreq;
251         }
252       }
253     }
254 
255     Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
256 
257     // Jump tables.
258     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
259     if (BC.MIB->getJumpTable(*LastInstr)) {
260       Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
261       LLVM_DEBUG(
262         static uint64_t MostFrequentJT;
263         if (BBExecutionCount > MostFrequentJT) {
264           MostFrequentJT = BBExecutionCount;
265           dbgs() << "BOLT-INFO: most frequently executed jump table is in "
266                  << "function " << BF << " in basic block " << BB->getName()
267                  << " executed totally " << BBExecutionCount << " times.\n";
268         }
269       );
270       continue;
271     }
272 
273     if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
274       Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
275       continue;
276     }
277 
278     // Update stats for branches.
279     const MCSymbol *TBB = nullptr;
280     const MCSymbol *FBB = nullptr;
281     MCInst *CondBranch = nullptr;
282     MCInst *UncondBranch = nullptr;
283     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
284       continue;
285 
286     if (!CondBranch && !UncondBranch)
287       continue;
288 
289     // Simple unconditional branch.
290     if (!CondBranch) {
291       Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
292       continue;
293     }
294 
295     // CTCs: instruction annotations could be stripped, hence check the number
296     // of successors to identify conditional tail calls.
297     if (BB->succ_size() == 1) {
298       if (BB->branch_info_begin() != BB->branch_info_end())
299         Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
300       continue;
301     }
302 
303     // Conditional branch that could be followed by an unconditional branch.
304     uint64_t TakenCount = BB->getTakenBranchInfo().Count;
305     if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
306       TakenCount = 0;
307 
308     uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
309     if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
310       NonTakenCount = 0;
311 
312     if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
313       Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
314       Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
315     } else {
316       Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
317       Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
318     }
319 
320     if (UncondBranch) {
321       Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
322     }
323   }
324 
325   return Stats;
326 }
327 
328 } // namespace bolt
329 } // namespace llvm
330