xref: /llvm-project/bolt/lib/Core/DynoStats.cpp (revision c8fc234ee28d0e2a10bd88bae391cb3e32e2ee77)
12f09f445SMaksim Panchenko //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements the DynoStats class.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Core/DynoStats.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "llvm/ADT/StringRef.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18a34c753fSRafael Auler #include "llvm/Support/Debug.h"
19a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h"
20a34c753fSRafael Auler #include <algorithm>
21a34c753fSRafael Auler #include <string>
22a34c753fSRafael Auler 
23a34c753fSRafael Auler #undef  DEBUG_TYPE
24a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
25a34c753fSRafael Auler 
26a34c753fSRafael Auler using namespace llvm;
27a34c753fSRafael Auler using namespace bolt;
28a34c753fSRafael Auler 
29a34c753fSRafael Auler namespace opts {
30a34c753fSRafael Auler 
31a34c753fSRafael Auler extern cl::OptionCategory BoltCategory;
32a34c753fSRafael Auler 
33a34c753fSRafael Auler static cl::opt<uint32_t>
34a34c753fSRafael Auler DynoStatsScale("dyno-stats-scale",
35a34c753fSRafael Auler   cl::desc("scale to be applied while reporting dyno stats"),
36a34c753fSRafael Auler   cl::Optional,
37a34c753fSRafael Auler   cl::init(1),
38a34c753fSRafael Auler   cl::Hidden,
39a34c753fSRafael Auler   cl::cat(BoltCategory));
40a34c753fSRafael Auler 
41a34c753fSRafael Auler static cl::opt<uint32_t>
42a34c753fSRafael Auler PrintDynoOpcodeStat("print-dyno-opcode-stats",
43a34c753fSRafael Auler   cl::desc("print per instruction opcode dyno stats and the function"
44a34c753fSRafael Auler               "names:BB offsets of the nth highest execution counts"),
45a34c753fSRafael Auler   cl::init(0),
46a34c753fSRafael Auler   cl::Hidden,
47a34c753fSRafael Auler   cl::cat(BoltCategory));
48a34c753fSRafael Auler 
49a34c753fSRafael Auler } // namespace opts
50a34c753fSRafael Auler 
51a34c753fSRafael Auler namespace llvm {
52a34c753fSRafael Auler namespace bolt {
53a34c753fSRafael Auler 
54a34c753fSRafael Auler constexpr const char *DynoStats::Desc[];
55a34c753fSRafael Auler 
operator <(const DynoStats & Other) const56a34c753fSRafael Auler bool DynoStats::operator<(const DynoStats &Other) const {
57a34c753fSRafael Auler   return std::lexicographical_compare(
58a34c753fSRafael Auler       &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
5940c2e0faSMaksim Panchenko       &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
60a34c753fSRafael Auler }
61a34c753fSRafael Auler 
operator ==(const DynoStats & Other) const62a34c753fSRafael Auler bool DynoStats::operator==(const DynoStats &Other) const {
6340c2e0faSMaksim Panchenko   return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
6440c2e0faSMaksim Panchenko                     &Other.Stats[FIRST_DYNO_STAT]);
65a34c753fSRafael Auler }
66a34c753fSRafael Auler 
lessThan(const DynoStats & Other,ArrayRef<Category> Keys) const67a34c753fSRafael Auler bool DynoStats::lessThan(const DynoStats &Other,
68a34c753fSRafael Auler                          ArrayRef<Category> Keys) const {
69a34c753fSRafael Auler   return std::lexicographical_compare(
7040c2e0faSMaksim Panchenko       Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
71a34c753fSRafael Auler       [this, &Other](const Category A, const Category) {
72a34c753fSRafael Auler         return Stats[A] < Other.Stats[A];
7340c2e0faSMaksim Panchenko       });
74a34c753fSRafael Auler }
75a34c753fSRafael Auler 
print(raw_ostream & OS,const DynoStats * Other,MCInstPrinter * Printer) const76a34c753fSRafael Auler void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77a34c753fSRafael Auler                       MCInstPrinter *Printer) const {
78a34c753fSRafael Auler   auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79a34c753fSRafael Auler                                 uint64_t OtherStat) {
80a34c753fSRafael Auler     OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
81a34c753fSRafael Auler     if (Other) {
82a34c753fSRafael Auler       if (Stat != OtherStat) {
83a34c753fSRafael Auler         OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
8440c2e0faSMaksim Panchenko         OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
85a34c753fSRafael Auler                                        (float)(OtherStat));
86a34c753fSRafael Auler       } else {
87a34c753fSRafael Auler         OS << " (=)";
88a34c753fSRafael Auler       }
89a34c753fSRafael Auler     }
90a34c753fSRafael Auler     OS << '\n';
91a34c753fSRafael Auler   };
92a34c753fSRafael Auler 
93a34c753fSRafael Auler   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
9440c2e0faSMaksim Panchenko        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
95a34c753fSRafael Auler 
96a34c753fSRafael Auler     if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97a34c753fSRafael Auler       continue;
98a34c753fSRafael Auler 
99a34c753fSRafael Auler     printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
100a34c753fSRafael Auler   }
101a34c753fSRafael Auler   if (opts::PrintDynoOpcodeStat && Printer) {
10252cf0711SAmir Ayupov     OS << "\nProgram-wide opcode histogram:\n";
103a34c753fSRafael Auler     OS << "              Opcode,   Execution Count,     Max Exec Count, "
104a34c753fSRafael Auler           "Function Name:Offset ...\n";
105a34c753fSRafael Auler     std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106a34c753fSRafael Auler     for (const OpcodeStatTy &Stat : OpcodeHistogram)
107a34c753fSRafael Auler       SortedHistogram.emplace_back(Stat.second.first, Stat.first);
108a34c753fSRafael Auler 
109a34c753fSRafael Auler     // Sort using lexicographic ordering
110d2c87699SAmir Ayupov     llvm::sort(SortedHistogram);
111a34c753fSRafael Auler 
112a34c753fSRafael Auler     // Dump in ascending order: Start with Opcode with Highest execution
113a34c753fSRafael Auler     // count.
114f40d25ddSAmir Ayupov     for (auto &Stat : llvm::reverse(SortedHistogram)) {
115f40d25ddSAmir Ayupov       OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
116f40d25ddSAmir Ayupov                    Stat.first * opts::DynoStatsScale);
117*c8fc234eSshaw young       auto It = OpcodeHistogram.find(Stat.second);
118*c8fc234eSshaw young       assert(It != OpcodeHistogram.end());
119*c8fc234eSshaw young       MaxOpcodeHistogramTy MaxMultiMap = It->second.second;
120a34c753fSRafael Auler       // Start with function name:BB offset with highest execution count.
121f40d25ddSAmir Ayupov       for (auto &Max : llvm::reverse(MaxMultiMap)) {
122f40d25ddSAmir Ayupov         OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)
123f40d25ddSAmir Ayupov            << Max.second.first.str() << ':' << Max.second.second;
124a34c753fSRafael Auler       }
125a34c753fSRafael Auler       OS << '\n';
126a34c753fSRafael Auler     }
127a34c753fSRafael Auler   }
128a34c753fSRafael Auler }
129a34c753fSRafael Auler 
operator +=(const DynoStats & Other)130a34c753fSRafael Auler void DynoStats::operator+=(const DynoStats &Other) {
131a34c753fSRafael Auler   for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
13240c2e0faSMaksim Panchenko        Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
133a34c753fSRafael Auler     Stats[Stat] += Other[Stat];
134a34c753fSRafael Auler   }
135a34c753fSRafael Auler   for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
136a34c753fSRafael Auler     auto I = OpcodeHistogram.find(Stat.first);
137a34c753fSRafael Auler     if (I == OpcodeHistogram.end()) {
138a34c753fSRafael Auler       OpcodeHistogram.emplace(Stat);
139a34c753fSRafael Auler     } else {
1401a2f8336Sspaette       // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
141a34c753fSRafael Auler       // maximum counts.
142a34c753fSRafael Auler       I->second.first += Stat.second.first;
143a34c753fSRafael Auler       auto &MMap = I->second.second;
144a34c753fSRafael Auler       auto &OtherMMap = Stat.second.second;
145a34c753fSRafael Auler       auto Size = MMap.size();
146a34c753fSRafael Auler       assert(Size <= opts::PrintDynoOpcodeStat);
147f40d25ddSAmir Ayupov       for (auto OtherMMapPair : llvm::reverse(OtherMMap)) {
148a34c753fSRafael Auler         if (Size++ >= opts::PrintDynoOpcodeStat) {
149a34c753fSRafael Auler           auto First = MMap.begin();
150f40d25ddSAmir Ayupov           if (OtherMMapPair.first <= First->first)
151a34c753fSRafael Auler             break;
152a34c753fSRafael Auler           MMap.erase(First);
153a34c753fSRafael Auler         }
154f40d25ddSAmir Ayupov         MMap.emplace(OtherMMapPair);
155a34c753fSRafael Auler       }
156a34c753fSRafael Auler     }
157a34c753fSRafael Auler   }
158a34c753fSRafael Auler }
159a34c753fSRafael Auler 
getDynoStats(BinaryFunction & BF)160d5c03defSFabian Parzefall DynoStats getDynoStats(BinaryFunction &BF) {
161a34c753fSRafael Auler   auto &BC = BF.getBinaryContext();
162a34c753fSRafael Auler 
163a34c753fSRafael Auler   DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
164a34c753fSRafael Auler 
165a34c753fSRafael Auler   // Return empty-stats about the function we don't completely understand.
166a34c753fSRafael Auler   if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
167a34c753fSRafael Auler     return Stats;
168a34c753fSRafael Auler 
169a34c753fSRafael Auler   // Update enumeration of basic blocks for correct detection of branch'
170a34c753fSRafael Auler   // direction.
1718477bc67SFabian Parzefall   BF.getLayout().updateLayoutIndices();
172a34c753fSRafael Auler 
1738477bc67SFabian Parzefall   for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
174a34c753fSRafael Auler     // The basic block execution count equals to the sum of incoming branch
175a34c753fSRafael Auler     // frequencies. This may deviate from the sum of outgoing branches of the
176a34c753fSRafael Auler     // basic block especially since the block may contain a function that
177a34c753fSRafael Auler     // does not return or a function that throws an exception.
178a34c753fSRafael Auler     const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
179a34c753fSRafael Auler 
180a34c753fSRafael Auler     // Ignore empty blocks and blocks that were not executed.
181a34c753fSRafael Auler     if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
182a34c753fSRafael Auler       continue;
183a34c753fSRafael Auler 
184a34c753fSRafael Auler     // Count AArch64 linker-inserted veneers
185a34c753fSRafael Auler     if (BF.isAArch64Veneer())
186a34c753fSRafael Auler       Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
187a34c753fSRafael Auler 
188a34c753fSRafael Auler     // Count various instruction types by iterating through all instructions.
189a34c753fSRafael Auler     // When -print-dyno-opcode-stats is on, count per each opcode and record
190a34c753fSRafael Auler     // maximum execution counts.
191a34c753fSRafael Auler     for (const MCInst &Instr : *BB) {
192a34c753fSRafael Auler       if (opts::PrintDynoOpcodeStat) {
193a34c753fSRafael Auler         unsigned Opcode = Instr.getOpcode();
194a34c753fSRafael Auler         auto I = Stats.OpcodeHistogram.find(Opcode);
195a34c753fSRafael Auler         if (I == Stats.OpcodeHistogram.end()) {
196a34c753fSRafael Auler           DynoStats::MaxOpcodeHistogramTy MMap;
197a34c753fSRafael Auler           MMap.emplace(BBExecutionCount,
198a34c753fSRafael Auler                        std::make_pair(BF.getOneName(), BB->getOffset()));
199a34c753fSRafael Auler           Stats.OpcodeHistogram.emplace(Opcode,
200a34c753fSRafael Auler                                         std::make_pair(BBExecutionCount, MMap));
201a34c753fSRafael Auler         } else {
202a34c753fSRafael Auler           I->second.first += BBExecutionCount;
203a34c753fSRafael Auler           bool Insert = true;
204a34c753fSRafael Auler           if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
205a34c753fSRafael Auler             auto First = I->second.second.begin();
206a34c753fSRafael Auler             if (First->first < BBExecutionCount)
207a34c753fSRafael Auler               I->second.second.erase(First);
208a34c753fSRafael Auler             else
209a34c753fSRafael Auler               Insert = false;
210a34c753fSRafael Auler           }
211a34c753fSRafael Auler           if (Insert) {
212a34c753fSRafael Auler             I->second.second.emplace(
213a34c753fSRafael Auler                 BBExecutionCount,
214a34c753fSRafael Auler                 std::make_pair(BF.getOneName(), BB->getOffset()));
215a34c753fSRafael Auler           }
216a34c753fSRafael Auler         }
217a34c753fSRafael Auler       }
218a34c753fSRafael Auler 
219eafe4ee2SJob Noorman       if (BC.MIB->mayStore(Instr)) {
220a34c753fSRafael Auler         Stats[DynoStats::STORES] += BBExecutionCount;
221a34c753fSRafael Auler       }
222eafe4ee2SJob Noorman       if (BC.MIB->mayLoad(Instr)) {
223a34c753fSRafael Auler         Stats[DynoStats::LOADS] += BBExecutionCount;
224a34c753fSRafael Auler       }
225a34c753fSRafael Auler       if (!BC.MIB->isCall(Instr))
226a34c753fSRafael Auler         continue;
227a34c753fSRafael Auler 
228a34c753fSRafael Auler       uint64_t CallFreq = BBExecutionCount;
229a34c753fSRafael Auler       if (BC.MIB->getConditionalTailCall(Instr)) {
230a34c753fSRafael Auler         CallFreq =
231a34c753fSRafael Auler             BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
232a34c753fSRafael Auler       }
233a34c753fSRafael Auler       Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
234a34c753fSRafael Auler       if (BC.MIB->isIndirectCall(Instr)) {
235a34c753fSRafael Auler         Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
236a34c753fSRafael Auler       } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
237a34c753fSRafael Auler         const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
238a34c753fSRafael Auler         if (BF && BF->isPLTFunction()) {
239a34c753fSRafael Auler           Stats[DynoStats::PLT_CALLS] += CallFreq;
240a34c753fSRafael Auler 
241a34c753fSRafael Auler           // We don't process PLT functions and hence have to adjust relevant
242a34c753fSRafael Auler           // dynostats here for:
243a34c753fSRafael Auler           //
244a34c753fSRafael Auler           //   jmp *GOT_ENTRY(%rip)
245a34c753fSRafael Auler           //
246a34c753fSRafael Auler           // NOTE: this is arch-specific.
247a34c753fSRafael Auler           Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
248a34c753fSRafael Auler           Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
249a34c753fSRafael Auler           Stats[DynoStats::LOADS] += CallFreq;
250a34c753fSRafael Auler           Stats[DynoStats::INSTRUCTIONS] += CallFreq;
251a34c753fSRafael Auler         }
252a34c753fSRafael Auler       }
253a34c753fSRafael Auler     }
254a34c753fSRafael Auler 
255a34c753fSRafael Auler     Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
256a34c753fSRafael Auler 
257a34c753fSRafael Auler     // Jump tables.
258a34c753fSRafael Auler     const MCInst *LastInstr = BB->getLastNonPseudoInstr();
259a34c753fSRafael Auler     if (BC.MIB->getJumpTable(*LastInstr)) {
260a34c753fSRafael Auler       Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
261a34c753fSRafael Auler       LLVM_DEBUG(
262a34c753fSRafael Auler         static uint64_t MostFrequentJT;
263a34c753fSRafael Auler         if (BBExecutionCount > MostFrequentJT) {
264a34c753fSRafael Auler           MostFrequentJT = BBExecutionCount;
265a34c753fSRafael Auler           dbgs() << "BOLT-INFO: most frequently executed jump table is in "
266a34c753fSRafael Auler                  << "function " << BF << " in basic block " << BB->getName()
267a34c753fSRafael Auler                  << " executed totally " << BBExecutionCount << " times.\n";
268a34c753fSRafael Auler         }
269a34c753fSRafael Auler       );
270a34c753fSRafael Auler       continue;
271a34c753fSRafael Auler     }
272a34c753fSRafael Auler 
273a34c753fSRafael Auler     if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
274a34c753fSRafael Auler       Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
275a34c753fSRafael Auler       continue;
276a34c753fSRafael Auler     }
277a34c753fSRafael Auler 
278a34c753fSRafael Auler     // Update stats for branches.
279a34c753fSRafael Auler     const MCSymbol *TBB = nullptr;
280a34c753fSRafael Auler     const MCSymbol *FBB = nullptr;
281a34c753fSRafael Auler     MCInst *CondBranch = nullptr;
282a34c753fSRafael Auler     MCInst *UncondBranch = nullptr;
2833652483cSRafael Auler     if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
284a34c753fSRafael Auler       continue;
285a34c753fSRafael Auler 
2863652483cSRafael Auler     if (!CondBranch && !UncondBranch)
287a34c753fSRafael Auler       continue;
288a34c753fSRafael Auler 
289a34c753fSRafael Auler     // Simple unconditional branch.
290a34c753fSRafael Auler     if (!CondBranch) {
291a34c753fSRafael Auler       Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
292a34c753fSRafael Auler       continue;
293a34c753fSRafael Auler     }
294a34c753fSRafael Auler 
295a34c753fSRafael Auler     // CTCs: instruction annotations could be stripped, hence check the number
296a34c753fSRafael Auler     // of successors to identify conditional tail calls.
297a34c753fSRafael Auler     if (BB->succ_size() == 1) {
298a34c753fSRafael Auler       if (BB->branch_info_begin() != BB->branch_info_end())
299a34c753fSRafael Auler         Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
300a34c753fSRafael Auler       continue;
301a34c753fSRafael Auler     }
302a34c753fSRafael Auler 
303a34c753fSRafael Auler     // Conditional branch that could be followed by an unconditional branch.
304a34c753fSRafael Auler     uint64_t TakenCount = BB->getTakenBranchInfo().Count;
305a34c753fSRafael Auler     if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
306a34c753fSRafael Auler       TakenCount = 0;
307a34c753fSRafael Auler 
308a34c753fSRafael Auler     uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
309a34c753fSRafael Auler     if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
310a34c753fSRafael Auler       NonTakenCount = 0;
311a34c753fSRafael Auler 
312a34c753fSRafael Auler     if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
313a34c753fSRafael Auler       Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
314a34c753fSRafael Auler       Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
315a34c753fSRafael Auler     } else {
316a34c753fSRafael Auler       Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
317a34c753fSRafael Auler       Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
318a34c753fSRafael Auler     }
319a34c753fSRafael Auler 
320a34c753fSRafael Auler     if (UncondBranch) {
321a34c753fSRafael Auler       Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
322a34c753fSRafael Auler     }
323a34c753fSRafael Auler   }
324a34c753fSRafael Auler 
325a34c753fSRafael Auler   return Stats;
326a34c753fSRafael Auler }
327a34c753fSRafael Auler 
328a34c753fSRafael Auler } // namespace bolt
329a34c753fSRafael Auler } // namespace llvm
330