12f09f445SMaksim Panchenko //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements the DynoStats class.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler
13a34c753fSRafael Auler #include "bolt/Core/DynoStats.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "llvm/ADT/StringRef.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18a34c753fSRafael Auler #include "llvm/Support/Debug.h"
19a34c753fSRafael Auler #include "llvm/Support/raw_ostream.h"
20a34c753fSRafael Auler #include <algorithm>
21a34c753fSRafael Auler #include <string>
22a34c753fSRafael Auler
23a34c753fSRafael Auler #undef DEBUG_TYPE
24a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
25a34c753fSRafael Auler
26a34c753fSRafael Auler using namespace llvm;
27a34c753fSRafael Auler using namespace bolt;
28a34c753fSRafael Auler
29a34c753fSRafael Auler namespace opts {
30a34c753fSRafael Auler
31a34c753fSRafael Auler extern cl::OptionCategory BoltCategory;
32a34c753fSRafael Auler
33a34c753fSRafael Auler static cl::opt<uint32_t>
34a34c753fSRafael Auler DynoStatsScale("dyno-stats-scale",
35a34c753fSRafael Auler cl::desc("scale to be applied while reporting dyno stats"),
36a34c753fSRafael Auler cl::Optional,
37a34c753fSRafael Auler cl::init(1),
38a34c753fSRafael Auler cl::Hidden,
39a34c753fSRafael Auler cl::cat(BoltCategory));
40a34c753fSRafael Auler
41a34c753fSRafael Auler static cl::opt<uint32_t>
42a34c753fSRafael Auler PrintDynoOpcodeStat("print-dyno-opcode-stats",
43a34c753fSRafael Auler cl::desc("print per instruction opcode dyno stats and the function"
44a34c753fSRafael Auler "names:BB offsets of the nth highest execution counts"),
45a34c753fSRafael Auler cl::init(0),
46a34c753fSRafael Auler cl::Hidden,
47a34c753fSRafael Auler cl::cat(BoltCategory));
48a34c753fSRafael Auler
49a34c753fSRafael Auler } // namespace opts
50a34c753fSRafael Auler
51a34c753fSRafael Auler namespace llvm {
52a34c753fSRafael Auler namespace bolt {
53a34c753fSRafael Auler
54a34c753fSRafael Auler constexpr const char *DynoStats::Desc[];
55a34c753fSRafael Auler
operator <(const DynoStats & Other) const56a34c753fSRafael Auler bool DynoStats::operator<(const DynoStats &Other) const {
57a34c753fSRafael Auler return std::lexicographical_compare(
58a34c753fSRafael Auler &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
5940c2e0faSMaksim Panchenko &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
60a34c753fSRafael Auler }
61a34c753fSRafael Auler
operator ==(const DynoStats & Other) const62a34c753fSRafael Auler bool DynoStats::operator==(const DynoStats &Other) const {
6340c2e0faSMaksim Panchenko return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
6440c2e0faSMaksim Panchenko &Other.Stats[FIRST_DYNO_STAT]);
65a34c753fSRafael Auler }
66a34c753fSRafael Auler
lessThan(const DynoStats & Other,ArrayRef<Category> Keys) const67a34c753fSRafael Auler bool DynoStats::lessThan(const DynoStats &Other,
68a34c753fSRafael Auler ArrayRef<Category> Keys) const {
69a34c753fSRafael Auler return std::lexicographical_compare(
7040c2e0faSMaksim Panchenko Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
71a34c753fSRafael Auler [this, &Other](const Category A, const Category) {
72a34c753fSRafael Auler return Stats[A] < Other.Stats[A];
7340c2e0faSMaksim Panchenko });
74a34c753fSRafael Auler }
75a34c753fSRafael Auler
print(raw_ostream & OS,const DynoStats * Other,MCInstPrinter * Printer) const76a34c753fSRafael Auler void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
77a34c753fSRafael Auler MCInstPrinter *Printer) const {
78a34c753fSRafael Auler auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
79a34c753fSRafael Auler uint64_t OtherStat) {
80a34c753fSRafael Auler OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
81a34c753fSRafael Auler if (Other) {
82a34c753fSRafael Auler if (Stat != OtherStat) {
83a34c753fSRafael Auler OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
8440c2e0faSMaksim Panchenko OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
85a34c753fSRafael Auler (float)(OtherStat));
86a34c753fSRafael Auler } else {
87a34c753fSRafael Auler OS << " (=)";
88a34c753fSRafael Auler }
89a34c753fSRafael Auler }
90a34c753fSRafael Auler OS << '\n';
91a34c753fSRafael Auler };
92a34c753fSRafael Auler
93a34c753fSRafael Auler for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
9440c2e0faSMaksim Panchenko Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
95a34c753fSRafael Auler
96a34c753fSRafael Auler if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
97a34c753fSRafael Auler continue;
98a34c753fSRafael Auler
99a34c753fSRafael Auler printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
100a34c753fSRafael Auler }
101a34c753fSRafael Auler if (opts::PrintDynoOpcodeStat && Printer) {
10252cf0711SAmir Ayupov OS << "\nProgram-wide opcode histogram:\n";
103a34c753fSRafael Auler OS << " Opcode, Execution Count, Max Exec Count, "
104a34c753fSRafael Auler "Function Name:Offset ...\n";
105a34c753fSRafael Auler std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
106a34c753fSRafael Auler for (const OpcodeStatTy &Stat : OpcodeHistogram)
107a34c753fSRafael Auler SortedHistogram.emplace_back(Stat.second.first, Stat.first);
108a34c753fSRafael Auler
109a34c753fSRafael Auler // Sort using lexicographic ordering
110d2c87699SAmir Ayupov llvm::sort(SortedHistogram);
111a34c753fSRafael Auler
112a34c753fSRafael Auler // Dump in ascending order: Start with Opcode with Highest execution
113a34c753fSRafael Auler // count.
114f40d25ddSAmir Ayupov for (auto &Stat : llvm::reverse(SortedHistogram)) {
115f40d25ddSAmir Ayupov OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
116f40d25ddSAmir Ayupov Stat.first * opts::DynoStatsScale);
117*c8fc234eSshaw young auto It = OpcodeHistogram.find(Stat.second);
118*c8fc234eSshaw young assert(It != OpcodeHistogram.end());
119*c8fc234eSshaw young MaxOpcodeHistogramTy MaxMultiMap = It->second.second;
120a34c753fSRafael Auler // Start with function name:BB offset with highest execution count.
121f40d25ddSAmir Ayupov for (auto &Max : llvm::reverse(MaxMultiMap)) {
122f40d25ddSAmir Ayupov OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)
123f40d25ddSAmir Ayupov << Max.second.first.str() << ':' << Max.second.second;
124a34c753fSRafael Auler }
125a34c753fSRafael Auler OS << '\n';
126a34c753fSRafael Auler }
127a34c753fSRafael Auler }
128a34c753fSRafael Auler }
129a34c753fSRafael Auler
operator +=(const DynoStats & Other)130a34c753fSRafael Auler void DynoStats::operator+=(const DynoStats &Other) {
131a34c753fSRafael Auler for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
13240c2e0faSMaksim Panchenko Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
133a34c753fSRafael Auler Stats[Stat] += Other[Stat];
134a34c753fSRafael Auler }
135a34c753fSRafael Auler for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
136a34c753fSRafael Auler auto I = OpcodeHistogram.find(Stat.first);
137a34c753fSRafael Auler if (I == OpcodeHistogram.end()) {
138a34c753fSRafael Auler OpcodeHistogram.emplace(Stat);
139a34c753fSRafael Auler } else {
1401a2f8336Sspaette // Merge other histograms, log only the opts::PrintDynoOpcodeStat'th
141a34c753fSRafael Auler // maximum counts.
142a34c753fSRafael Auler I->second.first += Stat.second.first;
143a34c753fSRafael Auler auto &MMap = I->second.second;
144a34c753fSRafael Auler auto &OtherMMap = Stat.second.second;
145a34c753fSRafael Auler auto Size = MMap.size();
146a34c753fSRafael Auler assert(Size <= opts::PrintDynoOpcodeStat);
147f40d25ddSAmir Ayupov for (auto OtherMMapPair : llvm::reverse(OtherMMap)) {
148a34c753fSRafael Auler if (Size++ >= opts::PrintDynoOpcodeStat) {
149a34c753fSRafael Auler auto First = MMap.begin();
150f40d25ddSAmir Ayupov if (OtherMMapPair.first <= First->first)
151a34c753fSRafael Auler break;
152a34c753fSRafael Auler MMap.erase(First);
153a34c753fSRafael Auler }
154f40d25ddSAmir Ayupov MMap.emplace(OtherMMapPair);
155a34c753fSRafael Auler }
156a34c753fSRafael Auler }
157a34c753fSRafael Auler }
158a34c753fSRafael Auler }
159a34c753fSRafael Auler
getDynoStats(BinaryFunction & BF)160d5c03defSFabian Parzefall DynoStats getDynoStats(BinaryFunction &BF) {
161a34c753fSRafael Auler auto &BC = BF.getBinaryContext();
162a34c753fSRafael Auler
163a34c753fSRafael Auler DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
164a34c753fSRafael Auler
165a34c753fSRafael Auler // Return empty-stats about the function we don't completely understand.
166a34c753fSRafael Auler if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
167a34c753fSRafael Auler return Stats;
168a34c753fSRafael Auler
169a34c753fSRafael Auler // Update enumeration of basic blocks for correct detection of branch'
170a34c753fSRafael Auler // direction.
1718477bc67SFabian Parzefall BF.getLayout().updateLayoutIndices();
172a34c753fSRafael Auler
1738477bc67SFabian Parzefall for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) {
174a34c753fSRafael Auler // The basic block execution count equals to the sum of incoming branch
175a34c753fSRafael Auler // frequencies. This may deviate from the sum of outgoing branches of the
176a34c753fSRafael Auler // basic block especially since the block may contain a function that
177a34c753fSRafael Auler // does not return or a function that throws an exception.
178a34c753fSRafael Auler const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
179a34c753fSRafael Auler
180a34c753fSRafael Auler // Ignore empty blocks and blocks that were not executed.
181a34c753fSRafael Auler if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
182a34c753fSRafael Auler continue;
183a34c753fSRafael Auler
184a34c753fSRafael Auler // Count AArch64 linker-inserted veneers
185a34c753fSRafael Auler if (BF.isAArch64Veneer())
186a34c753fSRafael Auler Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
187a34c753fSRafael Auler
188a34c753fSRafael Auler // Count various instruction types by iterating through all instructions.
189a34c753fSRafael Auler // When -print-dyno-opcode-stats is on, count per each opcode and record
190a34c753fSRafael Auler // maximum execution counts.
191a34c753fSRafael Auler for (const MCInst &Instr : *BB) {
192a34c753fSRafael Auler if (opts::PrintDynoOpcodeStat) {
193a34c753fSRafael Auler unsigned Opcode = Instr.getOpcode();
194a34c753fSRafael Auler auto I = Stats.OpcodeHistogram.find(Opcode);
195a34c753fSRafael Auler if (I == Stats.OpcodeHistogram.end()) {
196a34c753fSRafael Auler DynoStats::MaxOpcodeHistogramTy MMap;
197a34c753fSRafael Auler MMap.emplace(BBExecutionCount,
198a34c753fSRafael Auler std::make_pair(BF.getOneName(), BB->getOffset()));
199a34c753fSRafael Auler Stats.OpcodeHistogram.emplace(Opcode,
200a34c753fSRafael Auler std::make_pair(BBExecutionCount, MMap));
201a34c753fSRafael Auler } else {
202a34c753fSRafael Auler I->second.first += BBExecutionCount;
203a34c753fSRafael Auler bool Insert = true;
204a34c753fSRafael Auler if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
205a34c753fSRafael Auler auto First = I->second.second.begin();
206a34c753fSRafael Auler if (First->first < BBExecutionCount)
207a34c753fSRafael Auler I->second.second.erase(First);
208a34c753fSRafael Auler else
209a34c753fSRafael Auler Insert = false;
210a34c753fSRafael Auler }
211a34c753fSRafael Auler if (Insert) {
212a34c753fSRafael Auler I->second.second.emplace(
213a34c753fSRafael Auler BBExecutionCount,
214a34c753fSRafael Auler std::make_pair(BF.getOneName(), BB->getOffset()));
215a34c753fSRafael Auler }
216a34c753fSRafael Auler }
217a34c753fSRafael Auler }
218a34c753fSRafael Auler
219eafe4ee2SJob Noorman if (BC.MIB->mayStore(Instr)) {
220a34c753fSRafael Auler Stats[DynoStats::STORES] += BBExecutionCount;
221a34c753fSRafael Auler }
222eafe4ee2SJob Noorman if (BC.MIB->mayLoad(Instr)) {
223a34c753fSRafael Auler Stats[DynoStats::LOADS] += BBExecutionCount;
224a34c753fSRafael Auler }
225a34c753fSRafael Auler if (!BC.MIB->isCall(Instr))
226a34c753fSRafael Auler continue;
227a34c753fSRafael Auler
228a34c753fSRafael Auler uint64_t CallFreq = BBExecutionCount;
229a34c753fSRafael Auler if (BC.MIB->getConditionalTailCall(Instr)) {
230a34c753fSRafael Auler CallFreq =
231a34c753fSRafael Auler BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
232a34c753fSRafael Auler }
233a34c753fSRafael Auler Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
234a34c753fSRafael Auler if (BC.MIB->isIndirectCall(Instr)) {
235a34c753fSRafael Auler Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
236a34c753fSRafael Auler } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
237a34c753fSRafael Auler const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
238a34c753fSRafael Auler if (BF && BF->isPLTFunction()) {
239a34c753fSRafael Auler Stats[DynoStats::PLT_CALLS] += CallFreq;
240a34c753fSRafael Auler
241a34c753fSRafael Auler // We don't process PLT functions and hence have to adjust relevant
242a34c753fSRafael Auler // dynostats here for:
243a34c753fSRafael Auler //
244a34c753fSRafael Auler // jmp *GOT_ENTRY(%rip)
245a34c753fSRafael Auler //
246a34c753fSRafael Auler // NOTE: this is arch-specific.
247a34c753fSRafael Auler Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
248a34c753fSRafael Auler Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
249a34c753fSRafael Auler Stats[DynoStats::LOADS] += CallFreq;
250a34c753fSRafael Auler Stats[DynoStats::INSTRUCTIONS] += CallFreq;
251a34c753fSRafael Auler }
252a34c753fSRafael Auler }
253a34c753fSRafael Auler }
254a34c753fSRafael Auler
255a34c753fSRafael Auler Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
256a34c753fSRafael Auler
257a34c753fSRafael Auler // Jump tables.
258a34c753fSRafael Auler const MCInst *LastInstr = BB->getLastNonPseudoInstr();
259a34c753fSRafael Auler if (BC.MIB->getJumpTable(*LastInstr)) {
260a34c753fSRafael Auler Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
261a34c753fSRafael Auler LLVM_DEBUG(
262a34c753fSRafael Auler static uint64_t MostFrequentJT;
263a34c753fSRafael Auler if (BBExecutionCount > MostFrequentJT) {
264a34c753fSRafael Auler MostFrequentJT = BBExecutionCount;
265a34c753fSRafael Auler dbgs() << "BOLT-INFO: most frequently executed jump table is in "
266a34c753fSRafael Auler << "function " << BF << " in basic block " << BB->getName()
267a34c753fSRafael Auler << " executed totally " << BBExecutionCount << " times.\n";
268a34c753fSRafael Auler }
269a34c753fSRafael Auler );
270a34c753fSRafael Auler continue;
271a34c753fSRafael Auler }
272a34c753fSRafael Auler
273a34c753fSRafael Auler if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
274a34c753fSRafael Auler Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
275a34c753fSRafael Auler continue;
276a34c753fSRafael Auler }
277a34c753fSRafael Auler
278a34c753fSRafael Auler // Update stats for branches.
279a34c753fSRafael Auler const MCSymbol *TBB = nullptr;
280a34c753fSRafael Auler const MCSymbol *FBB = nullptr;
281a34c753fSRafael Auler MCInst *CondBranch = nullptr;
282a34c753fSRafael Auler MCInst *UncondBranch = nullptr;
2833652483cSRafael Auler if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
284a34c753fSRafael Auler continue;
285a34c753fSRafael Auler
2863652483cSRafael Auler if (!CondBranch && !UncondBranch)
287a34c753fSRafael Auler continue;
288a34c753fSRafael Auler
289a34c753fSRafael Auler // Simple unconditional branch.
290a34c753fSRafael Auler if (!CondBranch) {
291a34c753fSRafael Auler Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
292a34c753fSRafael Auler continue;
293a34c753fSRafael Auler }
294a34c753fSRafael Auler
295a34c753fSRafael Auler // CTCs: instruction annotations could be stripped, hence check the number
296a34c753fSRafael Auler // of successors to identify conditional tail calls.
297a34c753fSRafael Auler if (BB->succ_size() == 1) {
298a34c753fSRafael Auler if (BB->branch_info_begin() != BB->branch_info_end())
299a34c753fSRafael Auler Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
300a34c753fSRafael Auler continue;
301a34c753fSRafael Auler }
302a34c753fSRafael Auler
303a34c753fSRafael Auler // Conditional branch that could be followed by an unconditional branch.
304a34c753fSRafael Auler uint64_t TakenCount = BB->getTakenBranchInfo().Count;
305a34c753fSRafael Auler if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
306a34c753fSRafael Auler TakenCount = 0;
307a34c753fSRafael Auler
308a34c753fSRafael Auler uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
309a34c753fSRafael Auler if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
310a34c753fSRafael Auler NonTakenCount = 0;
311a34c753fSRafael Auler
312a34c753fSRafael Auler if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
313a34c753fSRafael Auler Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
314a34c753fSRafael Auler Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
315a34c753fSRafael Auler } else {
316a34c753fSRafael Auler Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
317a34c753fSRafael Auler Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
318a34c753fSRafael Auler }
319a34c753fSRafael Auler
320a34c753fSRafael Auler if (UncondBranch) {
321a34c753fSRafael Auler Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
322a34c753fSRafael Auler }
323a34c753fSRafael Auler }
324a34c753fSRafael Auler
325a34c753fSRafael Auler return Stats;
326a34c753fSRafael Auler }
327a34c753fSRafael Auler
328a34c753fSRafael Auler } // namespace bolt
329a34c753fSRafael Auler } // namespace llvm
330