1 //===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the DynoStats class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Core/DynoStats.h" 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 #include <algorithm> 21 #include <string> 22 23 #undef DEBUG_TYPE 24 #define DEBUG_TYPE "bolt" 25 26 using namespace llvm; 27 using namespace bolt; 28 29 namespace opts { 30 31 extern cl::OptionCategory BoltCategory; 32 33 static cl::opt<uint32_t> 34 DynoStatsScale("dyno-stats-scale", 35 cl::desc("scale to be applied while reporting dyno stats"), 36 cl::Optional, 37 cl::init(1), 38 cl::Hidden, 39 cl::cat(BoltCategory)); 40 41 static cl::opt<uint32_t> 42 PrintDynoOpcodeStat("print-dyno-opcode-stats", 43 cl::desc("print per instruction opcode dyno stats and the function" 44 "names:BB offsets of the nth highest execution counts"), 45 cl::init(0), 46 cl::Hidden, 47 cl::cat(BoltCategory)); 48 49 } // namespace opts 50 51 namespace llvm { 52 namespace bolt { 53 54 constexpr const char *DynoStats::Desc[]; 55 56 bool DynoStats::operator<(const DynoStats &Other) const { 57 return std::lexicographical_compare( 58 &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT], 59 &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]); 60 } 61 62 bool DynoStats::operator==(const DynoStats &Other) const { 63 return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT], 64 &Other.Stats[FIRST_DYNO_STAT]); 65 } 66 67 bool DynoStats::lessThan(const DynoStats &Other, 68 ArrayRef<Category> Keys) const { 69 return std::lexicographical_compare( 70 Keys.begin(), Keys.end(), Keys.begin(), Keys.end(), 71 [this, &Other](const Category A, const Category) { 72 return Stats[A] < Other.Stats[A]; 73 }); 74 } 75 76 void DynoStats::print(raw_ostream &OS, const DynoStats *Other, 77 MCInstPrinter *Printer) const { 78 auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat, 79 uint64_t OtherStat) { 80 OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name; 81 if (Other) { 82 if (Stat != OtherStat) { 83 OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0 84 OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 / 85 (float)(OtherStat)); 86 } else { 87 OS << " (=)"; 88 } 89 } 90 OS << '\n'; 91 }; 92 93 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1; 94 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) { 95 96 if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64) 97 continue; 98 99 printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0); 100 } 101 if (opts::PrintDynoOpcodeStat && Printer) { 102 outs() << "\nProgram-wide opcode histogram:\n"; 103 OS << " Opcode, Execution Count, Max Exec Count, " 104 "Function Name:Offset ...\n"; 105 std::vector<std::pair<uint64_t, unsigned>> SortedHistogram; 106 for (const OpcodeStatTy &Stat : OpcodeHistogram) 107 SortedHistogram.emplace_back(Stat.second.first, Stat.first); 108 109 // Sort using lexicographic ordering 110 std::sort(SortedHistogram.begin(), SortedHistogram.end()); 111 112 // Dump in ascending order: Start with Opcode with Highest execution 113 // count. 114 for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend(); 115 ++Stat) { 116 OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat->second).data(), 117 Stat->first * opts::DynoStatsScale); 118 119 MaxOpcodeHistogramTy MaxMultiMap = 120 OpcodeHistogram.at(Stat->second).second; 121 // Start with function name:BB offset with highest execution count. 122 for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) { 123 OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale) 124 << Max->second.first.str() << ':' << Max->second.second; 125 } 126 OS << '\n'; 127 } 128 } 129 } 130 131 void DynoStats::operator+=(const DynoStats &Other) { 132 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1; 133 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) { 134 Stats[Stat] += Other[Stat]; 135 } 136 for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) { 137 auto I = OpcodeHistogram.find(Stat.first); 138 if (I == OpcodeHistogram.end()) { 139 OpcodeHistogram.emplace(Stat); 140 } else { 141 // Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th 142 // maximum counts. 143 I->second.first += Stat.second.first; 144 auto &MMap = I->second.second; 145 auto &OtherMMap = Stat.second.second; 146 auto Size = MMap.size(); 147 assert(Size <= opts::PrintDynoOpcodeStat); 148 for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) { 149 if (Size++ >= opts::PrintDynoOpcodeStat) { 150 auto First = MMap.begin(); 151 if (Iter->first <= First->first) 152 break; 153 MMap.erase(First); 154 } 155 MMap.emplace(*Iter); 156 } 157 } 158 } 159 } 160 161 DynoStats getDynoStats(const BinaryFunction &BF) { 162 auto &BC = BF.getBinaryContext(); 163 164 DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64()); 165 166 // Return empty-stats about the function we don't completely understand. 167 if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG()) 168 return Stats; 169 170 // Update enumeration of basic blocks for correct detection of branch' 171 // direction. 172 BF.updateLayoutIndices(); 173 174 for (BinaryBasicBlock *const &BB : BF.layout()) { 175 // The basic block execution count equals to the sum of incoming branch 176 // frequencies. This may deviate from the sum of outgoing branches of the 177 // basic block especially since the block may contain a function that 178 // does not return or a function that throws an exception. 179 const uint64_t BBExecutionCount = BB->getKnownExecutionCount(); 180 181 // Ignore empty blocks and blocks that were not executed. 182 if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0) 183 continue; 184 185 // Count AArch64 linker-inserted veneers 186 if (BF.isAArch64Veneer()) 187 Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount(); 188 189 // Count various instruction types by iterating through all instructions. 190 // When -print-dyno-opcode-stats is on, count per each opcode and record 191 // maximum execution counts. 192 for (const MCInst &Instr : *BB) { 193 if (opts::PrintDynoOpcodeStat) { 194 unsigned Opcode = Instr.getOpcode(); 195 auto I = Stats.OpcodeHistogram.find(Opcode); 196 if (I == Stats.OpcodeHistogram.end()) { 197 DynoStats::MaxOpcodeHistogramTy MMap; 198 MMap.emplace(BBExecutionCount, 199 std::make_pair(BF.getOneName(), BB->getOffset())); 200 Stats.OpcodeHistogram.emplace(Opcode, 201 std::make_pair(BBExecutionCount, MMap)); 202 } else { 203 I->second.first += BBExecutionCount; 204 bool Insert = true; 205 if (I->second.second.size() == opts::PrintDynoOpcodeStat) { 206 auto First = I->second.second.begin(); 207 if (First->first < BBExecutionCount) 208 I->second.second.erase(First); 209 else 210 Insert = false; 211 } 212 if (Insert) { 213 I->second.second.emplace( 214 BBExecutionCount, 215 std::make_pair(BF.getOneName(), BB->getOffset())); 216 } 217 } 218 } 219 220 if (BC.MIB->isStore(Instr)) { 221 Stats[DynoStats::STORES] += BBExecutionCount; 222 } 223 if (BC.MIB->isLoad(Instr)) { 224 Stats[DynoStats::LOADS] += BBExecutionCount; 225 } 226 if (!BC.MIB->isCall(Instr)) 227 continue; 228 229 uint64_t CallFreq = BBExecutionCount; 230 if (BC.MIB->getConditionalTailCall(Instr)) { 231 CallFreq = 232 BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount"); 233 } 234 Stats[DynoStats::FUNCTION_CALLS] += CallFreq; 235 if (BC.MIB->isIndirectCall(Instr)) { 236 Stats[DynoStats::INDIRECT_CALLS] += CallFreq; 237 } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) { 238 const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol); 239 if (BF && BF->isPLTFunction()) { 240 Stats[DynoStats::PLT_CALLS] += CallFreq; 241 242 // We don't process PLT functions and hence have to adjust relevant 243 // dynostats here for: 244 // 245 // jmp *GOT_ENTRY(%rip) 246 // 247 // NOTE: this is arch-specific. 248 Stats[DynoStats::FUNCTION_CALLS] += CallFreq; 249 Stats[DynoStats::INDIRECT_CALLS] += CallFreq; 250 Stats[DynoStats::LOADS] += CallFreq; 251 Stats[DynoStats::INSTRUCTIONS] += CallFreq; 252 } 253 } 254 } 255 256 Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount; 257 258 // Jump tables. 259 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 260 if (BC.MIB->getJumpTable(*LastInstr)) { 261 Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount; 262 LLVM_DEBUG( 263 static uint64_t MostFrequentJT; 264 if (BBExecutionCount > MostFrequentJT) { 265 MostFrequentJT = BBExecutionCount; 266 dbgs() << "BOLT-INFO: most frequently executed jump table is in " 267 << "function " << BF << " in basic block " << BB->getName() 268 << " executed totally " << BBExecutionCount << " times.\n"; 269 } 270 ); 271 continue; 272 } 273 274 if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) { 275 Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount; 276 continue; 277 } 278 279 // Update stats for branches. 280 const MCSymbol *TBB = nullptr; 281 const MCSymbol *FBB = nullptr; 282 MCInst *CondBranch = nullptr; 283 MCInst *UncondBranch = nullptr; 284 if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) { 285 continue; 286 } 287 288 if (!CondBranch && !UncondBranch) { 289 continue; 290 } 291 292 // Simple unconditional branch. 293 if (!CondBranch) { 294 Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount; 295 continue; 296 } 297 298 // CTCs: instruction annotations could be stripped, hence check the number 299 // of successors to identify conditional tail calls. 300 if (BB->succ_size() == 1) { 301 if (BB->branch_info_begin() != BB->branch_info_end()) 302 Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count; 303 continue; 304 } 305 306 // Conditional branch that could be followed by an unconditional branch. 307 uint64_t TakenCount = BB->getTakenBranchInfo().Count; 308 if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE) 309 TakenCount = 0; 310 311 uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count; 312 if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE) 313 NonTakenCount = 0; 314 315 if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) { 316 Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount; 317 Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount; 318 } else { 319 Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount; 320 Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount; 321 } 322 323 if (UncondBranch) { 324 Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount; 325 } 326 } 327 328 return Stats; 329 } 330 331 } // namespace bolt 332 } // namespace llvm 333