1 //===--- DynoStats.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Core/DynoStats.h" 12 #include "bolt/Core/BinaryBasicBlock.h" 13 #include "bolt/Core/BinaryFunction.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/Debug.h" 17 #include "llvm/Support/raw_ostream.h" 18 #include <algorithm> 19 #include <string> 20 21 #undef DEBUG_TYPE 22 #define DEBUG_TYPE "bolt" 23 24 using namespace llvm; 25 using namespace bolt; 26 27 namespace opts { 28 29 extern cl::OptionCategory BoltCategory; 30 31 static cl::opt<uint32_t> 32 DynoStatsScale("dyno-stats-scale", 33 cl::desc("scale to be applied while reporting dyno stats"), 34 cl::Optional, 35 cl::init(1), 36 cl::Hidden, 37 cl::cat(BoltCategory)); 38 39 static cl::opt<uint32_t> 40 PrintDynoOpcodeStat("print-dyno-opcode-stats", 41 cl::desc("print per instruction opcode dyno stats and the function" 42 "names:BB offsets of the nth highest execution counts"), 43 cl::init(0), 44 cl::Hidden, 45 cl::cat(BoltCategory)); 46 47 } // namespace opts 48 49 namespace llvm { 50 namespace bolt { 51 52 constexpr const char *DynoStats::Desc[]; 53 54 bool DynoStats::operator<(const DynoStats &Other) const { 55 return std::lexicographical_compare( 56 &Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT], 57 &Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]); 58 } 59 60 bool DynoStats::operator==(const DynoStats &Other) const { 61 return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT], 62 &Other.Stats[FIRST_DYNO_STAT]); 63 } 64 65 bool DynoStats::lessThan(const DynoStats &Other, 66 ArrayRef<Category> Keys) const { 67 return std::lexicographical_compare( 68 Keys.begin(), Keys.end(), Keys.begin(), Keys.end(), 69 [this, &Other](const Category A, const Category) { 70 return Stats[A] < Other.Stats[A]; 71 }); 72 } 73 74 void DynoStats::print(raw_ostream &OS, const DynoStats *Other, 75 MCInstPrinter *Printer) const { 76 auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat, 77 uint64_t OtherStat) { 78 OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name; 79 if (Other) { 80 if (Stat != OtherStat) { 81 OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0 82 OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 / 83 (float)(OtherStat)); 84 } else { 85 OS << " (=)"; 86 } 87 } 88 OS << '\n'; 89 }; 90 91 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1; 92 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) { 93 94 if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64) 95 continue; 96 97 printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0); 98 } 99 if (opts::PrintDynoOpcodeStat && Printer) { 100 outs() << "\nProgram-wide opcode histogram:\n"; 101 OS << " Opcode, Execution Count, Max Exec Count, " 102 "Function Name:Offset ...\n"; 103 std::vector<std::pair<uint64_t, unsigned>> SortedHistogram; 104 for (const OpcodeStatTy &Stat : OpcodeHistogram) 105 SortedHistogram.emplace_back(Stat.second.first, Stat.first); 106 107 // Sort using lexicographic ordering 108 std::sort(SortedHistogram.begin(), SortedHistogram.end()); 109 110 // Dump in ascending order: Start with Opcode with Highest execution 111 // count. 112 for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend(); 113 ++Stat) { 114 OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat->second).data(), 115 Stat->first * opts::DynoStatsScale); 116 117 MaxOpcodeHistogramTy MaxMultiMap = 118 OpcodeHistogram.at(Stat->second).second; 119 // Start with function name:BB offset with highest execution count. 120 for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) { 121 OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale) 122 << Max->second.first.str() << ':' << Max->second.second; 123 } 124 OS << '\n'; 125 } 126 } 127 } 128 129 void DynoStats::operator+=(const DynoStats &Other) { 130 for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1; 131 Stat < DynoStats::LAST_DYNO_STAT; ++Stat) { 132 Stats[Stat] += Other[Stat]; 133 } 134 for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) { 135 auto I = OpcodeHistogram.find(Stat.first); 136 if (I == OpcodeHistogram.end()) { 137 OpcodeHistogram.emplace(Stat); 138 } else { 139 // Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th 140 // maximum counts. 141 I->second.first += Stat.second.first; 142 auto &MMap = I->second.second; 143 auto &OtherMMap = Stat.second.second; 144 auto Size = MMap.size(); 145 assert(Size <= opts::PrintDynoOpcodeStat); 146 for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) { 147 if (Size++ >= opts::PrintDynoOpcodeStat) { 148 auto First = MMap.begin(); 149 if (Iter->first <= First->first) 150 break; 151 MMap.erase(First); 152 } 153 MMap.emplace(*Iter); 154 } 155 } 156 } 157 } 158 159 DynoStats getDynoStats(const BinaryFunction &BF) { 160 auto &BC = BF.getBinaryContext(); 161 162 DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64()); 163 164 // Return empty-stats about the function we don't completely understand. 165 if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG()) 166 return Stats; 167 168 // Update enumeration of basic blocks for correct detection of branch' 169 // direction. 170 BF.updateLayoutIndices(); 171 172 for (BinaryBasicBlock *const &BB : BF.layout()) { 173 // The basic block execution count equals to the sum of incoming branch 174 // frequencies. This may deviate from the sum of outgoing branches of the 175 // basic block especially since the block may contain a function that 176 // does not return or a function that throws an exception. 177 const uint64_t BBExecutionCount = BB->getKnownExecutionCount(); 178 179 // Ignore empty blocks and blocks that were not executed. 180 if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0) 181 continue; 182 183 // Count AArch64 linker-inserted veneers 184 if (BF.isAArch64Veneer()) 185 Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount(); 186 187 // Count various instruction types by iterating through all instructions. 188 // When -print-dyno-opcode-stats is on, count per each opcode and record 189 // maximum execution counts. 190 for (const MCInst &Instr : *BB) { 191 if (opts::PrintDynoOpcodeStat) { 192 unsigned Opcode = Instr.getOpcode(); 193 auto I = Stats.OpcodeHistogram.find(Opcode); 194 if (I == Stats.OpcodeHistogram.end()) { 195 DynoStats::MaxOpcodeHistogramTy MMap; 196 MMap.emplace(BBExecutionCount, 197 std::make_pair(BF.getOneName(), BB->getOffset())); 198 Stats.OpcodeHistogram.emplace(Opcode, 199 std::make_pair(BBExecutionCount, MMap)); 200 } else { 201 I->second.first += BBExecutionCount; 202 bool Insert = true; 203 if (I->second.second.size() == opts::PrintDynoOpcodeStat) { 204 auto First = I->second.second.begin(); 205 if (First->first < BBExecutionCount) 206 I->second.second.erase(First); 207 else 208 Insert = false; 209 } 210 if (Insert) { 211 I->second.second.emplace( 212 BBExecutionCount, 213 std::make_pair(BF.getOneName(), BB->getOffset())); 214 } 215 } 216 } 217 218 if (BC.MIB->isStore(Instr)) { 219 Stats[DynoStats::STORES] += BBExecutionCount; 220 } 221 if (BC.MIB->isLoad(Instr)) { 222 Stats[DynoStats::LOADS] += BBExecutionCount; 223 } 224 if (!BC.MIB->isCall(Instr)) 225 continue; 226 227 uint64_t CallFreq = BBExecutionCount; 228 if (BC.MIB->getConditionalTailCall(Instr)) { 229 CallFreq = 230 BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount"); 231 } 232 Stats[DynoStats::FUNCTION_CALLS] += CallFreq; 233 if (BC.MIB->isIndirectCall(Instr)) { 234 Stats[DynoStats::INDIRECT_CALLS] += CallFreq; 235 } else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) { 236 const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol); 237 if (BF && BF->isPLTFunction()) { 238 Stats[DynoStats::PLT_CALLS] += CallFreq; 239 240 // We don't process PLT functions and hence have to adjust relevant 241 // dynostats here for: 242 // 243 // jmp *GOT_ENTRY(%rip) 244 // 245 // NOTE: this is arch-specific. 246 Stats[DynoStats::FUNCTION_CALLS] += CallFreq; 247 Stats[DynoStats::INDIRECT_CALLS] += CallFreq; 248 Stats[DynoStats::LOADS] += CallFreq; 249 Stats[DynoStats::INSTRUCTIONS] += CallFreq; 250 } 251 } 252 } 253 254 Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount; 255 256 // Jump tables. 257 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 258 if (BC.MIB->getJumpTable(*LastInstr)) { 259 Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount; 260 LLVM_DEBUG( 261 static uint64_t MostFrequentJT; 262 if (BBExecutionCount > MostFrequentJT) { 263 MostFrequentJT = BBExecutionCount; 264 dbgs() << "BOLT-INFO: most frequently executed jump table is in " 265 << "function " << BF << " in basic block " << BB->getName() 266 << " executed totally " << BBExecutionCount << " times.\n"; 267 } 268 ); 269 continue; 270 } 271 272 if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) { 273 Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount; 274 continue; 275 } 276 277 // Update stats for branches. 278 const MCSymbol *TBB = nullptr; 279 const MCSymbol *FBB = nullptr; 280 MCInst *CondBranch = nullptr; 281 MCInst *UncondBranch = nullptr; 282 if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch)) { 283 continue; 284 } 285 286 if (!CondBranch && !UncondBranch) { 287 continue; 288 } 289 290 // Simple unconditional branch. 291 if (!CondBranch) { 292 Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount; 293 continue; 294 } 295 296 // CTCs: instruction annotations could be stripped, hence check the number 297 // of successors to identify conditional tail calls. 298 if (BB->succ_size() == 1) { 299 if (BB->branch_info_begin() != BB->branch_info_end()) 300 Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count; 301 continue; 302 } 303 304 // Conditional branch that could be followed by an unconditional branch. 305 uint64_t TakenCount = BB->getTakenBranchInfo().Count; 306 if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE) 307 TakenCount = 0; 308 309 uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count; 310 if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE) 311 NonTakenCount = 0; 312 313 if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) { 314 Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount; 315 Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount; 316 } else { 317 Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount; 318 Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount; 319 } 320 321 if (UncondBranch) { 322 Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount; 323 } 324 } 325 326 return Stats; 327 } 328 329 } // namespace bolt 330 } // namespace llvm 331