1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BinaryFunction member functions related to processing 10 // the execution profile. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #undef DEBUG_TYPE 21 #define DEBUG_TYPE "bolt-prof" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 cl::opt<IndirectCallPromotionType> 31 IndirectCallPromotion("indirect-call-promotion", 32 cl::init(ICP_NONE), 33 cl::desc("indirect call promotion"), 34 cl::values( 35 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 36 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 37 clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"), 38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 39 cl::ZeroOrMore, 40 cl::cat(BoltOptCategory)); 41 42 extern cl::opt<JumpTableSupportLevel> JumpTables; 43 44 static cl::opt<bool> 45 FixFuncCounts("fix-func-counts", 46 cl::desc("adjust function counts based on basic blocks execution count"), 47 cl::init(false), 48 cl::ZeroOrMore, 49 cl::Hidden, 50 cl::cat(BoltOptCategory)); 51 52 static cl::opt<bool> 53 FixBlockCounts("fix-block-counts", 54 cl::desc("adjust block counts based on outgoing branch counts"), 55 cl::init(true), 56 cl::ZeroOrMore, 57 cl::Hidden, 58 cl::cat(BoltOptCategory)); 59 60 static cl::opt<bool> 61 InferFallThroughs("infer-fall-throughs", 62 cl::desc("infer execution count for fall-through blocks"), 63 cl::init(false), 64 cl::ZeroOrMore, 65 cl::Hidden, 66 cl::cat(BoltOptCategory)); 67 68 } // namespace opts 69 70 namespace llvm { 71 namespace bolt { 72 73 void BinaryFunction::postProcessProfile() { 74 if (!hasValidProfile()) { 75 clearProfile(); 76 return; 77 } 78 79 if (!(getProfileFlags() & PF_LBR)) 80 return; 81 82 // If we have at least some branch data for the function indicate that it 83 // was executed. 84 if (opts::FixFuncCounts && ExecutionCount == 0) { 85 ExecutionCount = 1; 86 } 87 88 // Compute preliminary execution count for each basic block. 89 for (BinaryBasicBlock *BB : BasicBlocks) { 90 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 91 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 92 BB->ExecutionCount = 0; 93 } 94 for (BinaryBasicBlock *BB : BasicBlocks) { 95 auto SuccBIIter = BB->branch_info_begin(); 96 for (BinaryBasicBlock *Succ : BB->successors()) { 97 // All incoming edges to the primary entry have been accounted for, thus 98 // we skip the update here. 99 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 100 Succ != BasicBlocks.front()) 101 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 102 ++SuccBIIter; 103 } 104 } 105 106 // Fix for old profiles. 107 for (BinaryBasicBlock *BB : BasicBlocks) { 108 if (BB->size() != 1 || BB->succ_size() != 1) 109 continue; 110 111 if (BB->getKnownExecutionCount() == 0) 112 continue; 113 114 MCInst *Instr = BB->getFirstNonPseudoInstr(); 115 assert(Instr && "expected non-pseudo instr"); 116 if (!BC.MIB->hasAnnotation(*Instr, "NOP")) 117 continue; 118 119 BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); 120 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); 121 if (!BI.Count) { 122 BI.Count = BB->getKnownExecutionCount(); 123 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + 124 BI.Count); 125 } 126 } 127 128 if (opts::FixBlockCounts) { 129 for (BinaryBasicBlock *BB : BasicBlocks) { 130 // Make sure that execution count of a block is at least the branch count 131 // of an incoming/outgoing jump. 132 auto SuccBIIter = BB->branch_info_begin(); 133 for (BinaryBasicBlock *Succ : BB->successors()) { 134 uint64_t Count = SuccBIIter->Count; 135 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 136 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 137 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 138 } 139 ++SuccBIIter; 140 } 141 // Make sure that execution count of a block is at least the number of 142 // function calls from the block. 143 for (MCInst &Inst : *BB) { 144 // Ignore non-call instruction 145 if (!BC.MIB->isCall(Inst)) 146 continue; 147 148 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 149 if (CountAnnt) { 150 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 151 } 152 } 153 } 154 } 155 156 if (opts::InferFallThroughs) 157 inferFallThroughCounts(); 158 159 // Update profile information for jump tables based on CFG branch data. 160 for (BinaryBasicBlock *BB : BasicBlocks) { 161 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 162 if (!LastInstr) 163 continue; 164 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 165 if (!JTAddress) 166 continue; 167 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 168 if (!JT) 169 continue; 170 171 uint64_t TotalBranchCount = 0; 172 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 173 BB->branch_info()) { 174 TotalBranchCount += BranchInfo.Count; 175 } 176 JT->Count += TotalBranchCount; 177 178 if (opts::IndirectCallPromotion < ICP_JUMP_TABLES && 179 opts::JumpTables < JTS_AGGRESSIVE) 180 continue; 181 182 if (JT->Counts.empty()) 183 JT->Counts.resize(JT->Entries.size()); 184 auto EI = JT->Entries.begin(); 185 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 186 EI += Delta; 187 while (EI != JT->Entries.end()) { 188 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 189 if (TargetBB) { 190 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 191 BB->getBranchInfo(*TargetBB); 192 assert(Delta < JT->Counts.size()); 193 JT->Counts[Delta].Count += BranchInfo.Count; 194 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 195 } 196 ++Delta; 197 ++EI; 198 // A label marks the start of another jump table. 199 if (JT->Labels.count(Delta * JT->EntrySize)) 200 break; 201 } 202 } 203 } 204 205 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 206 // No reason to merge invalid or empty profiles into BF. 207 if (!hasValidProfile()) 208 return; 209 210 // Update function execution count. 211 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) { 212 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 213 } 214 215 // Since we are merging a valid profile, the new profile should be valid too. 216 // It has either already been valid, or it has been cleaned up. 217 BF.ProfileMatchRatio = 1.0f; 218 219 // Update basic block and edge counts. 220 auto BBMergeI = BF.begin(); 221 for (BinaryBasicBlock *BB : BasicBlocks) { 222 BinaryBasicBlock *BBMerge = &*BBMergeI; 223 assert(getIndex(BB) == BF.getIndex(BBMerge)); 224 225 // Update basic block count. 226 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 227 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 228 BB->getExecutionCount()); 229 } 230 231 // Update edge count for successors of this basic block. 232 auto BBMergeSI = BBMerge->succ_begin(); 233 auto BIMergeI = BBMerge->branch_info_begin(); 234 auto BII = BB->branch_info_begin(); 235 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 236 (void)BBSucc; 237 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 238 239 // At this point no branch count should be set to COUNT_NO_PROFILE. 240 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 241 "unexpected unknown branch profile"); 242 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 243 "unexpected unknown branch profile"); 244 245 BIMergeI->Count += BII->Count; 246 247 // When we merge inferred and real fall-through branch data, the merged 248 // data is considered inferred. 249 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 250 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 251 BIMergeI->MispredictedCount += BII->MispredictedCount; 252 } else { 253 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 254 } 255 256 ++BBMergeSI; 257 ++BII; 258 ++BIMergeI; 259 } 260 assert(BBMergeSI == BBMerge->succ_end()); 261 262 ++BBMergeI; 263 } 264 assert(BBMergeI == BF.end()); 265 266 // Merge jump tables profile info. 267 auto JTMergeI = BF.JumpTables.begin(); 268 for (const auto &JTEntry : JumpTables) { 269 if (JTMergeI->second->Counts.empty()) 270 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 271 auto CountMergeI = JTMergeI->second->Counts.begin(); 272 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 273 CountMergeI->Count += JI.Count; 274 CountMergeI->Mispreds += JI.Mispreds; 275 ++CountMergeI; 276 } 277 assert(CountMergeI == JTMergeI->second->Counts.end()); 278 279 ++JTMergeI; 280 } 281 assert(JTMergeI == BF.JumpTables.end()); 282 } 283 284 void BinaryFunction::inferFallThroughCounts() { 285 // Work on a basic block at a time, propagating frequency information 286 // forwards. 287 // It is important to walk in the layout order. 288 for (BinaryBasicBlock *BB : BasicBlocks) { 289 const uint64_t BBExecCount = BB->getExecutionCount(); 290 291 // Propagate this information to successors, filling in fall-through edges 292 // with frequency information 293 if (BB->succ_size() == 0) 294 continue; 295 296 // Calculate frequency of outgoing branches from this node according to 297 // LBR data. 298 uint64_t ReportedBranches = 0; 299 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) { 300 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 301 ReportedBranches += SuccBI.Count; 302 } 303 304 // Get taken count of conditional tail call if the block ends with one. 305 uint64_t CTCTakenCount = 0; 306 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 307 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 308 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 309 *CTCInstr, "CTCTakenCount"); 310 } 311 312 // Calculate frequency of throws from this node according to LBR data 313 // for branching into associated landing pads. Since it is possible 314 // for a landing pad to be associated with more than one basic blocks, 315 // we may overestimate the frequency of throws for such blocks. 316 uint64_t ReportedThrows = 0; 317 for (const BinaryBasicBlock *LP : BB->landing_pads()) { 318 ReportedThrows += LP->getExecutionCount(); 319 } 320 321 const uint64_t TotalReportedJumps = 322 ReportedBranches + CTCTakenCount + ReportedThrows; 323 324 // Infer the frequency of the fall-through edge, representing not taking the 325 // branch. 326 uint64_t Inferred = 0; 327 if (BBExecCount > TotalReportedJumps) 328 Inferred = BBExecCount - TotalReportedJumps; 329 330 LLVM_DEBUG( 331 if (BBExecCount < TotalReportedJumps) dbgs() 332 << "Fall-through inference is slightly inconsistent. " 333 "exec frequency is less than the outgoing edges frequency (" 334 << BBExecCount << " < " << ReportedBranches 335 << ") for BB at offset 0x" 336 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 337 338 if (BB->succ_size() <= 2) { 339 // Skip if the last instruction is an unconditional jump. 340 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 341 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 342 BC.MIB->isIndirectBranch(*LastInstr))) 343 continue; 344 // If there is an FT it will be the last successor. 345 auto &SuccBI = *BB->branch_info_rbegin(); 346 auto &Succ = *BB->succ_rbegin(); 347 if (SuccBI.Count == 0) { 348 SuccBI.Count = Inferred; 349 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 350 Succ->ExecutionCount += Inferred; 351 } 352 } 353 } 354 355 return; 356 } 357 358 void BinaryFunction::clearProfile() { 359 // Keep function execution profile the same. Only clear basic block and edge 360 // counts. 361 for (BinaryBasicBlock *BB : BasicBlocks) { 362 BB->ExecutionCount = 0; 363 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 364 BI.Count = 0; 365 BI.MispredictedCount = 0; 366 } 367 } 368 } 369 370 } // namespace bolt 371 } // namespace llvm 372