1 //===--- BinaryFunctionProfile.cpp --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Core/BinaryBasicBlock.h" 12 #include "bolt/Core/BinaryFunction.h" 13 #include "llvm/Support/CommandLine.h" 14 #include "llvm/Support/Debug.h" 15 #include "llvm/Support/raw_ostream.h" 16 17 #undef DEBUG_TYPE 18 #define DEBUG_TYPE "bolt-prof" 19 20 using namespace llvm; 21 using namespace bolt; 22 23 namespace opts { 24 25 extern cl::OptionCategory BoltOptCategory; 26 27 cl::opt<IndirectCallPromotionType> 28 IndirectCallPromotion("indirect-call-promotion", 29 cl::init(ICP_NONE), 30 cl::desc("indirect call promotion"), 31 cl::values( 32 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 33 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 34 clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"), 35 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 36 cl::ZeroOrMore, 37 cl::cat(BoltOptCategory)); 38 39 extern cl::opt<JumpTableSupportLevel> JumpTables; 40 41 static cl::opt<bool> 42 FixFuncCounts("fix-func-counts", 43 cl::desc("adjust function counts based on basic blocks execution count"), 44 cl::init(false), 45 cl::ZeroOrMore, 46 cl::Hidden, 47 cl::cat(BoltOptCategory)); 48 49 static cl::opt<bool> 50 FixBlockCounts("fix-block-counts", 51 cl::desc("adjust block counts based on outgoing branch counts"), 52 cl::init(true), 53 cl::ZeroOrMore, 54 cl::Hidden, 55 cl::cat(BoltOptCategory)); 56 57 static cl::opt<bool> 58 InferFallThroughs("infer-fall-throughs", 59 cl::desc("infer execution count for fall-through blocks"), 60 cl::init(false), 61 cl::ZeroOrMore, 62 cl::Hidden, 63 cl::cat(BoltOptCategory)); 64 65 } // namespace opts 66 67 namespace llvm { 68 namespace bolt { 69 70 void BinaryFunction::postProcessProfile() { 71 if (!hasValidProfile()) { 72 clearProfile(); 73 return; 74 } 75 76 if (!(getProfileFlags() & PF_LBR)) 77 return; 78 79 // If we have at least some branch data for the function indicate that it 80 // was executed. 81 if (opts::FixFuncCounts && ExecutionCount == 0) { 82 ExecutionCount = 1; 83 } 84 85 // Compute preliminary execution count for each basic block. 86 for (BinaryBasicBlock *BB : BasicBlocks) { 87 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 88 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 89 BB->ExecutionCount = 0; 90 } 91 for (BinaryBasicBlock *BB : BasicBlocks) { 92 auto SuccBIIter = BB->branch_info_begin(); 93 for (BinaryBasicBlock *Succ : BB->successors()) { 94 // All incoming edges to the primary entry have been accounted for, thus 95 // we skip the update here. 96 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 97 Succ != BasicBlocks.front()) 98 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 99 ++SuccBIIter; 100 } 101 } 102 103 // Fix for old profiles. 104 for (BinaryBasicBlock *BB : BasicBlocks) { 105 if (BB->size() != 1 || BB->succ_size() != 1) 106 continue; 107 108 if (BB->getKnownExecutionCount() == 0) 109 continue; 110 111 MCInst *Instr = BB->getFirstNonPseudoInstr(); 112 assert(Instr && "expected non-pseudo instr"); 113 if (!BC.MIB->hasAnnotation(*Instr, "NOP")) 114 continue; 115 116 BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); 117 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); 118 if (!BI.Count) { 119 BI.Count = BB->getKnownExecutionCount(); 120 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + 121 BI.Count); 122 } 123 } 124 125 if (opts::FixBlockCounts) { 126 for (BinaryBasicBlock *BB : BasicBlocks) { 127 // Make sure that execution count of a block is at least the branch count 128 // of an incoming/outgoing jump. 129 auto SuccBIIter = BB->branch_info_begin(); 130 for (BinaryBasicBlock *Succ : BB->successors()) { 131 uint64_t Count = SuccBIIter->Count; 132 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 133 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 134 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 135 } 136 ++SuccBIIter; 137 } 138 // Make sure that execution count of a block is at least the number of 139 // function calls from the block. 140 for (MCInst &Inst : *BB) { 141 // Ignore non-call instruction 142 if (!BC.MIB->isCall(Inst)) 143 continue; 144 145 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 146 if (CountAnnt) { 147 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 148 } 149 } 150 } 151 } 152 153 if (opts::InferFallThroughs) 154 inferFallThroughCounts(); 155 156 // Update profile information for jump tables based on CFG branch data. 157 for (BinaryBasicBlock *BB : BasicBlocks) { 158 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 159 if (!LastInstr) 160 continue; 161 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 162 if (!JTAddress) 163 continue; 164 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 165 if (!JT) 166 continue; 167 168 uint64_t TotalBranchCount = 0; 169 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 170 BB->branch_info()) { 171 TotalBranchCount += BranchInfo.Count; 172 } 173 JT->Count += TotalBranchCount; 174 175 if (opts::IndirectCallPromotion < ICP_JUMP_TABLES && 176 opts::JumpTables < JTS_AGGRESSIVE) 177 continue; 178 179 if (JT->Counts.empty()) 180 JT->Counts.resize(JT->Entries.size()); 181 auto EI = JT->Entries.begin(); 182 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 183 EI += Delta; 184 while (EI != JT->Entries.end()) { 185 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 186 if (TargetBB) { 187 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 188 BB->getBranchInfo(*TargetBB); 189 assert(Delta < JT->Counts.size()); 190 JT->Counts[Delta].Count += BranchInfo.Count; 191 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 192 } 193 ++Delta; 194 ++EI; 195 // A label marks the start of another jump table. 196 if (JT->Labels.count(Delta * JT->EntrySize)) 197 break; 198 } 199 } 200 } 201 202 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 203 // No reason to merge invalid or empty profiles into BF. 204 if (!hasValidProfile()) 205 return; 206 207 // Update function execution count. 208 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) { 209 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 210 } 211 212 // Since we are merging a valid profile, the new profile should be valid too. 213 // It has either already been valid, or it has been cleaned up. 214 BF.ProfileMatchRatio = 1.0f; 215 216 // Update basic block and edge counts. 217 auto BBMergeI = BF.begin(); 218 for (BinaryBasicBlock *BB : BasicBlocks) { 219 BinaryBasicBlock *BBMerge = &*BBMergeI; 220 assert(getIndex(BB) == BF.getIndex(BBMerge)); 221 222 // Update basic block count. 223 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 224 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 225 BB->getExecutionCount()); 226 } 227 228 // Update edge count for successors of this basic block. 229 auto BBMergeSI = BBMerge->succ_begin(); 230 auto BIMergeI = BBMerge->branch_info_begin(); 231 auto BII = BB->branch_info_begin(); 232 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 233 (void)BBSucc; 234 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 235 236 // At this point no branch count should be set to COUNT_NO_PROFILE. 237 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 238 "unexpected unknown branch profile"); 239 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 240 "unexpected unknown branch profile"); 241 242 BIMergeI->Count += BII->Count; 243 244 // When we merge inferred and real fall-through branch data, the merged 245 // data is considered inferred. 246 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 247 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 248 BIMergeI->MispredictedCount += BII->MispredictedCount; 249 } else { 250 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 251 } 252 253 ++BBMergeSI; 254 ++BII; 255 ++BIMergeI; 256 } 257 assert(BBMergeSI == BBMerge->succ_end()); 258 259 ++BBMergeI; 260 } 261 assert(BBMergeI == BF.end()); 262 263 // Merge jump tables profile info. 264 auto JTMergeI = BF.JumpTables.begin(); 265 for (const auto &JTEntry : JumpTables) { 266 if (JTMergeI->second->Counts.empty()) 267 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 268 auto CountMergeI = JTMergeI->second->Counts.begin(); 269 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 270 CountMergeI->Count += JI.Count; 271 CountMergeI->Mispreds += JI.Mispreds; 272 ++CountMergeI; 273 } 274 assert(CountMergeI == JTMergeI->second->Counts.end()); 275 276 ++JTMergeI; 277 } 278 assert(JTMergeI == BF.JumpTables.end()); 279 } 280 281 void BinaryFunction::inferFallThroughCounts() { 282 // Work on a basic block at a time, propagating frequency information 283 // forwards. 284 // It is important to walk in the layout order. 285 for (BinaryBasicBlock *BB : BasicBlocks) { 286 const uint64_t BBExecCount = BB->getExecutionCount(); 287 288 // Propagate this information to successors, filling in fall-through edges 289 // with frequency information 290 if (BB->succ_size() == 0) 291 continue; 292 293 // Calculate frequency of outgoing branches from this node according to 294 // LBR data. 295 uint64_t ReportedBranches = 0; 296 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) { 297 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 298 ReportedBranches += SuccBI.Count; 299 } 300 301 // Get taken count of conditional tail call if the block ends with one. 302 uint64_t CTCTakenCount = 0; 303 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 304 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 305 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 306 *CTCInstr, "CTCTakenCount"); 307 } 308 309 // Calculate frequency of throws from this node according to LBR data 310 // for branching into associated landing pads. Since it is possible 311 // for a landing pad to be associated with more than one basic blocks, 312 // we may overestimate the frequency of throws for such blocks. 313 uint64_t ReportedThrows = 0; 314 for (const BinaryBasicBlock *LP : BB->landing_pads()) { 315 ReportedThrows += LP->getExecutionCount(); 316 } 317 318 const uint64_t TotalReportedJumps = 319 ReportedBranches + CTCTakenCount + ReportedThrows; 320 321 // Infer the frequency of the fall-through edge, representing not taking the 322 // branch. 323 uint64_t Inferred = 0; 324 if (BBExecCount > TotalReportedJumps) 325 Inferred = BBExecCount - TotalReportedJumps; 326 327 LLVM_DEBUG( 328 if (BBExecCount < TotalReportedJumps) dbgs() 329 << "Fall-through inference is slightly inconsistent. " 330 "exec frequency is less than the outgoing edges frequency (" 331 << BBExecCount << " < " << ReportedBranches 332 << ") for BB at offset 0x" 333 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 334 335 if (BB->succ_size() <= 2) { 336 // Skip if the last instruction is an unconditional jump. 337 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 338 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 339 BC.MIB->isIndirectBranch(*LastInstr))) 340 continue; 341 // If there is an FT it will be the last successor. 342 auto &SuccBI = *BB->branch_info_rbegin(); 343 auto &Succ = *BB->succ_rbegin(); 344 if (SuccBI.Count == 0) { 345 SuccBI.Count = Inferred; 346 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 347 Succ->ExecutionCount += Inferred; 348 } 349 } 350 } 351 352 return; 353 } 354 355 void BinaryFunction::clearProfile() { 356 // Keep function execution profile the same. Only clear basic block and edge 357 // counts. 358 for (BinaryBasicBlock *BB : BasicBlocks) { 359 BB->ExecutionCount = 0; 360 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 361 BI.Count = 0; 362 BI.MispredictedCount = 0; 363 } 364 } 365 } 366 367 } // namespace bolt 368 } // namespace llvm 369