1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BinaryFunction member functions related to processing 10 // the execution profile. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #undef DEBUG_TYPE 21 #define DEBUG_TYPE "bolt-prof" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 cl::opt<IndirectCallPromotionType> ICP( 31 "indirect-call-promotion", cl::init(ICP_NONE), 32 cl::desc("indirect call promotion"), 33 cl::values( 34 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 35 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 36 clEnumValN(ICP_JUMP_TABLES, "jump-tables", 37 "perform ICP on jump tables"), 38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 39 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 40 41 static cl::alias ICPAlias("icp", 42 cl::desc("Alias for --indirect-call-promotion"), 43 cl::aliasopt(ICP)); 44 45 extern cl::opt<JumpTableSupportLevel> JumpTables; 46 47 static cl::opt<bool> FixFuncCounts( 48 "fix-func-counts", 49 cl::desc("adjust function counts based on basic blocks execution count"), 50 cl::Hidden, cl::cat(BoltOptCategory)); 51 52 static cl::opt<bool> FixBlockCounts( 53 "fix-block-counts", 54 cl::desc("adjust block counts based on outgoing branch counts"), 55 cl::init(true), cl::Hidden, cl::cat(BoltOptCategory)); 56 57 static cl::opt<bool> 58 InferFallThroughs("infer-fall-throughs", 59 cl::desc("infer execution count for fall-through blocks"), 60 cl::Hidden, cl::cat(BoltOptCategory)); 61 62 } // namespace opts 63 64 namespace llvm { 65 namespace bolt { 66 67 void BinaryFunction::postProcessProfile() { 68 if (!hasValidProfile()) { 69 clearProfile(); 70 return; 71 } 72 73 if (!(getProfileFlags() & PF_LBR)) 74 return; 75 76 // If we have at least some branch data for the function indicate that it 77 // was executed. 78 if (opts::FixFuncCounts && ExecutionCount == 0) 79 ExecutionCount = 1; 80 81 // Compute preliminary execution count for each basic block. 82 for (BinaryBasicBlock *BB : BasicBlocks) { 83 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 84 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 85 BB->ExecutionCount = 0; 86 } 87 for (BinaryBasicBlock *BB : BasicBlocks) { 88 auto SuccBIIter = BB->branch_info_begin(); 89 for (BinaryBasicBlock *Succ : BB->successors()) { 90 // All incoming edges to the primary entry have been accounted for, thus 91 // we skip the update here. 92 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 93 Succ != BasicBlocks.front()) 94 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 95 ++SuccBIIter; 96 } 97 } 98 99 // Fix for old profiles. 100 for (BinaryBasicBlock *BB : BasicBlocks) { 101 if (BB->size() != 1 || BB->succ_size() != 1) 102 continue; 103 104 if (BB->getKnownExecutionCount() == 0) 105 continue; 106 107 MCInst *Instr = BB->getFirstNonPseudoInstr(); 108 assert(Instr && "expected non-pseudo instr"); 109 if (!BC.MIB->hasAnnotation(*Instr, "NOP")) 110 continue; 111 112 BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); 113 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); 114 if (!BI.Count) { 115 BI.Count = BB->getKnownExecutionCount(); 116 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + 117 BI.Count); 118 } 119 } 120 121 if (opts::FixBlockCounts) { 122 for (BinaryBasicBlock *BB : BasicBlocks) { 123 // Make sure that execution count of a block is at least the branch count 124 // of an incoming/outgoing jump. 125 auto SuccBIIter = BB->branch_info_begin(); 126 for (BinaryBasicBlock *Succ : BB->successors()) { 127 uint64_t Count = SuccBIIter->Count; 128 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 129 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 130 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 131 } 132 ++SuccBIIter; 133 } 134 // Make sure that execution count of a block is at least the number of 135 // function calls from the block. 136 for (MCInst &Inst : *BB) { 137 // Ignore non-call instruction 138 if (!BC.MIB->isCall(Inst)) 139 continue; 140 141 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 142 if (CountAnnt) 143 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 144 } 145 } 146 } 147 148 if (opts::InferFallThroughs) 149 inferFallThroughCounts(); 150 151 // Update profile information for jump tables based on CFG branch data. 152 for (BinaryBasicBlock *BB : BasicBlocks) { 153 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 154 if (!LastInstr) 155 continue; 156 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 157 if (!JTAddress) 158 continue; 159 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 160 if (!JT) 161 continue; 162 163 uint64_t TotalBranchCount = 0; 164 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 165 BB->branch_info()) { 166 TotalBranchCount += BranchInfo.Count; 167 } 168 JT->Count += TotalBranchCount; 169 170 if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE) 171 continue; 172 173 if (JT->Counts.empty()) 174 JT->Counts.resize(JT->Entries.size()); 175 auto EI = JT->Entries.begin(); 176 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 177 EI += Delta; 178 while (EI != JT->Entries.end()) { 179 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 180 if (TargetBB) { 181 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 182 BB->getBranchInfo(*TargetBB); 183 assert(Delta < JT->Counts.size()); 184 JT->Counts[Delta].Count += BranchInfo.Count; 185 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 186 } 187 ++Delta; 188 ++EI; 189 // A label marks the start of another jump table. 190 if (JT->Labels.count(Delta * JT->EntrySize)) 191 break; 192 } 193 } 194 } 195 196 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 197 // No reason to merge invalid or empty profiles into BF. 198 if (!hasValidProfile()) 199 return; 200 201 // Update function execution count. 202 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) 203 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 204 205 // Since we are merging a valid profile, the new profile should be valid too. 206 // It has either already been valid, or it has been cleaned up. 207 BF.ProfileMatchRatio = 1.0f; 208 209 // Update basic block and edge counts. 210 auto BBMergeI = BF.begin(); 211 for (BinaryBasicBlock *BB : BasicBlocks) { 212 BinaryBasicBlock *BBMerge = &*BBMergeI; 213 assert(getIndex(BB) == BF.getIndex(BBMerge)); 214 215 // Update basic block count. 216 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 217 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 218 BB->getExecutionCount()); 219 } 220 221 // Update edge count for successors of this basic block. 222 auto BBMergeSI = BBMerge->succ_begin(); 223 auto BIMergeI = BBMerge->branch_info_begin(); 224 auto BII = BB->branch_info_begin(); 225 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 226 (void)BBSucc; 227 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 228 (void)BBMergeSI; 229 230 // At this point no branch count should be set to COUNT_NO_PROFILE. 231 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 232 "unexpected unknown branch profile"); 233 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 234 "unexpected unknown branch profile"); 235 236 BIMergeI->Count += BII->Count; 237 238 // When we merge inferred and real fall-through branch data, the merged 239 // data is considered inferred. 240 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 241 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 242 BIMergeI->MispredictedCount += BII->MispredictedCount; 243 } else { 244 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 245 } 246 247 ++BBMergeSI; 248 ++BII; 249 ++BIMergeI; 250 } 251 assert(BBMergeSI == BBMerge->succ_end()); 252 253 ++BBMergeI; 254 } 255 assert(BBMergeI == BF.end()); 256 257 // Merge jump tables profile info. 258 auto JTMergeI = BF.JumpTables.begin(); 259 for (const auto &JTEntry : JumpTables) { 260 if (JTMergeI->second->Counts.empty()) 261 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 262 auto CountMergeI = JTMergeI->second->Counts.begin(); 263 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 264 CountMergeI->Count += JI.Count; 265 CountMergeI->Mispreds += JI.Mispreds; 266 ++CountMergeI; 267 } 268 assert(CountMergeI == JTMergeI->second->Counts.end()); 269 270 ++JTMergeI; 271 } 272 assert(JTMergeI == BF.JumpTables.end()); 273 } 274 275 void BinaryFunction::inferFallThroughCounts() { 276 // Work on a basic block at a time, propagating frequency information 277 // forwards. 278 // It is important to walk in the layout order. 279 for (BinaryBasicBlock *BB : BasicBlocks) { 280 const uint64_t BBExecCount = BB->getExecutionCount(); 281 282 // Propagate this information to successors, filling in fall-through edges 283 // with frequency information 284 if (BB->succ_size() == 0) 285 continue; 286 287 // Calculate frequency of outgoing branches from this node according to 288 // LBR data. 289 uint64_t ReportedBranches = 0; 290 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) 291 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 292 ReportedBranches += SuccBI.Count; 293 294 // Get taken count of conditional tail call if the block ends with one. 295 uint64_t CTCTakenCount = 0; 296 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 297 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 298 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 299 *CTCInstr, "CTCTakenCount"); 300 } 301 302 // Calculate frequency of throws from this node according to LBR data 303 // for branching into associated landing pads. Since it is possible 304 // for a landing pad to be associated with more than one basic blocks, 305 // we may overestimate the frequency of throws for such blocks. 306 uint64_t ReportedThrows = 0; 307 for (const BinaryBasicBlock *LP : BB->landing_pads()) 308 ReportedThrows += LP->getExecutionCount(); 309 310 const uint64_t TotalReportedJumps = 311 ReportedBranches + CTCTakenCount + ReportedThrows; 312 313 // Infer the frequency of the fall-through edge, representing not taking the 314 // branch. 315 uint64_t Inferred = 0; 316 if (BBExecCount > TotalReportedJumps) 317 Inferred = BBExecCount - TotalReportedJumps; 318 319 LLVM_DEBUG( 320 if (BBExecCount < TotalReportedJumps) dbgs() 321 << "Fall-through inference is slightly inconsistent. " 322 "exec frequency is less than the outgoing edges frequency (" 323 << BBExecCount << " < " << ReportedBranches 324 << ") for BB at offset 0x" 325 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 326 327 if (BB->succ_size() <= 2) { 328 // Skip if the last instruction is an unconditional jump. 329 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 330 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 331 BC.MIB->isIndirectBranch(*LastInstr))) 332 continue; 333 // If there is an FT it will be the last successor. 334 auto &SuccBI = *BB->branch_info_rbegin(); 335 auto &Succ = *BB->succ_rbegin(); 336 if (SuccBI.Count == 0) { 337 SuccBI.Count = Inferred; 338 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 339 Succ->ExecutionCount = 340 std::max(Succ->getKnownExecutionCount(), Inferred); 341 } 342 } 343 } 344 } 345 346 void BinaryFunction::clearProfile() { 347 // Keep function execution profile the same. Only clear basic block and edge 348 // counts. 349 for (BinaryBasicBlock *BB : BasicBlocks) { 350 BB->ExecutionCount = 0; 351 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 352 BI.Count = 0; 353 BI.MispredictedCount = 0; 354 } 355 } 356 } 357 358 } // namespace bolt 359 } // namespace llvm 360