1 //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BinaryFunction member functions related to processing 10 // the execution profile. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Core/BinaryBasicBlock.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/Debug.h" 18 #include "llvm/Support/raw_ostream.h" 19 20 #undef DEBUG_TYPE 21 #define DEBUG_TYPE "bolt-prof" 22 23 using namespace llvm; 24 using namespace bolt; 25 26 namespace opts { 27 28 extern cl::OptionCategory BoltOptCategory; 29 30 cl::opt<IndirectCallPromotionType> ICP( 31 "indirect-call-promotion", cl::init(ICP_NONE), 32 cl::desc("indirect call promotion"), 33 cl::values( 34 clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), 35 clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), 36 clEnumValN(ICP_JUMP_TABLES, "jump-tables", 37 "perform ICP on jump tables"), 38 clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), 39 cl::ZeroOrMore, cl::cat(BoltOptCategory)); 40 41 extern cl::opt<JumpTableSupportLevel> JumpTables; 42 43 static cl::opt<bool> FixFuncCounts( 44 "fix-func-counts", 45 cl::desc("adjust function counts based on basic blocks execution count"), 46 cl::Hidden, cl::cat(BoltOptCategory)); 47 48 static cl::opt<bool> FixBlockCounts( 49 "fix-block-counts", 50 cl::desc("adjust block counts based on outgoing branch counts"), 51 cl::init(true), cl::Hidden, cl::cat(BoltOptCategory)); 52 53 static cl::opt<bool> 54 InferFallThroughs("infer-fall-throughs", 55 cl::desc("infer execution count for fall-through blocks"), 56 cl::Hidden, cl::cat(BoltOptCategory)); 57 58 } // namespace opts 59 60 namespace llvm { 61 namespace bolt { 62 63 void BinaryFunction::postProcessProfile() { 64 if (!hasValidProfile()) { 65 clearProfile(); 66 return; 67 } 68 69 if (!(getProfileFlags() & PF_LBR)) 70 return; 71 72 // If we have at least some branch data for the function indicate that it 73 // was executed. 74 if (opts::FixFuncCounts && ExecutionCount == 0) 75 ExecutionCount = 1; 76 77 // Compute preliminary execution count for each basic block. 78 for (BinaryBasicBlock *BB : BasicBlocks) { 79 if ((!BB->isEntryPoint() && !BB->isLandingPad()) || 80 BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) 81 BB->ExecutionCount = 0; 82 } 83 for (BinaryBasicBlock *BB : BasicBlocks) { 84 auto SuccBIIter = BB->branch_info_begin(); 85 for (BinaryBasicBlock *Succ : BB->successors()) { 86 // All incoming edges to the primary entry have been accounted for, thus 87 // we skip the update here. 88 if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 89 Succ != BasicBlocks.front()) 90 Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); 91 ++SuccBIIter; 92 } 93 } 94 95 // Fix for old profiles. 96 for (BinaryBasicBlock *BB : BasicBlocks) { 97 if (BB->size() != 1 || BB->succ_size() != 1) 98 continue; 99 100 if (BB->getKnownExecutionCount() == 0) 101 continue; 102 103 MCInst *Instr = BB->getFirstNonPseudoInstr(); 104 assert(Instr && "expected non-pseudo instr"); 105 if (!BC.MIB->hasAnnotation(*Instr, "NOP")) 106 continue; 107 108 BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); 109 BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); 110 if (!BI.Count) { 111 BI.Count = BB->getKnownExecutionCount(); 112 FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + 113 BI.Count); 114 } 115 } 116 117 if (opts::FixBlockCounts) { 118 for (BinaryBasicBlock *BB : BasicBlocks) { 119 // Make sure that execution count of a block is at least the branch count 120 // of an incoming/outgoing jump. 121 auto SuccBIIter = BB->branch_info_begin(); 122 for (BinaryBasicBlock *Succ : BB->successors()) { 123 uint64_t Count = SuccBIIter->Count; 124 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { 125 Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); 126 BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); 127 } 128 ++SuccBIIter; 129 } 130 // Make sure that execution count of a block is at least the number of 131 // function calls from the block. 132 for (MCInst &Inst : *BB) { 133 // Ignore non-call instruction 134 if (!BC.MIB->isCall(Inst)) 135 continue; 136 137 auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, "Count"); 138 if (CountAnnt) 139 BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); 140 } 141 } 142 } 143 144 if (opts::InferFallThroughs) 145 inferFallThroughCounts(); 146 147 // Update profile information for jump tables based on CFG branch data. 148 for (BinaryBasicBlock *BB : BasicBlocks) { 149 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 150 if (!LastInstr) 151 continue; 152 const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); 153 if (!JTAddress) 154 continue; 155 JumpTable *JT = getJumpTableContainingAddress(JTAddress); 156 if (!JT) 157 continue; 158 159 uint64_t TotalBranchCount = 0; 160 for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : 161 BB->branch_info()) { 162 TotalBranchCount += BranchInfo.Count; 163 } 164 JT->Count += TotalBranchCount; 165 166 if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE) 167 continue; 168 169 if (JT->Counts.empty()) 170 JT->Counts.resize(JT->Entries.size()); 171 auto EI = JT->Entries.begin(); 172 uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; 173 EI += Delta; 174 while (EI != JT->Entries.end()) { 175 const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); 176 if (TargetBB) { 177 const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = 178 BB->getBranchInfo(*TargetBB); 179 assert(Delta < JT->Counts.size()); 180 JT->Counts[Delta].Count += BranchInfo.Count; 181 JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; 182 } 183 ++Delta; 184 ++EI; 185 // A label marks the start of another jump table. 186 if (JT->Labels.count(Delta * JT->EntrySize)) 187 break; 188 } 189 } 190 } 191 192 void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { 193 // No reason to merge invalid or empty profiles into BF. 194 if (!hasValidProfile()) 195 return; 196 197 // Update function execution count. 198 if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) 199 BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); 200 201 // Since we are merging a valid profile, the new profile should be valid too. 202 // It has either already been valid, or it has been cleaned up. 203 BF.ProfileMatchRatio = 1.0f; 204 205 // Update basic block and edge counts. 206 auto BBMergeI = BF.begin(); 207 for (BinaryBasicBlock *BB : BasicBlocks) { 208 BinaryBasicBlock *BBMerge = &*BBMergeI; 209 assert(getIndex(BB) == BF.getIndex(BBMerge)); 210 211 // Update basic block count. 212 if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { 213 BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + 214 BB->getExecutionCount()); 215 } 216 217 // Update edge count for successors of this basic block. 218 auto BBMergeSI = BBMerge->succ_begin(); 219 auto BIMergeI = BBMerge->branch_info_begin(); 220 auto BII = BB->branch_info_begin(); 221 for (const BinaryBasicBlock *BBSucc : BB->successors()) { 222 (void)BBSucc; 223 assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); 224 225 // At this point no branch count should be set to COUNT_NO_PROFILE. 226 assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 227 "unexpected unknown branch profile"); 228 assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && 229 "unexpected unknown branch profile"); 230 231 BIMergeI->Count += BII->Count; 232 233 // When we merge inferred and real fall-through branch data, the merged 234 // data is considered inferred. 235 if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && 236 BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { 237 BIMergeI->MispredictedCount += BII->MispredictedCount; 238 } else { 239 BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 240 } 241 242 ++BBMergeSI; 243 ++BII; 244 ++BIMergeI; 245 } 246 assert(BBMergeSI == BBMerge->succ_end()); 247 248 ++BBMergeI; 249 } 250 assert(BBMergeI == BF.end()); 251 252 // Merge jump tables profile info. 253 auto JTMergeI = BF.JumpTables.begin(); 254 for (const auto &JTEntry : JumpTables) { 255 if (JTMergeI->second->Counts.empty()) 256 JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); 257 auto CountMergeI = JTMergeI->second->Counts.begin(); 258 for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { 259 CountMergeI->Count += JI.Count; 260 CountMergeI->Mispreds += JI.Mispreds; 261 ++CountMergeI; 262 } 263 assert(CountMergeI == JTMergeI->second->Counts.end()); 264 265 ++JTMergeI; 266 } 267 assert(JTMergeI == BF.JumpTables.end()); 268 } 269 270 void BinaryFunction::inferFallThroughCounts() { 271 // Work on a basic block at a time, propagating frequency information 272 // forwards. 273 // It is important to walk in the layout order. 274 for (BinaryBasicBlock *BB : BasicBlocks) { 275 const uint64_t BBExecCount = BB->getExecutionCount(); 276 277 // Propagate this information to successors, filling in fall-through edges 278 // with frequency information 279 if (BB->succ_size() == 0) 280 continue; 281 282 // Calculate frequency of outgoing branches from this node according to 283 // LBR data. 284 uint64_t ReportedBranches = 0; 285 for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) 286 if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) 287 ReportedBranches += SuccBI.Count; 288 289 // Get taken count of conditional tail call if the block ends with one. 290 uint64_t CTCTakenCount = 0; 291 const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); 292 if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { 293 CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( 294 *CTCInstr, "CTCTakenCount"); 295 } 296 297 // Calculate frequency of throws from this node according to LBR data 298 // for branching into associated landing pads. Since it is possible 299 // for a landing pad to be associated with more than one basic blocks, 300 // we may overestimate the frequency of throws for such blocks. 301 uint64_t ReportedThrows = 0; 302 for (const BinaryBasicBlock *LP : BB->landing_pads()) 303 ReportedThrows += LP->getExecutionCount(); 304 305 const uint64_t TotalReportedJumps = 306 ReportedBranches + CTCTakenCount + ReportedThrows; 307 308 // Infer the frequency of the fall-through edge, representing not taking the 309 // branch. 310 uint64_t Inferred = 0; 311 if (BBExecCount > TotalReportedJumps) 312 Inferred = BBExecCount - TotalReportedJumps; 313 314 LLVM_DEBUG( 315 if (BBExecCount < TotalReportedJumps) dbgs() 316 << "Fall-through inference is slightly inconsistent. " 317 "exec frequency is less than the outgoing edges frequency (" 318 << BBExecCount << " < " << ReportedBranches 319 << ") for BB at offset 0x" 320 << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); 321 322 if (BB->succ_size() <= 2) { 323 // Skip if the last instruction is an unconditional jump. 324 const MCInst *LastInstr = BB->getLastNonPseudoInstr(); 325 if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || 326 BC.MIB->isIndirectBranch(*LastInstr))) 327 continue; 328 // If there is an FT it will be the last successor. 329 auto &SuccBI = *BB->branch_info_rbegin(); 330 auto &Succ = *BB->succ_rbegin(); 331 if (SuccBI.Count == 0) { 332 SuccBI.Count = Inferred; 333 SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; 334 Succ->ExecutionCount += Inferred; 335 } 336 } 337 } 338 339 return; 340 } 341 342 void BinaryFunction::clearProfile() { 343 // Keep function execution profile the same. Only clear basic block and edge 344 // counts. 345 for (BinaryBasicBlock *BB : BasicBlocks) { 346 BB->ExecutionCount = 0; 347 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { 348 BI.Count = 0; 349 BI.MispredictedCount = 0; 350 } 351 } 352 } 353 354 } // namespace bolt 355 } // namespace llvm 356