//===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements BinaryFunction member functions related to processing // the execution profile. // //===----------------------------------------------------------------------===// #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #undef DEBUG_TYPE #define DEBUG_TYPE "bolt-prof" using namespace llvm; using namespace bolt; namespace opts { extern cl::OptionCategory BoltOptCategory; cl::opt ICP( "indirect-call-promotion", cl::init(ICP_NONE), cl::desc("indirect call promotion"), cl::values( clEnumValN(ICP_NONE, "none", "do not perform indirect call promotion"), clEnumValN(ICP_CALLS, "calls", "perform ICP on indirect calls"), clEnumValN(ICP_JUMP_TABLES, "jump-tables", "perform ICP on jump tables"), clEnumValN(ICP_ALL, "all", "perform ICP on calls and jump tables")), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::alias ICPAlias("icp", cl::desc("Alias for --indirect-call-promotion"), cl::aliasopt(ICP)); extern cl::opt JumpTables; static cl::opt FixFuncCounts( "fix-func-counts", cl::desc("adjust function counts based on basic blocks execution count"), cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt FixBlockCounts( "fix-block-counts", cl::desc("adjust block counts based on outgoing branch counts"), cl::init(true), cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt InferFallThroughs("infer-fall-throughs", cl::desc("infer execution count for fall-through blocks"), cl::Hidden, cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { namespace bolt { void BinaryFunction::postProcessProfile() { if (!hasValidProfile()) { clearProfile(); return; } if (!(getProfileFlags() & PF_LBR)) return; // If we have at least some branch data for the function indicate that it // was executed. if (opts::FixFuncCounts && ExecutionCount == 0) ExecutionCount = 1; // Compute preliminary execution count for each basic block. for (BinaryBasicBlock *BB : BasicBlocks) { if ((!BB->isEntryPoint() && !BB->isLandingPad()) || BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) BB->ExecutionCount = 0; } for (BinaryBasicBlock *BB : BasicBlocks) { auto SuccBIIter = BB->branch_info_begin(); for (BinaryBasicBlock *Succ : BB->successors()) { // All incoming edges to the primary entry have been accounted for, thus // we skip the update here. if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && Succ != BasicBlocks.front()) Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); ++SuccBIIter; } } // Fix for old profiles. for (BinaryBasicBlock *BB : BasicBlocks) { if (BB->size() != 1 || BB->succ_size() != 1) continue; if (BB->getKnownExecutionCount() == 0) continue; MCInst *Instr = BB->getFirstNonPseudoInstr(); assert(Instr && "expected non-pseudo instr"); if (!BC.MIB->hasAnnotation(*Instr, "NOP")) continue; BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(*FTSuccessor); if (!BI.Count) { BI.Count = BB->getKnownExecutionCount(); FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + BI.Count); } } if (opts::FixBlockCounts) { for (BinaryBasicBlock *BB : BasicBlocks) { // Make sure that execution count of a block is at least the branch count // of an incoming/outgoing jump. auto SuccBIIter = BB->branch_info_begin(); for (BinaryBasicBlock *Succ : BB->successors()) { uint64_t Count = SuccBIIter->Count; if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { Succ->setExecutionCount(std::max(Succ->getExecutionCount(), Count)); BB->setExecutionCount(std::max(BB->getExecutionCount(), Count)); } ++SuccBIIter; } // Make sure that execution count of a block is at least the number of // function calls from the block. for (MCInst &Inst : *BB) { // Ignore non-call instruction if (!BC.MIB->isCall(Inst)) continue; auto CountAnnt = BC.MIB->tryGetAnnotationAs(Inst, "Count"); if (CountAnnt) BB->setExecutionCount(std::max(BB->getExecutionCount(), *CountAnnt)); } } } if (opts::InferFallThroughs) inferFallThroughCounts(); // Update profile information for jump tables based on CFG branch data. for (BinaryBasicBlock *BB : BasicBlocks) { const MCInst *LastInstr = BB->getLastNonPseudoInstr(); if (!LastInstr) continue; const uint64_t JTAddress = BC.MIB->getJumpTable(*LastInstr); if (!JTAddress) continue; JumpTable *JT = getJumpTableContainingAddress(JTAddress); if (!JT) continue; uint64_t TotalBranchCount = 0; for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : BB->branch_info()) { TotalBranchCount += BranchInfo.Count; } JT->Count += TotalBranchCount; if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE) continue; if (JT->Counts.empty()) JT->Counts.resize(JT->Entries.size()); auto EI = JT->Entries.begin(); uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; EI += Delta; while (EI != JT->Entries.end()) { const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(*EI); if (TargetBB) { const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = BB->getBranchInfo(*TargetBB); assert(Delta < JT->Counts.size()); JT->Counts[Delta].Count += BranchInfo.Count; JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; } ++Delta; ++EI; // A label marks the start of another jump table. if (JT->Labels.count(Delta * JT->EntrySize)) break; } } } void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { // No reason to merge invalid or empty profiles into BF. if (!hasValidProfile()) return; // Update function execution count. if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); // Since we are merging a valid profile, the new profile should be valid too. // It has either already been valid, or it has been cleaned up. BF.ProfileMatchRatio = 1.0f; // Update basic block and edge counts. auto BBMergeI = BF.begin(); for (BinaryBasicBlock *BB : BasicBlocks) { BinaryBasicBlock *BBMerge = &*BBMergeI; assert(getIndex(BB) == BF.getIndex(BBMerge)); // Update basic block count. if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + BB->getExecutionCount()); } // Update edge count for successors of this basic block. auto BBMergeSI = BBMerge->succ_begin(); auto BIMergeI = BBMerge->branch_info_begin(); auto BII = BB->branch_info_begin(); for (const BinaryBasicBlock *BBSucc : BB->successors()) { (void)BBSucc; assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); (void)BBMergeSI; // At this point no branch count should be set to COUNT_NO_PROFILE. assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "unexpected unknown branch profile"); assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "unexpected unknown branch profile"); BIMergeI->Count += BII->Count; // When we merge inferred and real fall-through branch data, the merged // data is considered inferred. if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { BIMergeI->MispredictedCount += BII->MispredictedCount; } else { BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; } ++BBMergeSI; ++BII; ++BIMergeI; } assert(BBMergeSI == BBMerge->succ_end()); ++BBMergeI; } assert(BBMergeI == BF.end()); // Merge jump tables profile info. auto JTMergeI = BF.JumpTables.begin(); for (const auto &JTEntry : JumpTables) { if (JTMergeI->second->Counts.empty()) JTMergeI->second->Counts.resize(JTEntry.second->Counts.size()); auto CountMergeI = JTMergeI->second->Counts.begin(); for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { CountMergeI->Count += JI.Count; CountMergeI->Mispreds += JI.Mispreds; ++CountMergeI; } assert(CountMergeI == JTMergeI->second->Counts.end()); ++JTMergeI; } assert(JTMergeI == BF.JumpTables.end()); } void BinaryFunction::inferFallThroughCounts() { // Work on a basic block at a time, propagating frequency information // forwards. // It is important to walk in the layout order. for (BinaryBasicBlock *BB : BasicBlocks) { const uint64_t BBExecCount = BB->getExecutionCount(); // Propagate this information to successors, filling in fall-through edges // with frequency information if (BB->succ_size() == 0) continue; // Calculate frequency of outgoing branches from this node according to // LBR data. uint64_t ReportedBranches = 0; for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) ReportedBranches += SuccBI.Count; // Get taken count of conditional tail call if the block ends with one. uint64_t CTCTakenCount = 0; const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); if (CTCInstr && BC.MIB->getConditionalTailCall(*CTCInstr)) { CTCTakenCount = BC.MIB->getAnnotationWithDefault( *CTCInstr, "CTCTakenCount"); } // Calculate frequency of throws from this node according to LBR data // for branching into associated landing pads. Since it is possible // for a landing pad to be associated with more than one basic blocks, // we may overestimate the frequency of throws for such blocks. uint64_t ReportedThrows = 0; for (const BinaryBasicBlock *LP : BB->landing_pads()) ReportedThrows += LP->getExecutionCount(); const uint64_t TotalReportedJumps = ReportedBranches + CTCTakenCount + ReportedThrows; // Infer the frequency of the fall-through edge, representing not taking the // branch. uint64_t Inferred = 0; if (BBExecCount > TotalReportedJumps) Inferred = BBExecCount - TotalReportedJumps; LLVM_DEBUG( if (BBExecCount < TotalReportedJumps) dbgs() << "Fall-through inference is slightly inconsistent. " "exec frequency is less than the outgoing edges frequency (" << BBExecCount << " < " << ReportedBranches << ") for BB at offset 0x" << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); if (BB->succ_size() <= 2) { // Skip if the last instruction is an unconditional jump. const MCInst *LastInstr = BB->getLastNonPseudoInstr(); if (LastInstr && (BC.MIB->isUnconditionalBranch(*LastInstr) || BC.MIB->isIndirectBranch(*LastInstr))) continue; // If there is an FT it will be the last successor. auto &SuccBI = *BB->branch_info_rbegin(); auto &Succ = *BB->succ_rbegin(); if (SuccBI.Count == 0) { SuccBI.Count = Inferred; SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; Succ->ExecutionCount = std::max(Succ->getKnownExecutionCount(), Inferred); } } } } void BinaryFunction::clearProfile() { // Keep function execution profile the same. Only clear basic block and edge // counts. for (BinaryBasicBlock *BB : BasicBlocks) { BB->ExecutionCount = 0; for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { BI.Count = 0; BI.MispredictedCount = 0; } } } } // namespace bolt } // namespace llvm