164fa8cceSHongtao Yu //===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===// 264fa8cceSHongtao Yu // 364fa8cceSHongtao Yu // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 464fa8cceSHongtao Yu // See https://llvm.org/LICENSE.txt for license information. 564fa8cceSHongtao Yu // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 664fa8cceSHongtao Yu // 764fa8cceSHongtao Yu //===----------------------------------------------------------------------===// 864fa8cceSHongtao Yu // 964fa8cceSHongtao Yu // This file implements the SampleProfileProber transformation. 1064fa8cceSHongtao Yu // 1164fa8cceSHongtao Yu //===----------------------------------------------------------------------===// 1264fa8cceSHongtao Yu 1364fa8cceSHongtao Yu #include "llvm/Transforms/IPO/SampleProfileProbe.h" 1464fa8cceSHongtao Yu #include "llvm/ADT/Statistic.h" 153d89b3cbSHongtao Yu #include "llvm/Analysis/BlockFrequencyInfo.h" 16950487bdSHongtao Yu #include "llvm/Analysis/EHUtils.h" 17f1985a3fSserge-sans-paille #include "llvm/Analysis/LoopInfo.h" 1864fa8cceSHongtao Yu #include "llvm/IR/BasicBlock.h" 1964fa8cceSHongtao Yu #include "llvm/IR/DebugInfoMetadata.h" 20954af75cSDavide Italiano #include "llvm/IR/DiagnosticInfo.h" 2164fa8cceSHongtao Yu #include "llvm/IR/IRBuilder.h" 2264fa8cceSHongtao Yu #include "llvm/IR/Instruction.h" 23e188aae4Sserge-sans-paille #include "llvm/IR/IntrinsicInst.h" 2464fa8cceSHongtao Yu #include "llvm/IR/MDBuilder.h" 2574deadf1SNikita Popov #include "llvm/IR/Module.h" 26f1985a3fSserge-sans-paille #include "llvm/IR/PseudoProbe.h" 2764fa8cceSHongtao Yu #include "llvm/ProfileData/SampleProf.h" 2864fa8cceSHongtao Yu #include "llvm/Support/CRC.h" 293d89b3cbSHongtao Yu #include "llvm/Support/CommandLine.h" 30f1985a3fSserge-sans-paille #include "llvm/Target/TargetMachine.h" 312ae968a0SAntonio Frighetto #include "llvm/Transforms/Utils/Instrumentation.h" 3264fa8cceSHongtao Yu #include "llvm/Transforms/Utils/ModuleUtils.h" 333d89b3cbSHongtao Yu #include <unordered_set> 3464fa8cceSHongtao Yu #include <vector> 3564fa8cceSHongtao Yu 3664fa8cceSHongtao Yu using namespace llvm; 37950487bdSHongtao Yu #define DEBUG_TYPE "pseudo-probe" 3864fa8cceSHongtao Yu 3964fa8cceSHongtao Yu STATISTIC(ArtificialDbgLine, 4064fa8cceSHongtao Yu "Number of probes that have an artificial debug line"); 4164fa8cceSHongtao Yu 423d89b3cbSHongtao Yu static cl::opt<bool> 433d89b3cbSHongtao Yu VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden, 443d89b3cbSHongtao Yu cl::desc("Do pseudo probe verification")); 453d89b3cbSHongtao Yu 463d89b3cbSHongtao Yu static cl::list<std::string> VerifyPseudoProbeFuncList( 473d89b3cbSHongtao Yu "verify-pseudo-probe-funcs", cl::Hidden, 483d89b3cbSHongtao Yu cl::desc("The option to specify the name of the functions to verify.")); 493d89b3cbSHongtao Yu 503d89b3cbSHongtao Yu static cl::opt<bool> 513d89b3cbSHongtao Yu UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, 523d89b3cbSHongtao Yu cl::desc("Update pseudo probe distribution factor")); 533d89b3cbSHongtao Yu 54f28ee1a2SHongtao Yu static uint64_t getCallStackHash(const DILocation *DIL) { 55f28ee1a2SHongtao Yu uint64_t Hash = 0; 56f28ee1a2SHongtao Yu const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr; 57f28ee1a2SHongtao Yu while (InlinedAt) { 58f28ee1a2SHongtao Yu Hash ^= MD5Hash(std::to_string(InlinedAt->getLine())); 59f28ee1a2SHongtao Yu Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn())); 60ebe09e2aSRong Xu auto Name = InlinedAt->getSubprogramLinkageName(); 61f28ee1a2SHongtao Yu Hash ^= MD5Hash(Name); 62f28ee1a2SHongtao Yu InlinedAt = InlinedAt->getInlinedAt(); 63f28ee1a2SHongtao Yu } 64f28ee1a2SHongtao Yu return Hash; 65f28ee1a2SHongtao Yu } 66f28ee1a2SHongtao Yu 67f28ee1a2SHongtao Yu static uint64_t computeCallStackHash(const Instruction &Inst) { 68f28ee1a2SHongtao Yu return getCallStackHash(Inst.getDebugLoc()); 69f28ee1a2SHongtao Yu } 70f28ee1a2SHongtao Yu 713d89b3cbSHongtao Yu bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) { 723d89b3cbSHongtao Yu // Skip function declaration. 733d89b3cbSHongtao Yu if (F->isDeclaration()) 743d89b3cbSHongtao Yu return false; 753d89b3cbSHongtao Yu // Skip function that will not be emitted into object file. The prevailing 763d89b3cbSHongtao Yu // defintion will be verified instead. 773d89b3cbSHongtao Yu if (F->hasAvailableExternallyLinkage()) 783d89b3cbSHongtao Yu return false; 793d89b3cbSHongtao Yu // Do a name matching. 803d89b3cbSHongtao Yu static std::unordered_set<std::string> VerifyFuncNames( 813d89b3cbSHongtao Yu VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end()); 823d89b3cbSHongtao Yu return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str()); 833d89b3cbSHongtao Yu } 843d89b3cbSHongtao Yu 853d89b3cbSHongtao Yu void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) { 863d89b3cbSHongtao Yu if (VerifyPseudoProbe) { 873d89b3cbSHongtao Yu PIC.registerAfterPassCallback( 8819158eb7SSebastian Neubauer [this](StringRef P, Any IR, const PreservedAnalyses &) { 893d89b3cbSHongtao Yu this->runAfterPass(P, IR); 903d89b3cbSHongtao Yu }); 913d89b3cbSHongtao Yu } 923d89b3cbSHongtao Yu } 933d89b3cbSHongtao Yu 943d89b3cbSHongtao Yu // Callback to run after each transformation for the new pass manager. 9519158eb7SSebastian Neubauer void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) { 963d89b3cbSHongtao Yu std::string Banner = 973d89b3cbSHongtao Yu "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n"; 983d89b3cbSHongtao Yu dbgs() << Banner; 99f8a1c8b7Skazutakahirata if (const auto **M = llvm::any_cast<const Module *>(&IR)) 100bb7940e2SSebastian Neubauer runAfterPass(*M); 101f8a1c8b7Skazutakahirata else if (const auto **F = llvm::any_cast<const Function *>(&IR)) 102bb7940e2SSebastian Neubauer runAfterPass(*F); 103f8a1c8b7Skazutakahirata else if (const auto **C = llvm::any_cast<const LazyCallGraph::SCC *>(&IR)) 104bb7940e2SSebastian Neubauer runAfterPass(*C); 105f8a1c8b7Skazutakahirata else if (const auto **L = llvm::any_cast<const Loop *>(&IR)) 106bb7940e2SSebastian Neubauer runAfterPass(*L); 1073d89b3cbSHongtao Yu else 1083d89b3cbSHongtao Yu llvm_unreachable("Unknown IR unit"); 1093d89b3cbSHongtao Yu } 1103d89b3cbSHongtao Yu 1113d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const Module *M) { 1123d89b3cbSHongtao Yu for (const Function &F : *M) 1133d89b3cbSHongtao Yu runAfterPass(&F); 1143d89b3cbSHongtao Yu } 1153d89b3cbSHongtao Yu 1163d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) { 1173d89b3cbSHongtao Yu for (const LazyCallGraph::Node &N : *C) 1183d89b3cbSHongtao Yu runAfterPass(&N.getFunction()); 1193d89b3cbSHongtao Yu } 1203d89b3cbSHongtao Yu 1213d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const Function *F) { 1223d89b3cbSHongtao Yu if (!shouldVerifyFunction(F)) 1233d89b3cbSHongtao Yu return; 1243d89b3cbSHongtao Yu ProbeFactorMap ProbeFactors; 1253d89b3cbSHongtao Yu for (const auto &BB : *F) 1263d89b3cbSHongtao Yu collectProbeFactors(&BB, ProbeFactors); 1273d89b3cbSHongtao Yu verifyProbeFactors(F, ProbeFactors); 1283d89b3cbSHongtao Yu } 1293d89b3cbSHongtao Yu 1303d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const Loop *L) { 1313d89b3cbSHongtao Yu const Function *F = L->getHeader()->getParent(); 1323d89b3cbSHongtao Yu runAfterPass(F); 1333d89b3cbSHongtao Yu } 1343d89b3cbSHongtao Yu 1353d89b3cbSHongtao Yu void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block, 1363d89b3cbSHongtao Yu ProbeFactorMap &ProbeFactors) { 1373d89b3cbSHongtao Yu for (const auto &I : *Block) { 13889fae41eSFangrui Song if (std::optional<PseudoProbe> Probe = extractProbe(I)) { 139f28ee1a2SHongtao Yu uint64_t Hash = computeCallStackHash(I); 140f28ee1a2SHongtao Yu ProbeFactors[{Probe->Id, Hash}] += Probe->Factor; 141f28ee1a2SHongtao Yu } 1423d89b3cbSHongtao Yu } 1433d89b3cbSHongtao Yu } 1443d89b3cbSHongtao Yu 1453d89b3cbSHongtao Yu void PseudoProbeVerifier::verifyProbeFactors( 1463d89b3cbSHongtao Yu const Function *F, const ProbeFactorMap &ProbeFactors) { 1473d89b3cbSHongtao Yu bool BannerPrinted = false; 1483d89b3cbSHongtao Yu auto &PrevProbeFactors = FunctionProbeFactors[F->getName()]; 1493d89b3cbSHongtao Yu for (const auto &I : ProbeFactors) { 1503d89b3cbSHongtao Yu float CurProbeFactor = I.second; 1513d89b3cbSHongtao Yu if (PrevProbeFactors.count(I.first)) { 1523d89b3cbSHongtao Yu float PrevProbeFactor = PrevProbeFactors[I.first]; 1533d89b3cbSHongtao Yu if (std::abs(CurProbeFactor - PrevProbeFactor) > 1543d89b3cbSHongtao Yu DistributionFactorVariance) { 1553d89b3cbSHongtao Yu if (!BannerPrinted) { 1563d89b3cbSHongtao Yu dbgs() << "Function " << F->getName() << ":\n"; 1573d89b3cbSHongtao Yu BannerPrinted = true; 1583d89b3cbSHongtao Yu } 159f28ee1a2SHongtao Yu dbgs() << "Probe " << I.first.first << "\tprevious factor " 1603d89b3cbSHongtao Yu << format("%0.2f", PrevProbeFactor) << "\tcurrent factor " 1613d89b3cbSHongtao Yu << format("%0.2f", CurProbeFactor) << "\n"; 1623d89b3cbSHongtao Yu } 1633d89b3cbSHongtao Yu } 1643d89b3cbSHongtao Yu 1653d89b3cbSHongtao Yu // Update 1663d89b3cbSHongtao Yu PrevProbeFactors[I.first] = I.second; 1673d89b3cbSHongtao Yu } 1683d89b3cbSHongtao Yu } 1693d89b3cbSHongtao Yu 170705a4c14SHongtao Yu SampleProfileProber::SampleProfileProber(Function &Func, 171705a4c14SHongtao Yu const std::string &CurModuleUniqueId) 172705a4c14SHongtao Yu : F(&Func), CurModuleUniqueId(CurModuleUniqueId) { 17364fa8cceSHongtao Yu BlockProbeIds.clear(); 17424d4291cSHongtao Yu CallProbeIds.clear(); 17564fa8cceSHongtao Yu LastProbeId = (uint32_t)PseudoProbeReservedId::Last; 176b8cc3ba4SLei Wang 177b8cc3ba4SLei Wang DenseSet<BasicBlock *> BlocksToIgnore; 178b8cc3ba4SLei Wang DenseSet<BasicBlock *> BlocksAndCallsToIgnore; 179b8cc3ba4SLei Wang computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore); 180b8cc3ba4SLei Wang 1815bbce06aSLei Wang computeProbeId(BlocksToIgnore, BlocksAndCallsToIgnore); 182b8cc3ba4SLei Wang computeCFGHash(BlocksToIgnore); 183b8cc3ba4SLei Wang } 184b8cc3ba4SLei Wang 185b8cc3ba4SLei Wang // Two purposes to compute the blocks to ignore: 186b8cc3ba4SLei Wang // 1. Reduce the IR size. 187b8cc3ba4SLei Wang // 2. Make the instrumentation(checksum) stable. e.g. the frondend may 188b8cc3ba4SLei Wang // generate unstable IR while optimizing nounwind attribute, some versions are 189b8cc3ba4SLei Wang // optimized with the call-to-invoke conversion, while other versions do not. 190b8cc3ba4SLei Wang // This discrepancy in probe ID could cause profile mismatching issues. 191b8cc3ba4SLei Wang // Note that those ignored blocks are either cold blocks or new split blocks 192b8cc3ba4SLei Wang // whose original blocks are instrumented, so it shouldn't degrade the profile 193b8cc3ba4SLei Wang // quality. 194b8cc3ba4SLei Wang void SampleProfileProber::computeBlocksToIgnore( 195b8cc3ba4SLei Wang DenseSet<BasicBlock *> &BlocksToIgnore, 196b8cc3ba4SLei Wang DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) { 197b8cc3ba4SLei Wang // Ignore the cold EH and unreachable blocks and calls. 198b8cc3ba4SLei Wang computeEHOnlyBlocks(*F, BlocksAndCallsToIgnore); 199b8cc3ba4SLei Wang findUnreachableBlocks(BlocksAndCallsToIgnore); 200b8cc3ba4SLei Wang 201b8cc3ba4SLei Wang BlocksToIgnore.insert(BlocksAndCallsToIgnore.begin(), 202b8cc3ba4SLei Wang BlocksAndCallsToIgnore.end()); 203b8cc3ba4SLei Wang 204b8cc3ba4SLei Wang // Handle the call-to-invoke conversion case: make sure that the probe id and 205b8cc3ba4SLei Wang // callsite id are consistent before and after the block split. For block 206b8cc3ba4SLei Wang // probe, we only keep the head block probe id and ignore the block ids of the 207b8cc3ba4SLei Wang // normal dests. For callsite probe, it's different to block probe, there is 208b8cc3ba4SLei Wang // no additional callsite in the normal dests, so we don't ignore the 209b8cc3ba4SLei Wang // callsites. 210b8cc3ba4SLei Wang findInvokeNormalDests(BlocksToIgnore); 211b8cc3ba4SLei Wang } 212b8cc3ba4SLei Wang 213b8cc3ba4SLei Wang // Unreachable blocks and calls are always cold, ignore them. 214b8cc3ba4SLei Wang void SampleProfileProber::findUnreachableBlocks( 215b8cc3ba4SLei Wang DenseSet<BasicBlock *> &BlocksToIgnore) { 216b8cc3ba4SLei Wang for (auto &BB : *F) { 217b8cc3ba4SLei Wang if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0) 218b8cc3ba4SLei Wang BlocksToIgnore.insert(&BB); 219b8cc3ba4SLei Wang } 220b8cc3ba4SLei Wang } 221b8cc3ba4SLei Wang 222b8cc3ba4SLei Wang // In call-to-invoke conversion, basic block can be split into multiple blocks, 223b8cc3ba4SLei Wang // only instrument probe in the head block, ignore the normal dests. 224b8cc3ba4SLei Wang void SampleProfileProber::findInvokeNormalDests( 225b8cc3ba4SLei Wang DenseSet<BasicBlock *> &InvokeNormalDests) { 226b8cc3ba4SLei Wang for (auto &BB : *F) { 227b8cc3ba4SLei Wang auto *TI = BB.getTerminator(); 228b8cc3ba4SLei Wang if (auto *II = dyn_cast<InvokeInst>(TI)) { 229b8cc3ba4SLei Wang auto *ND = II->getNormalDest(); 230b8cc3ba4SLei Wang InvokeNormalDests.insert(ND); 231b8cc3ba4SLei Wang 232b8cc3ba4SLei Wang // The normal dest and the try/catch block are connected by an 233b8cc3ba4SLei Wang // unconditional branch. 234b8cc3ba4SLei Wang while (pred_size(ND) == 1) { 235b8cc3ba4SLei Wang auto *Pred = *pred_begin(ND); 236b8cc3ba4SLei Wang if (succ_size(Pred) == 1) { 237b8cc3ba4SLei Wang InvokeNormalDests.insert(Pred); 238b8cc3ba4SLei Wang ND = Pred; 239b8cc3ba4SLei Wang } else 240b8cc3ba4SLei Wang break; 241b8cc3ba4SLei Wang } 242b8cc3ba4SLei Wang } 243b8cc3ba4SLei Wang } 244b8cc3ba4SLei Wang } 245b8cc3ba4SLei Wang 246b8cc3ba4SLei Wang // The call-to-invoke conversion splits the original block into a list of block, 247b8cc3ba4SLei Wang // we need to compute the hash using the original block's successors to keep the 248b8cc3ba4SLei Wang // CFG Hash consistent. For a given head block, we keep searching the 249b8cc3ba4SLei Wang // succesor(normal dest or unconditional branch dest) to find the tail block, 250b8cc3ba4SLei Wang // the tail block's successors are the original block's successors. 251b8cc3ba4SLei Wang const Instruction *SampleProfileProber::getOriginalTerminator( 252b8cc3ba4SLei Wang const BasicBlock *Head, const DenseSet<BasicBlock *> &BlocksToIgnore) { 253b8cc3ba4SLei Wang auto *TI = Head->getTerminator(); 254b8cc3ba4SLei Wang if (auto *II = dyn_cast<InvokeInst>(TI)) { 255b8cc3ba4SLei Wang return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore); 256b8cc3ba4SLei Wang } else if (succ_size(Head) == 1 && 257b8cc3ba4SLei Wang BlocksToIgnore.contains(*succ_begin(Head))) { 258b8cc3ba4SLei Wang // Go to the unconditional branch dest. 259b8cc3ba4SLei Wang return getOriginalTerminator(*succ_begin(Head), BlocksToIgnore); 260b8cc3ba4SLei Wang } 261b8cc3ba4SLei Wang return TI; 262705a4c14SHongtao Yu } 263705a4c14SHongtao Yu 264705a4c14SHongtao Yu // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 265705a4c14SHongtao Yu // value of each BB in the CFG. The higher 32 bits record the number of edges 266705a4c14SHongtao Yu // preceded by the number of indirect calls. 267705a4c14SHongtao Yu // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash(). 268b8cc3ba4SLei Wang void SampleProfileProber::computeCFGHash( 269b8cc3ba4SLei Wang const DenseSet<BasicBlock *> &BlocksToIgnore) { 270705a4c14SHongtao Yu std::vector<uint8_t> Indexes; 271705a4c14SHongtao Yu JamCRC JC; 272705a4c14SHongtao Yu for (auto &BB : *F) { 273b8cc3ba4SLei Wang if (BlocksToIgnore.contains(&BB)) 274b8cc3ba4SLei Wang continue; 275b8cc3ba4SLei Wang 276b8cc3ba4SLei Wang auto *TI = getOriginalTerminator(&BB, BlocksToIgnore); 277b8cc3ba4SLei Wang for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 278b8cc3ba4SLei Wang auto *Succ = TI->getSuccessor(I); 279705a4c14SHongtao Yu auto Index = getBlockId(Succ); 280b8cc3ba4SLei Wang // Ingore ignored-block(zero ID) to avoid unstable checksum. 281b8cc3ba4SLei Wang if (Index == 0) 282b8cc3ba4SLei Wang continue; 283705a4c14SHongtao Yu for (int J = 0; J < 4; J++) 284705a4c14SHongtao Yu Indexes.push_back((uint8_t)(Index >> (J * 8))); 285705a4c14SHongtao Yu } 286705a4c14SHongtao Yu } 287705a4c14SHongtao Yu 288705a4c14SHongtao Yu JC.update(Indexes); 289705a4c14SHongtao Yu 290705a4c14SHongtao Yu FunctionHash = (uint64_t)CallProbeIds.size() << 48 | 291705a4c14SHongtao Yu (uint64_t)Indexes.size() << 32 | JC.getCRC(); 292705a4c14SHongtao Yu // Reserve bit 60-63 for other information purpose. 293705a4c14SHongtao Yu FunctionHash &= 0x0FFFFFFFFFFFFFFF; 294705a4c14SHongtao Yu assert(FunctionHash && "Function checksum should not be zero"); 295705a4c14SHongtao Yu LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName() 296705a4c14SHongtao Yu << ":\n" 297705a4c14SHongtao Yu << " CRC = " << JC.getCRC() << ", Edges = " 298705a4c14SHongtao Yu << Indexes.size() << ", ICSites = " << CallProbeIds.size() 299705a4c14SHongtao Yu << ", Hash = " << FunctionHash << "\n"); 30064fa8cceSHongtao Yu } 30164fa8cceSHongtao Yu 3025bbce06aSLei Wang void SampleProfileProber::computeProbeId( 3035bbce06aSLei Wang const DenseSet<BasicBlock *> &BlocksToIgnore, 304b8cc3ba4SLei Wang const DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) { 305954af75cSDavide Italiano LLVMContext &Ctx = F->getContext(); 306954af75cSDavide Italiano Module *M = F->getParent(); 307954af75cSDavide Italiano 30824d4291cSHongtao Yu for (auto &BB : *F) { 3095bbce06aSLei Wang if (!BlocksToIgnore.contains(&BB)) 3105bbce06aSLei Wang BlockProbeIds[&BB] = ++LastProbeId; 3115bbce06aSLei Wang 312b8cc3ba4SLei Wang if (BlocksAndCallsToIgnore.contains(&BB)) 313b8cc3ba4SLei Wang continue; 31424d4291cSHongtao Yu for (auto &I : BB) { 3155bbce06aSLei Wang if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I)) 31624d4291cSHongtao Yu continue; 317954af75cSDavide Italiano 318954af75cSDavide Italiano // The current implementation uses the lower 16 bits of the discriminator 319954af75cSDavide Italiano // so anything larger than 0xFFFF will be ignored. 320954af75cSDavide Italiano if (LastProbeId >= 0xFFFF) { 321954af75cSDavide Italiano std::string Msg = "Pseudo instrumentation incomplete for " + 322954af75cSDavide Italiano std::string(F->getName()) + " because it's too large"; 323615ebfc3SDavide Italiano Ctx.diagnose( 324615ebfc3SDavide Italiano DiagnosticInfoSampleProfile(M->getName().data(), Msg, DS_Warning)); 325954af75cSDavide Italiano return; 326954af75cSDavide Italiano } 327954af75cSDavide Italiano 32824d4291cSHongtao Yu CallProbeIds[&I] = ++LastProbeId; 32924d4291cSHongtao Yu } 33024d4291cSHongtao Yu } 33124d4291cSHongtao Yu } 33224d4291cSHongtao Yu 33364fa8cceSHongtao Yu uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const { 33464fa8cceSHongtao Yu auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB)); 33564fa8cceSHongtao Yu return I == BlockProbeIds.end() ? 0 : I->second; 33664fa8cceSHongtao Yu } 33764fa8cceSHongtao Yu 33824d4291cSHongtao Yu uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const { 33924d4291cSHongtao Yu auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call)); 34024d4291cSHongtao Yu return Iter == CallProbeIds.end() ? 0 : Iter->second; 34124d4291cSHongtao Yu } 34224d4291cSHongtao Yu 34364fa8cceSHongtao Yu void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { 34464fa8cceSHongtao Yu Module *M = F.getParent(); 34564fa8cceSHongtao Yu MDBuilder MDB(F.getContext()); 346d4a01549SJay Foad // Since the GUID from probe desc and inline stack are computed separately, we 347fd29a4d2Swlei // need to make sure their names are consistent, so here also use the name 348fd29a4d2Swlei // from debug info. 349fd29a4d2Swlei StringRef FName = F.getName(); 350fd29a4d2Swlei if (auto *SP = F.getSubprogram()) { 351fd29a4d2Swlei FName = SP->getLinkageName(); 352fd29a4d2Swlei if (FName.empty()) 353fd29a4d2Swlei FName = SP->getName(); 354fd29a4d2Swlei } 355fd29a4d2Swlei uint64_t Guid = Function::getGUID(FName); 35664fa8cceSHongtao Yu 35724d4291cSHongtao Yu // Assign an artificial debug line to a probe that doesn't come with a real 35824d4291cSHongtao Yu // line. A probe not having a debug line will get an incomplete inline 35924d4291cSHongtao Yu // context. This will cause samples collected on the probe to be counted 36024d4291cSHongtao Yu // into the base profile instead of a context profile. The line number 36124d4291cSHongtao Yu // itself is not important though. 36224d4291cSHongtao Yu auto AssignDebugLoc = [&](Instruction *I) { 36324d4291cSHongtao Yu assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) && 36424d4291cSHongtao Yu "Expecting pseudo probe or call instructions"); 36524d4291cSHongtao Yu if (!I->getDebugLoc()) { 36624d4291cSHongtao Yu if (auto *SP = F.getSubprogram()) { 367b5ad32efSFangrui Song auto DIL = DILocation::get(SP->getContext(), 0, 0, SP); 36824d4291cSHongtao Yu I->setDebugLoc(DIL); 36924d4291cSHongtao Yu ArtificialDbgLine++; 37024d4291cSHongtao Yu LLVM_DEBUG({ 37124d4291cSHongtao Yu dbgs() << "\nIn Function " << F.getName() 37224d4291cSHongtao Yu << " Probe gets an artificial debug line\n"; 37324d4291cSHongtao Yu I->dump(); 37424d4291cSHongtao Yu }); 37524d4291cSHongtao Yu } 37624d4291cSHongtao Yu } 37724d4291cSHongtao Yu }; 37824d4291cSHongtao Yu 37964fa8cceSHongtao Yu // Probe basic blocks. 38064fa8cceSHongtao Yu for (auto &I : BlockProbeIds) { 38164fa8cceSHongtao Yu BasicBlock *BB = I.first; 38264fa8cceSHongtao Yu uint32_t Index = I.second; 38364fa8cceSHongtao Yu // Insert a probe before an instruction with a valid debug line number which 38464fa8cceSHongtao Yu // will be assigned to the probe. The line number will be used later to 38564fa8cceSHongtao Yu // model the inline context when the probe is inlined into other functions. 38664fa8cceSHongtao Yu // Debug instructions, phi nodes and lifetime markers do not have an valid 38764fa8cceSHongtao Yu // line number. Real instructions generated by optimizations may not come 38864fa8cceSHongtao Yu // with a line number either. 38964fa8cceSHongtao Yu auto HasValidDbgLine = [](Instruction *J) { 39064fa8cceSHongtao Yu return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) && 39164fa8cceSHongtao Yu !J->isLifetimeStartOrEnd() && J->getDebugLoc(); 39264fa8cceSHongtao Yu }; 39364fa8cceSHongtao Yu 39464fa8cceSHongtao Yu Instruction *J = &*BB->getFirstInsertionPt(); 39564fa8cceSHongtao Yu while (J != BB->getTerminator() && !HasValidDbgLine(J)) { 39664fa8cceSHongtao Yu J = J->getNextNode(); 39764fa8cceSHongtao Yu } 39864fa8cceSHongtao Yu 39964fa8cceSHongtao Yu IRBuilder<> Builder(J); 40064fa8cceSHongtao Yu assert(Builder.GetInsertPoint() != BB->end() && 40164fa8cceSHongtao Yu "Cannot get the probing point"); 40264fa8cceSHongtao Yu Function *ProbeFn = 403*fa789dffSRahul Joshi llvm::Intrinsic::getOrInsertDeclaration(M, Intrinsic::pseudoprobe); 40464fa8cceSHongtao Yu Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index), 4053d89b3cbSHongtao Yu Builder.getInt32(0), 4063d89b3cbSHongtao Yu Builder.getInt64(PseudoProbeFullDistributionFactor)}; 40764fa8cceSHongtao Yu auto *Probe = Builder.CreateCall(ProbeFn, Args); 40824d4291cSHongtao Yu AssignDebugLoc(Probe); 4099272d0f0SHongtao Yu // Reset the dwarf discriminator if the debug location comes with any. The 4109272d0f0SHongtao Yu // discriminator field may be used by FS-AFDO later in the pipeline. 4119272d0f0SHongtao Yu if (auto DIL = Probe->getDebugLoc()) { 4129272d0f0SHongtao Yu if (DIL->getDiscriminator()) { 4139272d0f0SHongtao Yu DIL = DIL->cloneWithDiscriminator(0); 4149272d0f0SHongtao Yu Probe->setDebugLoc(DIL); 4159272d0f0SHongtao Yu } 4169272d0f0SHongtao Yu } 41764fa8cceSHongtao Yu } 41824d4291cSHongtao Yu 41924d4291cSHongtao Yu // Probe both direct calls and indirect calls. Direct calls are probed so that 42024d4291cSHongtao Yu // their probe ID can be used as an call site identifier to represent a 42124d4291cSHongtao Yu // calling context. 42224d4291cSHongtao Yu for (auto &I : CallProbeIds) { 42324d4291cSHongtao Yu auto *Call = I.first; 42424d4291cSHongtao Yu uint32_t Index = I.second; 42524d4291cSHongtao Yu uint32_t Type = cast<CallBase>(Call)->getCalledFunction() 42624d4291cSHongtao Yu ? (uint32_t)PseudoProbeType::DirectCall 42724d4291cSHongtao Yu : (uint32_t)PseudoProbeType::IndirectCall; 42824d4291cSHongtao Yu AssignDebugLoc(Call); 42924d4291cSHongtao Yu if (auto DIL = Call->getDebugLoc()) { 4309272d0f0SHongtao Yu // Levarge the 32-bit discriminator field of debug data to store the ID 4319272d0f0SHongtao Yu // and type of a callsite probe. This gets rid of the dependency on 4329272d0f0SHongtao Yu // plumbing a customized metadata through the codegen pipeline. 4339272d0f0SHongtao Yu uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( 434e20b9047SLei Wang Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor, 435e20b9047SLei Wang DIL->getBaseDiscriminator()); 43624d4291cSHongtao Yu DIL = DIL->cloneWithDiscriminator(V); 43724d4291cSHongtao Yu Call->setDebugLoc(DIL); 43864fa8cceSHongtao Yu } 43964fa8cceSHongtao Yu } 440705a4c14SHongtao Yu 441705a4c14SHongtao Yu // Create module-level metadata that contains function info necessary to 442705a4c14SHongtao Yu // synthesize probe-based sample counts, which are 443705a4c14SHongtao Yu // - FunctionGUID 444705a4c14SHongtao Yu // - FunctionHash. 445705a4c14SHongtao Yu // - FunctionName 446705a4c14SHongtao Yu auto Hash = getFunctionHash(); 447fd29a4d2Swlei auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, FName); 448705a4c14SHongtao Yu auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName); 449705a4c14SHongtao Yu assert(NMD && "llvm.pseudo_probe_desc should be pre-created"); 450705a4c14SHongtao Yu NMD->addOperand(MD); 45164fa8cceSHongtao Yu } 45264fa8cceSHongtao Yu 45364fa8cceSHongtao Yu PreservedAnalyses SampleProfileProbePass::run(Module &M, 45464fa8cceSHongtao Yu ModuleAnalysisManager &AM) { 455705a4c14SHongtao Yu auto ModuleId = getUniqueModuleId(&M); 456705a4c14SHongtao Yu // Create the pseudo probe desc metadata beforehand. 457705a4c14SHongtao Yu // Note that modules with only data but no functions will require this to 458705a4c14SHongtao Yu // be set up so that they will be known as probed later. 459705a4c14SHongtao Yu M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName); 460705a4c14SHongtao Yu 46164fa8cceSHongtao Yu for (auto &F : M) { 46264fa8cceSHongtao Yu if (F.isDeclaration()) 46364fa8cceSHongtao Yu continue; 464705a4c14SHongtao Yu SampleProfileProber ProbeManager(F, ModuleId); 46564fa8cceSHongtao Yu ProbeManager.instrumentOneFunc(F, TM); 46664fa8cceSHongtao Yu } 46764fa8cceSHongtao Yu 46864fa8cceSHongtao Yu return PreservedAnalyses::none(); 46964fa8cceSHongtao Yu } 4703d89b3cbSHongtao Yu 4713d89b3cbSHongtao Yu void PseudoProbeUpdatePass::runOnFunction(Function &F, 4723d89b3cbSHongtao Yu FunctionAnalysisManager &FAM) { 4733d89b3cbSHongtao Yu BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 4743d89b3cbSHongtao Yu auto BBProfileCount = [&BFI](BasicBlock *BB) { 475129b531cSKazu Hirata return BFI.getBlockProfileCount(BB).value_or(0); 4763d89b3cbSHongtao Yu }; 4773d89b3cbSHongtao Yu 4783d89b3cbSHongtao Yu // Collect the sum of execution weight for each probe. 4793d89b3cbSHongtao Yu ProbeFactorMap ProbeFactors; 4803d89b3cbSHongtao Yu for (auto &Block : F) { 4813d89b3cbSHongtao Yu for (auto &I : Block) { 48289fae41eSFangrui Song if (std::optional<PseudoProbe> Probe = extractProbe(I)) { 483f28ee1a2SHongtao Yu uint64_t Hash = computeCallStackHash(I); 484f28ee1a2SHongtao Yu ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block); 485f28ee1a2SHongtao Yu } 4863d89b3cbSHongtao Yu } 4873d89b3cbSHongtao Yu } 4883d89b3cbSHongtao Yu 4893d89b3cbSHongtao Yu // Fix up over-counted probes. 4903d89b3cbSHongtao Yu for (auto &Block : F) { 4913d89b3cbSHongtao Yu for (auto &I : Block) { 49289fae41eSFangrui Song if (std::optional<PseudoProbe> Probe = extractProbe(I)) { 493f28ee1a2SHongtao Yu uint64_t Hash = computeCallStackHash(I); 494f28ee1a2SHongtao Yu float Sum = ProbeFactors[{Probe->Id, Hash}]; 4953d89b3cbSHongtao Yu if (Sum != 0) 4963d89b3cbSHongtao Yu setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum); 4973d89b3cbSHongtao Yu } 4983d89b3cbSHongtao Yu } 4993d89b3cbSHongtao Yu } 5003d89b3cbSHongtao Yu } 5013d89b3cbSHongtao Yu 5023d89b3cbSHongtao Yu PreservedAnalyses PseudoProbeUpdatePass::run(Module &M, 5033d89b3cbSHongtao Yu ModuleAnalysisManager &AM) { 5043d89b3cbSHongtao Yu if (UpdatePseudoProbe) { 5053d89b3cbSHongtao Yu for (auto &F : M) { 5063d89b3cbSHongtao Yu if (F.isDeclaration()) 5073d89b3cbSHongtao Yu continue; 5083d89b3cbSHongtao Yu FunctionAnalysisManager &FAM = 5093d89b3cbSHongtao Yu AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 5103d89b3cbSHongtao Yu runOnFunction(F, FAM); 5113d89b3cbSHongtao Yu } 5123d89b3cbSHongtao Yu } 5133d89b3cbSHongtao Yu return PreservedAnalyses::none(); 5143d89b3cbSHongtao Yu } 515