xref: /llvm-project/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp (revision 98ea1a81a28a6dd36941456c8ab4ce46f665f57a)
164fa8cceSHongtao Yu //===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
264fa8cceSHongtao Yu //
364fa8cceSHongtao Yu // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
464fa8cceSHongtao Yu // See https://llvm.org/LICENSE.txt for license information.
564fa8cceSHongtao Yu // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
664fa8cceSHongtao Yu //
764fa8cceSHongtao Yu //===----------------------------------------------------------------------===//
864fa8cceSHongtao Yu //
964fa8cceSHongtao Yu // This file implements the SampleProfileProber transformation.
1064fa8cceSHongtao Yu //
1164fa8cceSHongtao Yu //===----------------------------------------------------------------------===//
1264fa8cceSHongtao Yu 
1364fa8cceSHongtao Yu #include "llvm/Transforms/IPO/SampleProfileProbe.h"
1464fa8cceSHongtao Yu #include "llvm/ADT/Statistic.h"
153d89b3cbSHongtao Yu #include "llvm/Analysis/BlockFrequencyInfo.h"
16950487bdSHongtao Yu #include "llvm/Analysis/EHUtils.h"
17f1985a3fSserge-sans-paille #include "llvm/Analysis/LoopInfo.h"
1864fa8cceSHongtao Yu #include "llvm/IR/BasicBlock.h"
1964fa8cceSHongtao Yu #include "llvm/IR/DebugInfoMetadata.h"
20954af75cSDavide Italiano #include "llvm/IR/DiagnosticInfo.h"
2164fa8cceSHongtao Yu #include "llvm/IR/IRBuilder.h"
2264fa8cceSHongtao Yu #include "llvm/IR/Instruction.h"
23e188aae4Sserge-sans-paille #include "llvm/IR/IntrinsicInst.h"
2464fa8cceSHongtao Yu #include "llvm/IR/MDBuilder.h"
2574deadf1SNikita Popov #include "llvm/IR/Module.h"
26f1985a3fSserge-sans-paille #include "llvm/IR/PseudoProbe.h"
2764fa8cceSHongtao Yu #include "llvm/ProfileData/SampleProf.h"
2864fa8cceSHongtao Yu #include "llvm/Support/CRC.h"
293d89b3cbSHongtao Yu #include "llvm/Support/CommandLine.h"
30f1985a3fSserge-sans-paille #include "llvm/Target/TargetMachine.h"
312ae968a0SAntonio Frighetto #include "llvm/Transforms/Utils/Instrumentation.h"
3264fa8cceSHongtao Yu #include "llvm/Transforms/Utils/ModuleUtils.h"
333d89b3cbSHongtao Yu #include <unordered_set>
3464fa8cceSHongtao Yu #include <vector>
3564fa8cceSHongtao Yu 
3664fa8cceSHongtao Yu using namespace llvm;
37950487bdSHongtao Yu #define DEBUG_TYPE "pseudo-probe"
3864fa8cceSHongtao Yu 
3964fa8cceSHongtao Yu STATISTIC(ArtificialDbgLine,
4064fa8cceSHongtao Yu           "Number of probes that have an artificial debug line");
4164fa8cceSHongtao Yu 
423d89b3cbSHongtao Yu static cl::opt<bool>
433d89b3cbSHongtao Yu     VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
443d89b3cbSHongtao Yu                       cl::desc("Do pseudo probe verification"));
453d89b3cbSHongtao Yu 
463d89b3cbSHongtao Yu static cl::list<std::string> VerifyPseudoProbeFuncList(
473d89b3cbSHongtao Yu     "verify-pseudo-probe-funcs", cl::Hidden,
483d89b3cbSHongtao Yu     cl::desc("The option to specify the name of the functions to verify."));
493d89b3cbSHongtao Yu 
503d89b3cbSHongtao Yu static cl::opt<bool>
513d89b3cbSHongtao Yu     UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
523d89b3cbSHongtao Yu                       cl::desc("Update pseudo probe distribution factor"));
533d89b3cbSHongtao Yu 
54f28ee1a2SHongtao Yu static uint64_t getCallStackHash(const DILocation *DIL) {
55f28ee1a2SHongtao Yu   uint64_t Hash = 0;
56f28ee1a2SHongtao Yu   const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr;
57f28ee1a2SHongtao Yu   while (InlinedAt) {
58f28ee1a2SHongtao Yu     Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
59f28ee1a2SHongtao Yu     Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
60ebe09e2aSRong Xu     auto Name = InlinedAt->getSubprogramLinkageName();
61f28ee1a2SHongtao Yu     Hash ^= MD5Hash(Name);
62f28ee1a2SHongtao Yu     InlinedAt = InlinedAt->getInlinedAt();
63f28ee1a2SHongtao Yu   }
64f28ee1a2SHongtao Yu   return Hash;
65f28ee1a2SHongtao Yu }
66f28ee1a2SHongtao Yu 
67f28ee1a2SHongtao Yu static uint64_t computeCallStackHash(const Instruction &Inst) {
68f28ee1a2SHongtao Yu   return getCallStackHash(Inst.getDebugLoc());
69f28ee1a2SHongtao Yu }
70f28ee1a2SHongtao Yu 
713d89b3cbSHongtao Yu bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
723d89b3cbSHongtao Yu   // Skip function declaration.
733d89b3cbSHongtao Yu   if (F->isDeclaration())
743d89b3cbSHongtao Yu     return false;
753d89b3cbSHongtao Yu   // Skip function that will not be emitted into object file. The prevailing
763d89b3cbSHongtao Yu   // defintion will be verified instead.
773d89b3cbSHongtao Yu   if (F->hasAvailableExternallyLinkage())
783d89b3cbSHongtao Yu     return false;
793d89b3cbSHongtao Yu   // Do a name matching.
803d89b3cbSHongtao Yu   static std::unordered_set<std::string> VerifyFuncNames(
813d89b3cbSHongtao Yu       VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
823d89b3cbSHongtao Yu   return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
833d89b3cbSHongtao Yu }
843d89b3cbSHongtao Yu 
853d89b3cbSHongtao Yu void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
863d89b3cbSHongtao Yu   if (VerifyPseudoProbe) {
873d89b3cbSHongtao Yu     PIC.registerAfterPassCallback(
8819158eb7SSebastian Neubauer         [this](StringRef P, Any IR, const PreservedAnalyses &) {
893d89b3cbSHongtao Yu           this->runAfterPass(P, IR);
903d89b3cbSHongtao Yu         });
913d89b3cbSHongtao Yu   }
923d89b3cbSHongtao Yu }
933d89b3cbSHongtao Yu 
943d89b3cbSHongtao Yu // Callback to run after each transformation for the new pass manager.
9519158eb7SSebastian Neubauer void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
963d89b3cbSHongtao Yu   std::string Banner =
973d89b3cbSHongtao Yu       "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
983d89b3cbSHongtao Yu   dbgs() << Banner;
99f8a1c8b7Skazutakahirata   if (const auto **M = llvm::any_cast<const Module *>(&IR))
100bb7940e2SSebastian Neubauer     runAfterPass(*M);
101f8a1c8b7Skazutakahirata   else if (const auto **F = llvm::any_cast<const Function *>(&IR))
102bb7940e2SSebastian Neubauer     runAfterPass(*F);
103f8a1c8b7Skazutakahirata   else if (const auto **C = llvm::any_cast<const LazyCallGraph::SCC *>(&IR))
104bb7940e2SSebastian Neubauer     runAfterPass(*C);
105f8a1c8b7Skazutakahirata   else if (const auto **L = llvm::any_cast<const Loop *>(&IR))
106bb7940e2SSebastian Neubauer     runAfterPass(*L);
1073d89b3cbSHongtao Yu   else
1083d89b3cbSHongtao Yu     llvm_unreachable("Unknown IR unit");
1093d89b3cbSHongtao Yu }
1103d89b3cbSHongtao Yu 
1113d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const Module *M) {
1123d89b3cbSHongtao Yu   for (const Function &F : *M)
1133d89b3cbSHongtao Yu     runAfterPass(&F);
1143d89b3cbSHongtao Yu }
1153d89b3cbSHongtao Yu 
1163d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
1173d89b3cbSHongtao Yu   for (const LazyCallGraph::Node &N : *C)
1183d89b3cbSHongtao Yu     runAfterPass(&N.getFunction());
1193d89b3cbSHongtao Yu }
1203d89b3cbSHongtao Yu 
1213d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const Function *F) {
1223d89b3cbSHongtao Yu   if (!shouldVerifyFunction(F))
1233d89b3cbSHongtao Yu     return;
1243d89b3cbSHongtao Yu   ProbeFactorMap ProbeFactors;
1253d89b3cbSHongtao Yu   for (const auto &BB : *F)
1263d89b3cbSHongtao Yu     collectProbeFactors(&BB, ProbeFactors);
1273d89b3cbSHongtao Yu   verifyProbeFactors(F, ProbeFactors);
1283d89b3cbSHongtao Yu }
1293d89b3cbSHongtao Yu 
1303d89b3cbSHongtao Yu void PseudoProbeVerifier::runAfterPass(const Loop *L) {
1313d89b3cbSHongtao Yu   const Function *F = L->getHeader()->getParent();
1323d89b3cbSHongtao Yu   runAfterPass(F);
1333d89b3cbSHongtao Yu }
1343d89b3cbSHongtao Yu 
1353d89b3cbSHongtao Yu void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
1363d89b3cbSHongtao Yu                                               ProbeFactorMap &ProbeFactors) {
1373d89b3cbSHongtao Yu   for (const auto &I : *Block) {
13889fae41eSFangrui Song     if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
139f28ee1a2SHongtao Yu       uint64_t Hash = computeCallStackHash(I);
140f28ee1a2SHongtao Yu       ProbeFactors[{Probe->Id, Hash}] += Probe->Factor;
141f28ee1a2SHongtao Yu     }
1423d89b3cbSHongtao Yu   }
1433d89b3cbSHongtao Yu }
1443d89b3cbSHongtao Yu 
1453d89b3cbSHongtao Yu void PseudoProbeVerifier::verifyProbeFactors(
1463d89b3cbSHongtao Yu     const Function *F, const ProbeFactorMap &ProbeFactors) {
1473d89b3cbSHongtao Yu   bool BannerPrinted = false;
1483d89b3cbSHongtao Yu   auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
1493d89b3cbSHongtao Yu   for (const auto &I : ProbeFactors) {
1503d89b3cbSHongtao Yu     float CurProbeFactor = I.second;
1513d89b3cbSHongtao Yu     if (PrevProbeFactors.count(I.first)) {
1523d89b3cbSHongtao Yu       float PrevProbeFactor = PrevProbeFactors[I.first];
1533d89b3cbSHongtao Yu       if (std::abs(CurProbeFactor - PrevProbeFactor) >
1543d89b3cbSHongtao Yu           DistributionFactorVariance) {
1553d89b3cbSHongtao Yu         if (!BannerPrinted) {
1563d89b3cbSHongtao Yu           dbgs() << "Function " << F->getName() << ":\n";
1573d89b3cbSHongtao Yu           BannerPrinted = true;
1583d89b3cbSHongtao Yu         }
159f28ee1a2SHongtao Yu         dbgs() << "Probe " << I.first.first << "\tprevious factor "
1603d89b3cbSHongtao Yu                << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
1613d89b3cbSHongtao Yu                << format("%0.2f", CurProbeFactor) << "\n";
1623d89b3cbSHongtao Yu       }
1633d89b3cbSHongtao Yu     }
1643d89b3cbSHongtao Yu 
1653d89b3cbSHongtao Yu     // Update
1663d89b3cbSHongtao Yu     PrevProbeFactors[I.first] = I.second;
1673d89b3cbSHongtao Yu   }
1683d89b3cbSHongtao Yu }
1693d89b3cbSHongtao Yu 
170705a4c14SHongtao Yu SampleProfileProber::SampleProfileProber(Function &Func,
171705a4c14SHongtao Yu                                          const std::string &CurModuleUniqueId)
172705a4c14SHongtao Yu     : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
17364fa8cceSHongtao Yu   BlockProbeIds.clear();
17424d4291cSHongtao Yu   CallProbeIds.clear();
17564fa8cceSHongtao Yu   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
176b8cc3ba4SLei Wang 
177b8cc3ba4SLei Wang   DenseSet<BasicBlock *> BlocksToIgnore;
178b8cc3ba4SLei Wang   DenseSet<BasicBlock *> BlocksAndCallsToIgnore;
179b8cc3ba4SLei Wang   computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore);
180b8cc3ba4SLei Wang 
1815bbce06aSLei Wang   computeProbeId(BlocksToIgnore, BlocksAndCallsToIgnore);
182b8cc3ba4SLei Wang   computeCFGHash(BlocksToIgnore);
183b8cc3ba4SLei Wang }
184b8cc3ba4SLei Wang 
185b8cc3ba4SLei Wang // Two purposes to compute the blocks to ignore:
186b8cc3ba4SLei Wang // 1. Reduce the IR size.
187b8cc3ba4SLei Wang // 2. Make the instrumentation(checksum) stable. e.g. the frondend may
188b8cc3ba4SLei Wang // generate unstable IR while optimizing nounwind attribute, some versions are
189b8cc3ba4SLei Wang // optimized with the call-to-invoke conversion, while other versions do not.
190b8cc3ba4SLei Wang // This discrepancy in probe ID could cause profile mismatching issues.
191b8cc3ba4SLei Wang // Note that those ignored blocks are either cold blocks or new split blocks
192b8cc3ba4SLei Wang // whose original blocks are instrumented, so it shouldn't degrade the profile
193b8cc3ba4SLei Wang // quality.
194b8cc3ba4SLei Wang void SampleProfileProber::computeBlocksToIgnore(
195b8cc3ba4SLei Wang     DenseSet<BasicBlock *> &BlocksToIgnore,
196b8cc3ba4SLei Wang     DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) {
197b8cc3ba4SLei Wang   // Ignore the cold EH and unreachable blocks and calls.
198b8cc3ba4SLei Wang   computeEHOnlyBlocks(*F, BlocksAndCallsToIgnore);
199b8cc3ba4SLei Wang   findUnreachableBlocks(BlocksAndCallsToIgnore);
200b8cc3ba4SLei Wang 
201b8cc3ba4SLei Wang   BlocksToIgnore.insert(BlocksAndCallsToIgnore.begin(),
202b8cc3ba4SLei Wang                         BlocksAndCallsToIgnore.end());
203b8cc3ba4SLei Wang 
204b8cc3ba4SLei Wang   // Handle the call-to-invoke conversion case: make sure that the probe id and
205b8cc3ba4SLei Wang   // callsite id are consistent before and after the block split. For block
206b8cc3ba4SLei Wang   // probe, we only keep the head block probe id and ignore the block ids of the
207b8cc3ba4SLei Wang   // normal dests. For callsite probe, it's different to block probe, there is
208b8cc3ba4SLei Wang   // no additional callsite in the normal dests, so we don't ignore the
209b8cc3ba4SLei Wang   // callsites.
210b8cc3ba4SLei Wang   findInvokeNormalDests(BlocksToIgnore);
211b8cc3ba4SLei Wang }
212b8cc3ba4SLei Wang 
213b8cc3ba4SLei Wang // Unreachable blocks and calls are always cold, ignore them.
214b8cc3ba4SLei Wang void SampleProfileProber::findUnreachableBlocks(
215b8cc3ba4SLei Wang     DenseSet<BasicBlock *> &BlocksToIgnore) {
216b8cc3ba4SLei Wang   for (auto &BB : *F) {
217b8cc3ba4SLei Wang     if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0)
218b8cc3ba4SLei Wang       BlocksToIgnore.insert(&BB);
219b8cc3ba4SLei Wang   }
220b8cc3ba4SLei Wang }
221b8cc3ba4SLei Wang 
222b8cc3ba4SLei Wang // In call-to-invoke conversion, basic block can be split into multiple blocks,
223b8cc3ba4SLei Wang // only instrument probe in the head block, ignore the normal dests.
224b8cc3ba4SLei Wang void SampleProfileProber::findInvokeNormalDests(
225b8cc3ba4SLei Wang     DenseSet<BasicBlock *> &InvokeNormalDests) {
226b8cc3ba4SLei Wang   for (auto &BB : *F) {
227b8cc3ba4SLei Wang     auto *TI = BB.getTerminator();
228b8cc3ba4SLei Wang     if (auto *II = dyn_cast<InvokeInst>(TI)) {
229b8cc3ba4SLei Wang       auto *ND = II->getNormalDest();
230b8cc3ba4SLei Wang       InvokeNormalDests.insert(ND);
231b8cc3ba4SLei Wang 
232b8cc3ba4SLei Wang       // The normal dest and the try/catch block are connected by an
233b8cc3ba4SLei Wang       // unconditional branch.
234b8cc3ba4SLei Wang       while (pred_size(ND) == 1) {
235b8cc3ba4SLei Wang         auto *Pred = *pred_begin(ND);
236b8cc3ba4SLei Wang         if (succ_size(Pred) == 1) {
237b8cc3ba4SLei Wang           InvokeNormalDests.insert(Pred);
238b8cc3ba4SLei Wang           ND = Pred;
239b8cc3ba4SLei Wang         } else
240b8cc3ba4SLei Wang           break;
241b8cc3ba4SLei Wang       }
242b8cc3ba4SLei Wang     }
243b8cc3ba4SLei Wang   }
244b8cc3ba4SLei Wang }
245b8cc3ba4SLei Wang 
246b8cc3ba4SLei Wang // The call-to-invoke conversion splits the original block into a list of block,
247b8cc3ba4SLei Wang // we need to compute the hash using the original block's successors to keep the
248b8cc3ba4SLei Wang // CFG Hash consistent. For a given head block, we keep searching the
249b8cc3ba4SLei Wang // succesor(normal dest or unconditional branch dest) to find the tail block,
250b8cc3ba4SLei Wang // the tail block's successors are the original block's successors.
251b8cc3ba4SLei Wang const Instruction *SampleProfileProber::getOriginalTerminator(
252b8cc3ba4SLei Wang     const BasicBlock *Head, const DenseSet<BasicBlock *> &BlocksToIgnore) {
253b8cc3ba4SLei Wang   auto *TI = Head->getTerminator();
254b8cc3ba4SLei Wang   if (auto *II = dyn_cast<InvokeInst>(TI)) {
255b8cc3ba4SLei Wang     return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore);
256b8cc3ba4SLei Wang   } else if (succ_size(Head) == 1 &&
257b8cc3ba4SLei Wang              BlocksToIgnore.contains(*succ_begin(Head))) {
258b8cc3ba4SLei Wang     // Go to the unconditional branch dest.
259b8cc3ba4SLei Wang     return getOriginalTerminator(*succ_begin(Head), BlocksToIgnore);
260b8cc3ba4SLei Wang   }
261b8cc3ba4SLei Wang   return TI;
262705a4c14SHongtao Yu }
263705a4c14SHongtao Yu 
264705a4c14SHongtao Yu // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
265705a4c14SHongtao Yu // value of each BB in the CFG. The higher 32 bits record the number of edges
266705a4c14SHongtao Yu // preceded by the number of indirect calls.
267705a4c14SHongtao Yu // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
268b8cc3ba4SLei Wang void SampleProfileProber::computeCFGHash(
269b8cc3ba4SLei Wang     const DenseSet<BasicBlock *> &BlocksToIgnore) {
270705a4c14SHongtao Yu   std::vector<uint8_t> Indexes;
271705a4c14SHongtao Yu   JamCRC JC;
272705a4c14SHongtao Yu   for (auto &BB : *F) {
273b8cc3ba4SLei Wang     if (BlocksToIgnore.contains(&BB))
274b8cc3ba4SLei Wang       continue;
275b8cc3ba4SLei Wang 
276b8cc3ba4SLei Wang     auto *TI = getOriginalTerminator(&BB, BlocksToIgnore);
277b8cc3ba4SLei Wang     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
278b8cc3ba4SLei Wang       auto *Succ = TI->getSuccessor(I);
279705a4c14SHongtao Yu       auto Index = getBlockId(Succ);
280b8cc3ba4SLei Wang       // Ingore ignored-block(zero ID) to avoid unstable checksum.
281b8cc3ba4SLei Wang       if (Index == 0)
282b8cc3ba4SLei Wang         continue;
283705a4c14SHongtao Yu       for (int J = 0; J < 4; J++)
284705a4c14SHongtao Yu         Indexes.push_back((uint8_t)(Index >> (J * 8)));
285705a4c14SHongtao Yu     }
286705a4c14SHongtao Yu   }
287705a4c14SHongtao Yu 
288705a4c14SHongtao Yu   JC.update(Indexes);
289705a4c14SHongtao Yu 
290705a4c14SHongtao Yu   FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
291705a4c14SHongtao Yu                  (uint64_t)Indexes.size() << 32 | JC.getCRC();
292705a4c14SHongtao Yu   // Reserve bit 60-63 for other information purpose.
293705a4c14SHongtao Yu   FunctionHash &= 0x0FFFFFFFFFFFFFFF;
294705a4c14SHongtao Yu   assert(FunctionHash && "Function checksum should not be zero");
295705a4c14SHongtao Yu   LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
296705a4c14SHongtao Yu                     << ":\n"
297705a4c14SHongtao Yu                     << " CRC = " << JC.getCRC() << ", Edges = "
298705a4c14SHongtao Yu                     << Indexes.size() << ", ICSites = " << CallProbeIds.size()
299705a4c14SHongtao Yu                     << ", Hash = " << FunctionHash << "\n");
30064fa8cceSHongtao Yu }
30164fa8cceSHongtao Yu 
3025bbce06aSLei Wang void SampleProfileProber::computeProbeId(
3035bbce06aSLei Wang     const DenseSet<BasicBlock *> &BlocksToIgnore,
304b8cc3ba4SLei Wang     const DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) {
305954af75cSDavide Italiano   LLVMContext &Ctx = F->getContext();
306954af75cSDavide Italiano   Module *M = F->getParent();
307954af75cSDavide Italiano 
30824d4291cSHongtao Yu   for (auto &BB : *F) {
3095bbce06aSLei Wang     if (!BlocksToIgnore.contains(&BB))
3105bbce06aSLei Wang       BlockProbeIds[&BB] = ++LastProbeId;
3115bbce06aSLei Wang 
312b8cc3ba4SLei Wang     if (BlocksAndCallsToIgnore.contains(&BB))
313b8cc3ba4SLei Wang       continue;
31424d4291cSHongtao Yu     for (auto &I : BB) {
3155bbce06aSLei Wang       if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I))
31624d4291cSHongtao Yu         continue;
317954af75cSDavide Italiano 
318954af75cSDavide Italiano       // The current implementation uses the lower 16 bits of the discriminator
319954af75cSDavide Italiano       // so anything larger than 0xFFFF will be ignored.
320954af75cSDavide Italiano       if (LastProbeId >= 0xFFFF) {
321954af75cSDavide Italiano         std::string Msg = "Pseudo instrumentation incomplete for " +
322954af75cSDavide Italiano                           std::string(F->getName()) + " because it's too large";
323615ebfc3SDavide Italiano         Ctx.diagnose(
324615ebfc3SDavide Italiano             DiagnosticInfoSampleProfile(M->getName().data(), Msg, DS_Warning));
325954af75cSDavide Italiano         return;
326954af75cSDavide Italiano       }
327954af75cSDavide Italiano 
32824d4291cSHongtao Yu       CallProbeIds[&I] = ++LastProbeId;
32924d4291cSHongtao Yu     }
33024d4291cSHongtao Yu   }
33124d4291cSHongtao Yu }
33224d4291cSHongtao Yu 
33364fa8cceSHongtao Yu uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
33464fa8cceSHongtao Yu   auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
33564fa8cceSHongtao Yu   return I == BlockProbeIds.end() ? 0 : I->second;
33664fa8cceSHongtao Yu }
33764fa8cceSHongtao Yu 
33824d4291cSHongtao Yu uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
33924d4291cSHongtao Yu   auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
34024d4291cSHongtao Yu   return Iter == CallProbeIds.end() ? 0 : Iter->second;
34124d4291cSHongtao Yu }
34224d4291cSHongtao Yu 
34364fa8cceSHongtao Yu void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
34464fa8cceSHongtao Yu   Module *M = F.getParent();
34564fa8cceSHongtao Yu   MDBuilder MDB(F.getContext());
346d4a01549SJay Foad   // Since the GUID from probe desc and inline stack are computed separately, we
347fd29a4d2Swlei   // need to make sure their names are consistent, so here also use the name
348fd29a4d2Swlei   // from debug info.
349fd29a4d2Swlei   StringRef FName = F.getName();
350fd29a4d2Swlei   if (auto *SP = F.getSubprogram()) {
351fd29a4d2Swlei     FName = SP->getLinkageName();
352fd29a4d2Swlei     if (FName.empty())
353fd29a4d2Swlei       FName = SP->getName();
354fd29a4d2Swlei   }
355fd29a4d2Swlei   uint64_t Guid = Function::getGUID(FName);
35664fa8cceSHongtao Yu 
35724d4291cSHongtao Yu   // Assign an artificial debug line to a probe that doesn't come with a real
35824d4291cSHongtao Yu   // line. A probe not having a debug line will get an incomplete inline
35924d4291cSHongtao Yu   // context. This will cause samples collected on the probe to be counted
36024d4291cSHongtao Yu   // into the base profile instead of a context profile. The line number
36124d4291cSHongtao Yu   // itself is not important though.
36224d4291cSHongtao Yu   auto AssignDebugLoc = [&](Instruction *I) {
36324d4291cSHongtao Yu     assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
36424d4291cSHongtao Yu            "Expecting pseudo probe or call instructions");
36524d4291cSHongtao Yu     if (!I->getDebugLoc()) {
36624d4291cSHongtao Yu       if (auto *SP = F.getSubprogram()) {
367b5ad32efSFangrui Song         auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
36824d4291cSHongtao Yu         I->setDebugLoc(DIL);
36924d4291cSHongtao Yu         ArtificialDbgLine++;
37024d4291cSHongtao Yu         LLVM_DEBUG({
37124d4291cSHongtao Yu           dbgs() << "\nIn Function " << F.getName()
37224d4291cSHongtao Yu                  << " Probe gets an artificial debug line\n";
37324d4291cSHongtao Yu           I->dump();
37424d4291cSHongtao Yu         });
37524d4291cSHongtao Yu       }
37624d4291cSHongtao Yu     }
37724d4291cSHongtao Yu   };
37824d4291cSHongtao Yu 
37964fa8cceSHongtao Yu   // Probe basic blocks.
38064fa8cceSHongtao Yu   for (auto &I : BlockProbeIds) {
38164fa8cceSHongtao Yu     BasicBlock *BB = I.first;
38264fa8cceSHongtao Yu     uint32_t Index = I.second;
38364fa8cceSHongtao Yu     // Insert a probe before an instruction with a valid debug line number which
38464fa8cceSHongtao Yu     // will be assigned to the probe. The line number will be used later to
38564fa8cceSHongtao Yu     // model the inline context when the probe is inlined into other functions.
38664fa8cceSHongtao Yu     // Debug instructions, phi nodes and lifetime markers do not have an valid
38764fa8cceSHongtao Yu     // line number. Real instructions generated by optimizations may not come
38864fa8cceSHongtao Yu     // with a line number either.
38964fa8cceSHongtao Yu     auto HasValidDbgLine = [](Instruction *J) {
39064fa8cceSHongtao Yu       return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
39164fa8cceSHongtao Yu              !J->isLifetimeStartOrEnd() && J->getDebugLoc();
39264fa8cceSHongtao Yu     };
39364fa8cceSHongtao Yu 
39464fa8cceSHongtao Yu     Instruction *J = &*BB->getFirstInsertionPt();
39564fa8cceSHongtao Yu     while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
39664fa8cceSHongtao Yu       J = J->getNextNode();
39764fa8cceSHongtao Yu     }
39864fa8cceSHongtao Yu 
39964fa8cceSHongtao Yu     IRBuilder<> Builder(J);
40064fa8cceSHongtao Yu     assert(Builder.GetInsertPoint() != BB->end() &&
40164fa8cceSHongtao Yu            "Cannot get the probing point");
40264fa8cceSHongtao Yu     Function *ProbeFn =
403*fa789dffSRahul Joshi         llvm::Intrinsic::getOrInsertDeclaration(M, Intrinsic::pseudoprobe);
40464fa8cceSHongtao Yu     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
4053d89b3cbSHongtao Yu                      Builder.getInt32(0),
4063d89b3cbSHongtao Yu                      Builder.getInt64(PseudoProbeFullDistributionFactor)};
40764fa8cceSHongtao Yu     auto *Probe = Builder.CreateCall(ProbeFn, Args);
40824d4291cSHongtao Yu     AssignDebugLoc(Probe);
4099272d0f0SHongtao Yu     // Reset the dwarf discriminator if the debug location comes with any. The
4109272d0f0SHongtao Yu     // discriminator field may be used by FS-AFDO later in the pipeline.
4119272d0f0SHongtao Yu     if (auto DIL = Probe->getDebugLoc()) {
4129272d0f0SHongtao Yu       if (DIL->getDiscriminator()) {
4139272d0f0SHongtao Yu         DIL = DIL->cloneWithDiscriminator(0);
4149272d0f0SHongtao Yu         Probe->setDebugLoc(DIL);
4159272d0f0SHongtao Yu       }
4169272d0f0SHongtao Yu     }
41764fa8cceSHongtao Yu   }
41824d4291cSHongtao Yu 
41924d4291cSHongtao Yu   // Probe both direct calls and indirect calls. Direct calls are probed so that
42024d4291cSHongtao Yu   // their probe ID can be used as an call site identifier to represent a
42124d4291cSHongtao Yu   // calling context.
42224d4291cSHongtao Yu   for (auto &I : CallProbeIds) {
42324d4291cSHongtao Yu     auto *Call = I.first;
42424d4291cSHongtao Yu     uint32_t Index = I.second;
42524d4291cSHongtao Yu     uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
42624d4291cSHongtao Yu                         ? (uint32_t)PseudoProbeType::DirectCall
42724d4291cSHongtao Yu                         : (uint32_t)PseudoProbeType::IndirectCall;
42824d4291cSHongtao Yu     AssignDebugLoc(Call);
42924d4291cSHongtao Yu     if (auto DIL = Call->getDebugLoc()) {
4309272d0f0SHongtao Yu       // Levarge the 32-bit discriminator field of debug data to store the ID
4319272d0f0SHongtao Yu       // and type of a callsite probe. This gets rid of the dependency on
4329272d0f0SHongtao Yu       // plumbing a customized metadata through the codegen pipeline.
4339272d0f0SHongtao Yu       uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
434e20b9047SLei Wang           Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor,
435e20b9047SLei Wang           DIL->getBaseDiscriminator());
43624d4291cSHongtao Yu       DIL = DIL->cloneWithDiscriminator(V);
43724d4291cSHongtao Yu       Call->setDebugLoc(DIL);
43864fa8cceSHongtao Yu     }
43964fa8cceSHongtao Yu   }
440705a4c14SHongtao Yu 
441705a4c14SHongtao Yu   // Create module-level metadata that contains function info necessary to
442705a4c14SHongtao Yu   // synthesize probe-based sample counts,  which are
443705a4c14SHongtao Yu   // - FunctionGUID
444705a4c14SHongtao Yu   // - FunctionHash.
445705a4c14SHongtao Yu   // - FunctionName
446705a4c14SHongtao Yu   auto Hash = getFunctionHash();
447fd29a4d2Swlei   auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, FName);
448705a4c14SHongtao Yu   auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
449705a4c14SHongtao Yu   assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
450705a4c14SHongtao Yu   NMD->addOperand(MD);
45164fa8cceSHongtao Yu }
45264fa8cceSHongtao Yu 
45364fa8cceSHongtao Yu PreservedAnalyses SampleProfileProbePass::run(Module &M,
45464fa8cceSHongtao Yu                                               ModuleAnalysisManager &AM) {
455705a4c14SHongtao Yu   auto ModuleId = getUniqueModuleId(&M);
456705a4c14SHongtao Yu   // Create the pseudo probe desc metadata beforehand.
457705a4c14SHongtao Yu   // Note that modules with only data but no functions will require this to
458705a4c14SHongtao Yu   // be set up so that they will be known as probed later.
459705a4c14SHongtao Yu   M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
460705a4c14SHongtao Yu 
46164fa8cceSHongtao Yu   for (auto &F : M) {
46264fa8cceSHongtao Yu     if (F.isDeclaration())
46364fa8cceSHongtao Yu       continue;
464705a4c14SHongtao Yu     SampleProfileProber ProbeManager(F, ModuleId);
46564fa8cceSHongtao Yu     ProbeManager.instrumentOneFunc(F, TM);
46664fa8cceSHongtao Yu   }
46764fa8cceSHongtao Yu 
46864fa8cceSHongtao Yu   return PreservedAnalyses::none();
46964fa8cceSHongtao Yu }
4703d89b3cbSHongtao Yu 
4713d89b3cbSHongtao Yu void PseudoProbeUpdatePass::runOnFunction(Function &F,
4723d89b3cbSHongtao Yu                                           FunctionAnalysisManager &FAM) {
4733d89b3cbSHongtao Yu   BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
4743d89b3cbSHongtao Yu   auto BBProfileCount = [&BFI](BasicBlock *BB) {
475129b531cSKazu Hirata     return BFI.getBlockProfileCount(BB).value_or(0);
4763d89b3cbSHongtao Yu   };
4773d89b3cbSHongtao Yu 
4783d89b3cbSHongtao Yu   // Collect the sum of execution weight for each probe.
4793d89b3cbSHongtao Yu   ProbeFactorMap ProbeFactors;
4803d89b3cbSHongtao Yu   for (auto &Block : F) {
4813d89b3cbSHongtao Yu     for (auto &I : Block) {
48289fae41eSFangrui Song       if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
483f28ee1a2SHongtao Yu         uint64_t Hash = computeCallStackHash(I);
484f28ee1a2SHongtao Yu         ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
485f28ee1a2SHongtao Yu       }
4863d89b3cbSHongtao Yu     }
4873d89b3cbSHongtao Yu   }
4883d89b3cbSHongtao Yu 
4893d89b3cbSHongtao Yu   // Fix up over-counted probes.
4903d89b3cbSHongtao Yu   for (auto &Block : F) {
4913d89b3cbSHongtao Yu     for (auto &I : Block) {
49289fae41eSFangrui Song       if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
493f28ee1a2SHongtao Yu         uint64_t Hash = computeCallStackHash(I);
494f28ee1a2SHongtao Yu         float Sum = ProbeFactors[{Probe->Id, Hash}];
4953d89b3cbSHongtao Yu         if (Sum != 0)
4963d89b3cbSHongtao Yu           setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
4973d89b3cbSHongtao Yu       }
4983d89b3cbSHongtao Yu     }
4993d89b3cbSHongtao Yu   }
5003d89b3cbSHongtao Yu }
5013d89b3cbSHongtao Yu 
5023d89b3cbSHongtao Yu PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
5033d89b3cbSHongtao Yu                                              ModuleAnalysisManager &AM) {
5043d89b3cbSHongtao Yu   if (UpdatePseudoProbe) {
5053d89b3cbSHongtao Yu     for (auto &F : M) {
5063d89b3cbSHongtao Yu       if (F.isDeclaration())
5073d89b3cbSHongtao Yu         continue;
5083d89b3cbSHongtao Yu       FunctionAnalysisManager &FAM =
5093d89b3cbSHongtao Yu           AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
5103d89b3cbSHongtao Yu       runOnFunction(F, FAM);
5113d89b3cbSHongtao Yu     }
5123d89b3cbSHongtao Yu   }
5133d89b3cbSHongtao Yu   return PreservedAnalyses::none();
5143d89b3cbSHongtao Yu }
515