1*0fca6ea1SDimitry Andric //===- PGOCtxProfLowering.cpp - Contextual PGO Instr. Lowering ------------===// 2*0fca6ea1SDimitry Andric // 3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric // 7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 8*0fca6ea1SDimitry Andric // 9*0fca6ea1SDimitry Andric 10*0fca6ea1SDimitry Andric #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" 11*0fca6ea1SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 12*0fca6ea1SDimitry Andric #include "llvm/IR/Analysis.h" 13*0fca6ea1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 14*0fca6ea1SDimitry Andric #include "llvm/IR/IRBuilder.h" 15*0fca6ea1SDimitry Andric #include "llvm/IR/Instructions.h" 16*0fca6ea1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 17*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 18*0fca6ea1SDimitry Andric #include "llvm/IR/PassManager.h" 19*0fca6ea1SDimitry Andric #include "llvm/Support/CommandLine.h" 20*0fca6ea1SDimitry Andric #include <utility> 21*0fca6ea1SDimitry Andric 22*0fca6ea1SDimitry Andric using namespace llvm; 23*0fca6ea1SDimitry Andric 24*0fca6ea1SDimitry Andric #define DEBUG_TYPE "ctx-instr-lower" 25*0fca6ea1SDimitry Andric 26*0fca6ea1SDimitry Andric static cl::list<std::string> ContextRoots( 27*0fca6ea1SDimitry Andric "profile-context-root", cl::Hidden, 28*0fca6ea1SDimitry Andric cl::desc( 29*0fca6ea1SDimitry Andric "A function name, assumed to be global, which will be treated as the " 30*0fca6ea1SDimitry Andric "root of an interesting graph, which will be profiled independently " 31*0fca6ea1SDimitry Andric "from other similar graphs.")); 32*0fca6ea1SDimitry Andric 33*0fca6ea1SDimitry Andric bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() { 34*0fca6ea1SDimitry Andric return !ContextRoots.empty(); 35*0fca6ea1SDimitry Andric } 36*0fca6ea1SDimitry Andric 37*0fca6ea1SDimitry Andric // the names of symbols we expect in compiler-rt. Using a namespace for 38*0fca6ea1SDimitry Andric // readability. 39*0fca6ea1SDimitry Andric namespace CompilerRtAPINames { 40*0fca6ea1SDimitry Andric static auto StartCtx = "__llvm_ctx_profile_start_context"; 41*0fca6ea1SDimitry Andric static auto ReleaseCtx = "__llvm_ctx_profile_release_context"; 42*0fca6ea1SDimitry Andric static auto GetCtx = "__llvm_ctx_profile_get_context"; 43*0fca6ea1SDimitry Andric static auto ExpectedCalleeTLS = "__llvm_ctx_profile_expected_callee"; 44*0fca6ea1SDimitry Andric static auto CallsiteTLS = "__llvm_ctx_profile_callsite"; 45*0fca6ea1SDimitry Andric } // namespace CompilerRtAPINames 46*0fca6ea1SDimitry Andric 47*0fca6ea1SDimitry Andric namespace { 48*0fca6ea1SDimitry Andric // The lowering logic and state. 49*0fca6ea1SDimitry Andric class CtxInstrumentationLowerer final { 50*0fca6ea1SDimitry Andric Module &M; 51*0fca6ea1SDimitry Andric ModuleAnalysisManager &MAM; 52*0fca6ea1SDimitry Andric Type *ContextNodeTy = nullptr; 53*0fca6ea1SDimitry Andric Type *ContextRootTy = nullptr; 54*0fca6ea1SDimitry Andric 55*0fca6ea1SDimitry Andric DenseMap<const Function *, Constant *> ContextRootMap; 56*0fca6ea1SDimitry Andric Function *StartCtx = nullptr; 57*0fca6ea1SDimitry Andric Function *GetCtx = nullptr; 58*0fca6ea1SDimitry Andric Function *ReleaseCtx = nullptr; 59*0fca6ea1SDimitry Andric GlobalVariable *ExpectedCalleeTLS = nullptr; 60*0fca6ea1SDimitry Andric GlobalVariable *CallsiteInfoTLS = nullptr; 61*0fca6ea1SDimitry Andric 62*0fca6ea1SDimitry Andric public: 63*0fca6ea1SDimitry Andric CtxInstrumentationLowerer(Module &M, ModuleAnalysisManager &MAM); 64*0fca6ea1SDimitry Andric // return true if lowering happened (i.e. a change was made) 65*0fca6ea1SDimitry Andric bool lowerFunction(Function &F); 66*0fca6ea1SDimitry Andric }; 67*0fca6ea1SDimitry Andric 68*0fca6ea1SDimitry Andric // llvm.instrprof.increment[.step] captures the total number of counters as one 69*0fca6ea1SDimitry Andric // of its parameters, and llvm.instrprof.callsite captures the total number of 70*0fca6ea1SDimitry Andric // callsites. Those values are the same for instances of those intrinsics in 71*0fca6ea1SDimitry Andric // this function. Find the first instance of each and return them. 72*0fca6ea1SDimitry Andric std::pair<uint32_t, uint32_t> getNrCountersAndCallsites(const Function &F) { 73*0fca6ea1SDimitry Andric uint32_t NrCounters = 0; 74*0fca6ea1SDimitry Andric uint32_t NrCallsites = 0; 75*0fca6ea1SDimitry Andric for (const auto &BB : F) { 76*0fca6ea1SDimitry Andric for (const auto &I : BB) { 77*0fca6ea1SDimitry Andric if (const auto *Incr = dyn_cast<InstrProfIncrementInst>(&I)) { 78*0fca6ea1SDimitry Andric uint32_t V = 79*0fca6ea1SDimitry Andric static_cast<uint32_t>(Incr->getNumCounters()->getZExtValue()); 80*0fca6ea1SDimitry Andric assert((!NrCounters || V == NrCounters) && 81*0fca6ea1SDimitry Andric "expected all llvm.instrprof.increment[.step] intrinsics to " 82*0fca6ea1SDimitry Andric "have the same total nr of counters parameter"); 83*0fca6ea1SDimitry Andric NrCounters = V; 84*0fca6ea1SDimitry Andric } else if (const auto *CSIntr = dyn_cast<InstrProfCallsite>(&I)) { 85*0fca6ea1SDimitry Andric uint32_t V = 86*0fca6ea1SDimitry Andric static_cast<uint32_t>(CSIntr->getNumCounters()->getZExtValue()); 87*0fca6ea1SDimitry Andric assert((!NrCallsites || V == NrCallsites) && 88*0fca6ea1SDimitry Andric "expected all llvm.instrprof.callsite intrinsics to have the " 89*0fca6ea1SDimitry Andric "same total nr of callsites parameter"); 90*0fca6ea1SDimitry Andric NrCallsites = V; 91*0fca6ea1SDimitry Andric } 92*0fca6ea1SDimitry Andric #if NDEBUG 93*0fca6ea1SDimitry Andric if (NrCounters && NrCallsites) 94*0fca6ea1SDimitry Andric return std::make_pair(NrCounters, NrCallsites); 95*0fca6ea1SDimitry Andric #endif 96*0fca6ea1SDimitry Andric } 97*0fca6ea1SDimitry Andric } 98*0fca6ea1SDimitry Andric return {NrCounters, NrCallsites}; 99*0fca6ea1SDimitry Andric } 100*0fca6ea1SDimitry Andric } // namespace 101*0fca6ea1SDimitry Andric 102*0fca6ea1SDimitry Andric // set up tie-in with compiler-rt. 103*0fca6ea1SDimitry Andric // NOTE!!! 104*0fca6ea1SDimitry Andric // These have to match compiler-rt/lib/ctx_profile/CtxInstrProfiling.h 105*0fca6ea1SDimitry Andric CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M, 106*0fca6ea1SDimitry Andric ModuleAnalysisManager &MAM) 107*0fca6ea1SDimitry Andric : M(M), MAM(MAM) { 108*0fca6ea1SDimitry Andric auto *PointerTy = PointerType::get(M.getContext(), 0); 109*0fca6ea1SDimitry Andric auto *SanitizerMutexType = Type::getInt8Ty(M.getContext()); 110*0fca6ea1SDimitry Andric auto *I32Ty = Type::getInt32Ty(M.getContext()); 111*0fca6ea1SDimitry Andric auto *I64Ty = Type::getInt64Ty(M.getContext()); 112*0fca6ea1SDimitry Andric 113*0fca6ea1SDimitry Andric // The ContextRoot type 114*0fca6ea1SDimitry Andric ContextRootTy = 115*0fca6ea1SDimitry Andric StructType::get(M.getContext(), { 116*0fca6ea1SDimitry Andric PointerTy, /*FirstNode*/ 117*0fca6ea1SDimitry Andric PointerTy, /*FirstMemBlock*/ 118*0fca6ea1SDimitry Andric PointerTy, /*CurrentMem*/ 119*0fca6ea1SDimitry Andric SanitizerMutexType, /*Taken*/ 120*0fca6ea1SDimitry Andric }); 121*0fca6ea1SDimitry Andric // The Context header. 122*0fca6ea1SDimitry Andric ContextNodeTy = StructType::get(M.getContext(), { 123*0fca6ea1SDimitry Andric I64Ty, /*Guid*/ 124*0fca6ea1SDimitry Andric PointerTy, /*Next*/ 125*0fca6ea1SDimitry Andric I32Ty, /*NrCounters*/ 126*0fca6ea1SDimitry Andric I32Ty, /*NrCallsites*/ 127*0fca6ea1SDimitry Andric }); 128*0fca6ea1SDimitry Andric 129*0fca6ea1SDimitry Andric // Define a global for each entrypoint. We'll reuse the entrypoint's name as 130*0fca6ea1SDimitry Andric // prefix. We assume the entrypoint names to be unique. 131*0fca6ea1SDimitry Andric for (const auto &Fname : ContextRoots) { 132*0fca6ea1SDimitry Andric if (const auto *F = M.getFunction(Fname)) { 133*0fca6ea1SDimitry Andric if (F->isDeclaration()) 134*0fca6ea1SDimitry Andric continue; 135*0fca6ea1SDimitry Andric auto *G = M.getOrInsertGlobal(Fname + "_ctx_root", ContextRootTy); 136*0fca6ea1SDimitry Andric cast<GlobalVariable>(G)->setInitializer( 137*0fca6ea1SDimitry Andric Constant::getNullValue(ContextRootTy)); 138*0fca6ea1SDimitry Andric ContextRootMap.insert(std::make_pair(F, G)); 139*0fca6ea1SDimitry Andric for (const auto &BB : *F) 140*0fca6ea1SDimitry Andric for (const auto &I : BB) 141*0fca6ea1SDimitry Andric if (const auto *CB = dyn_cast<CallBase>(&I)) 142*0fca6ea1SDimitry Andric if (CB->isMustTailCall()) { 143*0fca6ea1SDimitry Andric M.getContext().emitError( 144*0fca6ea1SDimitry Andric "The function " + Fname + 145*0fca6ea1SDimitry Andric " was indicated as a context root, but it features musttail " 146*0fca6ea1SDimitry Andric "calls, which is not supported."); 147*0fca6ea1SDimitry Andric } 148*0fca6ea1SDimitry Andric } 149*0fca6ea1SDimitry Andric } 150*0fca6ea1SDimitry Andric 151*0fca6ea1SDimitry Andric // Declare the functions we will call. 152*0fca6ea1SDimitry Andric StartCtx = cast<Function>( 153*0fca6ea1SDimitry Andric M.getOrInsertFunction( 154*0fca6ea1SDimitry Andric CompilerRtAPINames::StartCtx, 155*0fca6ea1SDimitry Andric FunctionType::get(ContextNodeTy->getPointerTo(), 156*0fca6ea1SDimitry Andric {ContextRootTy->getPointerTo(), /*ContextRoot*/ 157*0fca6ea1SDimitry Andric I64Ty, /*Guid*/ I32Ty, 158*0fca6ea1SDimitry Andric /*NrCounters*/ I32Ty /*NrCallsites*/}, 159*0fca6ea1SDimitry Andric false)) 160*0fca6ea1SDimitry Andric .getCallee()); 161*0fca6ea1SDimitry Andric GetCtx = cast<Function>( 162*0fca6ea1SDimitry Andric M.getOrInsertFunction(CompilerRtAPINames::GetCtx, 163*0fca6ea1SDimitry Andric FunctionType::get(ContextNodeTy->getPointerTo(), 164*0fca6ea1SDimitry Andric {PointerTy, /*Callee*/ 165*0fca6ea1SDimitry Andric I64Ty, /*Guid*/ 166*0fca6ea1SDimitry Andric I32Ty, /*NrCounters*/ 167*0fca6ea1SDimitry Andric I32Ty}, /*NrCallsites*/ 168*0fca6ea1SDimitry Andric false)) 169*0fca6ea1SDimitry Andric .getCallee()); 170*0fca6ea1SDimitry Andric ReleaseCtx = cast<Function>( 171*0fca6ea1SDimitry Andric M.getOrInsertFunction( 172*0fca6ea1SDimitry Andric CompilerRtAPINames::ReleaseCtx, 173*0fca6ea1SDimitry Andric FunctionType::get(Type::getVoidTy(M.getContext()), 174*0fca6ea1SDimitry Andric { 175*0fca6ea1SDimitry Andric ContextRootTy->getPointerTo(), /*ContextRoot*/ 176*0fca6ea1SDimitry Andric }, 177*0fca6ea1SDimitry Andric false)) 178*0fca6ea1SDimitry Andric .getCallee()); 179*0fca6ea1SDimitry Andric 180*0fca6ea1SDimitry Andric // Declare the TLSes we will need to use. 181*0fca6ea1SDimitry Andric CallsiteInfoTLS = 182*0fca6ea1SDimitry Andric new GlobalVariable(M, PointerTy, false, GlobalValue::ExternalLinkage, 183*0fca6ea1SDimitry Andric nullptr, CompilerRtAPINames::CallsiteTLS); 184*0fca6ea1SDimitry Andric CallsiteInfoTLS->setThreadLocal(true); 185*0fca6ea1SDimitry Andric CallsiteInfoTLS->setVisibility(llvm::GlobalValue::HiddenVisibility); 186*0fca6ea1SDimitry Andric ExpectedCalleeTLS = 187*0fca6ea1SDimitry Andric new GlobalVariable(M, PointerTy, false, GlobalValue::ExternalLinkage, 188*0fca6ea1SDimitry Andric nullptr, CompilerRtAPINames::ExpectedCalleeTLS); 189*0fca6ea1SDimitry Andric ExpectedCalleeTLS->setThreadLocal(true); 190*0fca6ea1SDimitry Andric ExpectedCalleeTLS->setVisibility(llvm::GlobalValue::HiddenVisibility); 191*0fca6ea1SDimitry Andric } 192*0fca6ea1SDimitry Andric 193*0fca6ea1SDimitry Andric PreservedAnalyses PGOCtxProfLoweringPass::run(Module &M, 194*0fca6ea1SDimitry Andric ModuleAnalysisManager &MAM) { 195*0fca6ea1SDimitry Andric CtxInstrumentationLowerer Lowerer(M, MAM); 196*0fca6ea1SDimitry Andric bool Changed = false; 197*0fca6ea1SDimitry Andric for (auto &F : M) 198*0fca6ea1SDimitry Andric Changed |= Lowerer.lowerFunction(F); 199*0fca6ea1SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 200*0fca6ea1SDimitry Andric } 201*0fca6ea1SDimitry Andric 202*0fca6ea1SDimitry Andric bool CtxInstrumentationLowerer::lowerFunction(Function &F) { 203*0fca6ea1SDimitry Andric if (F.isDeclaration()) 204*0fca6ea1SDimitry Andric return false; 205*0fca6ea1SDimitry Andric auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 206*0fca6ea1SDimitry Andric auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 207*0fca6ea1SDimitry Andric 208*0fca6ea1SDimitry Andric Value *Guid = nullptr; 209*0fca6ea1SDimitry Andric auto [NrCounters, NrCallsites] = getNrCountersAndCallsites(F); 210*0fca6ea1SDimitry Andric 211*0fca6ea1SDimitry Andric Value *Context = nullptr; 212*0fca6ea1SDimitry Andric Value *RealContext = nullptr; 213*0fca6ea1SDimitry Andric 214*0fca6ea1SDimitry Andric StructType *ThisContextType = nullptr; 215*0fca6ea1SDimitry Andric Value *TheRootContext = nullptr; 216*0fca6ea1SDimitry Andric Value *ExpectedCalleeTLSAddr = nullptr; 217*0fca6ea1SDimitry Andric Value *CallsiteInfoTLSAddr = nullptr; 218*0fca6ea1SDimitry Andric 219*0fca6ea1SDimitry Andric auto &Head = F.getEntryBlock(); 220*0fca6ea1SDimitry Andric for (auto &I : Head) { 221*0fca6ea1SDimitry Andric // Find the increment intrinsic in the entry basic block. 222*0fca6ea1SDimitry Andric if (auto *Mark = dyn_cast<InstrProfIncrementInst>(&I)) { 223*0fca6ea1SDimitry Andric assert(Mark->getIndex()->isZero()); 224*0fca6ea1SDimitry Andric 225*0fca6ea1SDimitry Andric IRBuilder<> Builder(Mark); 226*0fca6ea1SDimitry Andric // FIXME(mtrofin): use InstrProfSymtab::getCanonicalName 227*0fca6ea1SDimitry Andric Guid = Builder.getInt64(F.getGUID()); 228*0fca6ea1SDimitry Andric // The type of the context of this function is now knowable since we have 229*0fca6ea1SDimitry Andric // NrCallsites and NrCounters. We delcare it here because it's more 230*0fca6ea1SDimitry Andric // convenient - we have the Builder. 231*0fca6ea1SDimitry Andric ThisContextType = StructType::get( 232*0fca6ea1SDimitry Andric F.getContext(), 233*0fca6ea1SDimitry Andric {ContextNodeTy, ArrayType::get(Builder.getInt64Ty(), NrCounters), 234*0fca6ea1SDimitry Andric ArrayType::get(Builder.getPtrTy(), NrCallsites)}); 235*0fca6ea1SDimitry Andric // Figure out which way we obtain the context object for this function - 236*0fca6ea1SDimitry Andric // if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the 237*0fca6ea1SDimitry Andric // former case, we also set TheRootContext since we need to release it 238*0fca6ea1SDimitry Andric // at the end (plus it can be used to know if we have an entrypoint or a 239*0fca6ea1SDimitry Andric // regular function) 240*0fca6ea1SDimitry Andric auto Iter = ContextRootMap.find(&F); 241*0fca6ea1SDimitry Andric if (Iter != ContextRootMap.end()) { 242*0fca6ea1SDimitry Andric TheRootContext = Iter->second; 243*0fca6ea1SDimitry Andric Context = Builder.CreateCall(StartCtx, {TheRootContext, Guid, 244*0fca6ea1SDimitry Andric Builder.getInt32(NrCounters), 245*0fca6ea1SDimitry Andric Builder.getInt32(NrCallsites)}); 246*0fca6ea1SDimitry Andric ORE.emit( 247*0fca6ea1SDimitry Andric [&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); }); 248*0fca6ea1SDimitry Andric } else { 249*0fca6ea1SDimitry Andric Context = 250*0fca6ea1SDimitry Andric Builder.CreateCall(GetCtx, {&F, Guid, Builder.getInt32(NrCounters), 251*0fca6ea1SDimitry Andric Builder.getInt32(NrCallsites)}); 252*0fca6ea1SDimitry Andric ORE.emit([&] { 253*0fca6ea1SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "RegularFunction", &F); 254*0fca6ea1SDimitry Andric }); 255*0fca6ea1SDimitry Andric } 256*0fca6ea1SDimitry Andric // The context could be scratch. 257*0fca6ea1SDimitry Andric auto *CtxAsInt = Builder.CreatePtrToInt(Context, Builder.getInt64Ty()); 258*0fca6ea1SDimitry Andric if (NrCallsites > 0) { 259*0fca6ea1SDimitry Andric // Figure out which index of the TLS 2-element buffers to use. 260*0fca6ea1SDimitry Andric // Scratch context => we use index == 1. Real contexts => index == 0. 261*0fca6ea1SDimitry Andric auto *Index = Builder.CreateAnd(CtxAsInt, Builder.getInt64(1)); 262*0fca6ea1SDimitry Andric // The GEPs corresponding to that index, in the respective TLS. 263*0fca6ea1SDimitry Andric ExpectedCalleeTLSAddr = Builder.CreateGEP( 264*0fca6ea1SDimitry Andric Builder.getInt8Ty()->getPointerTo(), 265*0fca6ea1SDimitry Andric Builder.CreateThreadLocalAddress(ExpectedCalleeTLS), {Index}); 266*0fca6ea1SDimitry Andric CallsiteInfoTLSAddr = Builder.CreateGEP( 267*0fca6ea1SDimitry Andric Builder.getInt32Ty(), 268*0fca6ea1SDimitry Andric Builder.CreateThreadLocalAddress(CallsiteInfoTLS), {Index}); 269*0fca6ea1SDimitry Andric } 270*0fca6ea1SDimitry Andric // Because the context pointer may have LSB set (to indicate scratch), 271*0fca6ea1SDimitry Andric // clear it for the value we use as base address for the counter vector. 272*0fca6ea1SDimitry Andric // This way, if later we want to have "real" (not clobbered) buffers 273*0fca6ea1SDimitry Andric // acting as scratch, the lowering (at least this part of it that deals 274*0fca6ea1SDimitry Andric // with counters) stays the same. 275*0fca6ea1SDimitry Andric RealContext = Builder.CreateIntToPtr( 276*0fca6ea1SDimitry Andric Builder.CreateAnd(CtxAsInt, Builder.getInt64(-2)), 277*0fca6ea1SDimitry Andric ThisContextType->getPointerTo()); 278*0fca6ea1SDimitry Andric I.eraseFromParent(); 279*0fca6ea1SDimitry Andric break; 280*0fca6ea1SDimitry Andric } 281*0fca6ea1SDimitry Andric } 282*0fca6ea1SDimitry Andric if (!Context) { 283*0fca6ea1SDimitry Andric ORE.emit([&] { 284*0fca6ea1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "Skip", &F) 285*0fca6ea1SDimitry Andric << "Function doesn't have instrumentation, skipping"; 286*0fca6ea1SDimitry Andric }); 287*0fca6ea1SDimitry Andric return false; 288*0fca6ea1SDimitry Andric } 289*0fca6ea1SDimitry Andric 290*0fca6ea1SDimitry Andric bool ContextWasReleased = false; 291*0fca6ea1SDimitry Andric for (auto &BB : F) { 292*0fca6ea1SDimitry Andric for (auto &I : llvm::make_early_inc_range(BB)) { 293*0fca6ea1SDimitry Andric if (auto *Instr = dyn_cast<InstrProfCntrInstBase>(&I)) { 294*0fca6ea1SDimitry Andric IRBuilder<> Builder(Instr); 295*0fca6ea1SDimitry Andric switch (Instr->getIntrinsicID()) { 296*0fca6ea1SDimitry Andric case llvm::Intrinsic::instrprof_increment: 297*0fca6ea1SDimitry Andric case llvm::Intrinsic::instrprof_increment_step: { 298*0fca6ea1SDimitry Andric // Increments (or increment-steps) are just a typical load - increment 299*0fca6ea1SDimitry Andric // - store in the RealContext. 300*0fca6ea1SDimitry Andric auto *AsStep = cast<InstrProfIncrementInst>(Instr); 301*0fca6ea1SDimitry Andric auto *GEP = Builder.CreateGEP( 302*0fca6ea1SDimitry Andric ThisContextType, RealContext, 303*0fca6ea1SDimitry Andric {Builder.getInt32(0), Builder.getInt32(1), AsStep->getIndex()}); 304*0fca6ea1SDimitry Andric Builder.CreateStore( 305*0fca6ea1SDimitry Andric Builder.CreateAdd(Builder.CreateLoad(Builder.getInt64Ty(), GEP), 306*0fca6ea1SDimitry Andric AsStep->getStep()), 307*0fca6ea1SDimitry Andric GEP); 308*0fca6ea1SDimitry Andric } break; 309*0fca6ea1SDimitry Andric case llvm::Intrinsic::instrprof_callsite: 310*0fca6ea1SDimitry Andric // callsite lowering: write the called value in the expected callee 311*0fca6ea1SDimitry Andric // TLS we treat the TLS as volatile because of signal handlers and to 312*0fca6ea1SDimitry Andric // avoid these being moved away from the callsite they decorate. 313*0fca6ea1SDimitry Andric auto *CSIntrinsic = dyn_cast<InstrProfCallsite>(Instr); 314*0fca6ea1SDimitry Andric Builder.CreateStore(CSIntrinsic->getCallee(), ExpectedCalleeTLSAddr, 315*0fca6ea1SDimitry Andric true); 316*0fca6ea1SDimitry Andric // write the GEP of the slot in the sub-contexts portion of the 317*0fca6ea1SDimitry Andric // context in TLS. Now, here, we use the actual Context value - as 318*0fca6ea1SDimitry Andric // returned from compiler-rt - which may have the LSB set if the 319*0fca6ea1SDimitry Andric // Context was scratch. Since the header of the context object and 320*0fca6ea1SDimitry Andric // then the values are all 8-aligned (or, really, insofar as we care, 321*0fca6ea1SDimitry Andric // they are even) - if the context is scratch (meaning, an odd value), 322*0fca6ea1SDimitry Andric // so will the GEP. This is important because this is then visible to 323*0fca6ea1SDimitry Andric // compiler-rt which will produce scratch contexts for callers that 324*0fca6ea1SDimitry Andric // have a scratch context. 325*0fca6ea1SDimitry Andric Builder.CreateStore( 326*0fca6ea1SDimitry Andric Builder.CreateGEP(ThisContextType, Context, 327*0fca6ea1SDimitry Andric {Builder.getInt32(0), Builder.getInt32(2), 328*0fca6ea1SDimitry Andric CSIntrinsic->getIndex()}), 329*0fca6ea1SDimitry Andric CallsiteInfoTLSAddr, true); 330*0fca6ea1SDimitry Andric break; 331*0fca6ea1SDimitry Andric } 332*0fca6ea1SDimitry Andric I.eraseFromParent(); 333*0fca6ea1SDimitry Andric } else if (TheRootContext && isa<ReturnInst>(I)) { 334*0fca6ea1SDimitry Andric // Remember to release the context if we are an entrypoint. 335*0fca6ea1SDimitry Andric IRBuilder<> Builder(&I); 336*0fca6ea1SDimitry Andric Builder.CreateCall(ReleaseCtx, {TheRootContext}); 337*0fca6ea1SDimitry Andric ContextWasReleased = true; 338*0fca6ea1SDimitry Andric } 339*0fca6ea1SDimitry Andric } 340*0fca6ea1SDimitry Andric } 341*0fca6ea1SDimitry Andric // FIXME: This would happen if the entrypoint tailcalls. A way to fix would be 342*0fca6ea1SDimitry Andric // to disallow this, (so this then stays as an error), another is to detect 343*0fca6ea1SDimitry Andric // that and then do a wrapper or disallow the tail call. This only affects 344*0fca6ea1SDimitry Andric // instrumentation, when we want to detect the call graph. 345*0fca6ea1SDimitry Andric if (TheRootContext && !ContextWasReleased) 346*0fca6ea1SDimitry Andric F.getContext().emitError( 347*0fca6ea1SDimitry Andric "[ctx_prof] An entrypoint was instrumented but it has no `ret` " 348*0fca6ea1SDimitry Andric "instructions above which to release the context: " + 349*0fca6ea1SDimitry Andric F.getName()); 350*0fca6ea1SDimitry Andric return true; 351*0fca6ea1SDimitry Andric } 352