1349cc55cSDimitry Andric //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric // 9349cc55cSDimitry Andric /// \file This pass recursively promotes generic pointer arguments of a kernel 10349cc55cSDimitry Andric /// into the global address space. 11349cc55cSDimitry Andric /// 12349cc55cSDimitry Andric /// The pass walks kernel's pointer arguments, then loads from them. If a loaded 13349cc55cSDimitry Andric /// value is a pointer and loaded pointer is unmodified in the kernel before the 14349cc55cSDimitry Andric /// load, then promote loaded pointer to global. Then recursively continue. 15349cc55cSDimitry Andric // 16349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 17349cc55cSDimitry Andric 18349cc55cSDimitry Andric #include "AMDGPU.h" 19*81ad6265SDimitry Andric #include "Utils/AMDGPUMemoryUtils.h" 20349cc55cSDimitry Andric #include "llvm/ADT/SmallVector.h" 21*81ad6265SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 22349cc55cSDimitry Andric #include "llvm/Analysis/MemorySSA.h" 23349cc55cSDimitry Andric #include "llvm/IR/IRBuilder.h" 24349cc55cSDimitry Andric #include "llvm/InitializePasses.h" 25349cc55cSDimitry Andric 26349cc55cSDimitry Andric #define DEBUG_TYPE "amdgpu-promote-kernel-arguments" 27349cc55cSDimitry Andric 28349cc55cSDimitry Andric using namespace llvm; 29349cc55cSDimitry Andric 30349cc55cSDimitry Andric namespace { 31349cc55cSDimitry Andric 32349cc55cSDimitry Andric class AMDGPUPromoteKernelArguments : public FunctionPass { 33349cc55cSDimitry Andric MemorySSA *MSSA; 34349cc55cSDimitry Andric 35*81ad6265SDimitry Andric AliasAnalysis *AA; 36*81ad6265SDimitry Andric 37349cc55cSDimitry Andric Instruction *ArgCastInsertPt; 38349cc55cSDimitry Andric 39349cc55cSDimitry Andric SmallVector<Value *> Ptrs; 40349cc55cSDimitry Andric 41349cc55cSDimitry Andric void enqueueUsers(Value *Ptr); 42349cc55cSDimitry Andric 43349cc55cSDimitry Andric bool promotePointer(Value *Ptr); 44349cc55cSDimitry Andric 45*81ad6265SDimitry Andric bool promoteLoad(LoadInst *LI); 46*81ad6265SDimitry Andric 47349cc55cSDimitry Andric public: 48349cc55cSDimitry Andric static char ID; 49349cc55cSDimitry Andric 50349cc55cSDimitry Andric AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} 51349cc55cSDimitry Andric 52*81ad6265SDimitry Andric bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA); 53349cc55cSDimitry Andric 54349cc55cSDimitry Andric bool runOnFunction(Function &F) override; 55349cc55cSDimitry Andric 56349cc55cSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 57*81ad6265SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 58349cc55cSDimitry Andric AU.addRequired<MemorySSAWrapperPass>(); 59349cc55cSDimitry Andric AU.setPreservesAll(); 60349cc55cSDimitry Andric } 61349cc55cSDimitry Andric }; 62349cc55cSDimitry Andric 63349cc55cSDimitry Andric } // end anonymous namespace 64349cc55cSDimitry Andric 65349cc55cSDimitry Andric void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { 66349cc55cSDimitry Andric SmallVector<User *> PtrUsers(Ptr->users()); 67349cc55cSDimitry Andric 68349cc55cSDimitry Andric while (!PtrUsers.empty()) { 69349cc55cSDimitry Andric Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val()); 70349cc55cSDimitry Andric if (!U) 71349cc55cSDimitry Andric continue; 72349cc55cSDimitry Andric 73349cc55cSDimitry Andric switch (U->getOpcode()) { 74349cc55cSDimitry Andric default: 75349cc55cSDimitry Andric break; 76349cc55cSDimitry Andric case Instruction::Load: { 77349cc55cSDimitry Andric LoadInst *LD = cast<LoadInst>(U); 78*81ad6265SDimitry Andric if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr && 79*81ad6265SDimitry Andric !AMDGPU::isClobberedInFunction(LD, MSSA, AA)) 80349cc55cSDimitry Andric Ptrs.push_back(LD); 81*81ad6265SDimitry Andric 82349cc55cSDimitry Andric break; 83349cc55cSDimitry Andric } 84349cc55cSDimitry Andric case Instruction::GetElementPtr: 85349cc55cSDimitry Andric case Instruction::AddrSpaceCast: 86349cc55cSDimitry Andric case Instruction::BitCast: 87349cc55cSDimitry Andric if (U->getOperand(0)->stripInBoundsOffsets() == Ptr) 88349cc55cSDimitry Andric PtrUsers.append(U->user_begin(), U->user_end()); 89349cc55cSDimitry Andric break; 90349cc55cSDimitry Andric } 91349cc55cSDimitry Andric } 92349cc55cSDimitry Andric } 93349cc55cSDimitry Andric 94349cc55cSDimitry Andric bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { 95*81ad6265SDimitry Andric bool Changed = false; 96*81ad6265SDimitry Andric 97*81ad6265SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(Ptr); 98*81ad6265SDimitry Andric if (LI) 99*81ad6265SDimitry Andric Changed |= promoteLoad(LI); 100*81ad6265SDimitry Andric 101*81ad6265SDimitry Andric PointerType *PT = dyn_cast<PointerType>(Ptr->getType()); 102*81ad6265SDimitry Andric if (!PT) 103*81ad6265SDimitry Andric return Changed; 104*81ad6265SDimitry Andric 105*81ad6265SDimitry Andric if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 106*81ad6265SDimitry Andric PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || 107*81ad6265SDimitry Andric PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 108349cc55cSDimitry Andric enqueueUsers(Ptr); 109349cc55cSDimitry Andric 110349cc55cSDimitry Andric if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) 111*81ad6265SDimitry Andric return Changed; 112349cc55cSDimitry Andric 113*81ad6265SDimitry Andric IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator()) 114*81ad6265SDimitry Andric : ArgCastInsertPt); 115349cc55cSDimitry Andric 116349cc55cSDimitry Andric // Cast pointer to global address space and back to flat and let 117349cc55cSDimitry Andric // Infer Address Spaces pass to do all necessary rewriting. 118349cc55cSDimitry Andric PointerType *NewPT = 119349cc55cSDimitry Andric PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS); 120349cc55cSDimitry Andric Value *Cast = 121349cc55cSDimitry Andric B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global")); 122349cc55cSDimitry Andric Value *CastBack = 123349cc55cSDimitry Andric B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat")); 124349cc55cSDimitry Andric Ptr->replaceUsesWithIf(CastBack, 125349cc55cSDimitry Andric [Cast](Use &U) { return U.getUser() != Cast; }); 126349cc55cSDimitry Andric 127349cc55cSDimitry Andric return true; 128349cc55cSDimitry Andric } 129349cc55cSDimitry Andric 130*81ad6265SDimitry Andric bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) { 131*81ad6265SDimitry Andric if (!LI->isSimple()) 132*81ad6265SDimitry Andric return false; 133*81ad6265SDimitry Andric 134*81ad6265SDimitry Andric LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {})); 135*81ad6265SDimitry Andric return true; 136*81ad6265SDimitry Andric } 137*81ad6265SDimitry Andric 138349cc55cSDimitry Andric // skip allocas 139349cc55cSDimitry Andric static BasicBlock::iterator getInsertPt(BasicBlock &BB) { 140349cc55cSDimitry Andric BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); 141349cc55cSDimitry Andric for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { 142349cc55cSDimitry Andric AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt); 143349cc55cSDimitry Andric 144349cc55cSDimitry Andric // If this is a dynamic alloca, the value may depend on the loaded kernargs, 145349cc55cSDimitry Andric // so loads will need to be inserted before it. 146349cc55cSDimitry Andric if (!AI || !AI->isStaticAlloca()) 147349cc55cSDimitry Andric break; 148349cc55cSDimitry Andric } 149349cc55cSDimitry Andric 150349cc55cSDimitry Andric return InsPt; 151349cc55cSDimitry Andric } 152349cc55cSDimitry Andric 153*81ad6265SDimitry Andric bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA, 154*81ad6265SDimitry Andric AliasAnalysis &AA) { 155349cc55cSDimitry Andric if (skipFunction(F)) 156349cc55cSDimitry Andric return false; 157349cc55cSDimitry Andric 158349cc55cSDimitry Andric CallingConv::ID CC = F.getCallingConv(); 159349cc55cSDimitry Andric if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) 160349cc55cSDimitry Andric return false; 161349cc55cSDimitry Andric 162349cc55cSDimitry Andric ArgCastInsertPt = &*getInsertPt(*F.begin()); 163349cc55cSDimitry Andric this->MSSA = &MSSA; 164*81ad6265SDimitry Andric this->AA = &AA; 165349cc55cSDimitry Andric 166349cc55cSDimitry Andric for (Argument &Arg : F.args()) { 167349cc55cSDimitry Andric if (Arg.use_empty()) 168349cc55cSDimitry Andric continue; 169349cc55cSDimitry Andric 170349cc55cSDimitry Andric PointerType *PT = dyn_cast<PointerType>(Arg.getType()); 171349cc55cSDimitry Andric if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && 172349cc55cSDimitry Andric PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && 173349cc55cSDimitry Andric PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) 174349cc55cSDimitry Andric continue; 175349cc55cSDimitry Andric 176349cc55cSDimitry Andric Ptrs.push_back(&Arg); 177349cc55cSDimitry Andric } 178349cc55cSDimitry Andric 179349cc55cSDimitry Andric bool Changed = false; 180349cc55cSDimitry Andric while (!Ptrs.empty()) { 181349cc55cSDimitry Andric Value *Ptr = Ptrs.pop_back_val(); 182349cc55cSDimitry Andric Changed |= promotePointer(Ptr); 183349cc55cSDimitry Andric } 184349cc55cSDimitry Andric 185349cc55cSDimitry Andric return Changed; 186349cc55cSDimitry Andric } 187349cc55cSDimitry Andric 188349cc55cSDimitry Andric bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { 189349cc55cSDimitry Andric MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 190*81ad6265SDimitry Andric AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 191*81ad6265SDimitry Andric return run(F, MSSA, AA); 192349cc55cSDimitry Andric } 193349cc55cSDimitry Andric 194349cc55cSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 195349cc55cSDimitry Andric "AMDGPU Promote Kernel Arguments", false, false) 196*81ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 197349cc55cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 198349cc55cSDimitry Andric INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, 199349cc55cSDimitry Andric "AMDGPU Promote Kernel Arguments", false, false) 200349cc55cSDimitry Andric 201349cc55cSDimitry Andric char AMDGPUPromoteKernelArguments::ID = 0; 202349cc55cSDimitry Andric 203349cc55cSDimitry Andric FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { 204349cc55cSDimitry Andric return new AMDGPUPromoteKernelArguments(); 205349cc55cSDimitry Andric } 206349cc55cSDimitry Andric 207349cc55cSDimitry Andric PreservedAnalyses 208349cc55cSDimitry Andric AMDGPUPromoteKernelArgumentsPass::run(Function &F, 209349cc55cSDimitry Andric FunctionAnalysisManager &AM) { 210349cc55cSDimitry Andric MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 211*81ad6265SDimitry Andric AliasAnalysis &AA = AM.getResult<AAManager>(F); 212*81ad6265SDimitry Andric if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) { 213349cc55cSDimitry Andric PreservedAnalyses PA; 214349cc55cSDimitry Andric PA.preserveSet<CFGAnalyses>(); 215349cc55cSDimitry Andric PA.preserve<MemorySSAAnalysis>(); 216349cc55cSDimitry Andric return PA; 217349cc55cSDimitry Andric } 218349cc55cSDimitry Andric return PreservedAnalyses::all(); 219349cc55cSDimitry Andric } 220