xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1349cc55cSDimitry Andric //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric /// \file This pass recursively promotes generic pointer arguments of a kernel
10349cc55cSDimitry Andric /// into the global address space.
11349cc55cSDimitry Andric ///
12349cc55cSDimitry Andric /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13349cc55cSDimitry Andric /// value is a pointer and loaded pointer is unmodified in the kernel before the
14349cc55cSDimitry Andric /// load, then promote loaded pointer to global. Then recursively continue.
15349cc55cSDimitry Andric //
16349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
17349cc55cSDimitry Andric 
18349cc55cSDimitry Andric #include "AMDGPU.h"
1981ad6265SDimitry Andric #include "Utils/AMDGPUMemoryUtils.h"
20349cc55cSDimitry Andric #include "llvm/ADT/SmallVector.h"
2181ad6265SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
22349cc55cSDimitry Andric #include "llvm/Analysis/MemorySSA.h"
23349cc55cSDimitry Andric #include "llvm/IR/IRBuilder.h"
24349cc55cSDimitry Andric #include "llvm/InitializePasses.h"
25349cc55cSDimitry Andric 
26349cc55cSDimitry Andric #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27349cc55cSDimitry Andric 
28349cc55cSDimitry Andric using namespace llvm;
29349cc55cSDimitry Andric 
30349cc55cSDimitry Andric namespace {
31349cc55cSDimitry Andric 
32349cc55cSDimitry Andric class AMDGPUPromoteKernelArguments : public FunctionPass {
33349cc55cSDimitry Andric   MemorySSA *MSSA;
34349cc55cSDimitry Andric 
3581ad6265SDimitry Andric   AliasAnalysis *AA;
3681ad6265SDimitry Andric 
37349cc55cSDimitry Andric   Instruction *ArgCastInsertPt;
38349cc55cSDimitry Andric 
39349cc55cSDimitry Andric   SmallVector<Value *> Ptrs;
40349cc55cSDimitry Andric 
41349cc55cSDimitry Andric   void enqueueUsers(Value *Ptr);
42349cc55cSDimitry Andric 
43349cc55cSDimitry Andric   bool promotePointer(Value *Ptr);
44349cc55cSDimitry Andric 
4581ad6265SDimitry Andric   bool promoteLoad(LoadInst *LI);
4681ad6265SDimitry Andric 
47349cc55cSDimitry Andric public:
48349cc55cSDimitry Andric   static char ID;
49349cc55cSDimitry Andric 
AMDGPUPromoteKernelArguments()50349cc55cSDimitry Andric   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51349cc55cSDimitry Andric 
5281ad6265SDimitry Andric   bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53349cc55cSDimitry Andric 
54349cc55cSDimitry Andric   bool runOnFunction(Function &F) override;
55349cc55cSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const56349cc55cSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
5781ad6265SDimitry Andric     AU.addRequired<AAResultsWrapperPass>();
58349cc55cSDimitry Andric     AU.addRequired<MemorySSAWrapperPass>();
59349cc55cSDimitry Andric     AU.setPreservesAll();
60349cc55cSDimitry Andric   }
61349cc55cSDimitry Andric };
62349cc55cSDimitry Andric 
63349cc55cSDimitry Andric } // end anonymous namespace
64349cc55cSDimitry Andric 
enqueueUsers(Value * Ptr)65349cc55cSDimitry Andric void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66349cc55cSDimitry Andric   SmallVector<User *> PtrUsers(Ptr->users());
67349cc55cSDimitry Andric 
68349cc55cSDimitry Andric   while (!PtrUsers.empty()) {
69349cc55cSDimitry Andric     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70349cc55cSDimitry Andric     if (!U)
71349cc55cSDimitry Andric       continue;
72349cc55cSDimitry Andric 
73349cc55cSDimitry Andric     switch (U->getOpcode()) {
74349cc55cSDimitry Andric     default:
75349cc55cSDimitry Andric       break;
76349cc55cSDimitry Andric     case Instruction::Load: {
77349cc55cSDimitry Andric       LoadInst *LD = cast<LoadInst>(U);
7881ad6265SDimitry Andric       if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
7981ad6265SDimitry Andric           !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80349cc55cSDimitry Andric         Ptrs.push_back(LD);
8181ad6265SDimitry Andric 
82349cc55cSDimitry Andric       break;
83349cc55cSDimitry Andric     }
84349cc55cSDimitry Andric     case Instruction::GetElementPtr:
85349cc55cSDimitry Andric     case Instruction::AddrSpaceCast:
86349cc55cSDimitry Andric     case Instruction::BitCast:
87349cc55cSDimitry Andric       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88349cc55cSDimitry Andric         PtrUsers.append(U->user_begin(), U->user_end());
89349cc55cSDimitry Andric       break;
90349cc55cSDimitry Andric     }
91349cc55cSDimitry Andric   }
92349cc55cSDimitry Andric }
93349cc55cSDimitry Andric 
promotePointer(Value * Ptr)94349cc55cSDimitry Andric bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
9581ad6265SDimitry Andric   bool Changed = false;
9681ad6265SDimitry Andric 
9781ad6265SDimitry Andric   LoadInst *LI = dyn_cast<LoadInst>(Ptr);
9881ad6265SDimitry Andric   if (LI)
9981ad6265SDimitry Andric     Changed |= promoteLoad(LI);
10081ad6265SDimitry Andric 
10181ad6265SDimitry Andric   PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
10281ad6265SDimitry Andric   if (!PT)
10381ad6265SDimitry Andric     return Changed;
10481ad6265SDimitry Andric 
10581ad6265SDimitry Andric   if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
10681ad6265SDimitry Andric       PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
10781ad6265SDimitry Andric       PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108349cc55cSDimitry Andric     enqueueUsers(Ptr);
109349cc55cSDimitry Andric 
110349cc55cSDimitry Andric   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
11181ad6265SDimitry Andric     return Changed;
112349cc55cSDimitry Andric 
11381ad6265SDimitry Andric   IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
11481ad6265SDimitry Andric                    : ArgCastInsertPt);
115349cc55cSDimitry Andric 
116349cc55cSDimitry Andric   // Cast pointer to global address space and back to flat and let
117349cc55cSDimitry Andric   // Infer Address Spaces pass to do all necessary rewriting.
118349cc55cSDimitry Andric   PointerType *NewPT =
119*06c3fb27SDimitry Andric       PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
120349cc55cSDimitry Andric   Value *Cast =
121349cc55cSDimitry Andric       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122349cc55cSDimitry Andric   Value *CastBack =
123349cc55cSDimitry Andric       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124349cc55cSDimitry Andric   Ptr->replaceUsesWithIf(CastBack,
125349cc55cSDimitry Andric                          [Cast](Use &U) { return U.getUser() != Cast; });
126349cc55cSDimitry Andric 
127349cc55cSDimitry Andric   return true;
128349cc55cSDimitry Andric }
129349cc55cSDimitry Andric 
promoteLoad(LoadInst * LI)13081ad6265SDimitry Andric bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
13181ad6265SDimitry Andric   if (!LI->isSimple())
13281ad6265SDimitry Andric     return false;
13381ad6265SDimitry Andric 
13481ad6265SDimitry Andric   LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
13581ad6265SDimitry Andric   return true;
13681ad6265SDimitry Andric }
13781ad6265SDimitry Andric 
138349cc55cSDimitry Andric // skip allocas
getInsertPt(BasicBlock & BB)139349cc55cSDimitry Andric static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140349cc55cSDimitry Andric   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141349cc55cSDimitry Andric   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142349cc55cSDimitry Andric     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143349cc55cSDimitry Andric 
144349cc55cSDimitry Andric     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145349cc55cSDimitry Andric     // so loads will need to be inserted before it.
146349cc55cSDimitry Andric     if (!AI || !AI->isStaticAlloca())
147349cc55cSDimitry Andric       break;
148349cc55cSDimitry Andric   }
149349cc55cSDimitry Andric 
150349cc55cSDimitry Andric   return InsPt;
151349cc55cSDimitry Andric }
152349cc55cSDimitry Andric 
run(Function & F,MemorySSA & MSSA,AliasAnalysis & AA)15381ad6265SDimitry Andric bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
15481ad6265SDimitry Andric                                        AliasAnalysis &AA) {
155349cc55cSDimitry Andric   if (skipFunction(F))
156349cc55cSDimitry Andric     return false;
157349cc55cSDimitry Andric 
158349cc55cSDimitry Andric   CallingConv::ID CC = F.getCallingConv();
159349cc55cSDimitry Andric   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160349cc55cSDimitry Andric     return false;
161349cc55cSDimitry Andric 
162349cc55cSDimitry Andric   ArgCastInsertPt = &*getInsertPt(*F.begin());
163349cc55cSDimitry Andric   this->MSSA = &MSSA;
16481ad6265SDimitry Andric   this->AA = &AA;
165349cc55cSDimitry Andric 
166349cc55cSDimitry Andric   for (Argument &Arg : F.args()) {
167349cc55cSDimitry Andric     if (Arg.use_empty())
168349cc55cSDimitry Andric       continue;
169349cc55cSDimitry Andric 
170349cc55cSDimitry Andric     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171349cc55cSDimitry Andric     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172349cc55cSDimitry Andric                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173349cc55cSDimitry Andric                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174349cc55cSDimitry Andric       continue;
175349cc55cSDimitry Andric 
176349cc55cSDimitry Andric     Ptrs.push_back(&Arg);
177349cc55cSDimitry Andric   }
178349cc55cSDimitry Andric 
179349cc55cSDimitry Andric   bool Changed = false;
180349cc55cSDimitry Andric   while (!Ptrs.empty()) {
181349cc55cSDimitry Andric     Value *Ptr = Ptrs.pop_back_val();
182349cc55cSDimitry Andric     Changed |= promotePointer(Ptr);
183349cc55cSDimitry Andric   }
184349cc55cSDimitry Andric 
185349cc55cSDimitry Andric   return Changed;
186349cc55cSDimitry Andric }
187349cc55cSDimitry Andric 
runOnFunction(Function & F)188349cc55cSDimitry Andric bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189349cc55cSDimitry Andric   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
19081ad6265SDimitry Andric   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
19181ad6265SDimitry Andric   return run(F, MSSA, AA);
192349cc55cSDimitry Andric }
193349cc55cSDimitry Andric 
194349cc55cSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195349cc55cSDimitry Andric                       "AMDGPU Promote Kernel Arguments", false, false)
19681ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197349cc55cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198349cc55cSDimitry Andric INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199349cc55cSDimitry Andric                     "AMDGPU Promote Kernel Arguments", false, false)
200349cc55cSDimitry Andric 
201349cc55cSDimitry Andric char AMDGPUPromoteKernelArguments::ID = 0;
202349cc55cSDimitry Andric 
createAMDGPUPromoteKernelArgumentsPass()203349cc55cSDimitry Andric FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204349cc55cSDimitry Andric   return new AMDGPUPromoteKernelArguments();
205349cc55cSDimitry Andric }
206349cc55cSDimitry Andric 
207349cc55cSDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)208349cc55cSDimitry Andric AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209349cc55cSDimitry Andric                                       FunctionAnalysisManager &AM) {
210349cc55cSDimitry Andric   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
21181ad6265SDimitry Andric   AliasAnalysis &AA = AM.getResult<AAManager>(F);
21281ad6265SDimitry Andric   if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213349cc55cSDimitry Andric     PreservedAnalyses PA;
214349cc55cSDimitry Andric     PA.preserveSet<CFGAnalyses>();
215349cc55cSDimitry Andric     PA.preserve<MemorySSAAnalysis>();
216349cc55cSDimitry Andric     return PA;
217349cc55cSDimitry Andric   }
218349cc55cSDimitry Andric   return PreservedAnalyses::all();
219349cc55cSDimitry Andric }
220