1349cc55cSDimitry Andric //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric /// \file This pass recursively promotes generic pointer arguments of a kernel
10349cc55cSDimitry Andric /// into the global address space.
11349cc55cSDimitry Andric ///
12349cc55cSDimitry Andric /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13349cc55cSDimitry Andric /// value is a pointer and loaded pointer is unmodified in the kernel before the
14349cc55cSDimitry Andric /// load, then promote loaded pointer to global. Then recursively continue.
15349cc55cSDimitry Andric //
16349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
17349cc55cSDimitry Andric
18349cc55cSDimitry Andric #include "AMDGPU.h"
1981ad6265SDimitry Andric #include "Utils/AMDGPUMemoryUtils.h"
20349cc55cSDimitry Andric #include "llvm/ADT/SmallVector.h"
2181ad6265SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
22349cc55cSDimitry Andric #include "llvm/Analysis/MemorySSA.h"
23349cc55cSDimitry Andric #include "llvm/IR/IRBuilder.h"
24349cc55cSDimitry Andric #include "llvm/InitializePasses.h"
25349cc55cSDimitry Andric
26349cc55cSDimitry Andric #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27349cc55cSDimitry Andric
28349cc55cSDimitry Andric using namespace llvm;
29349cc55cSDimitry Andric
30349cc55cSDimitry Andric namespace {
31349cc55cSDimitry Andric
32349cc55cSDimitry Andric class AMDGPUPromoteKernelArguments : public FunctionPass {
33349cc55cSDimitry Andric MemorySSA *MSSA;
34349cc55cSDimitry Andric
3581ad6265SDimitry Andric AliasAnalysis *AA;
3681ad6265SDimitry Andric
37349cc55cSDimitry Andric Instruction *ArgCastInsertPt;
38349cc55cSDimitry Andric
39349cc55cSDimitry Andric SmallVector<Value *> Ptrs;
40349cc55cSDimitry Andric
41349cc55cSDimitry Andric void enqueueUsers(Value *Ptr);
42349cc55cSDimitry Andric
43349cc55cSDimitry Andric bool promotePointer(Value *Ptr);
44349cc55cSDimitry Andric
4581ad6265SDimitry Andric bool promoteLoad(LoadInst *LI);
4681ad6265SDimitry Andric
47349cc55cSDimitry Andric public:
48349cc55cSDimitry Andric static char ID;
49349cc55cSDimitry Andric
AMDGPUPromoteKernelArguments()50349cc55cSDimitry Andric AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51349cc55cSDimitry Andric
5281ad6265SDimitry Andric bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53349cc55cSDimitry Andric
54349cc55cSDimitry Andric bool runOnFunction(Function &F) override;
55349cc55cSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const56349cc55cSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
5781ad6265SDimitry Andric AU.addRequired<AAResultsWrapperPass>();
58349cc55cSDimitry Andric AU.addRequired<MemorySSAWrapperPass>();
59349cc55cSDimitry Andric AU.setPreservesAll();
60349cc55cSDimitry Andric }
61349cc55cSDimitry Andric };
62349cc55cSDimitry Andric
63349cc55cSDimitry Andric } // end anonymous namespace
64349cc55cSDimitry Andric
enqueueUsers(Value * Ptr)65349cc55cSDimitry Andric void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66349cc55cSDimitry Andric SmallVector<User *> PtrUsers(Ptr->users());
67349cc55cSDimitry Andric
68349cc55cSDimitry Andric while (!PtrUsers.empty()) {
69349cc55cSDimitry Andric Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70349cc55cSDimitry Andric if (!U)
71349cc55cSDimitry Andric continue;
72349cc55cSDimitry Andric
73349cc55cSDimitry Andric switch (U->getOpcode()) {
74349cc55cSDimitry Andric default:
75349cc55cSDimitry Andric break;
76349cc55cSDimitry Andric case Instruction::Load: {
77349cc55cSDimitry Andric LoadInst *LD = cast<LoadInst>(U);
7881ad6265SDimitry Andric if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
7981ad6265SDimitry Andric !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80349cc55cSDimitry Andric Ptrs.push_back(LD);
8181ad6265SDimitry Andric
82349cc55cSDimitry Andric break;
83349cc55cSDimitry Andric }
84349cc55cSDimitry Andric case Instruction::GetElementPtr:
85349cc55cSDimitry Andric case Instruction::AddrSpaceCast:
86349cc55cSDimitry Andric case Instruction::BitCast:
87349cc55cSDimitry Andric if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88349cc55cSDimitry Andric PtrUsers.append(U->user_begin(), U->user_end());
89349cc55cSDimitry Andric break;
90349cc55cSDimitry Andric }
91349cc55cSDimitry Andric }
92349cc55cSDimitry Andric }
93349cc55cSDimitry Andric
promotePointer(Value * Ptr)94349cc55cSDimitry Andric bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
9581ad6265SDimitry Andric bool Changed = false;
9681ad6265SDimitry Andric
9781ad6265SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(Ptr);
9881ad6265SDimitry Andric if (LI)
9981ad6265SDimitry Andric Changed |= promoteLoad(LI);
10081ad6265SDimitry Andric
10181ad6265SDimitry Andric PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
10281ad6265SDimitry Andric if (!PT)
10381ad6265SDimitry Andric return Changed;
10481ad6265SDimitry Andric
10581ad6265SDimitry Andric if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
10681ad6265SDimitry Andric PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
10781ad6265SDimitry Andric PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108349cc55cSDimitry Andric enqueueUsers(Ptr);
109349cc55cSDimitry Andric
110349cc55cSDimitry Andric if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
11181ad6265SDimitry Andric return Changed;
112349cc55cSDimitry Andric
11381ad6265SDimitry Andric IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
11481ad6265SDimitry Andric : ArgCastInsertPt);
115349cc55cSDimitry Andric
116349cc55cSDimitry Andric // Cast pointer to global address space and back to flat and let
117349cc55cSDimitry Andric // Infer Address Spaces pass to do all necessary rewriting.
118349cc55cSDimitry Andric PointerType *NewPT =
119*06c3fb27SDimitry Andric PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
120349cc55cSDimitry Andric Value *Cast =
121349cc55cSDimitry Andric B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122349cc55cSDimitry Andric Value *CastBack =
123349cc55cSDimitry Andric B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124349cc55cSDimitry Andric Ptr->replaceUsesWithIf(CastBack,
125349cc55cSDimitry Andric [Cast](Use &U) { return U.getUser() != Cast; });
126349cc55cSDimitry Andric
127349cc55cSDimitry Andric return true;
128349cc55cSDimitry Andric }
129349cc55cSDimitry Andric
promoteLoad(LoadInst * LI)13081ad6265SDimitry Andric bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
13181ad6265SDimitry Andric if (!LI->isSimple())
13281ad6265SDimitry Andric return false;
13381ad6265SDimitry Andric
13481ad6265SDimitry Andric LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
13581ad6265SDimitry Andric return true;
13681ad6265SDimitry Andric }
13781ad6265SDimitry Andric
138349cc55cSDimitry Andric // skip allocas
getInsertPt(BasicBlock & BB)139349cc55cSDimitry Andric static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140349cc55cSDimitry Andric BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141349cc55cSDimitry Andric for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142349cc55cSDimitry Andric AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143349cc55cSDimitry Andric
144349cc55cSDimitry Andric // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145349cc55cSDimitry Andric // so loads will need to be inserted before it.
146349cc55cSDimitry Andric if (!AI || !AI->isStaticAlloca())
147349cc55cSDimitry Andric break;
148349cc55cSDimitry Andric }
149349cc55cSDimitry Andric
150349cc55cSDimitry Andric return InsPt;
151349cc55cSDimitry Andric }
152349cc55cSDimitry Andric
run(Function & F,MemorySSA & MSSA,AliasAnalysis & AA)15381ad6265SDimitry Andric bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
15481ad6265SDimitry Andric AliasAnalysis &AA) {
155349cc55cSDimitry Andric if (skipFunction(F))
156349cc55cSDimitry Andric return false;
157349cc55cSDimitry Andric
158349cc55cSDimitry Andric CallingConv::ID CC = F.getCallingConv();
159349cc55cSDimitry Andric if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160349cc55cSDimitry Andric return false;
161349cc55cSDimitry Andric
162349cc55cSDimitry Andric ArgCastInsertPt = &*getInsertPt(*F.begin());
163349cc55cSDimitry Andric this->MSSA = &MSSA;
16481ad6265SDimitry Andric this->AA = &AA;
165349cc55cSDimitry Andric
166349cc55cSDimitry Andric for (Argument &Arg : F.args()) {
167349cc55cSDimitry Andric if (Arg.use_empty())
168349cc55cSDimitry Andric continue;
169349cc55cSDimitry Andric
170349cc55cSDimitry Andric PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171349cc55cSDimitry Andric if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172349cc55cSDimitry Andric PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173349cc55cSDimitry Andric PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174349cc55cSDimitry Andric continue;
175349cc55cSDimitry Andric
176349cc55cSDimitry Andric Ptrs.push_back(&Arg);
177349cc55cSDimitry Andric }
178349cc55cSDimitry Andric
179349cc55cSDimitry Andric bool Changed = false;
180349cc55cSDimitry Andric while (!Ptrs.empty()) {
181349cc55cSDimitry Andric Value *Ptr = Ptrs.pop_back_val();
182349cc55cSDimitry Andric Changed |= promotePointer(Ptr);
183349cc55cSDimitry Andric }
184349cc55cSDimitry Andric
185349cc55cSDimitry Andric return Changed;
186349cc55cSDimitry Andric }
187349cc55cSDimitry Andric
runOnFunction(Function & F)188349cc55cSDimitry Andric bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189349cc55cSDimitry Andric MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
19081ad6265SDimitry Andric AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
19181ad6265SDimitry Andric return run(F, MSSA, AA);
192349cc55cSDimitry Andric }
193349cc55cSDimitry Andric
194349cc55cSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195349cc55cSDimitry Andric "AMDGPU Promote Kernel Arguments", false, false)
19681ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197349cc55cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198349cc55cSDimitry Andric INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199349cc55cSDimitry Andric "AMDGPU Promote Kernel Arguments", false, false)
200349cc55cSDimitry Andric
201349cc55cSDimitry Andric char AMDGPUPromoteKernelArguments::ID = 0;
202349cc55cSDimitry Andric
createAMDGPUPromoteKernelArgumentsPass()203349cc55cSDimitry Andric FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204349cc55cSDimitry Andric return new AMDGPUPromoteKernelArguments();
205349cc55cSDimitry Andric }
206349cc55cSDimitry Andric
207349cc55cSDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)208349cc55cSDimitry Andric AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209349cc55cSDimitry Andric FunctionAnalysisManager &AM) {
210349cc55cSDimitry Andric MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
21181ad6265SDimitry Andric AliasAnalysis &AA = AM.getResult<AAManager>(F);
21281ad6265SDimitry Andric if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213349cc55cSDimitry Andric PreservedAnalyses PA;
214349cc55cSDimitry Andric PA.preserveSet<CFGAnalyses>();
215349cc55cSDimitry Andric PA.preserve<MemorySSAAnalysis>();
216349cc55cSDimitry Andric return PA;
217349cc55cSDimitry Andric }
218349cc55cSDimitry Andric return PreservedAnalyses::all();
219349cc55cSDimitry Andric }
220