xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10fe6060f1SDimitry Andric //
11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
12fe6060f1SDimitry Andric 
13fe6060f1SDimitry Andric #include "AMDGPU.h"
14fe6060f1SDimitry Andric #include "GCNSubtarget.h"
1581ad6265SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
16bdd1243dSDimitry Andric #include "llvm/Analysis/CycleAnalysis.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
18fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
19fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
20fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h"
21fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h"
22fe6060f1SDimitry Andric 
23fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor"
24fe6060f1SDimitry Andric 
25bdd1243dSDimitry Andric namespace llvm {
26bdd1243dSDimitry Andric void initializeCycleInfoWrapperPassPass(PassRegistry &);
27*0fca6ea1SDimitry Andric } // namespace llvm
28bdd1243dSDimitry Andric 
29fe6060f1SDimitry Andric using namespace llvm;
30fe6060f1SDimitry Andric 
315f757f3fSDimitry Andric static cl::opt<unsigned> KernargPreloadCount(
325f757f3fSDimitry Andric     "amdgpu-kernarg-preload-count",
335f757f3fSDimitry Andric     cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
345f757f3fSDimitry Andric 
3581ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
36349cc55cSDimitry Andric 
3781ad6265SDimitry Andric enum ImplicitArgumentPositions {
3881ad6265SDimitry Andric   #include "AMDGPUAttributes.def"
3981ad6265SDimitry Andric   LAST_ARG_POS
40349cc55cSDimitry Andric };
41349cc55cSDimitry Andric 
4281ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
4381ad6265SDimitry Andric 
4481ad6265SDimitry Andric enum ImplicitArgumentMask {
4581ad6265SDimitry Andric   NOT_IMPLICIT_INPUT = 0,
4681ad6265SDimitry Andric   #include "AMDGPUAttributes.def"
4781ad6265SDimitry Andric   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
4881ad6265SDimitry Andric };
4981ad6265SDimitry Andric 
5081ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
51349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask,
52349cc55cSDimitry Andric                            StringLiteral> ImplicitAttrs[] = {
5381ad6265SDimitry Andric  #include "AMDGPUAttributes.def"
54349cc55cSDimitry Andric };
55fe6060f1SDimitry Andric 
56fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
57fe6060f1SDimitry Andric // initialized.
58fe6060f1SDimitry Andric //
59fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
60fe6060f1SDimitry Andric // size is 1 for y/z.
61349cc55cSDimitry Andric static ImplicitArgumentMask
6281ad6265SDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
6306c3fb27SDimitry Andric                     bool HasApertureRegs, bool SupportsGetDoorBellID,
6406c3fb27SDimitry Andric                     unsigned CodeObjectVersion) {
65fe6060f1SDimitry Andric   switch (ID) {
66fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
67fe6060f1SDimitry Andric     NonKernelOnly = true;
68349cc55cSDimitry Andric     return WORKITEM_ID_X;
69fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
70fe6060f1SDimitry Andric     NonKernelOnly = true;
71349cc55cSDimitry Andric     return WORKGROUP_ID_X;
72fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
73fe6060f1SDimitry Andric   case Intrinsic::r600_read_tidig_y:
74349cc55cSDimitry Andric     return WORKITEM_ID_Y;
75fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
76fe6060f1SDimitry Andric   case Intrinsic::r600_read_tidig_z:
77349cc55cSDimitry Andric     return WORKITEM_ID_Z;
78fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
79fe6060f1SDimitry Andric   case Intrinsic::r600_read_tgid_y:
80349cc55cSDimitry Andric     return WORKGROUP_ID_Y;
81fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
82fe6060f1SDimitry Andric   case Intrinsic::r600_read_tgid_z:
83349cc55cSDimitry Andric     return WORKGROUP_ID_Z;
84fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_lds_kernel_id:
85fcaf7f86SDimitry Andric     return LDS_KERNEL_ID;
86fe6060f1SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
87349cc55cSDimitry Andric     return DISPATCH_PTR;
88fe6060f1SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
89349cc55cSDimitry Andric     return DISPATCH_ID;
90fe6060f1SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
91349cc55cSDimitry Andric     return IMPLICIT_ARG_PTR;
9281ad6265SDimitry Andric   // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
9381ad6265SDimitry Andric   // queue_ptr.
94fe6060f1SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
9506c3fb27SDimitry Andric     NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
9681ad6265SDimitry Andric     return QUEUE_PTR;
97fe6060f1SDimitry Andric   case Intrinsic::amdgcn_is_shared:
98fe6060f1SDimitry Andric   case Intrinsic::amdgcn_is_private:
9981ad6265SDimitry Andric     if (HasApertureRegs)
10081ad6265SDimitry Andric       return NOT_IMPLICIT_INPUT;
10181ad6265SDimitry Andric     // Under V5, we need implicitarg_ptr + offsets to access private_base or
10281ad6265SDimitry Andric     // shared_base. For pre-V5, however, need to access them through queue_ptr +
10381ad6265SDimitry Andric     // offsets.
10406c3fb27SDimitry Andric     return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
10506c3fb27SDimitry Andric                                                       QUEUE_PTR;
106fe6060f1SDimitry Andric   case Intrinsic::trap:
10781ad6265SDimitry Andric     if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
10806c3fb27SDimitry Andric       return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
10906c3fb27SDimitry Andric                                                         QUEUE_PTR;
11006c3fb27SDimitry Andric     NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
111349cc55cSDimitry Andric     return QUEUE_PTR;
112fe6060f1SDimitry Andric   default:
113349cc55cSDimitry Andric     return NOT_IMPLICIT_INPUT;
114fe6060f1SDimitry Andric   }
115fe6060f1SDimitry Andric }
116fe6060f1SDimitry Andric 
117fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
118fe6060f1SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
119fe6060f1SDimitry Andric }
120fe6060f1SDimitry Andric 
121fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) {
122fe6060f1SDimitry Andric   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
123fe6060f1SDimitry Andric   if (!GV)
124fe6060f1SDimitry Andric     return false;
125fe6060f1SDimitry Andric   unsigned AS = GV->getAddressSpace();
126fe6060f1SDimitry Andric   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
127fe6060f1SDimitry Andric }
128fe6060f1SDimitry Andric 
1290eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed
1300eae32dcSDimitry Andric /// regardless of the function contents.
13181ad6265SDimitry Andric static bool funcRequiresHostcallPtr(const Function &F) {
1320eae32dcSDimitry Andric   // Sanitizers require the hostcall buffer passed in the implicit arguments.
1330eae32dcSDimitry Andric   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
1340eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeThread) ||
1350eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeMemory) ||
1360eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1370eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeMemTag);
1380eae32dcSDimitry Andric }
1390eae32dcSDimitry Andric 
140349cc55cSDimitry Andric namespace {
141fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache {
142fe6060f1SDimitry Andric public:
143fe6060f1SDimitry Andric   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
144fe6060f1SDimitry Andric                          BumpPtrAllocator &Allocator,
145fe6060f1SDimitry Andric                          SetVector<Function *> *CGSCC, TargetMachine &TM)
14606c3fb27SDimitry Andric       : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
1477a6dacacSDimitry Andric         CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
14806c3fb27SDimitry Andric 
149fe6060f1SDimitry Andric   TargetMachine &TM;
150fe6060f1SDimitry Andric 
151fe6060f1SDimitry Andric   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
152fe6060f1SDimitry Andric 
153fe6060f1SDimitry Andric   /// Check if the subtarget has aperture regs.
154fe6060f1SDimitry Andric   bool hasApertureRegs(Function &F) {
155fe6060f1SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
156fe6060f1SDimitry Andric     return ST.hasApertureRegs();
157fe6060f1SDimitry Andric   }
158fe6060f1SDimitry Andric 
15981ad6265SDimitry Andric   /// Check if the subtarget supports GetDoorbellID.
16081ad6265SDimitry Andric   bool supportsGetDoorbellID(Function &F) {
16181ad6265SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
16281ad6265SDimitry Andric     return ST.supportsGetDoorbellID();
16381ad6265SDimitry Andric   }
16481ad6265SDimitry Andric 
165349cc55cSDimitry Andric   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
166349cc55cSDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167349cc55cSDimitry Andric     return ST.getFlatWorkGroupSizes(F);
168349cc55cSDimitry Andric   }
169349cc55cSDimitry Andric 
170349cc55cSDimitry Andric   std::pair<unsigned, unsigned>
171349cc55cSDimitry Andric   getMaximumFlatWorkGroupRange(const Function &F) {
172349cc55cSDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173349cc55cSDimitry Andric     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
174349cc55cSDimitry Andric   }
175349cc55cSDimitry Andric 
17606c3fb27SDimitry Andric   /// Get code object version.
17706c3fb27SDimitry Andric   unsigned getCodeObjectVersion() const {
17806c3fb27SDimitry Andric     return CodeObjectVersion;
17906c3fb27SDimitry Andric   }
18006c3fb27SDimitry Andric 
18106c3fb27SDimitry Andric   /// Get the effective value of "amdgpu-waves-per-eu" for the function,
18206c3fb27SDimitry Andric   /// accounting for the interaction with the passed value to use for
18306c3fb27SDimitry Andric   /// "amdgpu-flat-work-group-size".
18406c3fb27SDimitry Andric   std::pair<unsigned, unsigned>
18506c3fb27SDimitry Andric   getWavesPerEU(const Function &F,
18606c3fb27SDimitry Andric                 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
18706c3fb27SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
18806c3fb27SDimitry Andric     return ST.getWavesPerEU(F, FlatWorkGroupSize);
18906c3fb27SDimitry Andric   }
19006c3fb27SDimitry Andric 
19106c3fb27SDimitry Andric   std::pair<unsigned, unsigned>
19206c3fb27SDimitry Andric   getEffectiveWavesPerEU(const Function &F,
19306c3fb27SDimitry Andric                          std::pair<unsigned, unsigned> WavesPerEU,
19406c3fb27SDimitry Andric                          std::pair<unsigned, unsigned> FlatWorkGroupSize) {
19506c3fb27SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
19606c3fb27SDimitry Andric     return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
19706c3fb27SDimitry Andric   }
19806c3fb27SDimitry Andric 
19906c3fb27SDimitry Andric   unsigned getMaxWavesPerEU(const Function &F) {
20006c3fb27SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
20106c3fb27SDimitry Andric     return ST.getMaxWavesPerEU();
20206c3fb27SDimitry Andric   }
20306c3fb27SDimitry Andric 
204fe6060f1SDimitry Andric private:
20581ad6265SDimitry Andric   /// Check if the ConstantExpr \p CE requires the queue pointer.
206fe6060f1SDimitry Andric   static bool visitConstExpr(const ConstantExpr *CE) {
207fe6060f1SDimitry Andric     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
208fe6060f1SDimitry Andric       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
209fe6060f1SDimitry Andric       return castRequiresQueuePtr(SrcAS);
210fe6060f1SDimitry Andric     }
211fe6060f1SDimitry Andric     return false;
212fe6060f1SDimitry Andric   }
213fe6060f1SDimitry Andric 
214fe6060f1SDimitry Andric   /// Get the constant access bitmap for \p C.
21506c3fb27SDimitry Andric   uint8_t getConstantAccess(const Constant *C,
21606c3fb27SDimitry Andric                             SmallPtrSetImpl<const Constant *> &Visited) {
217fe6060f1SDimitry Andric     auto It = ConstantStatus.find(C);
218fe6060f1SDimitry Andric     if (It != ConstantStatus.end())
219fe6060f1SDimitry Andric       return It->second;
220fe6060f1SDimitry Andric 
221fe6060f1SDimitry Andric     uint8_t Result = 0;
222fe6060f1SDimitry Andric     if (isDSAddress(C))
223fe6060f1SDimitry Andric       Result = DS_GLOBAL;
224fe6060f1SDimitry Andric 
225fe6060f1SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C))
226fe6060f1SDimitry Andric       if (visitConstExpr(CE))
227fe6060f1SDimitry Andric         Result |= ADDR_SPACE_CAST;
228fe6060f1SDimitry Andric 
229fe6060f1SDimitry Andric     for (const Use &U : C->operands()) {
230fe6060f1SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
23106c3fb27SDimitry Andric       if (!OpC || !Visited.insert(OpC).second)
232fe6060f1SDimitry Andric         continue;
233fe6060f1SDimitry Andric 
23406c3fb27SDimitry Andric       Result |= getConstantAccess(OpC, Visited);
235fe6060f1SDimitry Andric     }
236fe6060f1SDimitry Andric     return Result;
237fe6060f1SDimitry Andric   }
238fe6060f1SDimitry Andric 
239fe6060f1SDimitry Andric public:
24081ad6265SDimitry Andric   /// Returns true if \p Fn needs the queue pointer because of \p C.
241fe6060f1SDimitry Andric   bool needsQueuePtr(const Constant *C, Function &Fn) {
242fe6060f1SDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
243fe6060f1SDimitry Andric     bool HasAperture = hasApertureRegs(Fn);
244fe6060f1SDimitry Andric 
245fe6060f1SDimitry Andric     // No need to explore the constants.
246fe6060f1SDimitry Andric     if (!IsNonEntryFunc && HasAperture)
247fe6060f1SDimitry Andric       return false;
248fe6060f1SDimitry Andric 
24906c3fb27SDimitry Andric     SmallPtrSet<const Constant *, 8> Visited;
25006c3fb27SDimitry Andric     uint8_t Access = getConstantAccess(C, Visited);
251fe6060f1SDimitry Andric 
252fe6060f1SDimitry Andric     // We need to trap on DS globals in non-entry functions.
253fe6060f1SDimitry Andric     if (IsNonEntryFunc && (Access & DS_GLOBAL))
254fe6060f1SDimitry Andric       return true;
255fe6060f1SDimitry Andric 
256fe6060f1SDimitry Andric     return !HasAperture && (Access & ADDR_SPACE_CAST);
257fe6060f1SDimitry Andric   }
258fe6060f1SDimitry Andric 
259fe6060f1SDimitry Andric private:
26081ad6265SDimitry Andric   /// Used to determine if the Constant needs the queue pointer.
261fe6060f1SDimitry Andric   DenseMap<const Constant *, uint8_t> ConstantStatus;
26206c3fb27SDimitry Andric   const unsigned CodeObjectVersion;
263fe6060f1SDimitry Andric };
264fe6060f1SDimitry Andric 
265bdd1243dSDimitry Andric struct AAAMDAttributes
266bdd1243dSDimitry Andric     : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
267bdd1243dSDimitry Andric                           AbstractAttribute> {
268bdd1243dSDimitry Andric   using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
269349cc55cSDimitry Andric                             AbstractAttribute>;
270349cc55cSDimitry Andric 
271fe6060f1SDimitry Andric   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
272fe6060f1SDimitry Andric 
273fe6060f1SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
274fe6060f1SDimitry Andric   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
275fe6060f1SDimitry Andric                                             Attributor &A);
276fe6060f1SDimitry Andric 
277fe6060f1SDimitry Andric   /// See AbstractAttribute::getName().
278fe6060f1SDimitry Andric   const std::string getName() const override { return "AAAMDAttributes"; }
279fe6060f1SDimitry Andric 
280fe6060f1SDimitry Andric   /// See AbstractAttribute::getIdAddr().
281fe6060f1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
282fe6060f1SDimitry Andric 
283fe6060f1SDimitry Andric   /// This function should return true if the type of the \p AA is
284fe6060f1SDimitry Andric   /// AAAMDAttributes.
285fe6060f1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
286fe6060f1SDimitry Andric     return (AA->getIdAddr() == &ID);
287fe6060f1SDimitry Andric   }
288fe6060f1SDimitry Andric 
289fe6060f1SDimitry Andric   /// Unique ID (due to the unique address)
290fe6060f1SDimitry Andric   static const char ID;
291fe6060f1SDimitry Andric };
292fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0;
293fe6060f1SDimitry Andric 
294349cc55cSDimitry Andric struct AAUniformWorkGroupSize
295fe6060f1SDimitry Andric     : public StateWrapper<BooleanState, AbstractAttribute> {
296fe6060f1SDimitry Andric   using Base = StateWrapper<BooleanState, AbstractAttribute>;
297349cc55cSDimitry Andric   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
298fe6060f1SDimitry Andric 
299fe6060f1SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
300349cc55cSDimitry Andric   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
301fe6060f1SDimitry Andric                                                    Attributor &A);
302fe6060f1SDimitry Andric 
303fe6060f1SDimitry Andric   /// See AbstractAttribute::getName().
304349cc55cSDimitry Andric   const std::string getName() const override {
305349cc55cSDimitry Andric     return "AAUniformWorkGroupSize";
306349cc55cSDimitry Andric   }
307fe6060f1SDimitry Andric 
308fe6060f1SDimitry Andric   /// See AbstractAttribute::getIdAddr().
309fe6060f1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
310fe6060f1SDimitry Andric 
311fe6060f1SDimitry Andric   /// This function should return true if the type of the \p AA is
312fe6060f1SDimitry Andric   /// AAAMDAttributes.
313fe6060f1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
314fe6060f1SDimitry Andric     return (AA->getIdAddr() == &ID);
315fe6060f1SDimitry Andric   }
316fe6060f1SDimitry Andric 
317fe6060f1SDimitry Andric   /// Unique ID (due to the unique address)
318fe6060f1SDimitry Andric   static const char ID;
319fe6060f1SDimitry Andric };
320349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0;
321fe6060f1SDimitry Andric 
322349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
323349cc55cSDimitry Andric   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
324349cc55cSDimitry Andric       : AAUniformWorkGroupSize(IRP, A) {}
325fe6060f1SDimitry Andric 
326fe6060f1SDimitry Andric   void initialize(Attributor &A) override {
327fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
328fe6060f1SDimitry Andric     CallingConv::ID CC = F->getCallingConv();
329fe6060f1SDimitry Andric 
330fe6060f1SDimitry Andric     if (CC != CallingConv::AMDGPU_KERNEL)
331fe6060f1SDimitry Andric       return;
332fe6060f1SDimitry Andric 
333fe6060f1SDimitry Andric     bool InitialValue = false;
334fe6060f1SDimitry Andric     if (F->hasFnAttribute("uniform-work-group-size"))
335*0fca6ea1SDimitry Andric       InitialValue =
336*0fca6ea1SDimitry Andric           F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
337*0fca6ea1SDimitry Andric           "true";
338fe6060f1SDimitry Andric 
339fe6060f1SDimitry Andric     if (InitialValue)
340fe6060f1SDimitry Andric       indicateOptimisticFixpoint();
341fe6060f1SDimitry Andric     else
342fe6060f1SDimitry Andric       indicatePessimisticFixpoint();
343fe6060f1SDimitry Andric   }
344fe6060f1SDimitry Andric 
345fe6060f1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
346fe6060f1SDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
347fe6060f1SDimitry Andric 
348fe6060f1SDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
349fe6060f1SDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
350349cc55cSDimitry Andric       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
351fe6060f1SDimitry Andric                         << "->" << getAssociatedFunction()->getName() << "\n");
352fe6060f1SDimitry Andric 
35306c3fb27SDimitry Andric       const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
354fe6060f1SDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
35506c3fb27SDimitry Andric       if (!CallerInfo)
35606c3fb27SDimitry Andric         return false;
357fe6060f1SDimitry Andric 
358fe6060f1SDimitry Andric       Change = Change | clampStateAndIndicateChange(this->getState(),
35906c3fb27SDimitry Andric                                                     CallerInfo->getState());
360fe6060f1SDimitry Andric 
361fe6060f1SDimitry Andric       return true;
362fe6060f1SDimitry Andric     };
363fe6060f1SDimitry Andric 
364fe6060f1SDimitry Andric     bool AllCallSitesKnown = true;
365fe6060f1SDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
3660eae32dcSDimitry Andric       return indicatePessimisticFixpoint();
367fe6060f1SDimitry Andric 
368fe6060f1SDimitry Andric     return Change;
369fe6060f1SDimitry Andric   }
370fe6060f1SDimitry Andric 
371fe6060f1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
372fe6060f1SDimitry Andric     SmallVector<Attribute, 8> AttrList;
373fe6060f1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
374fe6060f1SDimitry Andric 
375fe6060f1SDimitry Andric     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
376fe6060f1SDimitry Andric                                       getAssumed() ? "true" : "false"));
37706c3fb27SDimitry Andric     return A.manifestAttrs(getIRPosition(), AttrList,
378fe6060f1SDimitry Andric                            /* ForceReplace */ true);
379fe6060f1SDimitry Andric   }
380fe6060f1SDimitry Andric 
381fe6060f1SDimitry Andric   bool isValidState() const override {
382fe6060f1SDimitry Andric     // This state is always valid, even when the state is false.
383fe6060f1SDimitry Andric     return true;
384fe6060f1SDimitry Andric   }
385fe6060f1SDimitry Andric 
38606c3fb27SDimitry Andric   const std::string getAsStr(Attributor *) const override {
387fe6060f1SDimitry Andric     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
388fe6060f1SDimitry Andric   }
389fe6060f1SDimitry Andric 
390fe6060f1SDimitry Andric   /// See AbstractAttribute::trackStatistics()
391fe6060f1SDimitry Andric   void trackStatistics() const override {}
392fe6060f1SDimitry Andric };
393fe6060f1SDimitry Andric 
394349cc55cSDimitry Andric AAUniformWorkGroupSize &
395349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
396fe6060f1SDimitry Andric                                           Attributor &A) {
397fe6060f1SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
398349cc55cSDimitry Andric     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
399349cc55cSDimitry Andric   llvm_unreachable(
400349cc55cSDimitry Andric       "AAUniformWorkGroupSize is only valid for function position");
401fe6060f1SDimitry Andric }
402fe6060f1SDimitry Andric 
403fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes {
404fe6060f1SDimitry Andric   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
405fe6060f1SDimitry Andric       : AAAMDAttributes(IRP, A) {}
406fe6060f1SDimitry Andric 
407fe6060f1SDimitry Andric   void initialize(Attributor &A) override {
408fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
4090eae32dcSDimitry Andric 
4100eae32dcSDimitry Andric     // If the function requires the implicit arg pointer due to sanitizers,
4110eae32dcSDimitry Andric     // assume it's needed even if explicitly marked as not requiring it.
41281ad6265SDimitry Andric     const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
41381ad6265SDimitry Andric     if (NeedsHostcall) {
4140eae32dcSDimitry Andric       removeAssumedBits(IMPLICIT_ARG_PTR);
41581ad6265SDimitry Andric       removeAssumedBits(HOSTCALL_PTR);
41681ad6265SDimitry Andric     }
4170eae32dcSDimitry Andric 
418349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs) {
41981ad6265SDimitry Andric       if (NeedsHostcall &&
42081ad6265SDimitry Andric           (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
4210eae32dcSDimitry Andric         continue;
4220eae32dcSDimitry Andric 
423349cc55cSDimitry Andric       if (F->hasFnAttribute(Attr.second))
424349cc55cSDimitry Andric         addKnownBits(Attr.first);
425fe6060f1SDimitry Andric     }
426fe6060f1SDimitry Andric 
427349cc55cSDimitry Andric     if (F->isDeclaration())
428349cc55cSDimitry Andric       return;
429349cc55cSDimitry Andric 
430fe6060f1SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
431fe6060f1SDimitry Andric     // not allowed to have kernel arguments.
432fe6060f1SDimitry Andric     if (AMDGPU::isGraphics(F->getCallingConv())) {
433fe6060f1SDimitry Andric       indicatePessimisticFixpoint();
434fe6060f1SDimitry Andric       return;
435fe6060f1SDimitry Andric     }
436fe6060f1SDimitry Andric   }
437fe6060f1SDimitry Andric 
438fe6060f1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
439fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
440349cc55cSDimitry Andric     // The current assumed state used to determine a change.
441349cc55cSDimitry Andric     auto OrigAssumed = getAssumed();
442fe6060f1SDimitry Andric 
443fe6060f1SDimitry Andric     // Check for Intrinsics and propagate attributes.
44406c3fb27SDimitry Andric     const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
445fe6060f1SDimitry Andric         *this, this->getIRPosition(), DepClassTy::REQUIRED);
44606c3fb27SDimitry Andric     if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
447349cc55cSDimitry Andric       return indicatePessimisticFixpoint();
448fe6060f1SDimitry Andric 
449349cc55cSDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
450fe6060f1SDimitry Andric 
45181ad6265SDimitry Andric     bool NeedsImplicit = false;
45281ad6265SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
45381ad6265SDimitry Andric     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
45481ad6265SDimitry Andric     bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
45506c3fb27SDimitry Andric     unsigned COV = InfoCache.getCodeObjectVersion();
456349cc55cSDimitry Andric 
45706c3fb27SDimitry Andric     for (Function *Callee : AAEdges->getOptimisticEdges()) {
458fe6060f1SDimitry Andric       Intrinsic::ID IID = Callee->getIntrinsicID();
459349cc55cSDimitry Andric       if (IID == Intrinsic::not_intrinsic) {
46006c3fb27SDimitry Andric         const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
461349cc55cSDimitry Andric             *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
46206c3fb27SDimitry Andric         if (!AAAMD)
46306c3fb27SDimitry Andric           return indicatePessimisticFixpoint();
46406c3fb27SDimitry Andric         *this &= *AAAMD;
465fe6060f1SDimitry Andric         continue;
466fe6060f1SDimitry Andric       }
467fe6060f1SDimitry Andric 
468fe6060f1SDimitry Andric       bool NonKernelOnly = false;
469349cc55cSDimitry Andric       ImplicitArgumentMask AttrMask =
47081ad6265SDimitry Andric           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
47106c3fb27SDimitry Andric                               HasApertureRegs, SupportsGetDoorbellID, COV);
472349cc55cSDimitry Andric       if (AttrMask != NOT_IMPLICIT_INPUT) {
473349cc55cSDimitry Andric         if ((IsNonEntryFunc || !NonKernelOnly))
474349cc55cSDimitry Andric           removeAssumedBits(AttrMask);
475fe6060f1SDimitry Andric       }
476fe6060f1SDimitry Andric     }
477fe6060f1SDimitry Andric 
47881ad6265SDimitry Andric     // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
47981ad6265SDimitry Andric     if (NeedsImplicit)
48081ad6265SDimitry Andric       removeAssumedBits(IMPLICIT_ARG_PTR);
48181ad6265SDimitry Andric 
48281ad6265SDimitry Andric     if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
48381ad6265SDimitry Andric       // Under V5, we need implicitarg_ptr + offsets to access private_base or
48481ad6265SDimitry Andric       // shared_base. We do not actually need queue_ptr.
48506c3fb27SDimitry Andric       if (COV >= 5)
48681ad6265SDimitry Andric         removeAssumedBits(IMPLICIT_ARG_PTR);
48781ad6265SDimitry Andric       else
488349cc55cSDimitry Andric         removeAssumedBits(QUEUE_PTR);
489fe6060f1SDimitry Andric     }
490fe6060f1SDimitry Andric 
49106c3fb27SDimitry Andric     if (funcRetrievesMultigridSyncArg(A, COV)) {
49281ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) &&
49381ad6265SDimitry Andric              "multigrid_sync_arg needs implicitarg_ptr");
49481ad6265SDimitry Andric       removeAssumedBits(MULTIGRID_SYNC_ARG);
495349cc55cSDimitry Andric     }
496fe6060f1SDimitry Andric 
49706c3fb27SDimitry Andric     if (funcRetrievesHostcallPtr(A, COV)) {
49881ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
49981ad6265SDimitry Andric       removeAssumedBits(HOSTCALL_PTR);
50081ad6265SDimitry Andric     }
50181ad6265SDimitry Andric 
50206c3fb27SDimitry Andric     if (funcRetrievesHeapPtr(A, COV)) {
50381ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
50481ad6265SDimitry Andric       removeAssumedBits(HEAP_PTR);
50581ad6265SDimitry Andric     }
50681ad6265SDimitry Andric 
50706c3fb27SDimitry Andric     if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
50881ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
509349cc55cSDimitry Andric       removeAssumedBits(QUEUE_PTR);
510fe6060f1SDimitry Andric     }
511fe6060f1SDimitry Andric 
512fcaf7f86SDimitry Andric     if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
513fcaf7f86SDimitry Andric       removeAssumedBits(LDS_KERNEL_ID);
514fcaf7f86SDimitry Andric     }
515fcaf7f86SDimitry Andric 
51606c3fb27SDimitry Andric     if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
517bdd1243dSDimitry Andric       removeAssumedBits(DEFAULT_QUEUE);
518bdd1243dSDimitry Andric 
51906c3fb27SDimitry Andric     if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
520bdd1243dSDimitry Andric       removeAssumedBits(COMPLETION_ACTION);
521bdd1243dSDimitry Andric 
52281ad6265SDimitry Andric     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
52381ad6265SDimitry Andric                                        : ChangeStatus::UNCHANGED;
524fe6060f1SDimitry Andric   }
525fe6060f1SDimitry Andric 
526fe6060f1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
527fe6060f1SDimitry Andric     SmallVector<Attribute, 8> AttrList;
528fe6060f1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
529fe6060f1SDimitry Andric 
530349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs) {
531349cc55cSDimitry Andric       if (isKnown(Attr.first))
532349cc55cSDimitry Andric         AttrList.push_back(Attribute::get(Ctx, Attr.second));
533349cc55cSDimitry Andric     }
534fe6060f1SDimitry Andric 
53506c3fb27SDimitry Andric     return A.manifestAttrs(getIRPosition(), AttrList,
536fe6060f1SDimitry Andric                            /* ForceReplace */ true);
537fe6060f1SDimitry Andric   }
538fe6060f1SDimitry Andric 
53906c3fb27SDimitry Andric   const std::string getAsStr(Attributor *) const override {
540349cc55cSDimitry Andric     std::string Str;
541349cc55cSDimitry Andric     raw_string_ostream OS(Str);
542349cc55cSDimitry Andric     OS << "AMDInfo[";
543349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs)
54406c3fb27SDimitry Andric       if (isAssumed(Attr.first))
545349cc55cSDimitry Andric         OS << ' ' << Attr.second;
546349cc55cSDimitry Andric     OS << " ]";
547349cc55cSDimitry Andric     return OS.str();
548fe6060f1SDimitry Andric   }
549fe6060f1SDimitry Andric 
550fe6060f1SDimitry Andric   /// See AbstractAttribute::trackStatistics()
551fe6060f1SDimitry Andric   void trackStatistics() const override {}
55281ad6265SDimitry Andric 
55381ad6265SDimitry Andric private:
55481ad6265SDimitry Andric   bool checkForQueuePtr(Attributor &A) {
55581ad6265SDimitry Andric     Function *F = getAssociatedFunction();
55681ad6265SDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
55781ad6265SDimitry Andric 
55881ad6265SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
55981ad6265SDimitry Andric 
56081ad6265SDimitry Andric     bool NeedsQueuePtr = false;
56181ad6265SDimitry Andric 
56281ad6265SDimitry Andric     auto CheckAddrSpaceCasts = [&](Instruction &I) {
56381ad6265SDimitry Andric       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
56481ad6265SDimitry Andric       if (castRequiresQueuePtr(SrcAS)) {
56581ad6265SDimitry Andric         NeedsQueuePtr = true;
56681ad6265SDimitry Andric         return false;
56781ad6265SDimitry Andric       }
56881ad6265SDimitry Andric       return true;
56981ad6265SDimitry Andric     };
57081ad6265SDimitry Andric 
57181ad6265SDimitry Andric     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
57281ad6265SDimitry Andric 
57381ad6265SDimitry Andric     // `checkForAllInstructions` is much more cheaper than going through all
57481ad6265SDimitry Andric     // instructions, try it first.
57581ad6265SDimitry Andric 
57681ad6265SDimitry Andric     // The queue pointer is not needed if aperture regs is present.
57781ad6265SDimitry Andric     if (!HasApertureRegs) {
57881ad6265SDimitry Andric       bool UsedAssumedInformation = false;
57981ad6265SDimitry Andric       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
58081ad6265SDimitry Andric                                 {Instruction::AddrSpaceCast},
58181ad6265SDimitry Andric                                 UsedAssumedInformation);
58281ad6265SDimitry Andric     }
58381ad6265SDimitry Andric 
58481ad6265SDimitry Andric     // If we found  that we need the queue pointer, nothing else to do.
58581ad6265SDimitry Andric     if (NeedsQueuePtr)
58681ad6265SDimitry Andric       return true;
58781ad6265SDimitry Andric 
58881ad6265SDimitry Andric     if (!IsNonEntryFunc && HasApertureRegs)
58981ad6265SDimitry Andric       return false;
59081ad6265SDimitry Andric 
59181ad6265SDimitry Andric     for (BasicBlock &BB : *F) {
59281ad6265SDimitry Andric       for (Instruction &I : BB) {
59381ad6265SDimitry Andric         for (const Use &U : I.operands()) {
59481ad6265SDimitry Andric           if (const auto *C = dyn_cast<Constant>(U)) {
59581ad6265SDimitry Andric             if (InfoCache.needsQueuePtr(C, *F))
59681ad6265SDimitry Andric               return true;
59781ad6265SDimitry Andric           }
59881ad6265SDimitry Andric         }
59981ad6265SDimitry Andric       }
60081ad6265SDimitry Andric     }
60181ad6265SDimitry Andric 
60281ad6265SDimitry Andric     return false;
60381ad6265SDimitry Andric   }
60481ad6265SDimitry Andric 
60506c3fb27SDimitry Andric   bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
60606c3fb27SDimitry Andric     auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);
607bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
608bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
60981ad6265SDimitry Andric   }
61081ad6265SDimitry Andric 
61106c3fb27SDimitry Andric   bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
61206c3fb27SDimitry Andric     auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);
613bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
614bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
615bdd1243dSDimitry Andric   }
616bdd1243dSDimitry Andric 
61706c3fb27SDimitry Andric   bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
61806c3fb27SDimitry Andric     auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);
619bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
620bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
621bdd1243dSDimitry Andric   }
622bdd1243dSDimitry Andric 
62306c3fb27SDimitry Andric   bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
62406c3fb27SDimitry Andric     auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);
625bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
626bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
62781ad6265SDimitry Andric   }
62881ad6265SDimitry Andric 
62906c3fb27SDimitry Andric   bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
63006c3fb27SDimitry Andric     if (COV < 5)
63181ad6265SDimitry Andric       return false;
632bdd1243dSDimitry Andric     AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
633bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
63481ad6265SDimitry Andric   }
63581ad6265SDimitry Andric 
63606c3fb27SDimitry Andric   bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
63706c3fb27SDimitry Andric     if (COV < 5)
63881ad6265SDimitry Andric       return false;
639bdd1243dSDimitry Andric     AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
640bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
64181ad6265SDimitry Andric   }
64281ad6265SDimitry Andric 
643bdd1243dSDimitry Andric   bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
64481ad6265SDimitry Andric     // Check if this is a call to the implicitarg_ptr builtin and it
64581ad6265SDimitry Andric     // is used to retrieve the hostcall pointer. The implicit arg for
64681ad6265SDimitry Andric     // hostcall is not used only if every use of the implicitarg_ptr
64781ad6265SDimitry Andric     // is a load that clearly does not retrieve any byte of the
64881ad6265SDimitry Andric     // hostcall pointer. We check this by tracing all the uses of the
64981ad6265SDimitry Andric     // initial call to the implicitarg_ptr intrinsic.
65081ad6265SDimitry Andric     auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
65181ad6265SDimitry Andric       auto &Call = cast<CallBase>(I);
65281ad6265SDimitry Andric       if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
65381ad6265SDimitry Andric         return true;
65481ad6265SDimitry Andric 
65506c3fb27SDimitry Andric       const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
65681ad6265SDimitry Andric           *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
65706c3fb27SDimitry Andric       if (!PointerInfoAA)
65806c3fb27SDimitry Andric         return false;
65981ad6265SDimitry Andric 
66006c3fb27SDimitry Andric       return PointerInfoAA->forallInterferingAccesses(
661bdd1243dSDimitry Andric           Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
66281ad6265SDimitry Andric             return Acc.getRemoteInst()->isDroppable();
66381ad6265SDimitry Andric           });
66481ad6265SDimitry Andric     };
66581ad6265SDimitry Andric 
66681ad6265SDimitry Andric     bool UsedAssumedInformation = false;
66781ad6265SDimitry Andric     return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
66881ad6265SDimitry Andric                                               UsedAssumedInformation);
66981ad6265SDimitry Andric   }
670fcaf7f86SDimitry Andric 
671fcaf7f86SDimitry Andric   bool funcRetrievesLDSKernelId(Attributor &A) {
672fcaf7f86SDimitry Andric     auto DoesNotRetrieve = [&](Instruction &I) {
673fcaf7f86SDimitry Andric       auto &Call = cast<CallBase>(I);
674fcaf7f86SDimitry Andric       return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
675fcaf7f86SDimitry Andric     };
676fcaf7f86SDimitry Andric     bool UsedAssumedInformation = false;
677fcaf7f86SDimitry Andric     return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
678fcaf7f86SDimitry Andric                                               UsedAssumedInformation);
679fcaf7f86SDimitry Andric   }
680fe6060f1SDimitry Andric };
681fe6060f1SDimitry Andric 
682fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
683fe6060f1SDimitry Andric                                                     Attributor &A) {
684fe6060f1SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
685fe6060f1SDimitry Andric     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
686fe6060f1SDimitry Andric   llvm_unreachable("AAAMDAttributes is only valid for function position");
687fe6060f1SDimitry Andric }
688fe6060f1SDimitry Andric 
68906c3fb27SDimitry Andric /// Base class to derive different size ranges.
69006c3fb27SDimitry Andric struct AAAMDSizeRangeAttribute
691349cc55cSDimitry Andric     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
692349cc55cSDimitry Andric   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
693349cc55cSDimitry Andric 
69406c3fb27SDimitry Andric   StringRef AttrName;
69506c3fb27SDimitry Andric 
69606c3fb27SDimitry Andric   AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
69706c3fb27SDimitry Andric                           StringRef AttrName)
69806c3fb27SDimitry Andric       : Base(IRP, 32), AttrName(AttrName) {}
69906c3fb27SDimitry Andric 
70006c3fb27SDimitry Andric   /// See AbstractAttribute::trackStatistics()
70106c3fb27SDimitry Andric   void trackStatistics() const override {}
70206c3fb27SDimitry Andric 
70306c3fb27SDimitry Andric   template <class AttributeImpl>
70406c3fb27SDimitry Andric   ChangeStatus updateImplImpl(Attributor &A) {
70506c3fb27SDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
70606c3fb27SDimitry Andric 
70706c3fb27SDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
70806c3fb27SDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
70906c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
71006c3fb27SDimitry Andric                         << "->" << getAssociatedFunction()->getName() << '\n');
71106c3fb27SDimitry Andric 
71206c3fb27SDimitry Andric       const auto *CallerInfo = A.getAAFor<AttributeImpl>(
71306c3fb27SDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
71406c3fb27SDimitry Andric       if (!CallerInfo)
71506c3fb27SDimitry Andric         return false;
71606c3fb27SDimitry Andric 
71706c3fb27SDimitry Andric       Change |=
71806c3fb27SDimitry Andric           clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
71906c3fb27SDimitry Andric 
72006c3fb27SDimitry Andric       return true;
72106c3fb27SDimitry Andric     };
72206c3fb27SDimitry Andric 
72306c3fb27SDimitry Andric     bool AllCallSitesKnown = true;
72406c3fb27SDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
72506c3fb27SDimitry Andric       return indicatePessimisticFixpoint();
72606c3fb27SDimitry Andric 
72706c3fb27SDimitry Andric     return Change;
72806c3fb27SDimitry Andric   }
72906c3fb27SDimitry Andric 
73006c3fb27SDimitry Andric   ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
73106c3fb27SDimitry Andric                                          unsigned Max) {
73206c3fb27SDimitry Andric     // Don't add the attribute if it's the implied default.
73306c3fb27SDimitry Andric     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
73406c3fb27SDimitry Andric       return ChangeStatus::UNCHANGED;
73506c3fb27SDimitry Andric 
73606c3fb27SDimitry Andric     Function *F = getAssociatedFunction();
73706c3fb27SDimitry Andric     LLVMContext &Ctx = F->getContext();
73806c3fb27SDimitry Andric     SmallString<10> Buffer;
73906c3fb27SDimitry Andric     raw_svector_ostream OS(Buffer);
74006c3fb27SDimitry Andric     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
74106c3fb27SDimitry Andric     return A.manifestAttrs(getIRPosition(),
74206c3fb27SDimitry Andric                            {Attribute::get(Ctx, AttrName, OS.str())},
74306c3fb27SDimitry Andric                            /* ForceReplace */ true);
74406c3fb27SDimitry Andric   }
74506c3fb27SDimitry Andric 
74606c3fb27SDimitry Andric   const std::string getAsStr(Attributor *) const override {
74706c3fb27SDimitry Andric     std::string Str;
74806c3fb27SDimitry Andric     raw_string_ostream OS(Str);
74906c3fb27SDimitry Andric     OS << getName() << '[';
75006c3fb27SDimitry Andric     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
75106c3fb27SDimitry Andric     OS << ']';
75206c3fb27SDimitry Andric     return OS.str();
75306c3fb27SDimitry Andric   }
75406c3fb27SDimitry Andric };
75506c3fb27SDimitry Andric 
75606c3fb27SDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute.
75706c3fb27SDimitry Andric struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
75806c3fb27SDimitry Andric   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
75906c3fb27SDimitry Andric       : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
760349cc55cSDimitry Andric 
761349cc55cSDimitry Andric   void initialize(Attributor &A) override {
762349cc55cSDimitry Andric     Function *F = getAssociatedFunction();
763349cc55cSDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
764349cc55cSDimitry Andric     unsigned MinGroupSize, MaxGroupSize;
765349cc55cSDimitry Andric     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
766349cc55cSDimitry Andric     intersectKnown(
767349cc55cSDimitry Andric         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
7680eae32dcSDimitry Andric 
7690eae32dcSDimitry Andric     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
7700eae32dcSDimitry Andric       indicatePessimisticFixpoint();
771349cc55cSDimitry Andric   }
772349cc55cSDimitry Andric 
773349cc55cSDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
77406c3fb27SDimitry Andric     return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
775349cc55cSDimitry Andric   }
776349cc55cSDimitry Andric 
777349cc55cSDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
778349cc55cSDimitry Andric   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
779349cc55cSDimitry Andric                                                    Attributor &A);
780349cc55cSDimitry Andric 
78106c3fb27SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
78206c3fb27SDimitry Andric     Function *F = getAssociatedFunction();
78306c3fb27SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
78406c3fb27SDimitry Andric     unsigned Min, Max;
78506c3fb27SDimitry Andric     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
78606c3fb27SDimitry Andric     return emitAttributeIfNotDefault(A, Min, Max);
78706c3fb27SDimitry Andric   }
78806c3fb27SDimitry Andric 
789349cc55cSDimitry Andric   /// See AbstractAttribute::getName()
790349cc55cSDimitry Andric   const std::string getName() const override {
791349cc55cSDimitry Andric     return "AAAMDFlatWorkGroupSize";
792349cc55cSDimitry Andric   }
793349cc55cSDimitry Andric 
794349cc55cSDimitry Andric   /// See AbstractAttribute::getIdAddr()
795349cc55cSDimitry Andric   const char *getIdAddr() const override { return &ID; }
796349cc55cSDimitry Andric 
797349cc55cSDimitry Andric   /// This function should return true if the type of the \p AA is
798349cc55cSDimitry Andric   /// AAAMDFlatWorkGroupSize
799349cc55cSDimitry Andric   static bool classof(const AbstractAttribute *AA) {
800349cc55cSDimitry Andric     return (AA->getIdAddr() == &ID);
801349cc55cSDimitry Andric   }
802349cc55cSDimitry Andric 
803349cc55cSDimitry Andric   /// Unique ID (due to the unique address)
804349cc55cSDimitry Andric   static const char ID;
805349cc55cSDimitry Andric };
806349cc55cSDimitry Andric 
807349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0;
808349cc55cSDimitry Andric 
809349cc55cSDimitry Andric AAAMDFlatWorkGroupSize &
810349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
811349cc55cSDimitry Andric                                           Attributor &A) {
812349cc55cSDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
813349cc55cSDimitry Andric     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
814349cc55cSDimitry Andric   llvm_unreachable(
815349cc55cSDimitry Andric       "AAAMDFlatWorkGroupSize is only valid for function position");
816349cc55cSDimitry Andric }
817349cc55cSDimitry Andric 
81806c3fb27SDimitry Andric /// Propagate amdgpu-waves-per-eu attribute.
81906c3fb27SDimitry Andric struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
82006c3fb27SDimitry Andric   AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
82106c3fb27SDimitry Andric       : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
82206c3fb27SDimitry Andric 
82306c3fb27SDimitry Andric   bool isValidState() const override {
82406c3fb27SDimitry Andric     return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
82506c3fb27SDimitry Andric   }
82606c3fb27SDimitry Andric 
82706c3fb27SDimitry Andric   void initialize(Attributor &A) override {
82806c3fb27SDimitry Andric     Function *F = getAssociatedFunction();
82906c3fb27SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
83006c3fb27SDimitry Andric 
83106c3fb27SDimitry Andric     if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
83206c3fb27SDimitry Andric             *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
83306c3fb27SDimitry Andric 
83406c3fb27SDimitry Andric       unsigned Min, Max;
83506c3fb27SDimitry Andric       std::tie(Min, Max) = InfoCache.getWavesPerEU(
83606c3fb27SDimitry Andric           *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
83706c3fb27SDimitry Andric                AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
83806c3fb27SDimitry Andric 
83906c3fb27SDimitry Andric       ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
84006c3fb27SDimitry Andric       intersectKnown(Range);
84106c3fb27SDimitry Andric     }
84206c3fb27SDimitry Andric 
84306c3fb27SDimitry Andric     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
84406c3fb27SDimitry Andric       indicatePessimisticFixpoint();
84506c3fb27SDimitry Andric   }
84606c3fb27SDimitry Andric 
84706c3fb27SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
84806c3fb27SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
84906c3fb27SDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
85006c3fb27SDimitry Andric 
85106c3fb27SDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
85206c3fb27SDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
85306c3fb27SDimitry Andric       Function *Func = getAssociatedFunction();
85406c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
85506c3fb27SDimitry Andric                         << "->" << Func->getName() << '\n');
85606c3fb27SDimitry Andric 
85706c3fb27SDimitry Andric       const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
85806c3fb27SDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
85906c3fb27SDimitry Andric       const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
86006c3fb27SDimitry Andric           *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
86106c3fb27SDimitry Andric       if (!CallerInfo || !AssumedGroupSize)
86206c3fb27SDimitry Andric         return false;
86306c3fb27SDimitry Andric 
86406c3fb27SDimitry Andric       unsigned Min, Max;
86506c3fb27SDimitry Andric       std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
86606c3fb27SDimitry Andric           *Caller,
86706c3fb27SDimitry Andric           {CallerInfo->getAssumed().getLower().getZExtValue(),
86806c3fb27SDimitry Andric            CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
86906c3fb27SDimitry Andric           {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
87006c3fb27SDimitry Andric            AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
87106c3fb27SDimitry Andric       ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
87206c3fb27SDimitry Andric       IntegerRangeState CallerRangeState(CallerRange);
87306c3fb27SDimitry Andric       Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
87406c3fb27SDimitry Andric 
87506c3fb27SDimitry Andric       return true;
87606c3fb27SDimitry Andric     };
87706c3fb27SDimitry Andric 
87806c3fb27SDimitry Andric     bool AllCallSitesKnown = true;
87906c3fb27SDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
88006c3fb27SDimitry Andric       return indicatePessimisticFixpoint();
88106c3fb27SDimitry Andric 
88206c3fb27SDimitry Andric     return Change;
88306c3fb27SDimitry Andric   }
88406c3fb27SDimitry Andric 
88506c3fb27SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
88606c3fb27SDimitry Andric   static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
88706c3fb27SDimitry Andric                                             Attributor &A);
88806c3fb27SDimitry Andric 
88906c3fb27SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
89006c3fb27SDimitry Andric     Function *F = getAssociatedFunction();
89106c3fb27SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
89206c3fb27SDimitry Andric     unsigned Max = InfoCache.getMaxWavesPerEU(*F);
89306c3fb27SDimitry Andric     return emitAttributeIfNotDefault(A, 1, Max);
89406c3fb27SDimitry Andric   }
89506c3fb27SDimitry Andric 
89606c3fb27SDimitry Andric   /// See AbstractAttribute::getName()
89706c3fb27SDimitry Andric   const std::string getName() const override { return "AAAMDWavesPerEU"; }
89806c3fb27SDimitry Andric 
89906c3fb27SDimitry Andric   /// See AbstractAttribute::getIdAddr()
90006c3fb27SDimitry Andric   const char *getIdAddr() const override { return &ID; }
90106c3fb27SDimitry Andric 
90206c3fb27SDimitry Andric   /// This function should return true if the type of the \p AA is
90306c3fb27SDimitry Andric   /// AAAMDWavesPerEU
90406c3fb27SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
90506c3fb27SDimitry Andric     return (AA->getIdAddr() == &ID);
90606c3fb27SDimitry Andric   }
90706c3fb27SDimitry Andric 
90806c3fb27SDimitry Andric   /// Unique ID (due to the unique address)
90906c3fb27SDimitry Andric   static const char ID;
91006c3fb27SDimitry Andric };
91106c3fb27SDimitry Andric 
91206c3fb27SDimitry Andric const char AAAMDWavesPerEU::ID = 0;
91306c3fb27SDimitry Andric 
91406c3fb27SDimitry Andric AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
91506c3fb27SDimitry Andric                                                     Attributor &A) {
91606c3fb27SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
91706c3fb27SDimitry Andric     return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
91806c3fb27SDimitry Andric   llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
91906c3fb27SDimitry Andric }
92006c3fb27SDimitry Andric 
921*0fca6ea1SDimitry Andric static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
922*0fca6ea1SDimitry Andric   for (const auto &CI : IA->ParseConstraints()) {
923*0fca6ea1SDimitry Andric     for (StringRef Code : CI.Codes) {
924*0fca6ea1SDimitry Andric       Code.consume_front("{");
925*0fca6ea1SDimitry Andric       if (Code.starts_with("a"))
926*0fca6ea1SDimitry Andric         return true;
927*0fca6ea1SDimitry Andric     }
928*0fca6ea1SDimitry Andric   }
929*0fca6ea1SDimitry Andric 
930*0fca6ea1SDimitry Andric   return false;
931*0fca6ea1SDimitry Andric }
932*0fca6ea1SDimitry Andric 
933*0fca6ea1SDimitry Andric struct AAAMDGPUNoAGPR
934*0fca6ea1SDimitry Andric     : public IRAttribute<Attribute::NoUnwind,
935*0fca6ea1SDimitry Andric                          StateWrapper<BooleanState, AbstractAttribute>,
936*0fca6ea1SDimitry Andric                          AAAMDGPUNoAGPR> {
937*0fca6ea1SDimitry Andric   AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
938*0fca6ea1SDimitry Andric 
939*0fca6ea1SDimitry Andric   static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
940*0fca6ea1SDimitry Andric                                            Attributor &A) {
941*0fca6ea1SDimitry Andric     if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
942*0fca6ea1SDimitry Andric       return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
943*0fca6ea1SDimitry Andric     llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
944*0fca6ea1SDimitry Andric   }
945*0fca6ea1SDimitry Andric 
946*0fca6ea1SDimitry Andric   void initialize(Attributor &A) override {
947*0fca6ea1SDimitry Andric     Function *F = getAssociatedFunction();
948*0fca6ea1SDimitry Andric     if (F->hasFnAttribute("amdgpu-no-agpr"))
949*0fca6ea1SDimitry Andric       indicateOptimisticFixpoint();
950*0fca6ea1SDimitry Andric   }
951*0fca6ea1SDimitry Andric 
952*0fca6ea1SDimitry Andric   const std::string getAsStr(Attributor *A) const override {
953*0fca6ea1SDimitry Andric     return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
954*0fca6ea1SDimitry Andric   }
955*0fca6ea1SDimitry Andric 
956*0fca6ea1SDimitry Andric   void trackStatistics() const override {}
957*0fca6ea1SDimitry Andric 
958*0fca6ea1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
959*0fca6ea1SDimitry Andric     // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
960*0fca6ea1SDimitry Andric 
961*0fca6ea1SDimitry Andric     auto CheckForNoAGPRs = [&](Instruction &I) {
962*0fca6ea1SDimitry Andric       const auto &CB = cast<CallBase>(I);
963*0fca6ea1SDimitry Andric       const Value *CalleeOp = CB.getCalledOperand();
964*0fca6ea1SDimitry Andric       const Function *Callee = dyn_cast<Function>(CalleeOp);
965*0fca6ea1SDimitry Andric       if (!Callee) {
966*0fca6ea1SDimitry Andric         if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
967*0fca6ea1SDimitry Andric           return !inlineAsmUsesAGPRs(IA);
968*0fca6ea1SDimitry Andric         return false;
969*0fca6ea1SDimitry Andric       }
970*0fca6ea1SDimitry Andric 
971*0fca6ea1SDimitry Andric       // Some intrinsics may use AGPRs, but if we have a choice, we are not
972*0fca6ea1SDimitry Andric       // required to use AGPRs.
973*0fca6ea1SDimitry Andric       if (Callee->isIntrinsic())
974*0fca6ea1SDimitry Andric         return true;
975*0fca6ea1SDimitry Andric 
976*0fca6ea1SDimitry Andric       // TODO: Handle callsite attributes
977*0fca6ea1SDimitry Andric       const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
978*0fca6ea1SDimitry Andric           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
979*0fca6ea1SDimitry Andric       return CalleeInfo && CalleeInfo->getAssumed();
980*0fca6ea1SDimitry Andric     };
981*0fca6ea1SDimitry Andric 
982*0fca6ea1SDimitry Andric     bool UsedAssumedInformation = false;
983*0fca6ea1SDimitry Andric     if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
984*0fca6ea1SDimitry Andric                                            UsedAssumedInformation))
985*0fca6ea1SDimitry Andric       return indicatePessimisticFixpoint();
986*0fca6ea1SDimitry Andric     return ChangeStatus::UNCHANGED;
987*0fca6ea1SDimitry Andric   }
988*0fca6ea1SDimitry Andric 
989*0fca6ea1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
990*0fca6ea1SDimitry Andric     if (!getAssumed())
991*0fca6ea1SDimitry Andric       return ChangeStatus::UNCHANGED;
992*0fca6ea1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
993*0fca6ea1SDimitry Andric     return A.manifestAttrs(getIRPosition(),
994*0fca6ea1SDimitry Andric                            {Attribute::get(Ctx, "amdgpu-no-agpr")});
995*0fca6ea1SDimitry Andric   }
996*0fca6ea1SDimitry Andric 
997*0fca6ea1SDimitry Andric   const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
998*0fca6ea1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
999*0fca6ea1SDimitry Andric 
1000*0fca6ea1SDimitry Andric   /// This function should return true if the type of the \p AA is
1001*0fca6ea1SDimitry Andric   /// AAAMDGPUNoAGPRs
1002*0fca6ea1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
1003*0fca6ea1SDimitry Andric     return (AA->getIdAddr() == &ID);
1004*0fca6ea1SDimitry Andric   }
1005*0fca6ea1SDimitry Andric 
1006*0fca6ea1SDimitry Andric   static const char ID;
1007*0fca6ea1SDimitry Andric };
1008*0fca6ea1SDimitry Andric 
1009*0fca6ea1SDimitry Andric const char AAAMDGPUNoAGPR::ID = 0;
1010*0fca6ea1SDimitry Andric 
10115f757f3fSDimitry Andric static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
10125f757f3fSDimitry Andric   const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
10135f757f3fSDimitry Andric   for (unsigned I = 0;
10145f757f3fSDimitry Andric        I < F.arg_size() &&
10155f757f3fSDimitry Andric        I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());
10165f757f3fSDimitry Andric        ++I) {
10175f757f3fSDimitry Andric     Argument &Arg = *F.getArg(I);
10185f757f3fSDimitry Andric     // Check for incompatible attributes.
10195f757f3fSDimitry Andric     if (Arg.hasByRefAttr() || Arg.hasNestAttr())
10205f757f3fSDimitry Andric       break;
1021fe6060f1SDimitry Andric 
10225f757f3fSDimitry Andric     Arg.addAttr(Attribute::InReg);
10235f757f3fSDimitry Andric   }
1024fe6060f1SDimitry Andric }
1025fe6060f1SDimitry Andric 
10265f757f3fSDimitry Andric static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
1027fe6060f1SDimitry Andric   SetVector<Function *> Functions;
1028349cc55cSDimitry Andric   for (Function &F : M) {
1029349cc55cSDimitry Andric     if (!F.isIntrinsic())
1030fe6060f1SDimitry Andric       Functions.insert(&F);
1031349cc55cSDimitry Andric   }
1032fe6060f1SDimitry Andric 
1033fe6060f1SDimitry Andric   CallGraphUpdater CGUpdater;
1034fe6060f1SDimitry Andric   BumpPtrAllocator Allocator;
10355f757f3fSDimitry Andric   AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1036349cc55cSDimitry Andric   DenseSet<const char *> Allowed(
1037349cc55cSDimitry Andric       {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
103806c3fb27SDimitry Andric        &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1039*0fca6ea1SDimitry Andric        &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1040*0fca6ea1SDimitry Andric        &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041*0fca6ea1SDimitry Andric        &AAUnderlyingObjects::ID});
1042349cc55cSDimitry Andric 
104381ad6265SDimitry Andric   AttributorConfig AC(CGUpdater);
104481ad6265SDimitry Andric   AC.Allowed = &Allowed;
104581ad6265SDimitry Andric   AC.IsModulePass = true;
104681ad6265SDimitry Andric   AC.DefaultInitializeLiveInternals = false;
104706c3fb27SDimitry Andric   AC.IPOAmendableCB = [](const Function &F) {
104806c3fb27SDimitry Andric     return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
104906c3fb27SDimitry Andric   };
105081ad6265SDimitry Andric 
105181ad6265SDimitry Andric   Attributor A(Functions, InfoCache, AC);
1052fe6060f1SDimitry Andric 
1053fe6060f1SDimitry Andric   for (Function &F : M) {
1054349cc55cSDimitry Andric     if (!F.isIntrinsic()) {
1055fe6060f1SDimitry Andric       A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
1056349cc55cSDimitry Andric       A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
1057*0fca6ea1SDimitry Andric       A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
10585f757f3fSDimitry Andric       CallingConv::ID CC = F.getCallingConv();
10595f757f3fSDimitry Andric       if (!AMDGPU::isEntryFunctionCC(CC)) {
1060349cc55cSDimitry Andric         A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
106106c3fb27SDimitry Andric         A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
10625f757f3fSDimitry Andric       } else if (CC == CallingConv::AMDGPU_KERNEL) {
10635f757f3fSDimitry Andric         addPreloadKernArgHint(F, TM);
1064349cc55cSDimitry Andric       }
1065349cc55cSDimitry Andric     }
1066fe6060f1SDimitry Andric   }
1067fe6060f1SDimitry Andric 
1068fe6060f1SDimitry Andric   ChangeStatus Change = A.run();
1069fe6060f1SDimitry Andric   return Change == ChangeStatus::CHANGED;
1070fe6060f1SDimitry Andric }
1071fe6060f1SDimitry Andric 
10725f757f3fSDimitry Andric class AMDGPUAttributorLegacy : public ModulePass {
10735f757f3fSDimitry Andric public:
10745f757f3fSDimitry Andric   AMDGPUAttributorLegacy() : ModulePass(ID) {}
10755f757f3fSDimitry Andric 
10765f757f3fSDimitry Andric   /// doInitialization - Virtual method overridden by subclasses to do
10775f757f3fSDimitry Andric   /// any necessary initialization before any pass is run.
10785f757f3fSDimitry Andric   bool doInitialization(Module &) override {
10795f757f3fSDimitry Andric     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
10805f757f3fSDimitry Andric     if (!TPC)
10815f757f3fSDimitry Andric       report_fatal_error("TargetMachine is required");
10825f757f3fSDimitry Andric 
10835f757f3fSDimitry Andric     TM = &TPC->getTM<TargetMachine>();
10845f757f3fSDimitry Andric     return false;
10855f757f3fSDimitry Andric   }
10865f757f3fSDimitry Andric 
10875f757f3fSDimitry Andric   bool runOnModule(Module &M) override {
10885f757f3fSDimitry Andric     AnalysisGetter AG(this);
10895f757f3fSDimitry Andric     return runImpl(M, AG, *TM);
10905f757f3fSDimitry Andric   }
10915f757f3fSDimitry Andric 
1092bdd1243dSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1093bdd1243dSDimitry Andric     AU.addRequired<CycleInfoWrapperPass>();
1094bdd1243dSDimitry Andric   }
1095bdd1243dSDimitry Andric 
1096fe6060f1SDimitry Andric   StringRef getPassName() const override { return "AMDGPU Attributor"; }
1097fe6060f1SDimitry Andric   TargetMachine *TM;
1098fe6060f1SDimitry Andric   static char ID;
1099fe6060f1SDimitry Andric };
1100349cc55cSDimitry Andric } // namespace
1101fe6060f1SDimitry Andric 
11025f757f3fSDimitry Andric PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
11035f757f3fSDimitry Andric                                                   ModuleAnalysisManager &AM) {
1104fe6060f1SDimitry Andric 
11055f757f3fSDimitry Andric   FunctionAnalysisManager &FAM =
11065f757f3fSDimitry Andric       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
11075f757f3fSDimitry Andric   AnalysisGetter AG(FAM);
11085f757f3fSDimitry Andric 
11095f757f3fSDimitry Andric   // TODO: Probably preserves CFG
11105f757f3fSDimitry Andric   return runImpl(M, AG, TM) ? PreservedAnalyses::none()
11115f757f3fSDimitry Andric                             : PreservedAnalyses::all();
11125f757f3fSDimitry Andric }
11135f757f3fSDimitry Andric 
11145f757f3fSDimitry Andric char AMDGPUAttributorLegacy::ID = 0;
11155f757f3fSDimitry Andric 
11165f757f3fSDimitry Andric Pass *llvm::createAMDGPUAttributorLegacyPass() {
11175f757f3fSDimitry Andric   return new AMDGPUAttributorLegacy();
11185f757f3fSDimitry Andric }
11195f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
11205f757f3fSDimitry Andric                       false, false)
1121bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
11225f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",
11235f757f3fSDimitry Andric                     false, false)
1124