xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10fe6060f1SDimitry Andric //
11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
12fe6060f1SDimitry Andric 
13fe6060f1SDimitry Andric #include "AMDGPU.h"
14fe6060f1SDimitry Andric #include "GCNSubtarget.h"
1581ad6265SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
16*bdd1243dSDimitry Andric #include "llvm/Analysis/CycleAnalysis.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
18fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
19fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
20fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h"
21fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h"
22fe6060f1SDimitry Andric 
23fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor"
24fe6060f1SDimitry Andric 
25*bdd1243dSDimitry Andric namespace llvm {
26*bdd1243dSDimitry Andric void initializeCycleInfoWrapperPassPass(PassRegistry &);
27*bdd1243dSDimitry Andric }
28*bdd1243dSDimitry Andric 
29fe6060f1SDimitry Andric using namespace llvm;
30fe6060f1SDimitry Andric 
3181ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32349cc55cSDimitry Andric 
3381ad6265SDimitry Andric enum ImplicitArgumentPositions {
3481ad6265SDimitry Andric   #include "AMDGPUAttributes.def"
3581ad6265SDimitry Andric   LAST_ARG_POS
36349cc55cSDimitry Andric };
37349cc55cSDimitry Andric 
3881ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
3981ad6265SDimitry Andric 
4081ad6265SDimitry Andric enum ImplicitArgumentMask {
4181ad6265SDimitry Andric   NOT_IMPLICIT_INPUT = 0,
4281ad6265SDimitry Andric   #include "AMDGPUAttributes.def"
4381ad6265SDimitry Andric   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
4481ad6265SDimitry Andric };
4581ad6265SDimitry Andric 
4681ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask,
48349cc55cSDimitry Andric                            StringLiteral> ImplicitAttrs[] = {
4981ad6265SDimitry Andric  #include "AMDGPUAttributes.def"
50349cc55cSDimitry Andric };
51fe6060f1SDimitry Andric 
52fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
53fe6060f1SDimitry Andric // initialized.
54fe6060f1SDimitry Andric //
55fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
56fe6060f1SDimitry Andric // size is 1 for y/z.
57349cc55cSDimitry Andric static ImplicitArgumentMask
5881ad6265SDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
5981ad6265SDimitry Andric                     bool HasApertureRegs, bool SupportsGetDoorBellID) {
6081ad6265SDimitry Andric   unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
61fe6060f1SDimitry Andric   switch (ID) {
62fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
63fe6060f1SDimitry Andric     NonKernelOnly = true;
64349cc55cSDimitry Andric     return WORKITEM_ID_X;
65fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
66fe6060f1SDimitry Andric     NonKernelOnly = true;
67349cc55cSDimitry Andric     return WORKGROUP_ID_X;
68fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
69fe6060f1SDimitry Andric   case Intrinsic::r600_read_tidig_y:
70349cc55cSDimitry Andric     return WORKITEM_ID_Y;
71fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
72fe6060f1SDimitry Andric   case Intrinsic::r600_read_tidig_z:
73349cc55cSDimitry Andric     return WORKITEM_ID_Z;
74fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
75fe6060f1SDimitry Andric   case Intrinsic::r600_read_tgid_y:
76349cc55cSDimitry Andric     return WORKGROUP_ID_Y;
77fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
78fe6060f1SDimitry Andric   case Intrinsic::r600_read_tgid_z:
79349cc55cSDimitry Andric     return WORKGROUP_ID_Z;
80fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_lds_kernel_id:
81fcaf7f86SDimitry Andric     return LDS_KERNEL_ID;
82fe6060f1SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
83349cc55cSDimitry Andric     return DISPATCH_PTR;
84fe6060f1SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
85349cc55cSDimitry Andric     return DISPATCH_ID;
86fe6060f1SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
87349cc55cSDimitry Andric     return IMPLICIT_ARG_PTR;
8881ad6265SDimitry Andric   // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
8981ad6265SDimitry Andric   // queue_ptr.
90fe6060f1SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
9181ad6265SDimitry Andric     NeedsImplicit = (CodeObjectVersion == 5);
9281ad6265SDimitry Andric     return QUEUE_PTR;
93fe6060f1SDimitry Andric   case Intrinsic::amdgcn_is_shared:
94fe6060f1SDimitry Andric   case Intrinsic::amdgcn_is_private:
9581ad6265SDimitry Andric     if (HasApertureRegs)
9681ad6265SDimitry Andric       return NOT_IMPLICIT_INPUT;
9781ad6265SDimitry Andric     // Under V5, we need implicitarg_ptr + offsets to access private_base or
9881ad6265SDimitry Andric     // shared_base. For pre-V5, however, need to access them through queue_ptr +
9981ad6265SDimitry Andric     // offsets.
10081ad6265SDimitry Andric     return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
101fe6060f1SDimitry Andric   case Intrinsic::trap:
10281ad6265SDimitry Andric     if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
10381ad6265SDimitry Andric       return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
10481ad6265SDimitry Andric     NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
105349cc55cSDimitry Andric     return QUEUE_PTR;
106fe6060f1SDimitry Andric   default:
107349cc55cSDimitry Andric     return NOT_IMPLICIT_INPUT;
108fe6060f1SDimitry Andric   }
109fe6060f1SDimitry Andric }
110fe6060f1SDimitry Andric 
111fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
112fe6060f1SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
113fe6060f1SDimitry Andric }
114fe6060f1SDimitry Andric 
115fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) {
116fe6060f1SDimitry Andric   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
117fe6060f1SDimitry Andric   if (!GV)
118fe6060f1SDimitry Andric     return false;
119fe6060f1SDimitry Andric   unsigned AS = GV->getAddressSpace();
120fe6060f1SDimitry Andric   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
121fe6060f1SDimitry Andric }
122fe6060f1SDimitry Andric 
1230eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed
1240eae32dcSDimitry Andric /// regardless of the function contents.
12581ad6265SDimitry Andric static bool funcRequiresHostcallPtr(const Function &F) {
1260eae32dcSDimitry Andric   // Sanitizers require the hostcall buffer passed in the implicit arguments.
1270eae32dcSDimitry Andric   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
1280eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeThread) ||
1290eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeMemory) ||
1300eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1310eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeMemTag);
1320eae32dcSDimitry Andric }
1330eae32dcSDimitry Andric 
134349cc55cSDimitry Andric namespace {
135fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache {
136fe6060f1SDimitry Andric public:
137fe6060f1SDimitry Andric   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
138fe6060f1SDimitry Andric                          BumpPtrAllocator &Allocator,
139fe6060f1SDimitry Andric                          SetVector<Function *> *CGSCC, TargetMachine &TM)
140fe6060f1SDimitry Andric       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
141fe6060f1SDimitry Andric   TargetMachine &TM;
142fe6060f1SDimitry Andric 
143fe6060f1SDimitry Andric   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
144fe6060f1SDimitry Andric 
145fe6060f1SDimitry Andric   /// Check if the subtarget has aperture regs.
146fe6060f1SDimitry Andric   bool hasApertureRegs(Function &F) {
147fe6060f1SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
148fe6060f1SDimitry Andric     return ST.hasApertureRegs();
149fe6060f1SDimitry Andric   }
150fe6060f1SDimitry Andric 
15181ad6265SDimitry Andric   /// Check if the subtarget supports GetDoorbellID.
15281ad6265SDimitry Andric   bool supportsGetDoorbellID(Function &F) {
15381ad6265SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
15481ad6265SDimitry Andric     return ST.supportsGetDoorbellID();
15581ad6265SDimitry Andric   }
15681ad6265SDimitry Andric 
157349cc55cSDimitry Andric   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
158349cc55cSDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
159349cc55cSDimitry Andric     return ST.getFlatWorkGroupSizes(F);
160349cc55cSDimitry Andric   }
161349cc55cSDimitry Andric 
162349cc55cSDimitry Andric   std::pair<unsigned, unsigned>
163349cc55cSDimitry Andric   getMaximumFlatWorkGroupRange(const Function &F) {
164349cc55cSDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
165349cc55cSDimitry Andric     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
166349cc55cSDimitry Andric   }
167349cc55cSDimitry Andric 
168fe6060f1SDimitry Andric private:
16981ad6265SDimitry Andric   /// Check if the ConstantExpr \p CE requires the queue pointer.
170fe6060f1SDimitry Andric   static bool visitConstExpr(const ConstantExpr *CE) {
171fe6060f1SDimitry Andric     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
172fe6060f1SDimitry Andric       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
173fe6060f1SDimitry Andric       return castRequiresQueuePtr(SrcAS);
174fe6060f1SDimitry Andric     }
175fe6060f1SDimitry Andric     return false;
176fe6060f1SDimitry Andric   }
177fe6060f1SDimitry Andric 
178fe6060f1SDimitry Andric   /// Get the constant access bitmap for \p C.
179fe6060f1SDimitry Andric   uint8_t getConstantAccess(const Constant *C) {
180fe6060f1SDimitry Andric     auto It = ConstantStatus.find(C);
181fe6060f1SDimitry Andric     if (It != ConstantStatus.end())
182fe6060f1SDimitry Andric       return It->second;
183fe6060f1SDimitry Andric 
184fe6060f1SDimitry Andric     uint8_t Result = 0;
185fe6060f1SDimitry Andric     if (isDSAddress(C))
186fe6060f1SDimitry Andric       Result = DS_GLOBAL;
187fe6060f1SDimitry Andric 
188fe6060f1SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C))
189fe6060f1SDimitry Andric       if (visitConstExpr(CE))
190fe6060f1SDimitry Andric         Result |= ADDR_SPACE_CAST;
191fe6060f1SDimitry Andric 
192fe6060f1SDimitry Andric     for (const Use &U : C->operands()) {
193fe6060f1SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
194fe6060f1SDimitry Andric       if (!OpC)
195fe6060f1SDimitry Andric         continue;
196fe6060f1SDimitry Andric 
197fe6060f1SDimitry Andric       Result |= getConstantAccess(OpC);
198fe6060f1SDimitry Andric     }
199fe6060f1SDimitry Andric     return Result;
200fe6060f1SDimitry Andric   }
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric public:
20381ad6265SDimitry Andric   /// Returns true if \p Fn needs the queue pointer because of \p C.
204fe6060f1SDimitry Andric   bool needsQueuePtr(const Constant *C, Function &Fn) {
205fe6060f1SDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
206fe6060f1SDimitry Andric     bool HasAperture = hasApertureRegs(Fn);
207fe6060f1SDimitry Andric 
208fe6060f1SDimitry Andric     // No need to explore the constants.
209fe6060f1SDimitry Andric     if (!IsNonEntryFunc && HasAperture)
210fe6060f1SDimitry Andric       return false;
211fe6060f1SDimitry Andric 
212fe6060f1SDimitry Andric     uint8_t Access = getConstantAccess(C);
213fe6060f1SDimitry Andric 
214fe6060f1SDimitry Andric     // We need to trap on DS globals in non-entry functions.
215fe6060f1SDimitry Andric     if (IsNonEntryFunc && (Access & DS_GLOBAL))
216fe6060f1SDimitry Andric       return true;
217fe6060f1SDimitry Andric 
218fe6060f1SDimitry Andric     return !HasAperture && (Access & ADDR_SPACE_CAST);
219fe6060f1SDimitry Andric   }
220fe6060f1SDimitry Andric 
221fe6060f1SDimitry Andric private:
22281ad6265SDimitry Andric   /// Used to determine if the Constant needs the queue pointer.
223fe6060f1SDimitry Andric   DenseMap<const Constant *, uint8_t> ConstantStatus;
224fe6060f1SDimitry Andric };
225fe6060f1SDimitry Andric 
226*bdd1243dSDimitry Andric struct AAAMDAttributes
227*bdd1243dSDimitry Andric     : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
228*bdd1243dSDimitry Andric                           AbstractAttribute> {
229*bdd1243dSDimitry Andric   using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
230349cc55cSDimitry Andric                             AbstractAttribute>;
231349cc55cSDimitry Andric 
232fe6060f1SDimitry Andric   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
233fe6060f1SDimitry Andric 
234fe6060f1SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
235fe6060f1SDimitry Andric   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
236fe6060f1SDimitry Andric                                             Attributor &A);
237fe6060f1SDimitry Andric 
238fe6060f1SDimitry Andric   /// See AbstractAttribute::getName().
239fe6060f1SDimitry Andric   const std::string getName() const override { return "AAAMDAttributes"; }
240fe6060f1SDimitry Andric 
241fe6060f1SDimitry Andric   /// See AbstractAttribute::getIdAddr().
242fe6060f1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
243fe6060f1SDimitry Andric 
244fe6060f1SDimitry Andric   /// This function should return true if the type of the \p AA is
245fe6060f1SDimitry Andric   /// AAAMDAttributes.
246fe6060f1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
247fe6060f1SDimitry Andric     return (AA->getIdAddr() == &ID);
248fe6060f1SDimitry Andric   }
249fe6060f1SDimitry Andric 
250fe6060f1SDimitry Andric   /// Unique ID (due to the unique address)
251fe6060f1SDimitry Andric   static const char ID;
252fe6060f1SDimitry Andric };
253fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0;
254fe6060f1SDimitry Andric 
255349cc55cSDimitry Andric struct AAUniformWorkGroupSize
256fe6060f1SDimitry Andric     : public StateWrapper<BooleanState, AbstractAttribute> {
257fe6060f1SDimitry Andric   using Base = StateWrapper<BooleanState, AbstractAttribute>;
258349cc55cSDimitry Andric   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
259fe6060f1SDimitry Andric 
260fe6060f1SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
261349cc55cSDimitry Andric   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
262fe6060f1SDimitry Andric                                                    Attributor &A);
263fe6060f1SDimitry Andric 
264fe6060f1SDimitry Andric   /// See AbstractAttribute::getName().
265349cc55cSDimitry Andric   const std::string getName() const override {
266349cc55cSDimitry Andric     return "AAUniformWorkGroupSize";
267349cc55cSDimitry Andric   }
268fe6060f1SDimitry Andric 
269fe6060f1SDimitry Andric   /// See AbstractAttribute::getIdAddr().
270fe6060f1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
271fe6060f1SDimitry Andric 
272fe6060f1SDimitry Andric   /// This function should return true if the type of the \p AA is
273fe6060f1SDimitry Andric   /// AAAMDAttributes.
274fe6060f1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
275fe6060f1SDimitry Andric     return (AA->getIdAddr() == &ID);
276fe6060f1SDimitry Andric   }
277fe6060f1SDimitry Andric 
278fe6060f1SDimitry Andric   /// Unique ID (due to the unique address)
279fe6060f1SDimitry Andric   static const char ID;
280fe6060f1SDimitry Andric };
281349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0;
282fe6060f1SDimitry Andric 
283349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
284349cc55cSDimitry Andric   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
285349cc55cSDimitry Andric       : AAUniformWorkGroupSize(IRP, A) {}
286fe6060f1SDimitry Andric 
287fe6060f1SDimitry Andric   void initialize(Attributor &A) override {
288fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
289fe6060f1SDimitry Andric     CallingConv::ID CC = F->getCallingConv();
290fe6060f1SDimitry Andric 
291fe6060f1SDimitry Andric     if (CC != CallingConv::AMDGPU_KERNEL)
292fe6060f1SDimitry Andric       return;
293fe6060f1SDimitry Andric 
294fe6060f1SDimitry Andric     bool InitialValue = false;
295fe6060f1SDimitry Andric     if (F->hasFnAttribute("uniform-work-group-size"))
296fe6060f1SDimitry Andric       InitialValue = F->getFnAttribute("uniform-work-group-size")
297fe6060f1SDimitry Andric                          .getValueAsString()
298fe6060f1SDimitry Andric                          .equals("true");
299fe6060f1SDimitry Andric 
300fe6060f1SDimitry Andric     if (InitialValue)
301fe6060f1SDimitry Andric       indicateOptimisticFixpoint();
302fe6060f1SDimitry Andric     else
303fe6060f1SDimitry Andric       indicatePessimisticFixpoint();
304fe6060f1SDimitry Andric   }
305fe6060f1SDimitry Andric 
306fe6060f1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
307fe6060f1SDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
308fe6060f1SDimitry Andric 
309fe6060f1SDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
310fe6060f1SDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
311349cc55cSDimitry Andric       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
312fe6060f1SDimitry Andric                         << "->" << getAssociatedFunction()->getName() << "\n");
313fe6060f1SDimitry Andric 
314349cc55cSDimitry Andric       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
315fe6060f1SDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
316fe6060f1SDimitry Andric 
317fe6060f1SDimitry Andric       Change = Change | clampStateAndIndicateChange(this->getState(),
318fe6060f1SDimitry Andric                                                     CallerInfo.getState());
319fe6060f1SDimitry Andric 
320fe6060f1SDimitry Andric       return true;
321fe6060f1SDimitry Andric     };
322fe6060f1SDimitry Andric 
323fe6060f1SDimitry Andric     bool AllCallSitesKnown = true;
324fe6060f1SDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
3250eae32dcSDimitry Andric       return indicatePessimisticFixpoint();
326fe6060f1SDimitry Andric 
327fe6060f1SDimitry Andric     return Change;
328fe6060f1SDimitry Andric   }
329fe6060f1SDimitry Andric 
330fe6060f1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
331fe6060f1SDimitry Andric     SmallVector<Attribute, 8> AttrList;
332fe6060f1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
333fe6060f1SDimitry Andric 
334fe6060f1SDimitry Andric     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
335fe6060f1SDimitry Andric                                       getAssumed() ? "true" : "false"));
336fe6060f1SDimitry Andric     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
337fe6060f1SDimitry Andric                                               /* ForceReplace */ true);
338fe6060f1SDimitry Andric   }
339fe6060f1SDimitry Andric 
340fe6060f1SDimitry Andric   bool isValidState() const override {
341fe6060f1SDimitry Andric     // This state is always valid, even when the state is false.
342fe6060f1SDimitry Andric     return true;
343fe6060f1SDimitry Andric   }
344fe6060f1SDimitry Andric 
345fe6060f1SDimitry Andric   const std::string getAsStr() const override {
346fe6060f1SDimitry Andric     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
347fe6060f1SDimitry Andric   }
348fe6060f1SDimitry Andric 
349fe6060f1SDimitry Andric   /// See AbstractAttribute::trackStatistics()
350fe6060f1SDimitry Andric   void trackStatistics() const override {}
351fe6060f1SDimitry Andric };
352fe6060f1SDimitry Andric 
353349cc55cSDimitry Andric AAUniformWorkGroupSize &
354349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
355fe6060f1SDimitry Andric                                           Attributor &A) {
356fe6060f1SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
357349cc55cSDimitry Andric     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
358349cc55cSDimitry Andric   llvm_unreachable(
359349cc55cSDimitry Andric       "AAUniformWorkGroupSize is only valid for function position");
360fe6060f1SDimitry Andric }
361fe6060f1SDimitry Andric 
362fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes {
363fe6060f1SDimitry Andric   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
364fe6060f1SDimitry Andric       : AAAMDAttributes(IRP, A) {}
365fe6060f1SDimitry Andric 
366fe6060f1SDimitry Andric   void initialize(Attributor &A) override {
367fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
3680eae32dcSDimitry Andric 
3690eae32dcSDimitry Andric     // If the function requires the implicit arg pointer due to sanitizers,
3700eae32dcSDimitry Andric     // assume it's needed even if explicitly marked as not requiring it.
37181ad6265SDimitry Andric     const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
37281ad6265SDimitry Andric     if (NeedsHostcall) {
3730eae32dcSDimitry Andric       removeAssumedBits(IMPLICIT_ARG_PTR);
37481ad6265SDimitry Andric       removeAssumedBits(HOSTCALL_PTR);
37581ad6265SDimitry Andric     }
3760eae32dcSDimitry Andric 
377349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs) {
37881ad6265SDimitry Andric       if (NeedsHostcall &&
37981ad6265SDimitry Andric           (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
3800eae32dcSDimitry Andric         continue;
3810eae32dcSDimitry Andric 
382349cc55cSDimitry Andric       if (F->hasFnAttribute(Attr.second))
383349cc55cSDimitry Andric         addKnownBits(Attr.first);
384fe6060f1SDimitry Andric     }
385fe6060f1SDimitry Andric 
386349cc55cSDimitry Andric     if (F->isDeclaration())
387349cc55cSDimitry Andric       return;
388349cc55cSDimitry Andric 
389fe6060f1SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
390fe6060f1SDimitry Andric     // not allowed to have kernel arguments.
391fe6060f1SDimitry Andric     if (AMDGPU::isGraphics(F->getCallingConv())) {
392fe6060f1SDimitry Andric       indicatePessimisticFixpoint();
393fe6060f1SDimitry Andric       return;
394fe6060f1SDimitry Andric     }
395fe6060f1SDimitry Andric   }
396fe6060f1SDimitry Andric 
397fe6060f1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
398fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
399349cc55cSDimitry Andric     // The current assumed state used to determine a change.
400349cc55cSDimitry Andric     auto OrigAssumed = getAssumed();
401fe6060f1SDimitry Andric 
402fe6060f1SDimitry Andric     // Check for Intrinsics and propagate attributes.
403fe6060f1SDimitry Andric     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
404fe6060f1SDimitry Andric         *this, this->getIRPosition(), DepClassTy::REQUIRED);
405349cc55cSDimitry Andric     if (AAEdges.hasNonAsmUnknownCallee())
406349cc55cSDimitry Andric       return indicatePessimisticFixpoint();
407fe6060f1SDimitry Andric 
408349cc55cSDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
409fe6060f1SDimitry Andric 
41081ad6265SDimitry Andric     bool NeedsImplicit = false;
41181ad6265SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
41281ad6265SDimitry Andric     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
41381ad6265SDimitry Andric     bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
414349cc55cSDimitry Andric 
415fe6060f1SDimitry Andric     for (Function *Callee : AAEdges.getOptimisticEdges()) {
416fe6060f1SDimitry Andric       Intrinsic::ID IID = Callee->getIntrinsicID();
417349cc55cSDimitry Andric       if (IID == Intrinsic::not_intrinsic) {
418349cc55cSDimitry Andric         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
419349cc55cSDimitry Andric           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
420349cc55cSDimitry Andric         *this &= AAAMD;
421fe6060f1SDimitry Andric         continue;
422fe6060f1SDimitry Andric       }
423fe6060f1SDimitry Andric 
424fe6060f1SDimitry Andric       bool NonKernelOnly = false;
425349cc55cSDimitry Andric       ImplicitArgumentMask AttrMask =
42681ad6265SDimitry Andric           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
42781ad6265SDimitry Andric                               HasApertureRegs, SupportsGetDoorbellID);
428349cc55cSDimitry Andric       if (AttrMask != NOT_IMPLICIT_INPUT) {
429349cc55cSDimitry Andric         if ((IsNonEntryFunc || !NonKernelOnly))
430349cc55cSDimitry Andric           removeAssumedBits(AttrMask);
431fe6060f1SDimitry Andric       }
432fe6060f1SDimitry Andric     }
433fe6060f1SDimitry Andric 
43481ad6265SDimitry Andric     // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
43581ad6265SDimitry Andric     if (NeedsImplicit)
43681ad6265SDimitry Andric       removeAssumedBits(IMPLICIT_ARG_PTR);
43781ad6265SDimitry Andric 
43881ad6265SDimitry Andric     if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
43981ad6265SDimitry Andric       // Under V5, we need implicitarg_ptr + offsets to access private_base or
44081ad6265SDimitry Andric       // shared_base. We do not actually need queue_ptr.
44181ad6265SDimitry Andric       if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
44281ad6265SDimitry Andric         removeAssumedBits(IMPLICIT_ARG_PTR);
44381ad6265SDimitry Andric       else
444349cc55cSDimitry Andric         removeAssumedBits(QUEUE_PTR);
445fe6060f1SDimitry Andric     }
446fe6060f1SDimitry Andric 
44781ad6265SDimitry Andric     if (funcRetrievesMultigridSyncArg(A)) {
44881ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) &&
44981ad6265SDimitry Andric              "multigrid_sync_arg needs implicitarg_ptr");
45081ad6265SDimitry Andric       removeAssumedBits(MULTIGRID_SYNC_ARG);
451349cc55cSDimitry Andric     }
452fe6060f1SDimitry Andric 
45381ad6265SDimitry Andric     if (funcRetrievesHostcallPtr(A)) {
45481ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
45581ad6265SDimitry Andric       removeAssumedBits(HOSTCALL_PTR);
45681ad6265SDimitry Andric     }
45781ad6265SDimitry Andric 
45881ad6265SDimitry Andric     if (funcRetrievesHeapPtr(A)) {
45981ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
46081ad6265SDimitry Andric       removeAssumedBits(HEAP_PTR);
46181ad6265SDimitry Andric     }
46281ad6265SDimitry Andric 
46381ad6265SDimitry Andric     if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
46481ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
465349cc55cSDimitry Andric       removeAssumedBits(QUEUE_PTR);
466fe6060f1SDimitry Andric     }
467fe6060f1SDimitry Andric 
468fcaf7f86SDimitry Andric     if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
469fcaf7f86SDimitry Andric       removeAssumedBits(LDS_KERNEL_ID);
470fcaf7f86SDimitry Andric     }
471fcaf7f86SDimitry Andric 
472*bdd1243dSDimitry Andric     if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A))
473*bdd1243dSDimitry Andric       removeAssumedBits(DEFAULT_QUEUE);
474*bdd1243dSDimitry Andric 
475*bdd1243dSDimitry Andric     if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A))
476*bdd1243dSDimitry Andric       removeAssumedBits(COMPLETION_ACTION);
477*bdd1243dSDimitry Andric 
47881ad6265SDimitry Andric     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
47981ad6265SDimitry Andric                                        : ChangeStatus::UNCHANGED;
480fe6060f1SDimitry Andric   }
481fe6060f1SDimitry Andric 
482fe6060f1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
483fe6060f1SDimitry Andric     SmallVector<Attribute, 8> AttrList;
484fe6060f1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
485fe6060f1SDimitry Andric 
486349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs) {
487349cc55cSDimitry Andric       if (isKnown(Attr.first))
488349cc55cSDimitry Andric         AttrList.push_back(Attribute::get(Ctx, Attr.second));
489349cc55cSDimitry Andric     }
490fe6060f1SDimitry Andric 
491fe6060f1SDimitry Andric     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
492fe6060f1SDimitry Andric                                               /* ForceReplace */ true);
493fe6060f1SDimitry Andric   }
494fe6060f1SDimitry Andric 
495fe6060f1SDimitry Andric   const std::string getAsStr() const override {
496349cc55cSDimitry Andric     std::string Str;
497349cc55cSDimitry Andric     raw_string_ostream OS(Str);
498349cc55cSDimitry Andric     OS << "AMDInfo[";
499349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs)
500349cc55cSDimitry Andric       OS << ' ' << Attr.second;
501349cc55cSDimitry Andric     OS << " ]";
502349cc55cSDimitry Andric     return OS.str();
503fe6060f1SDimitry Andric   }
504fe6060f1SDimitry Andric 
505fe6060f1SDimitry Andric   /// See AbstractAttribute::trackStatistics()
506fe6060f1SDimitry Andric   void trackStatistics() const override {}
50781ad6265SDimitry Andric 
50881ad6265SDimitry Andric private:
50981ad6265SDimitry Andric   bool checkForQueuePtr(Attributor &A) {
51081ad6265SDimitry Andric     Function *F = getAssociatedFunction();
51181ad6265SDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
51281ad6265SDimitry Andric 
51381ad6265SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
51481ad6265SDimitry Andric 
51581ad6265SDimitry Andric     bool NeedsQueuePtr = false;
51681ad6265SDimitry Andric 
51781ad6265SDimitry Andric     auto CheckAddrSpaceCasts = [&](Instruction &I) {
51881ad6265SDimitry Andric       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
51981ad6265SDimitry Andric       if (castRequiresQueuePtr(SrcAS)) {
52081ad6265SDimitry Andric         NeedsQueuePtr = true;
52181ad6265SDimitry Andric         return false;
52281ad6265SDimitry Andric       }
52381ad6265SDimitry Andric       return true;
52481ad6265SDimitry Andric     };
52581ad6265SDimitry Andric 
52681ad6265SDimitry Andric     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
52781ad6265SDimitry Andric 
52881ad6265SDimitry Andric     // `checkForAllInstructions` is much more cheaper than going through all
52981ad6265SDimitry Andric     // instructions, try it first.
53081ad6265SDimitry Andric 
53181ad6265SDimitry Andric     // The queue pointer is not needed if aperture regs is present.
53281ad6265SDimitry Andric     if (!HasApertureRegs) {
53381ad6265SDimitry Andric       bool UsedAssumedInformation = false;
53481ad6265SDimitry Andric       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
53581ad6265SDimitry Andric                                 {Instruction::AddrSpaceCast},
53681ad6265SDimitry Andric                                 UsedAssumedInformation);
53781ad6265SDimitry Andric     }
53881ad6265SDimitry Andric 
53981ad6265SDimitry Andric     // If we found  that we need the queue pointer, nothing else to do.
54081ad6265SDimitry Andric     if (NeedsQueuePtr)
54181ad6265SDimitry Andric       return true;
54281ad6265SDimitry Andric 
54381ad6265SDimitry Andric     if (!IsNonEntryFunc && HasApertureRegs)
54481ad6265SDimitry Andric       return false;
54581ad6265SDimitry Andric 
54681ad6265SDimitry Andric     for (BasicBlock &BB : *F) {
54781ad6265SDimitry Andric       for (Instruction &I : BB) {
54881ad6265SDimitry Andric         for (const Use &U : I.operands()) {
54981ad6265SDimitry Andric           if (const auto *C = dyn_cast<Constant>(U)) {
55081ad6265SDimitry Andric             if (InfoCache.needsQueuePtr(C, *F))
55181ad6265SDimitry Andric               return true;
55281ad6265SDimitry Andric           }
55381ad6265SDimitry Andric         }
55481ad6265SDimitry Andric       }
55581ad6265SDimitry Andric     }
55681ad6265SDimitry Andric 
55781ad6265SDimitry Andric     return false;
55881ad6265SDimitry Andric   }
55981ad6265SDimitry Andric 
56081ad6265SDimitry Andric   bool funcRetrievesMultigridSyncArg(Attributor &A) {
56181ad6265SDimitry Andric     auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
562*bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
563*bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
56481ad6265SDimitry Andric   }
56581ad6265SDimitry Andric 
56681ad6265SDimitry Andric   bool funcRetrievesHostcallPtr(Attributor &A) {
56781ad6265SDimitry Andric     auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
568*bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
569*bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
570*bdd1243dSDimitry Andric   }
571*bdd1243dSDimitry Andric 
572*bdd1243dSDimitry Andric   bool funcRetrievesDefaultQueue(Attributor &A) {
573*bdd1243dSDimitry Andric     auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition();
574*bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
575*bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
576*bdd1243dSDimitry Andric   }
577*bdd1243dSDimitry Andric 
578*bdd1243dSDimitry Andric   bool funcRetrievesCompletionAction(Attributor &A) {
579*bdd1243dSDimitry Andric     auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition();
580*bdd1243dSDimitry Andric     AA::RangeTy Range(Pos, 8);
581*bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
58281ad6265SDimitry Andric   }
58381ad6265SDimitry Andric 
58481ad6265SDimitry Andric   bool funcRetrievesHeapPtr(Attributor &A) {
58581ad6265SDimitry Andric     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
58681ad6265SDimitry Andric       return false;
587*bdd1243dSDimitry Andric     AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
588*bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
58981ad6265SDimitry Andric   }
59081ad6265SDimitry Andric 
59181ad6265SDimitry Andric   bool funcRetrievesQueuePtr(Attributor &A) {
59281ad6265SDimitry Andric     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
59381ad6265SDimitry Andric       return false;
594*bdd1243dSDimitry Andric     AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
595*bdd1243dSDimitry Andric     return funcRetrievesImplicitKernelArg(A, Range);
59681ad6265SDimitry Andric   }
59781ad6265SDimitry Andric 
598*bdd1243dSDimitry Andric   bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
59981ad6265SDimitry Andric     // Check if this is a call to the implicitarg_ptr builtin and it
60081ad6265SDimitry Andric     // is used to retrieve the hostcall pointer. The implicit arg for
60181ad6265SDimitry Andric     // hostcall is not used only if every use of the implicitarg_ptr
60281ad6265SDimitry Andric     // is a load that clearly does not retrieve any byte of the
60381ad6265SDimitry Andric     // hostcall pointer. We check this by tracing all the uses of the
60481ad6265SDimitry Andric     // initial call to the implicitarg_ptr intrinsic.
60581ad6265SDimitry Andric     auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
60681ad6265SDimitry Andric       auto &Call = cast<CallBase>(I);
60781ad6265SDimitry Andric       if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
60881ad6265SDimitry Andric         return true;
60981ad6265SDimitry Andric 
61081ad6265SDimitry Andric       const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
61181ad6265SDimitry Andric           *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
61281ad6265SDimitry Andric 
61381ad6265SDimitry Andric       return PointerInfoAA.forallInterferingAccesses(
614*bdd1243dSDimitry Andric           Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
61581ad6265SDimitry Andric             return Acc.getRemoteInst()->isDroppable();
61681ad6265SDimitry Andric           });
61781ad6265SDimitry Andric     };
61881ad6265SDimitry Andric 
61981ad6265SDimitry Andric     bool UsedAssumedInformation = false;
62081ad6265SDimitry Andric     return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
62181ad6265SDimitry Andric                                               UsedAssumedInformation);
62281ad6265SDimitry Andric   }
623fcaf7f86SDimitry Andric 
624fcaf7f86SDimitry Andric   bool funcRetrievesLDSKernelId(Attributor &A) {
625fcaf7f86SDimitry Andric     auto DoesNotRetrieve = [&](Instruction &I) {
626fcaf7f86SDimitry Andric       auto &Call = cast<CallBase>(I);
627fcaf7f86SDimitry Andric       return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
628fcaf7f86SDimitry Andric     };
629fcaf7f86SDimitry Andric     bool UsedAssumedInformation = false;
630fcaf7f86SDimitry Andric     return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
631fcaf7f86SDimitry Andric                                               UsedAssumedInformation);
632fcaf7f86SDimitry Andric   }
633fe6060f1SDimitry Andric };
634fe6060f1SDimitry Andric 
635fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
636fe6060f1SDimitry Andric                                                     Attributor &A) {
637fe6060f1SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
638fe6060f1SDimitry Andric     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
639fe6060f1SDimitry Andric   llvm_unreachable("AAAMDAttributes is only valid for function position");
640fe6060f1SDimitry Andric }
641fe6060f1SDimitry Andric 
642349cc55cSDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute.
643349cc55cSDimitry Andric struct AAAMDFlatWorkGroupSize
644349cc55cSDimitry Andric     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
645349cc55cSDimitry Andric   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
646349cc55cSDimitry Andric   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
647349cc55cSDimitry Andric       : Base(IRP, 32) {}
648349cc55cSDimitry Andric 
649349cc55cSDimitry Andric   /// See AbstractAttribute::getState(...).
650349cc55cSDimitry Andric   IntegerRangeState &getState() override { return *this; }
651349cc55cSDimitry Andric   const IntegerRangeState &getState() const override { return *this; }
652349cc55cSDimitry Andric 
653349cc55cSDimitry Andric   void initialize(Attributor &A) override {
654349cc55cSDimitry Andric     Function *F = getAssociatedFunction();
655349cc55cSDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
656349cc55cSDimitry Andric     unsigned MinGroupSize, MaxGroupSize;
657349cc55cSDimitry Andric     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
658349cc55cSDimitry Andric     intersectKnown(
659349cc55cSDimitry Andric         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
6600eae32dcSDimitry Andric 
6610eae32dcSDimitry Andric     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
6620eae32dcSDimitry Andric       indicatePessimisticFixpoint();
663349cc55cSDimitry Andric   }
664349cc55cSDimitry Andric 
665349cc55cSDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
666349cc55cSDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
667349cc55cSDimitry Andric 
668349cc55cSDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
669349cc55cSDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
670349cc55cSDimitry Andric       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
671349cc55cSDimitry Andric                         << "->" << getAssociatedFunction()->getName() << '\n');
672349cc55cSDimitry Andric 
673349cc55cSDimitry Andric       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
674349cc55cSDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
675349cc55cSDimitry Andric 
676349cc55cSDimitry Andric       Change |=
677349cc55cSDimitry Andric           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
678349cc55cSDimitry Andric 
679349cc55cSDimitry Andric       return true;
680349cc55cSDimitry Andric     };
681349cc55cSDimitry Andric 
682349cc55cSDimitry Andric     bool AllCallSitesKnown = true;
683349cc55cSDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
684349cc55cSDimitry Andric       return indicatePessimisticFixpoint();
685349cc55cSDimitry Andric 
686349cc55cSDimitry Andric     return Change;
687349cc55cSDimitry Andric   }
688349cc55cSDimitry Andric 
689349cc55cSDimitry Andric   ChangeStatus manifest(Attributor &A) override {
690349cc55cSDimitry Andric     SmallVector<Attribute, 8> AttrList;
691349cc55cSDimitry Andric     Function *F = getAssociatedFunction();
692349cc55cSDimitry Andric     LLVMContext &Ctx = F->getContext();
693349cc55cSDimitry Andric 
694349cc55cSDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
695349cc55cSDimitry Andric     unsigned Min, Max;
696349cc55cSDimitry Andric     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
697349cc55cSDimitry Andric 
698349cc55cSDimitry Andric     // Don't add the attribute if it's the implied default.
699349cc55cSDimitry Andric     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
700349cc55cSDimitry Andric       return ChangeStatus::UNCHANGED;
701349cc55cSDimitry Andric 
702349cc55cSDimitry Andric     SmallString<10> Buffer;
703349cc55cSDimitry Andric     raw_svector_ostream OS(Buffer);
704349cc55cSDimitry Andric     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
705349cc55cSDimitry Andric 
706349cc55cSDimitry Andric     AttrList.push_back(
707349cc55cSDimitry Andric         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
708349cc55cSDimitry Andric     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
709349cc55cSDimitry Andric                                               /* ForceReplace */ true);
710349cc55cSDimitry Andric   }
711349cc55cSDimitry Andric 
712349cc55cSDimitry Andric   const std::string getAsStr() const override {
713349cc55cSDimitry Andric     std::string Str;
714349cc55cSDimitry Andric     raw_string_ostream OS(Str);
715349cc55cSDimitry Andric     OS << "AMDFlatWorkGroupSize[";
716349cc55cSDimitry Andric     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
717349cc55cSDimitry Andric     OS << ']';
718349cc55cSDimitry Andric     return OS.str();
719349cc55cSDimitry Andric   }
720349cc55cSDimitry Andric 
721349cc55cSDimitry Andric   /// See AbstractAttribute::trackStatistics()
722349cc55cSDimitry Andric   void trackStatistics() const override {}
723349cc55cSDimitry Andric 
724349cc55cSDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
725349cc55cSDimitry Andric   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
726349cc55cSDimitry Andric                                                    Attributor &A);
727349cc55cSDimitry Andric 
728349cc55cSDimitry Andric   /// See AbstractAttribute::getName()
729349cc55cSDimitry Andric   const std::string getName() const override {
730349cc55cSDimitry Andric     return "AAAMDFlatWorkGroupSize";
731349cc55cSDimitry Andric   }
732349cc55cSDimitry Andric 
733349cc55cSDimitry Andric   /// See AbstractAttribute::getIdAddr()
734349cc55cSDimitry Andric   const char *getIdAddr() const override { return &ID; }
735349cc55cSDimitry Andric 
736349cc55cSDimitry Andric   /// This function should return true if the type of the \p AA is
737349cc55cSDimitry Andric   /// AAAMDFlatWorkGroupSize
738349cc55cSDimitry Andric   static bool classof(const AbstractAttribute *AA) {
739349cc55cSDimitry Andric     return (AA->getIdAddr() == &ID);
740349cc55cSDimitry Andric   }
741349cc55cSDimitry Andric 
742349cc55cSDimitry Andric   /// Unique ID (due to the unique address)
743349cc55cSDimitry Andric   static const char ID;
744349cc55cSDimitry Andric };
745349cc55cSDimitry Andric 
746349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0;
747349cc55cSDimitry Andric 
748349cc55cSDimitry Andric AAAMDFlatWorkGroupSize &
749349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
750349cc55cSDimitry Andric                                           Attributor &A) {
751349cc55cSDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
752349cc55cSDimitry Andric     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
753349cc55cSDimitry Andric   llvm_unreachable(
754349cc55cSDimitry Andric       "AAAMDFlatWorkGroupSize is only valid for function position");
755349cc55cSDimitry Andric }
756349cc55cSDimitry Andric 
757fe6060f1SDimitry Andric class AMDGPUAttributor : public ModulePass {
758fe6060f1SDimitry Andric public:
759fe6060f1SDimitry Andric   AMDGPUAttributor() : ModulePass(ID) {}
760fe6060f1SDimitry Andric 
761fe6060f1SDimitry Andric   /// doInitialization - Virtual method overridden by subclasses to do
762fe6060f1SDimitry Andric   /// any necessary initialization before any pass is run.
763fe6060f1SDimitry Andric   bool doInitialization(Module &) override {
764fe6060f1SDimitry Andric     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
765fe6060f1SDimitry Andric     if (!TPC)
766fe6060f1SDimitry Andric       report_fatal_error("TargetMachine is required");
767fe6060f1SDimitry Andric 
768fe6060f1SDimitry Andric     TM = &TPC->getTM<TargetMachine>();
769fe6060f1SDimitry Andric     return false;
770fe6060f1SDimitry Andric   }
771fe6060f1SDimitry Andric 
772fe6060f1SDimitry Andric   bool runOnModule(Module &M) override {
773fe6060f1SDimitry Andric     SetVector<Function *> Functions;
774*bdd1243dSDimitry Andric     AnalysisGetter AG(this);
775349cc55cSDimitry Andric     for (Function &F : M) {
776349cc55cSDimitry Andric       if (!F.isIntrinsic())
777fe6060f1SDimitry Andric         Functions.insert(&F);
778349cc55cSDimitry Andric     }
779fe6060f1SDimitry Andric 
780fe6060f1SDimitry Andric     CallGraphUpdater CGUpdater;
781fe6060f1SDimitry Andric     BumpPtrAllocator Allocator;
782fe6060f1SDimitry Andric     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
783349cc55cSDimitry Andric     DenseSet<const char *> Allowed(
784349cc55cSDimitry Andric         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
785fcaf7f86SDimitry Andric          &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
786*bdd1243dSDimitry Andric          &AAPointerInfo::ID, &AAPotentialConstantValues::ID});
787349cc55cSDimitry Andric 
78881ad6265SDimitry Andric     AttributorConfig AC(CGUpdater);
78981ad6265SDimitry Andric     AC.Allowed = &Allowed;
79081ad6265SDimitry Andric     AC.IsModulePass = true;
79181ad6265SDimitry Andric     AC.DefaultInitializeLiveInternals = false;
79281ad6265SDimitry Andric 
79381ad6265SDimitry Andric     Attributor A(Functions, InfoCache, AC);
794fe6060f1SDimitry Andric 
795fe6060f1SDimitry Andric     for (Function &F : M) {
796349cc55cSDimitry Andric       if (!F.isIntrinsic()) {
797fe6060f1SDimitry Andric         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
798349cc55cSDimitry Andric         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
799349cc55cSDimitry Andric         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
800349cc55cSDimitry Andric           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
801349cc55cSDimitry Andric         }
802349cc55cSDimitry Andric       }
803fe6060f1SDimitry Andric     }
804fe6060f1SDimitry Andric 
805fe6060f1SDimitry Andric     ChangeStatus Change = A.run();
806fe6060f1SDimitry Andric     return Change == ChangeStatus::CHANGED;
807fe6060f1SDimitry Andric   }
808fe6060f1SDimitry Andric 
809*bdd1243dSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
810*bdd1243dSDimitry Andric     AU.addRequired<CycleInfoWrapperPass>();
811*bdd1243dSDimitry Andric   }
812*bdd1243dSDimitry Andric 
813fe6060f1SDimitry Andric   StringRef getPassName() const override { return "AMDGPU Attributor"; }
814fe6060f1SDimitry Andric   TargetMachine *TM;
815fe6060f1SDimitry Andric   static char ID;
816fe6060f1SDimitry Andric };
817349cc55cSDimitry Andric } // namespace
818fe6060f1SDimitry Andric 
819fe6060f1SDimitry Andric char AMDGPUAttributor::ID = 0;
820fe6060f1SDimitry Andric 
821fe6060f1SDimitry Andric Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
822*bdd1243dSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
823*bdd1243dSDimitry Andric                       false)
824*bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);
825*bdd1243dSDimitry Andric INITIALIZE_PASS_END(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
826*bdd1243dSDimitry Andric                     false)
827