xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10fe6060f1SDimitry Andric //
11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
12fe6060f1SDimitry Andric 
13fe6060f1SDimitry Andric #include "AMDGPU.h"
14fe6060f1SDimitry Andric #include "GCNSubtarget.h"
15*81ad6265SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
16fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
17fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
18fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
19fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h"
20fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h"
21fe6060f1SDimitry Andric 
22fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor"
23fe6060f1SDimitry Andric 
24fe6060f1SDimitry Andric using namespace llvm;
25fe6060f1SDimitry Andric 
26*81ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
27349cc55cSDimitry Andric 
28*81ad6265SDimitry Andric enum ImplicitArgumentPositions {
29*81ad6265SDimitry Andric   #include "AMDGPUAttributes.def"
30*81ad6265SDimitry Andric   LAST_ARG_POS
31349cc55cSDimitry Andric };
32349cc55cSDimitry Andric 
33*81ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
34*81ad6265SDimitry Andric 
35*81ad6265SDimitry Andric enum ImplicitArgumentMask {
36*81ad6265SDimitry Andric   NOT_IMPLICIT_INPUT = 0,
37*81ad6265SDimitry Andric   #include "AMDGPUAttributes.def"
38*81ad6265SDimitry Andric   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
39*81ad6265SDimitry Andric };
40*81ad6265SDimitry Andric 
41*81ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
42349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask,
43349cc55cSDimitry Andric                            StringLiteral> ImplicitAttrs[] = {
44*81ad6265SDimitry Andric  #include "AMDGPUAttributes.def"
45349cc55cSDimitry Andric };
46fe6060f1SDimitry Andric 
47fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
48fe6060f1SDimitry Andric // initialized.
49fe6060f1SDimitry Andric //
50fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
51fe6060f1SDimitry Andric // size is 1 for y/z.
52349cc55cSDimitry Andric static ImplicitArgumentMask
53*81ad6265SDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
54*81ad6265SDimitry Andric                     bool HasApertureRegs, bool SupportsGetDoorBellID) {
55*81ad6265SDimitry Andric   unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
56fe6060f1SDimitry Andric   switch (ID) {
57fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
58fe6060f1SDimitry Andric     NonKernelOnly = true;
59349cc55cSDimitry Andric     return WORKITEM_ID_X;
60fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
61fe6060f1SDimitry Andric     NonKernelOnly = true;
62349cc55cSDimitry Andric     return WORKGROUP_ID_X;
63fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
64fe6060f1SDimitry Andric   case Intrinsic::r600_read_tidig_y:
65349cc55cSDimitry Andric     return WORKITEM_ID_Y;
66fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
67fe6060f1SDimitry Andric   case Intrinsic::r600_read_tidig_z:
68349cc55cSDimitry Andric     return WORKITEM_ID_Z;
69fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
70fe6060f1SDimitry Andric   case Intrinsic::r600_read_tgid_y:
71349cc55cSDimitry Andric     return WORKGROUP_ID_Y;
72fe6060f1SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
73fe6060f1SDimitry Andric   case Intrinsic::r600_read_tgid_z:
74349cc55cSDimitry Andric     return WORKGROUP_ID_Z;
75fe6060f1SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
76349cc55cSDimitry Andric     return DISPATCH_PTR;
77fe6060f1SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
78349cc55cSDimitry Andric     return DISPATCH_ID;
79fe6060f1SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
80349cc55cSDimitry Andric     return IMPLICIT_ARG_PTR;
81*81ad6265SDimitry Andric   // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
82*81ad6265SDimitry Andric   // queue_ptr.
83fe6060f1SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
84*81ad6265SDimitry Andric     NeedsImplicit = (CodeObjectVersion == 5);
85*81ad6265SDimitry Andric     return QUEUE_PTR;
86fe6060f1SDimitry Andric   case Intrinsic::amdgcn_is_shared:
87fe6060f1SDimitry Andric   case Intrinsic::amdgcn_is_private:
88*81ad6265SDimitry Andric     if (HasApertureRegs)
89*81ad6265SDimitry Andric       return NOT_IMPLICIT_INPUT;
90*81ad6265SDimitry Andric     // Under V5, we need implicitarg_ptr + offsets to access private_base or
91*81ad6265SDimitry Andric     // shared_base. For pre-V5, however, need to access them through queue_ptr +
92*81ad6265SDimitry Andric     // offsets.
93*81ad6265SDimitry Andric     return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
94fe6060f1SDimitry Andric   case Intrinsic::trap:
95*81ad6265SDimitry Andric     if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
96*81ad6265SDimitry Andric       return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
97*81ad6265SDimitry Andric     NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
98349cc55cSDimitry Andric     return QUEUE_PTR;
99fe6060f1SDimitry Andric   default:
100349cc55cSDimitry Andric     return NOT_IMPLICIT_INPUT;
101fe6060f1SDimitry Andric   }
102fe6060f1SDimitry Andric }
103fe6060f1SDimitry Andric 
104fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
105fe6060f1SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
106fe6060f1SDimitry Andric }
107fe6060f1SDimitry Andric 
108fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) {
109fe6060f1SDimitry Andric   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
110fe6060f1SDimitry Andric   if (!GV)
111fe6060f1SDimitry Andric     return false;
112fe6060f1SDimitry Andric   unsigned AS = GV->getAddressSpace();
113fe6060f1SDimitry Andric   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
114fe6060f1SDimitry Andric }
115fe6060f1SDimitry Andric 
1160eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed
1170eae32dcSDimitry Andric /// regardless of the function contents.
118*81ad6265SDimitry Andric static bool funcRequiresHostcallPtr(const Function &F) {
1190eae32dcSDimitry Andric   // Sanitizers require the hostcall buffer passed in the implicit arguments.
1200eae32dcSDimitry Andric   return F.hasFnAttribute(Attribute::SanitizeAddress) ||
1210eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeThread) ||
1220eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeMemory) ||
1230eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1240eae32dcSDimitry Andric          F.hasFnAttribute(Attribute::SanitizeMemTag);
1250eae32dcSDimitry Andric }
1260eae32dcSDimitry Andric 
127349cc55cSDimitry Andric namespace {
128fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache {
129fe6060f1SDimitry Andric public:
130fe6060f1SDimitry Andric   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
131fe6060f1SDimitry Andric                          BumpPtrAllocator &Allocator,
132fe6060f1SDimitry Andric                          SetVector<Function *> *CGSCC, TargetMachine &TM)
133fe6060f1SDimitry Andric       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
134fe6060f1SDimitry Andric   TargetMachine &TM;
135fe6060f1SDimitry Andric 
136fe6060f1SDimitry Andric   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
137fe6060f1SDimitry Andric 
138fe6060f1SDimitry Andric   /// Check if the subtarget has aperture regs.
139fe6060f1SDimitry Andric   bool hasApertureRegs(Function &F) {
140fe6060f1SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
141fe6060f1SDimitry Andric     return ST.hasApertureRegs();
142fe6060f1SDimitry Andric   }
143fe6060f1SDimitry Andric 
144*81ad6265SDimitry Andric   /// Check if the subtarget supports GetDoorbellID.
145*81ad6265SDimitry Andric   bool supportsGetDoorbellID(Function &F) {
146*81ad6265SDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
147*81ad6265SDimitry Andric     return ST.supportsGetDoorbellID();
148*81ad6265SDimitry Andric   }
149*81ad6265SDimitry Andric 
150349cc55cSDimitry Andric   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
151349cc55cSDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
152349cc55cSDimitry Andric     return ST.getFlatWorkGroupSizes(F);
153349cc55cSDimitry Andric   }
154349cc55cSDimitry Andric 
155349cc55cSDimitry Andric   std::pair<unsigned, unsigned>
156349cc55cSDimitry Andric   getMaximumFlatWorkGroupRange(const Function &F) {
157349cc55cSDimitry Andric     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
158349cc55cSDimitry Andric     return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
159349cc55cSDimitry Andric   }
160349cc55cSDimitry Andric 
161fe6060f1SDimitry Andric private:
162*81ad6265SDimitry Andric   /// Check if the ConstantExpr \p CE requires the queue pointer.
163fe6060f1SDimitry Andric   static bool visitConstExpr(const ConstantExpr *CE) {
164fe6060f1SDimitry Andric     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
165fe6060f1SDimitry Andric       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
166fe6060f1SDimitry Andric       return castRequiresQueuePtr(SrcAS);
167fe6060f1SDimitry Andric     }
168fe6060f1SDimitry Andric     return false;
169fe6060f1SDimitry Andric   }
170fe6060f1SDimitry Andric 
171fe6060f1SDimitry Andric   /// Get the constant access bitmap for \p C.
172fe6060f1SDimitry Andric   uint8_t getConstantAccess(const Constant *C) {
173fe6060f1SDimitry Andric     auto It = ConstantStatus.find(C);
174fe6060f1SDimitry Andric     if (It != ConstantStatus.end())
175fe6060f1SDimitry Andric       return It->second;
176fe6060f1SDimitry Andric 
177fe6060f1SDimitry Andric     uint8_t Result = 0;
178fe6060f1SDimitry Andric     if (isDSAddress(C))
179fe6060f1SDimitry Andric       Result = DS_GLOBAL;
180fe6060f1SDimitry Andric 
181fe6060f1SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C))
182fe6060f1SDimitry Andric       if (visitConstExpr(CE))
183fe6060f1SDimitry Andric         Result |= ADDR_SPACE_CAST;
184fe6060f1SDimitry Andric 
185fe6060f1SDimitry Andric     for (const Use &U : C->operands()) {
186fe6060f1SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
187fe6060f1SDimitry Andric       if (!OpC)
188fe6060f1SDimitry Andric         continue;
189fe6060f1SDimitry Andric 
190fe6060f1SDimitry Andric       Result |= getConstantAccess(OpC);
191fe6060f1SDimitry Andric     }
192fe6060f1SDimitry Andric     return Result;
193fe6060f1SDimitry Andric   }
194fe6060f1SDimitry Andric 
195fe6060f1SDimitry Andric public:
196*81ad6265SDimitry Andric   /// Returns true if \p Fn needs the queue pointer because of \p C.
197fe6060f1SDimitry Andric   bool needsQueuePtr(const Constant *C, Function &Fn) {
198fe6060f1SDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
199fe6060f1SDimitry Andric     bool HasAperture = hasApertureRegs(Fn);
200fe6060f1SDimitry Andric 
201fe6060f1SDimitry Andric     // No need to explore the constants.
202fe6060f1SDimitry Andric     if (!IsNonEntryFunc && HasAperture)
203fe6060f1SDimitry Andric       return false;
204fe6060f1SDimitry Andric 
205fe6060f1SDimitry Andric     uint8_t Access = getConstantAccess(C);
206fe6060f1SDimitry Andric 
207fe6060f1SDimitry Andric     // We need to trap on DS globals in non-entry functions.
208fe6060f1SDimitry Andric     if (IsNonEntryFunc && (Access & DS_GLOBAL))
209fe6060f1SDimitry Andric       return true;
210fe6060f1SDimitry Andric 
211fe6060f1SDimitry Andric     return !HasAperture && (Access & ADDR_SPACE_CAST);
212fe6060f1SDimitry Andric   }
213fe6060f1SDimitry Andric 
214fe6060f1SDimitry Andric private:
215*81ad6265SDimitry Andric   /// Used to determine if the Constant needs the queue pointer.
216fe6060f1SDimitry Andric   DenseMap<const Constant *, uint8_t> ConstantStatus;
217fe6060f1SDimitry Andric };
218fe6060f1SDimitry Andric 
219349cc55cSDimitry Andric struct AAAMDAttributes : public StateWrapper<
220349cc55cSDimitry Andric   BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
221349cc55cSDimitry Andric   using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
222349cc55cSDimitry Andric                             AbstractAttribute>;
223349cc55cSDimitry Andric 
224fe6060f1SDimitry Andric   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
225fe6060f1SDimitry Andric 
226fe6060f1SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
227fe6060f1SDimitry Andric   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
228fe6060f1SDimitry Andric                                             Attributor &A);
229fe6060f1SDimitry Andric 
230fe6060f1SDimitry Andric   /// See AbstractAttribute::getName().
231fe6060f1SDimitry Andric   const std::string getName() const override { return "AAAMDAttributes"; }
232fe6060f1SDimitry Andric 
233fe6060f1SDimitry Andric   /// See AbstractAttribute::getIdAddr().
234fe6060f1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
235fe6060f1SDimitry Andric 
236fe6060f1SDimitry Andric   /// This function should return true if the type of the \p AA is
237fe6060f1SDimitry Andric   /// AAAMDAttributes.
238fe6060f1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
239fe6060f1SDimitry Andric     return (AA->getIdAddr() == &ID);
240fe6060f1SDimitry Andric   }
241fe6060f1SDimitry Andric 
242fe6060f1SDimitry Andric   /// Unique ID (due to the unique address)
243fe6060f1SDimitry Andric   static const char ID;
244fe6060f1SDimitry Andric };
245fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0;
246fe6060f1SDimitry Andric 
247349cc55cSDimitry Andric struct AAUniformWorkGroupSize
248fe6060f1SDimitry Andric     : public StateWrapper<BooleanState, AbstractAttribute> {
249fe6060f1SDimitry Andric   using Base = StateWrapper<BooleanState, AbstractAttribute>;
250349cc55cSDimitry Andric   AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
251fe6060f1SDimitry Andric 
252fe6060f1SDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
253349cc55cSDimitry Andric   static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
254fe6060f1SDimitry Andric                                                    Attributor &A);
255fe6060f1SDimitry Andric 
256fe6060f1SDimitry Andric   /// See AbstractAttribute::getName().
257349cc55cSDimitry Andric   const std::string getName() const override {
258349cc55cSDimitry Andric     return "AAUniformWorkGroupSize";
259349cc55cSDimitry Andric   }
260fe6060f1SDimitry Andric 
261fe6060f1SDimitry Andric   /// See AbstractAttribute::getIdAddr().
262fe6060f1SDimitry Andric   const char *getIdAddr() const override { return &ID; }
263fe6060f1SDimitry Andric 
264fe6060f1SDimitry Andric   /// This function should return true if the type of the \p AA is
265fe6060f1SDimitry Andric   /// AAAMDAttributes.
266fe6060f1SDimitry Andric   static bool classof(const AbstractAttribute *AA) {
267fe6060f1SDimitry Andric     return (AA->getIdAddr() == &ID);
268fe6060f1SDimitry Andric   }
269fe6060f1SDimitry Andric 
270fe6060f1SDimitry Andric   /// Unique ID (due to the unique address)
271fe6060f1SDimitry Andric   static const char ID;
272fe6060f1SDimitry Andric };
273349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0;
274fe6060f1SDimitry Andric 
275349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
276349cc55cSDimitry Andric   AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
277349cc55cSDimitry Andric       : AAUniformWorkGroupSize(IRP, A) {}
278fe6060f1SDimitry Andric 
279fe6060f1SDimitry Andric   void initialize(Attributor &A) override {
280fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
281fe6060f1SDimitry Andric     CallingConv::ID CC = F->getCallingConv();
282fe6060f1SDimitry Andric 
283fe6060f1SDimitry Andric     if (CC != CallingConv::AMDGPU_KERNEL)
284fe6060f1SDimitry Andric       return;
285fe6060f1SDimitry Andric 
286fe6060f1SDimitry Andric     bool InitialValue = false;
287fe6060f1SDimitry Andric     if (F->hasFnAttribute("uniform-work-group-size"))
288fe6060f1SDimitry Andric       InitialValue = F->getFnAttribute("uniform-work-group-size")
289fe6060f1SDimitry Andric                          .getValueAsString()
290fe6060f1SDimitry Andric                          .equals("true");
291fe6060f1SDimitry Andric 
292fe6060f1SDimitry Andric     if (InitialValue)
293fe6060f1SDimitry Andric       indicateOptimisticFixpoint();
294fe6060f1SDimitry Andric     else
295fe6060f1SDimitry Andric       indicatePessimisticFixpoint();
296fe6060f1SDimitry Andric   }
297fe6060f1SDimitry Andric 
298fe6060f1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
299fe6060f1SDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
300fe6060f1SDimitry Andric 
301fe6060f1SDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
302fe6060f1SDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
303349cc55cSDimitry Andric       LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
304fe6060f1SDimitry Andric                         << "->" << getAssociatedFunction()->getName() << "\n");
305fe6060f1SDimitry Andric 
306349cc55cSDimitry Andric       const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
307fe6060f1SDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
308fe6060f1SDimitry Andric 
309fe6060f1SDimitry Andric       Change = Change | clampStateAndIndicateChange(this->getState(),
310fe6060f1SDimitry Andric                                                     CallerInfo.getState());
311fe6060f1SDimitry Andric 
312fe6060f1SDimitry Andric       return true;
313fe6060f1SDimitry Andric     };
314fe6060f1SDimitry Andric 
315fe6060f1SDimitry Andric     bool AllCallSitesKnown = true;
316fe6060f1SDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
3170eae32dcSDimitry Andric       return indicatePessimisticFixpoint();
318fe6060f1SDimitry Andric 
319fe6060f1SDimitry Andric     return Change;
320fe6060f1SDimitry Andric   }
321fe6060f1SDimitry Andric 
322fe6060f1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
323fe6060f1SDimitry Andric     SmallVector<Attribute, 8> AttrList;
324fe6060f1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
325fe6060f1SDimitry Andric 
326fe6060f1SDimitry Andric     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
327fe6060f1SDimitry Andric                                       getAssumed() ? "true" : "false"));
328fe6060f1SDimitry Andric     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
329fe6060f1SDimitry Andric                                               /* ForceReplace */ true);
330fe6060f1SDimitry Andric   }
331fe6060f1SDimitry Andric 
332fe6060f1SDimitry Andric   bool isValidState() const override {
333fe6060f1SDimitry Andric     // This state is always valid, even when the state is false.
334fe6060f1SDimitry Andric     return true;
335fe6060f1SDimitry Andric   }
336fe6060f1SDimitry Andric 
337fe6060f1SDimitry Andric   const std::string getAsStr() const override {
338fe6060f1SDimitry Andric     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
339fe6060f1SDimitry Andric   }
340fe6060f1SDimitry Andric 
341fe6060f1SDimitry Andric   /// See AbstractAttribute::trackStatistics()
342fe6060f1SDimitry Andric   void trackStatistics() const override {}
343fe6060f1SDimitry Andric };
344fe6060f1SDimitry Andric 
345349cc55cSDimitry Andric AAUniformWorkGroupSize &
346349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
347fe6060f1SDimitry Andric                                           Attributor &A) {
348fe6060f1SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
349349cc55cSDimitry Andric     return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
350349cc55cSDimitry Andric   llvm_unreachable(
351349cc55cSDimitry Andric       "AAUniformWorkGroupSize is only valid for function position");
352fe6060f1SDimitry Andric }
353fe6060f1SDimitry Andric 
354fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes {
355fe6060f1SDimitry Andric   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
356fe6060f1SDimitry Andric       : AAAMDAttributes(IRP, A) {}
357fe6060f1SDimitry Andric 
358fe6060f1SDimitry Andric   void initialize(Attributor &A) override {
359fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
3600eae32dcSDimitry Andric 
3610eae32dcSDimitry Andric     // If the function requires the implicit arg pointer due to sanitizers,
3620eae32dcSDimitry Andric     // assume it's needed even if explicitly marked as not requiring it.
363*81ad6265SDimitry Andric     const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
364*81ad6265SDimitry Andric     if (NeedsHostcall) {
3650eae32dcSDimitry Andric       removeAssumedBits(IMPLICIT_ARG_PTR);
366*81ad6265SDimitry Andric       removeAssumedBits(HOSTCALL_PTR);
367*81ad6265SDimitry Andric     }
3680eae32dcSDimitry Andric 
369349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs) {
370*81ad6265SDimitry Andric       if (NeedsHostcall &&
371*81ad6265SDimitry Andric           (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
3720eae32dcSDimitry Andric         continue;
3730eae32dcSDimitry Andric 
374349cc55cSDimitry Andric       if (F->hasFnAttribute(Attr.second))
375349cc55cSDimitry Andric         addKnownBits(Attr.first);
376fe6060f1SDimitry Andric     }
377fe6060f1SDimitry Andric 
378349cc55cSDimitry Andric     if (F->isDeclaration())
379349cc55cSDimitry Andric       return;
380349cc55cSDimitry Andric 
381fe6060f1SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
382fe6060f1SDimitry Andric     // not allowed to have kernel arguments.
383fe6060f1SDimitry Andric     if (AMDGPU::isGraphics(F->getCallingConv())) {
384fe6060f1SDimitry Andric       indicatePessimisticFixpoint();
385fe6060f1SDimitry Andric       return;
386fe6060f1SDimitry Andric     }
387fe6060f1SDimitry Andric   }
388fe6060f1SDimitry Andric 
389fe6060f1SDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
390fe6060f1SDimitry Andric     Function *F = getAssociatedFunction();
391349cc55cSDimitry Andric     // The current assumed state used to determine a change.
392349cc55cSDimitry Andric     auto OrigAssumed = getAssumed();
393fe6060f1SDimitry Andric 
394fe6060f1SDimitry Andric     // Check for Intrinsics and propagate attributes.
395fe6060f1SDimitry Andric     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
396fe6060f1SDimitry Andric         *this, this->getIRPosition(), DepClassTy::REQUIRED);
397349cc55cSDimitry Andric     if (AAEdges.hasNonAsmUnknownCallee())
398349cc55cSDimitry Andric       return indicatePessimisticFixpoint();
399fe6060f1SDimitry Andric 
400349cc55cSDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
401fe6060f1SDimitry Andric 
402*81ad6265SDimitry Andric     bool NeedsImplicit = false;
403*81ad6265SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
404*81ad6265SDimitry Andric     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
405*81ad6265SDimitry Andric     bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
406349cc55cSDimitry Andric 
407fe6060f1SDimitry Andric     for (Function *Callee : AAEdges.getOptimisticEdges()) {
408fe6060f1SDimitry Andric       Intrinsic::ID IID = Callee->getIntrinsicID();
409349cc55cSDimitry Andric       if (IID == Intrinsic::not_intrinsic) {
410349cc55cSDimitry Andric         const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
411349cc55cSDimitry Andric           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
412349cc55cSDimitry Andric         *this &= AAAMD;
413fe6060f1SDimitry Andric         continue;
414fe6060f1SDimitry Andric       }
415fe6060f1SDimitry Andric 
416fe6060f1SDimitry Andric       bool NonKernelOnly = false;
417349cc55cSDimitry Andric       ImplicitArgumentMask AttrMask =
418*81ad6265SDimitry Andric           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
419*81ad6265SDimitry Andric                               HasApertureRegs, SupportsGetDoorbellID);
420349cc55cSDimitry Andric       if (AttrMask != NOT_IMPLICIT_INPUT) {
421349cc55cSDimitry Andric         if ((IsNonEntryFunc || !NonKernelOnly))
422349cc55cSDimitry Andric           removeAssumedBits(AttrMask);
423fe6060f1SDimitry Andric       }
424fe6060f1SDimitry Andric     }
425fe6060f1SDimitry Andric 
426*81ad6265SDimitry Andric     // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
427*81ad6265SDimitry Andric     if (NeedsImplicit)
428*81ad6265SDimitry Andric       removeAssumedBits(IMPLICIT_ARG_PTR);
429*81ad6265SDimitry Andric 
430*81ad6265SDimitry Andric     if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
431*81ad6265SDimitry Andric       // Under V5, we need implicitarg_ptr + offsets to access private_base or
432*81ad6265SDimitry Andric       // shared_base. We do not actually need queue_ptr.
433*81ad6265SDimitry Andric       if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
434*81ad6265SDimitry Andric         removeAssumedBits(IMPLICIT_ARG_PTR);
435*81ad6265SDimitry Andric       else
436349cc55cSDimitry Andric         removeAssumedBits(QUEUE_PTR);
437fe6060f1SDimitry Andric     }
438fe6060f1SDimitry Andric 
439*81ad6265SDimitry Andric     if (funcRetrievesMultigridSyncArg(A)) {
440*81ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) &&
441*81ad6265SDimitry Andric              "multigrid_sync_arg needs implicitarg_ptr");
442*81ad6265SDimitry Andric       removeAssumedBits(MULTIGRID_SYNC_ARG);
443349cc55cSDimitry Andric     }
444fe6060f1SDimitry Andric 
445*81ad6265SDimitry Andric     if (funcRetrievesHostcallPtr(A)) {
446*81ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
447*81ad6265SDimitry Andric       removeAssumedBits(HOSTCALL_PTR);
448*81ad6265SDimitry Andric     }
449*81ad6265SDimitry Andric 
450*81ad6265SDimitry Andric     if (funcRetrievesHeapPtr(A)) {
451*81ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
452*81ad6265SDimitry Andric       removeAssumedBits(HEAP_PTR);
453*81ad6265SDimitry Andric     }
454*81ad6265SDimitry Andric 
455*81ad6265SDimitry Andric     if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
456*81ad6265SDimitry Andric       assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
457349cc55cSDimitry Andric       removeAssumedBits(QUEUE_PTR);
458fe6060f1SDimitry Andric     }
459fe6060f1SDimitry Andric 
460*81ad6265SDimitry Andric     return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
461*81ad6265SDimitry Andric                                        : ChangeStatus::UNCHANGED;
462fe6060f1SDimitry Andric   }
463fe6060f1SDimitry Andric 
464fe6060f1SDimitry Andric   ChangeStatus manifest(Attributor &A) override {
465fe6060f1SDimitry Andric     SmallVector<Attribute, 8> AttrList;
466fe6060f1SDimitry Andric     LLVMContext &Ctx = getAssociatedFunction()->getContext();
467fe6060f1SDimitry Andric 
468349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs) {
469349cc55cSDimitry Andric       if (isKnown(Attr.first))
470349cc55cSDimitry Andric         AttrList.push_back(Attribute::get(Ctx, Attr.second));
471349cc55cSDimitry Andric     }
472fe6060f1SDimitry Andric 
473fe6060f1SDimitry Andric     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
474fe6060f1SDimitry Andric                                               /* ForceReplace */ true);
475fe6060f1SDimitry Andric   }
476fe6060f1SDimitry Andric 
477fe6060f1SDimitry Andric   const std::string getAsStr() const override {
478349cc55cSDimitry Andric     std::string Str;
479349cc55cSDimitry Andric     raw_string_ostream OS(Str);
480349cc55cSDimitry Andric     OS << "AMDInfo[";
481349cc55cSDimitry Andric     for (auto Attr : ImplicitAttrs)
482349cc55cSDimitry Andric       OS << ' ' << Attr.second;
483349cc55cSDimitry Andric     OS << " ]";
484349cc55cSDimitry Andric     return OS.str();
485fe6060f1SDimitry Andric   }
486fe6060f1SDimitry Andric 
487fe6060f1SDimitry Andric   /// See AbstractAttribute::trackStatistics()
488fe6060f1SDimitry Andric   void trackStatistics() const override {}
489*81ad6265SDimitry Andric 
490*81ad6265SDimitry Andric private:
491*81ad6265SDimitry Andric   bool checkForQueuePtr(Attributor &A) {
492*81ad6265SDimitry Andric     Function *F = getAssociatedFunction();
493*81ad6265SDimitry Andric     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
494*81ad6265SDimitry Andric 
495*81ad6265SDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
496*81ad6265SDimitry Andric 
497*81ad6265SDimitry Andric     bool NeedsQueuePtr = false;
498*81ad6265SDimitry Andric 
499*81ad6265SDimitry Andric     auto CheckAddrSpaceCasts = [&](Instruction &I) {
500*81ad6265SDimitry Andric       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
501*81ad6265SDimitry Andric       if (castRequiresQueuePtr(SrcAS)) {
502*81ad6265SDimitry Andric         NeedsQueuePtr = true;
503*81ad6265SDimitry Andric         return false;
504*81ad6265SDimitry Andric       }
505*81ad6265SDimitry Andric       return true;
506*81ad6265SDimitry Andric     };
507*81ad6265SDimitry Andric 
508*81ad6265SDimitry Andric     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
509*81ad6265SDimitry Andric 
510*81ad6265SDimitry Andric     // `checkForAllInstructions` is much more cheaper than going through all
511*81ad6265SDimitry Andric     // instructions, try it first.
512*81ad6265SDimitry Andric 
513*81ad6265SDimitry Andric     // The queue pointer is not needed if aperture regs is present.
514*81ad6265SDimitry Andric     if (!HasApertureRegs) {
515*81ad6265SDimitry Andric       bool UsedAssumedInformation = false;
516*81ad6265SDimitry Andric       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
517*81ad6265SDimitry Andric                                 {Instruction::AddrSpaceCast},
518*81ad6265SDimitry Andric                                 UsedAssumedInformation);
519*81ad6265SDimitry Andric     }
520*81ad6265SDimitry Andric 
521*81ad6265SDimitry Andric     // If we found  that we need the queue pointer, nothing else to do.
522*81ad6265SDimitry Andric     if (NeedsQueuePtr)
523*81ad6265SDimitry Andric       return true;
524*81ad6265SDimitry Andric 
525*81ad6265SDimitry Andric     if (!IsNonEntryFunc && HasApertureRegs)
526*81ad6265SDimitry Andric       return false;
527*81ad6265SDimitry Andric 
528*81ad6265SDimitry Andric     for (BasicBlock &BB : *F) {
529*81ad6265SDimitry Andric       for (Instruction &I : BB) {
530*81ad6265SDimitry Andric         for (const Use &U : I.operands()) {
531*81ad6265SDimitry Andric           if (const auto *C = dyn_cast<Constant>(U)) {
532*81ad6265SDimitry Andric             if (InfoCache.needsQueuePtr(C, *F))
533*81ad6265SDimitry Andric               return true;
534*81ad6265SDimitry Andric           }
535*81ad6265SDimitry Andric         }
536*81ad6265SDimitry Andric       }
537*81ad6265SDimitry Andric     }
538*81ad6265SDimitry Andric 
539*81ad6265SDimitry Andric     return false;
540*81ad6265SDimitry Andric   }
541*81ad6265SDimitry Andric 
542*81ad6265SDimitry Andric   bool funcRetrievesMultigridSyncArg(Attributor &A) {
543*81ad6265SDimitry Andric     auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
544*81ad6265SDimitry Andric     AAPointerInfo::OffsetAndSize OAS(Pos, 8);
545*81ad6265SDimitry Andric     return funcRetrievesImplicitKernelArg(A, OAS);
546*81ad6265SDimitry Andric   }
547*81ad6265SDimitry Andric 
548*81ad6265SDimitry Andric   bool funcRetrievesHostcallPtr(Attributor &A) {
549*81ad6265SDimitry Andric     auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
550*81ad6265SDimitry Andric     AAPointerInfo::OffsetAndSize OAS(Pos, 8);
551*81ad6265SDimitry Andric     return funcRetrievesImplicitKernelArg(A, OAS);
552*81ad6265SDimitry Andric   }
553*81ad6265SDimitry Andric 
554*81ad6265SDimitry Andric   bool funcRetrievesHeapPtr(Attributor &A) {
555*81ad6265SDimitry Andric     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
556*81ad6265SDimitry Andric       return false;
557*81ad6265SDimitry Andric     AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
558*81ad6265SDimitry Andric     return funcRetrievesImplicitKernelArg(A, OAS);
559*81ad6265SDimitry Andric   }
560*81ad6265SDimitry Andric 
561*81ad6265SDimitry Andric   bool funcRetrievesQueuePtr(Attributor &A) {
562*81ad6265SDimitry Andric     if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
563*81ad6265SDimitry Andric       return false;
564*81ad6265SDimitry Andric     AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
565*81ad6265SDimitry Andric     return funcRetrievesImplicitKernelArg(A, OAS);
566*81ad6265SDimitry Andric   }
567*81ad6265SDimitry Andric 
568*81ad6265SDimitry Andric   bool funcRetrievesImplicitKernelArg(Attributor &A,
569*81ad6265SDimitry Andric                                       AAPointerInfo::OffsetAndSize OAS) {
570*81ad6265SDimitry Andric     // Check if this is a call to the implicitarg_ptr builtin and it
571*81ad6265SDimitry Andric     // is used to retrieve the hostcall pointer. The implicit arg for
572*81ad6265SDimitry Andric     // hostcall is not used only if every use of the implicitarg_ptr
573*81ad6265SDimitry Andric     // is a load that clearly does not retrieve any byte of the
574*81ad6265SDimitry Andric     // hostcall pointer. We check this by tracing all the uses of the
575*81ad6265SDimitry Andric     // initial call to the implicitarg_ptr intrinsic.
576*81ad6265SDimitry Andric     auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
577*81ad6265SDimitry Andric       auto &Call = cast<CallBase>(I);
578*81ad6265SDimitry Andric       if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
579*81ad6265SDimitry Andric         return true;
580*81ad6265SDimitry Andric 
581*81ad6265SDimitry Andric       const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
582*81ad6265SDimitry Andric           *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
583*81ad6265SDimitry Andric 
584*81ad6265SDimitry Andric       return PointerInfoAA.forallInterferingAccesses(
585*81ad6265SDimitry Andric           OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
586*81ad6265SDimitry Andric             return Acc.getRemoteInst()->isDroppable();
587*81ad6265SDimitry Andric           });
588*81ad6265SDimitry Andric     };
589*81ad6265SDimitry Andric 
590*81ad6265SDimitry Andric     bool UsedAssumedInformation = false;
591*81ad6265SDimitry Andric     return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
592*81ad6265SDimitry Andric                                               UsedAssumedInformation);
593*81ad6265SDimitry Andric   }
594fe6060f1SDimitry Andric };
595fe6060f1SDimitry Andric 
596fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
597fe6060f1SDimitry Andric                                                     Attributor &A) {
598fe6060f1SDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
599fe6060f1SDimitry Andric     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
600fe6060f1SDimitry Andric   llvm_unreachable("AAAMDAttributes is only valid for function position");
601fe6060f1SDimitry Andric }
602fe6060f1SDimitry Andric 
603349cc55cSDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute.
604349cc55cSDimitry Andric struct AAAMDFlatWorkGroupSize
605349cc55cSDimitry Andric     : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
606349cc55cSDimitry Andric   using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
607349cc55cSDimitry Andric   AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
608349cc55cSDimitry Andric       : Base(IRP, 32) {}
609349cc55cSDimitry Andric 
610349cc55cSDimitry Andric   /// See AbstractAttribute::getState(...).
611349cc55cSDimitry Andric   IntegerRangeState &getState() override { return *this; }
612349cc55cSDimitry Andric   const IntegerRangeState &getState() const override { return *this; }
613349cc55cSDimitry Andric 
614349cc55cSDimitry Andric   void initialize(Attributor &A) override {
615349cc55cSDimitry Andric     Function *F = getAssociatedFunction();
616349cc55cSDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
617349cc55cSDimitry Andric     unsigned MinGroupSize, MaxGroupSize;
618349cc55cSDimitry Andric     std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
619349cc55cSDimitry Andric     intersectKnown(
620349cc55cSDimitry Andric         ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
6210eae32dcSDimitry Andric 
6220eae32dcSDimitry Andric     if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
6230eae32dcSDimitry Andric       indicatePessimisticFixpoint();
624349cc55cSDimitry Andric   }
625349cc55cSDimitry Andric 
626349cc55cSDimitry Andric   ChangeStatus updateImpl(Attributor &A) override {
627349cc55cSDimitry Andric     ChangeStatus Change = ChangeStatus::UNCHANGED;
628349cc55cSDimitry Andric 
629349cc55cSDimitry Andric     auto CheckCallSite = [&](AbstractCallSite CS) {
630349cc55cSDimitry Andric       Function *Caller = CS.getInstruction()->getFunction();
631349cc55cSDimitry Andric       LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
632349cc55cSDimitry Andric                         << "->" << getAssociatedFunction()->getName() << '\n');
633349cc55cSDimitry Andric 
634349cc55cSDimitry Andric       const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
635349cc55cSDimitry Andric           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
636349cc55cSDimitry Andric 
637349cc55cSDimitry Andric       Change |=
638349cc55cSDimitry Andric           clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
639349cc55cSDimitry Andric 
640349cc55cSDimitry Andric       return true;
641349cc55cSDimitry Andric     };
642349cc55cSDimitry Andric 
643349cc55cSDimitry Andric     bool AllCallSitesKnown = true;
644349cc55cSDimitry Andric     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
645349cc55cSDimitry Andric       return indicatePessimisticFixpoint();
646349cc55cSDimitry Andric 
647349cc55cSDimitry Andric     return Change;
648349cc55cSDimitry Andric   }
649349cc55cSDimitry Andric 
650349cc55cSDimitry Andric   ChangeStatus manifest(Attributor &A) override {
651349cc55cSDimitry Andric     SmallVector<Attribute, 8> AttrList;
652349cc55cSDimitry Andric     Function *F = getAssociatedFunction();
653349cc55cSDimitry Andric     LLVMContext &Ctx = F->getContext();
654349cc55cSDimitry Andric 
655349cc55cSDimitry Andric     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
656349cc55cSDimitry Andric     unsigned Min, Max;
657349cc55cSDimitry Andric     std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
658349cc55cSDimitry Andric 
659349cc55cSDimitry Andric     // Don't add the attribute if it's the implied default.
660349cc55cSDimitry Andric     if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
661349cc55cSDimitry Andric       return ChangeStatus::UNCHANGED;
662349cc55cSDimitry Andric 
663349cc55cSDimitry Andric     SmallString<10> Buffer;
664349cc55cSDimitry Andric     raw_svector_ostream OS(Buffer);
665349cc55cSDimitry Andric     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
666349cc55cSDimitry Andric 
667349cc55cSDimitry Andric     AttrList.push_back(
668349cc55cSDimitry Andric         Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
669349cc55cSDimitry Andric     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
670349cc55cSDimitry Andric                                               /* ForceReplace */ true);
671349cc55cSDimitry Andric   }
672349cc55cSDimitry Andric 
673349cc55cSDimitry Andric   const std::string getAsStr() const override {
674349cc55cSDimitry Andric     std::string Str;
675349cc55cSDimitry Andric     raw_string_ostream OS(Str);
676349cc55cSDimitry Andric     OS << "AMDFlatWorkGroupSize[";
677349cc55cSDimitry Andric     OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
678349cc55cSDimitry Andric     OS << ']';
679349cc55cSDimitry Andric     return OS.str();
680349cc55cSDimitry Andric   }
681349cc55cSDimitry Andric 
682349cc55cSDimitry Andric   /// See AbstractAttribute::trackStatistics()
683349cc55cSDimitry Andric   void trackStatistics() const override {}
684349cc55cSDimitry Andric 
685349cc55cSDimitry Andric   /// Create an abstract attribute view for the position \p IRP.
686349cc55cSDimitry Andric   static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
687349cc55cSDimitry Andric                                                    Attributor &A);
688349cc55cSDimitry Andric 
689349cc55cSDimitry Andric   /// See AbstractAttribute::getName()
690349cc55cSDimitry Andric   const std::string getName() const override {
691349cc55cSDimitry Andric     return "AAAMDFlatWorkGroupSize";
692349cc55cSDimitry Andric   }
693349cc55cSDimitry Andric 
694349cc55cSDimitry Andric   /// See AbstractAttribute::getIdAddr()
695349cc55cSDimitry Andric   const char *getIdAddr() const override { return &ID; }
696349cc55cSDimitry Andric 
697349cc55cSDimitry Andric   /// This function should return true if the type of the \p AA is
698349cc55cSDimitry Andric   /// AAAMDFlatWorkGroupSize
699349cc55cSDimitry Andric   static bool classof(const AbstractAttribute *AA) {
700349cc55cSDimitry Andric     return (AA->getIdAddr() == &ID);
701349cc55cSDimitry Andric   }
702349cc55cSDimitry Andric 
703349cc55cSDimitry Andric   /// Unique ID (due to the unique address)
704349cc55cSDimitry Andric   static const char ID;
705349cc55cSDimitry Andric };
706349cc55cSDimitry Andric 
707349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0;
708349cc55cSDimitry Andric 
709349cc55cSDimitry Andric AAAMDFlatWorkGroupSize &
710349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
711349cc55cSDimitry Andric                                           Attributor &A) {
712349cc55cSDimitry Andric   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
713349cc55cSDimitry Andric     return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
714349cc55cSDimitry Andric   llvm_unreachable(
715349cc55cSDimitry Andric       "AAAMDFlatWorkGroupSize is only valid for function position");
716349cc55cSDimitry Andric }
717349cc55cSDimitry Andric 
718fe6060f1SDimitry Andric class AMDGPUAttributor : public ModulePass {
719fe6060f1SDimitry Andric public:
720fe6060f1SDimitry Andric   AMDGPUAttributor() : ModulePass(ID) {}
721fe6060f1SDimitry Andric 
722fe6060f1SDimitry Andric   /// doInitialization - Virtual method overridden by subclasses to do
723fe6060f1SDimitry Andric   /// any necessary initialization before any pass is run.
724fe6060f1SDimitry Andric   bool doInitialization(Module &) override {
725fe6060f1SDimitry Andric     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
726fe6060f1SDimitry Andric     if (!TPC)
727fe6060f1SDimitry Andric       report_fatal_error("TargetMachine is required");
728fe6060f1SDimitry Andric 
729fe6060f1SDimitry Andric     TM = &TPC->getTM<TargetMachine>();
730fe6060f1SDimitry Andric     return false;
731fe6060f1SDimitry Andric   }
732fe6060f1SDimitry Andric 
733fe6060f1SDimitry Andric   bool runOnModule(Module &M) override {
734fe6060f1SDimitry Andric     SetVector<Function *> Functions;
735fe6060f1SDimitry Andric     AnalysisGetter AG;
736349cc55cSDimitry Andric     for (Function &F : M) {
737349cc55cSDimitry Andric       if (!F.isIntrinsic())
738fe6060f1SDimitry Andric         Functions.insert(&F);
739349cc55cSDimitry Andric     }
740fe6060f1SDimitry Andric 
741fe6060f1SDimitry Andric     CallGraphUpdater CGUpdater;
742fe6060f1SDimitry Andric     BumpPtrAllocator Allocator;
743fe6060f1SDimitry Andric     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
744349cc55cSDimitry Andric     DenseSet<const char *> Allowed(
745349cc55cSDimitry Andric         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
746*81ad6265SDimitry Andric          &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID});
747349cc55cSDimitry Andric 
748*81ad6265SDimitry Andric     AttributorConfig AC(CGUpdater);
749*81ad6265SDimitry Andric     AC.Allowed = &Allowed;
750*81ad6265SDimitry Andric     AC.IsModulePass = true;
751*81ad6265SDimitry Andric     AC.DefaultInitializeLiveInternals = false;
752*81ad6265SDimitry Andric 
753*81ad6265SDimitry Andric     Attributor A(Functions, InfoCache, AC);
754fe6060f1SDimitry Andric 
755fe6060f1SDimitry Andric     for (Function &F : M) {
756349cc55cSDimitry Andric       if (!F.isIntrinsic()) {
757fe6060f1SDimitry Andric         A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
758349cc55cSDimitry Andric         A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
759349cc55cSDimitry Andric         if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
760349cc55cSDimitry Andric           A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
761349cc55cSDimitry Andric         }
762349cc55cSDimitry Andric       }
763fe6060f1SDimitry Andric     }
764fe6060f1SDimitry Andric 
765fe6060f1SDimitry Andric     ChangeStatus Change = A.run();
766fe6060f1SDimitry Andric     return Change == ChangeStatus::CHANGED;
767fe6060f1SDimitry Andric   }
768fe6060f1SDimitry Andric 
769fe6060f1SDimitry Andric   StringRef getPassName() const override { return "AMDGPU Attributor"; }
770fe6060f1SDimitry Andric   TargetMachine *TM;
771fe6060f1SDimitry Andric   static char ID;
772fe6060f1SDimitry Andric };
773349cc55cSDimitry Andric } // namespace
774fe6060f1SDimitry Andric 
775fe6060f1SDimitry Andric char AMDGPUAttributor::ID = 0;
776fe6060f1SDimitry Andric 
777fe6060f1SDimitry Andric Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
778fe6060f1SDimitry Andric INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
779