1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10fe6060f1SDimitry Andric // 11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 12fe6060f1SDimitry Andric 13fe6060f1SDimitry Andric #include "AMDGPU.h" 14fe6060f1SDimitry Andric #include "GCNSubtarget.h" 1581ad6265SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 16bdd1243dSDimitry Andric #include "llvm/Analysis/CycleAnalysis.h" 17fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 18fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 19fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 20fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h" 21fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 22fe6060f1SDimitry Andric 23fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor" 24fe6060f1SDimitry Andric 25bdd1243dSDimitry Andric namespace llvm { 26bdd1243dSDimitry Andric void initializeCycleInfoWrapperPassPass(PassRegistry &); 27*0fca6ea1SDimitry Andric } // namespace llvm 28bdd1243dSDimitry Andric 29fe6060f1SDimitry Andric using namespace llvm; 30fe6060f1SDimitry Andric 315f757f3fSDimitry Andric static cl::opt<unsigned> KernargPreloadCount( 325f757f3fSDimitry Andric "amdgpu-kernarg-preload-count", 335f757f3fSDimitry Andric cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0)); 345f757f3fSDimitry Andric 3581ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, 36349cc55cSDimitry Andric 3781ad6265SDimitry Andric enum ImplicitArgumentPositions { 3881ad6265SDimitry Andric #include "AMDGPUAttributes.def" 3981ad6265SDimitry Andric LAST_ARG_POS 40349cc55cSDimitry Andric }; 41349cc55cSDimitry Andric 4281ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, 4381ad6265SDimitry Andric 4481ad6265SDimitry Andric enum ImplicitArgumentMask { 4581ad6265SDimitry Andric NOT_IMPLICIT_INPUT = 0, 4681ad6265SDimitry Andric #include "AMDGPUAttributes.def" 4781ad6265SDimitry Andric ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 4881ad6265SDimitry Andric }; 4981ad6265SDimitry Andric 5081ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, 51349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask, 52349cc55cSDimitry Andric StringLiteral> ImplicitAttrs[] = { 5381ad6265SDimitry Andric #include "AMDGPUAttributes.def" 54349cc55cSDimitry Andric }; 55fe6060f1SDimitry Andric 56fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 57fe6060f1SDimitry Andric // initialized. 58fe6060f1SDimitry Andric // 59fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 60fe6060f1SDimitry Andric // size is 1 for y/z. 61349cc55cSDimitry Andric static ImplicitArgumentMask 6281ad6265SDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, 6306c3fb27SDimitry Andric bool HasApertureRegs, bool SupportsGetDoorBellID, 6406c3fb27SDimitry Andric unsigned CodeObjectVersion) { 65fe6060f1SDimitry Andric switch (ID) { 66fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 67fe6060f1SDimitry Andric NonKernelOnly = true; 68349cc55cSDimitry Andric return WORKITEM_ID_X; 69fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 70fe6060f1SDimitry Andric NonKernelOnly = true; 71349cc55cSDimitry Andric return WORKGROUP_ID_X; 72fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 73fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_y: 74349cc55cSDimitry Andric return WORKITEM_ID_Y; 75fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 76fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_z: 77349cc55cSDimitry Andric return WORKITEM_ID_Z; 78fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 79fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_y: 80349cc55cSDimitry Andric return WORKGROUP_ID_Y; 81fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 82fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_z: 83349cc55cSDimitry Andric return WORKGROUP_ID_Z; 84fcaf7f86SDimitry Andric case Intrinsic::amdgcn_lds_kernel_id: 85fcaf7f86SDimitry Andric return LDS_KERNEL_ID; 86fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 87349cc55cSDimitry Andric return DISPATCH_PTR; 88fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 89349cc55cSDimitry Andric return DISPATCH_ID; 90fe6060f1SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 91349cc55cSDimitry Andric return IMPLICIT_ARG_PTR; 9281ad6265SDimitry Andric // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access 9381ad6265SDimitry Andric // queue_ptr. 94fe6060f1SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 9506c3fb27SDimitry Andric NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5); 9681ad6265SDimitry Andric return QUEUE_PTR; 97fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_shared: 98fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_private: 9981ad6265SDimitry Andric if (HasApertureRegs) 10081ad6265SDimitry Andric return NOT_IMPLICIT_INPUT; 10181ad6265SDimitry Andric // Under V5, we need implicitarg_ptr + offsets to access private_base or 10281ad6265SDimitry Andric // shared_base. For pre-V5, however, need to access them through queue_ptr + 10381ad6265SDimitry Andric // offsets. 10406c3fb27SDimitry Andric return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR : 10506c3fb27SDimitry Andric QUEUE_PTR; 106fe6060f1SDimitry Andric case Intrinsic::trap: 10781ad6265SDimitry Andric if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4. 10806c3fb27SDimitry Andric return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT : 10906c3fb27SDimitry Andric QUEUE_PTR; 11006c3fb27SDimitry Andric NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5); 111349cc55cSDimitry Andric return QUEUE_PTR; 112fe6060f1SDimitry Andric default: 113349cc55cSDimitry Andric return NOT_IMPLICIT_INPUT; 114fe6060f1SDimitry Andric } 115fe6060f1SDimitry Andric } 116fe6060f1SDimitry Andric 117fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 118fe6060f1SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 119fe6060f1SDimitry Andric } 120fe6060f1SDimitry Andric 121fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) { 122fe6060f1SDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C); 123fe6060f1SDimitry Andric if (!GV) 124fe6060f1SDimitry Andric return false; 125fe6060f1SDimitry Andric unsigned AS = GV->getAddressSpace(); 126fe6060f1SDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 127fe6060f1SDimitry Andric } 128fe6060f1SDimitry Andric 1290eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed 1300eae32dcSDimitry Andric /// regardless of the function contents. 13181ad6265SDimitry Andric static bool funcRequiresHostcallPtr(const Function &F) { 1320eae32dcSDimitry Andric // Sanitizers require the hostcall buffer passed in the implicit arguments. 1330eae32dcSDimitry Andric return F.hasFnAttribute(Attribute::SanitizeAddress) || 1340eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeThread) || 1350eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemory) || 1360eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeHWAddress) || 1370eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemTag); 1380eae32dcSDimitry Andric } 1390eae32dcSDimitry Andric 140349cc55cSDimitry Andric namespace { 141fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache { 142fe6060f1SDimitry Andric public: 143fe6060f1SDimitry Andric AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 144fe6060f1SDimitry Andric BumpPtrAllocator &Allocator, 145fe6060f1SDimitry Andric SetVector<Function *> *CGSCC, TargetMachine &TM) 14606c3fb27SDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), TM(TM), 1477a6dacacSDimitry Andric CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {} 14806c3fb27SDimitry Andric 149fe6060f1SDimitry Andric TargetMachine &TM; 150fe6060f1SDimitry Andric 151fe6060f1SDimitry Andric enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 152fe6060f1SDimitry Andric 153fe6060f1SDimitry Andric /// Check if the subtarget has aperture regs. 154fe6060f1SDimitry Andric bool hasApertureRegs(Function &F) { 155fe6060f1SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 156fe6060f1SDimitry Andric return ST.hasApertureRegs(); 157fe6060f1SDimitry Andric } 158fe6060f1SDimitry Andric 15981ad6265SDimitry Andric /// Check if the subtarget supports GetDoorbellID. 16081ad6265SDimitry Andric bool supportsGetDoorbellID(Function &F) { 16181ad6265SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 16281ad6265SDimitry Andric return ST.supportsGetDoorbellID(); 16381ad6265SDimitry Andric } 16481ad6265SDimitry Andric 165349cc55cSDimitry Andric std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 166349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 167349cc55cSDimitry Andric return ST.getFlatWorkGroupSizes(F); 168349cc55cSDimitry Andric } 169349cc55cSDimitry Andric 170349cc55cSDimitry Andric std::pair<unsigned, unsigned> 171349cc55cSDimitry Andric getMaximumFlatWorkGroupRange(const Function &F) { 172349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 173349cc55cSDimitry Andric return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 174349cc55cSDimitry Andric } 175349cc55cSDimitry Andric 17606c3fb27SDimitry Andric /// Get code object version. 17706c3fb27SDimitry Andric unsigned getCodeObjectVersion() const { 17806c3fb27SDimitry Andric return CodeObjectVersion; 17906c3fb27SDimitry Andric } 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric /// Get the effective value of "amdgpu-waves-per-eu" for the function, 18206c3fb27SDimitry Andric /// accounting for the interaction with the passed value to use for 18306c3fb27SDimitry Andric /// "amdgpu-flat-work-group-size". 18406c3fb27SDimitry Andric std::pair<unsigned, unsigned> 18506c3fb27SDimitry Andric getWavesPerEU(const Function &F, 18606c3fb27SDimitry Andric std::pair<unsigned, unsigned> FlatWorkGroupSize) { 18706c3fb27SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 18806c3fb27SDimitry Andric return ST.getWavesPerEU(F, FlatWorkGroupSize); 18906c3fb27SDimitry Andric } 19006c3fb27SDimitry Andric 19106c3fb27SDimitry Andric std::pair<unsigned, unsigned> 19206c3fb27SDimitry Andric getEffectiveWavesPerEU(const Function &F, 19306c3fb27SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU, 19406c3fb27SDimitry Andric std::pair<unsigned, unsigned> FlatWorkGroupSize) { 19506c3fb27SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 19606c3fb27SDimitry Andric return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize); 19706c3fb27SDimitry Andric } 19806c3fb27SDimitry Andric 19906c3fb27SDimitry Andric unsigned getMaxWavesPerEU(const Function &F) { 20006c3fb27SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 20106c3fb27SDimitry Andric return ST.getMaxWavesPerEU(); 20206c3fb27SDimitry Andric } 20306c3fb27SDimitry Andric 204fe6060f1SDimitry Andric private: 20581ad6265SDimitry Andric /// Check if the ConstantExpr \p CE requires the queue pointer. 206fe6060f1SDimitry Andric static bool visitConstExpr(const ConstantExpr *CE) { 207fe6060f1SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 208fe6060f1SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 209fe6060f1SDimitry Andric return castRequiresQueuePtr(SrcAS); 210fe6060f1SDimitry Andric } 211fe6060f1SDimitry Andric return false; 212fe6060f1SDimitry Andric } 213fe6060f1SDimitry Andric 214fe6060f1SDimitry Andric /// Get the constant access bitmap for \p C. 21506c3fb27SDimitry Andric uint8_t getConstantAccess(const Constant *C, 21606c3fb27SDimitry Andric SmallPtrSetImpl<const Constant *> &Visited) { 217fe6060f1SDimitry Andric auto It = ConstantStatus.find(C); 218fe6060f1SDimitry Andric if (It != ConstantStatus.end()) 219fe6060f1SDimitry Andric return It->second; 220fe6060f1SDimitry Andric 221fe6060f1SDimitry Andric uint8_t Result = 0; 222fe6060f1SDimitry Andric if (isDSAddress(C)) 223fe6060f1SDimitry Andric Result = DS_GLOBAL; 224fe6060f1SDimitry Andric 225fe6060f1SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) 226fe6060f1SDimitry Andric if (visitConstExpr(CE)) 227fe6060f1SDimitry Andric Result |= ADDR_SPACE_CAST; 228fe6060f1SDimitry Andric 229fe6060f1SDimitry Andric for (const Use &U : C->operands()) { 230fe6060f1SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 23106c3fb27SDimitry Andric if (!OpC || !Visited.insert(OpC).second) 232fe6060f1SDimitry Andric continue; 233fe6060f1SDimitry Andric 23406c3fb27SDimitry Andric Result |= getConstantAccess(OpC, Visited); 235fe6060f1SDimitry Andric } 236fe6060f1SDimitry Andric return Result; 237fe6060f1SDimitry Andric } 238fe6060f1SDimitry Andric 239fe6060f1SDimitry Andric public: 24081ad6265SDimitry Andric /// Returns true if \p Fn needs the queue pointer because of \p C. 241fe6060f1SDimitry Andric bool needsQueuePtr(const Constant *C, Function &Fn) { 242fe6060f1SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 243fe6060f1SDimitry Andric bool HasAperture = hasApertureRegs(Fn); 244fe6060f1SDimitry Andric 245fe6060f1SDimitry Andric // No need to explore the constants. 246fe6060f1SDimitry Andric if (!IsNonEntryFunc && HasAperture) 247fe6060f1SDimitry Andric return false; 248fe6060f1SDimitry Andric 24906c3fb27SDimitry Andric SmallPtrSet<const Constant *, 8> Visited; 25006c3fb27SDimitry Andric uint8_t Access = getConstantAccess(C, Visited); 251fe6060f1SDimitry Andric 252fe6060f1SDimitry Andric // We need to trap on DS globals in non-entry functions. 253fe6060f1SDimitry Andric if (IsNonEntryFunc && (Access & DS_GLOBAL)) 254fe6060f1SDimitry Andric return true; 255fe6060f1SDimitry Andric 256fe6060f1SDimitry Andric return !HasAperture && (Access & ADDR_SPACE_CAST); 257fe6060f1SDimitry Andric } 258fe6060f1SDimitry Andric 259fe6060f1SDimitry Andric private: 26081ad6265SDimitry Andric /// Used to determine if the Constant needs the queue pointer. 261fe6060f1SDimitry Andric DenseMap<const Constant *, uint8_t> ConstantStatus; 26206c3fb27SDimitry Andric const unsigned CodeObjectVersion; 263fe6060f1SDimitry Andric }; 264fe6060f1SDimitry Andric 265bdd1243dSDimitry Andric struct AAAMDAttributes 266bdd1243dSDimitry Andric : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>, 267bdd1243dSDimitry Andric AbstractAttribute> { 268bdd1243dSDimitry Andric using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>, 269349cc55cSDimitry Andric AbstractAttribute>; 270349cc55cSDimitry Andric 271fe6060f1SDimitry Andric AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 272fe6060f1SDimitry Andric 273fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 274fe6060f1SDimitry Andric static AAAMDAttributes &createForPosition(const IRPosition &IRP, 275fe6060f1SDimitry Andric Attributor &A); 276fe6060f1SDimitry Andric 277fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 278fe6060f1SDimitry Andric const std::string getName() const override { return "AAAMDAttributes"; } 279fe6060f1SDimitry Andric 280fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 281fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 282fe6060f1SDimitry Andric 283fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 284fe6060f1SDimitry Andric /// AAAMDAttributes. 285fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 286fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 287fe6060f1SDimitry Andric } 288fe6060f1SDimitry Andric 289fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 290fe6060f1SDimitry Andric static const char ID; 291fe6060f1SDimitry Andric }; 292fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0; 293fe6060f1SDimitry Andric 294349cc55cSDimitry Andric struct AAUniformWorkGroupSize 295fe6060f1SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> { 296fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 297349cc55cSDimitry Andric AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 298fe6060f1SDimitry Andric 299fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 300349cc55cSDimitry Andric static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 301fe6060f1SDimitry Andric Attributor &A); 302fe6060f1SDimitry Andric 303fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 304349cc55cSDimitry Andric const std::string getName() const override { 305349cc55cSDimitry Andric return "AAUniformWorkGroupSize"; 306349cc55cSDimitry Andric } 307fe6060f1SDimitry Andric 308fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 309fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 310fe6060f1SDimitry Andric 311fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 312fe6060f1SDimitry Andric /// AAAMDAttributes. 313fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 314fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 315fe6060f1SDimitry Andric } 316fe6060f1SDimitry Andric 317fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 318fe6060f1SDimitry Andric static const char ID; 319fe6060f1SDimitry Andric }; 320349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0; 321fe6060f1SDimitry Andric 322349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 323349cc55cSDimitry Andric AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 324349cc55cSDimitry Andric : AAUniformWorkGroupSize(IRP, A) {} 325fe6060f1SDimitry Andric 326fe6060f1SDimitry Andric void initialize(Attributor &A) override { 327fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 328fe6060f1SDimitry Andric CallingConv::ID CC = F->getCallingConv(); 329fe6060f1SDimitry Andric 330fe6060f1SDimitry Andric if (CC != CallingConv::AMDGPU_KERNEL) 331fe6060f1SDimitry Andric return; 332fe6060f1SDimitry Andric 333fe6060f1SDimitry Andric bool InitialValue = false; 334fe6060f1SDimitry Andric if (F->hasFnAttribute("uniform-work-group-size")) 335*0fca6ea1SDimitry Andric InitialValue = 336*0fca6ea1SDimitry Andric F->getFnAttribute("uniform-work-group-size").getValueAsString() == 337*0fca6ea1SDimitry Andric "true"; 338fe6060f1SDimitry Andric 339fe6060f1SDimitry Andric if (InitialValue) 340fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 341fe6060f1SDimitry Andric else 342fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 343fe6060f1SDimitry Andric } 344fe6060f1SDimitry Andric 345fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 346fe6060f1SDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 347fe6060f1SDimitry Andric 348fe6060f1SDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 349fe6060f1SDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 350349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 351fe6060f1SDimitry Andric << "->" << getAssociatedFunction()->getName() << "\n"); 352fe6060f1SDimitry Andric 35306c3fb27SDimitry Andric const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 354fe6060f1SDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 35506c3fb27SDimitry Andric if (!CallerInfo) 35606c3fb27SDimitry Andric return false; 357fe6060f1SDimitry Andric 358fe6060f1SDimitry Andric Change = Change | clampStateAndIndicateChange(this->getState(), 35906c3fb27SDimitry Andric CallerInfo->getState()); 360fe6060f1SDimitry Andric 361fe6060f1SDimitry Andric return true; 362fe6060f1SDimitry Andric }; 363fe6060f1SDimitry Andric 364fe6060f1SDimitry Andric bool AllCallSitesKnown = true; 365fe6060f1SDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 3660eae32dcSDimitry Andric return indicatePessimisticFixpoint(); 367fe6060f1SDimitry Andric 368fe6060f1SDimitry Andric return Change; 369fe6060f1SDimitry Andric } 370fe6060f1SDimitry Andric 371fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 372fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 373fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 374fe6060f1SDimitry Andric 375fe6060f1SDimitry Andric AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 376fe6060f1SDimitry Andric getAssumed() ? "true" : "false")); 37706c3fb27SDimitry Andric return A.manifestAttrs(getIRPosition(), AttrList, 378fe6060f1SDimitry Andric /* ForceReplace */ true); 379fe6060f1SDimitry Andric } 380fe6060f1SDimitry Andric 381fe6060f1SDimitry Andric bool isValidState() const override { 382fe6060f1SDimitry Andric // This state is always valid, even when the state is false. 383fe6060f1SDimitry Andric return true; 384fe6060f1SDimitry Andric } 385fe6060f1SDimitry Andric 38606c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 387fe6060f1SDimitry Andric return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 388fe6060f1SDimitry Andric } 389fe6060f1SDimitry Andric 390fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 391fe6060f1SDimitry Andric void trackStatistics() const override {} 392fe6060f1SDimitry Andric }; 393fe6060f1SDimitry Andric 394349cc55cSDimitry Andric AAUniformWorkGroupSize & 395349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 396fe6060f1SDimitry Andric Attributor &A) { 397fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 398349cc55cSDimitry Andric return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 399349cc55cSDimitry Andric llvm_unreachable( 400349cc55cSDimitry Andric "AAUniformWorkGroupSize is only valid for function position"); 401fe6060f1SDimitry Andric } 402fe6060f1SDimitry Andric 403fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes { 404fe6060f1SDimitry Andric AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 405fe6060f1SDimitry Andric : AAAMDAttributes(IRP, A) {} 406fe6060f1SDimitry Andric 407fe6060f1SDimitry Andric void initialize(Attributor &A) override { 408fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 4090eae32dcSDimitry Andric 4100eae32dcSDimitry Andric // If the function requires the implicit arg pointer due to sanitizers, 4110eae32dcSDimitry Andric // assume it's needed even if explicitly marked as not requiring it. 41281ad6265SDimitry Andric const bool NeedsHostcall = funcRequiresHostcallPtr(*F); 41381ad6265SDimitry Andric if (NeedsHostcall) { 4140eae32dcSDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 41581ad6265SDimitry Andric removeAssumedBits(HOSTCALL_PTR); 41681ad6265SDimitry Andric } 4170eae32dcSDimitry Andric 418349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 41981ad6265SDimitry Andric if (NeedsHostcall && 42081ad6265SDimitry Andric (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) 4210eae32dcSDimitry Andric continue; 4220eae32dcSDimitry Andric 423349cc55cSDimitry Andric if (F->hasFnAttribute(Attr.second)) 424349cc55cSDimitry Andric addKnownBits(Attr.first); 425fe6060f1SDimitry Andric } 426fe6060f1SDimitry Andric 427349cc55cSDimitry Andric if (F->isDeclaration()) 428349cc55cSDimitry Andric return; 429349cc55cSDimitry Andric 430fe6060f1SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 431fe6060f1SDimitry Andric // not allowed to have kernel arguments. 432fe6060f1SDimitry Andric if (AMDGPU::isGraphics(F->getCallingConv())) { 433fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 434fe6060f1SDimitry Andric return; 435fe6060f1SDimitry Andric } 436fe6060f1SDimitry Andric } 437fe6060f1SDimitry Andric 438fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 439fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 440349cc55cSDimitry Andric // The current assumed state used to determine a change. 441349cc55cSDimitry Andric auto OrigAssumed = getAssumed(); 442fe6060f1SDimitry Andric 443fe6060f1SDimitry Andric // Check for Intrinsics and propagate attributes. 44406c3fb27SDimitry Andric const AACallEdges *AAEdges = A.getAAFor<AACallEdges>( 445fe6060f1SDimitry Andric *this, this->getIRPosition(), DepClassTy::REQUIRED); 44606c3fb27SDimitry Andric if (!AAEdges || AAEdges->hasNonAsmUnknownCallee()) 447349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 448fe6060f1SDimitry Andric 449349cc55cSDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 450fe6060f1SDimitry Andric 45181ad6265SDimitry Andric bool NeedsImplicit = false; 45281ad6265SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 45381ad6265SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 45481ad6265SDimitry Andric bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); 45506c3fb27SDimitry Andric unsigned COV = InfoCache.getCodeObjectVersion(); 456349cc55cSDimitry Andric 45706c3fb27SDimitry Andric for (Function *Callee : AAEdges->getOptimisticEdges()) { 458fe6060f1SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 459349cc55cSDimitry Andric if (IID == Intrinsic::not_intrinsic) { 46006c3fb27SDimitry Andric const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>( 461349cc55cSDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 46206c3fb27SDimitry Andric if (!AAAMD) 46306c3fb27SDimitry Andric return indicatePessimisticFixpoint(); 46406c3fb27SDimitry Andric *this &= *AAAMD; 465fe6060f1SDimitry Andric continue; 466fe6060f1SDimitry Andric } 467fe6060f1SDimitry Andric 468fe6060f1SDimitry Andric bool NonKernelOnly = false; 469349cc55cSDimitry Andric ImplicitArgumentMask AttrMask = 47081ad6265SDimitry Andric intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, 47106c3fb27SDimitry Andric HasApertureRegs, SupportsGetDoorbellID, COV); 472349cc55cSDimitry Andric if (AttrMask != NOT_IMPLICIT_INPUT) { 473349cc55cSDimitry Andric if ((IsNonEntryFunc || !NonKernelOnly)) 474349cc55cSDimitry Andric removeAssumedBits(AttrMask); 475fe6060f1SDimitry Andric } 476fe6060f1SDimitry Andric } 477fe6060f1SDimitry Andric 47881ad6265SDimitry Andric // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base. 47981ad6265SDimitry Andric if (NeedsImplicit) 48081ad6265SDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 48181ad6265SDimitry Andric 48281ad6265SDimitry Andric if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { 48381ad6265SDimitry Andric // Under V5, we need implicitarg_ptr + offsets to access private_base or 48481ad6265SDimitry Andric // shared_base. We do not actually need queue_ptr. 48506c3fb27SDimitry Andric if (COV >= 5) 48681ad6265SDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 48781ad6265SDimitry Andric else 488349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 489fe6060f1SDimitry Andric } 490fe6060f1SDimitry Andric 49106c3fb27SDimitry Andric if (funcRetrievesMultigridSyncArg(A, COV)) { 49281ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && 49381ad6265SDimitry Andric "multigrid_sync_arg needs implicitarg_ptr"); 49481ad6265SDimitry Andric removeAssumedBits(MULTIGRID_SYNC_ARG); 495349cc55cSDimitry Andric } 496fe6060f1SDimitry Andric 49706c3fb27SDimitry Andric if (funcRetrievesHostcallPtr(A, COV)) { 49881ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); 49981ad6265SDimitry Andric removeAssumedBits(HOSTCALL_PTR); 50081ad6265SDimitry Andric } 50181ad6265SDimitry Andric 50206c3fb27SDimitry Andric if (funcRetrievesHeapPtr(A, COV)) { 50381ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); 50481ad6265SDimitry Andric removeAssumedBits(HEAP_PTR); 50581ad6265SDimitry Andric } 50681ad6265SDimitry Andric 50706c3fb27SDimitry Andric if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) { 50881ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); 509349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 510fe6060f1SDimitry Andric } 511fe6060f1SDimitry Andric 512fcaf7f86SDimitry Andric if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) { 513fcaf7f86SDimitry Andric removeAssumedBits(LDS_KERNEL_ID); 514fcaf7f86SDimitry Andric } 515fcaf7f86SDimitry Andric 51606c3fb27SDimitry Andric if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV)) 517bdd1243dSDimitry Andric removeAssumedBits(DEFAULT_QUEUE); 518bdd1243dSDimitry Andric 51906c3fb27SDimitry Andric if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV)) 520bdd1243dSDimitry Andric removeAssumedBits(COMPLETION_ACTION); 521bdd1243dSDimitry Andric 52281ad6265SDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED 52381ad6265SDimitry Andric : ChangeStatus::UNCHANGED; 524fe6060f1SDimitry Andric } 525fe6060f1SDimitry Andric 526fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 527fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 528fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 529fe6060f1SDimitry Andric 530349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 531349cc55cSDimitry Andric if (isKnown(Attr.first)) 532349cc55cSDimitry Andric AttrList.push_back(Attribute::get(Ctx, Attr.second)); 533349cc55cSDimitry Andric } 534fe6060f1SDimitry Andric 53506c3fb27SDimitry Andric return A.manifestAttrs(getIRPosition(), AttrList, 536fe6060f1SDimitry Andric /* ForceReplace */ true); 537fe6060f1SDimitry Andric } 538fe6060f1SDimitry Andric 53906c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 540349cc55cSDimitry Andric std::string Str; 541349cc55cSDimitry Andric raw_string_ostream OS(Str); 542349cc55cSDimitry Andric OS << "AMDInfo["; 543349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) 54406c3fb27SDimitry Andric if (isAssumed(Attr.first)) 545349cc55cSDimitry Andric OS << ' ' << Attr.second; 546349cc55cSDimitry Andric OS << " ]"; 547349cc55cSDimitry Andric return OS.str(); 548fe6060f1SDimitry Andric } 549fe6060f1SDimitry Andric 550fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 551fe6060f1SDimitry Andric void trackStatistics() const override {} 55281ad6265SDimitry Andric 55381ad6265SDimitry Andric private: 55481ad6265SDimitry Andric bool checkForQueuePtr(Attributor &A) { 55581ad6265SDimitry Andric Function *F = getAssociatedFunction(); 55681ad6265SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 55781ad6265SDimitry Andric 55881ad6265SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 55981ad6265SDimitry Andric 56081ad6265SDimitry Andric bool NeedsQueuePtr = false; 56181ad6265SDimitry Andric 56281ad6265SDimitry Andric auto CheckAddrSpaceCasts = [&](Instruction &I) { 56381ad6265SDimitry Andric unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 56481ad6265SDimitry Andric if (castRequiresQueuePtr(SrcAS)) { 56581ad6265SDimitry Andric NeedsQueuePtr = true; 56681ad6265SDimitry Andric return false; 56781ad6265SDimitry Andric } 56881ad6265SDimitry Andric return true; 56981ad6265SDimitry Andric }; 57081ad6265SDimitry Andric 57181ad6265SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 57281ad6265SDimitry Andric 57381ad6265SDimitry Andric // `checkForAllInstructions` is much more cheaper than going through all 57481ad6265SDimitry Andric // instructions, try it first. 57581ad6265SDimitry Andric 57681ad6265SDimitry Andric // The queue pointer is not needed if aperture regs is present. 57781ad6265SDimitry Andric if (!HasApertureRegs) { 57881ad6265SDimitry Andric bool UsedAssumedInformation = false; 57981ad6265SDimitry Andric A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 58081ad6265SDimitry Andric {Instruction::AddrSpaceCast}, 58181ad6265SDimitry Andric UsedAssumedInformation); 58281ad6265SDimitry Andric } 58381ad6265SDimitry Andric 58481ad6265SDimitry Andric // If we found that we need the queue pointer, nothing else to do. 58581ad6265SDimitry Andric if (NeedsQueuePtr) 58681ad6265SDimitry Andric return true; 58781ad6265SDimitry Andric 58881ad6265SDimitry Andric if (!IsNonEntryFunc && HasApertureRegs) 58981ad6265SDimitry Andric return false; 59081ad6265SDimitry Andric 59181ad6265SDimitry Andric for (BasicBlock &BB : *F) { 59281ad6265SDimitry Andric for (Instruction &I : BB) { 59381ad6265SDimitry Andric for (const Use &U : I.operands()) { 59481ad6265SDimitry Andric if (const auto *C = dyn_cast<Constant>(U)) { 59581ad6265SDimitry Andric if (InfoCache.needsQueuePtr(C, *F)) 59681ad6265SDimitry Andric return true; 59781ad6265SDimitry Andric } 59881ad6265SDimitry Andric } 59981ad6265SDimitry Andric } 60081ad6265SDimitry Andric } 60181ad6265SDimitry Andric 60281ad6265SDimitry Andric return false; 60381ad6265SDimitry Andric } 60481ad6265SDimitry Andric 60506c3fb27SDimitry Andric bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) { 60606c3fb27SDimitry Andric auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV); 607bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 608bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 60981ad6265SDimitry Andric } 61081ad6265SDimitry Andric 61106c3fb27SDimitry Andric bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) { 61206c3fb27SDimitry Andric auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV); 613bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 614bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 615bdd1243dSDimitry Andric } 616bdd1243dSDimitry Andric 61706c3fb27SDimitry Andric bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) { 61806c3fb27SDimitry Andric auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV); 619bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 620bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 621bdd1243dSDimitry Andric } 622bdd1243dSDimitry Andric 62306c3fb27SDimitry Andric bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) { 62406c3fb27SDimitry Andric auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV); 625bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 626bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 62781ad6265SDimitry Andric } 62881ad6265SDimitry Andric 62906c3fb27SDimitry Andric bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) { 63006c3fb27SDimitry Andric if (COV < 5) 63181ad6265SDimitry Andric return false; 632bdd1243dSDimitry Andric AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); 633bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 63481ad6265SDimitry Andric } 63581ad6265SDimitry Andric 63606c3fb27SDimitry Andric bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) { 63706c3fb27SDimitry Andric if (COV < 5) 63881ad6265SDimitry Andric return false; 639bdd1243dSDimitry Andric AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); 640bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 64181ad6265SDimitry Andric } 64281ad6265SDimitry Andric 643bdd1243dSDimitry Andric bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) { 64481ad6265SDimitry Andric // Check if this is a call to the implicitarg_ptr builtin and it 64581ad6265SDimitry Andric // is used to retrieve the hostcall pointer. The implicit arg for 64681ad6265SDimitry Andric // hostcall is not used only if every use of the implicitarg_ptr 64781ad6265SDimitry Andric // is a load that clearly does not retrieve any byte of the 64881ad6265SDimitry Andric // hostcall pointer. We check this by tracing all the uses of the 64981ad6265SDimitry Andric // initial call to the implicitarg_ptr intrinsic. 65081ad6265SDimitry Andric auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) { 65181ad6265SDimitry Andric auto &Call = cast<CallBase>(I); 65281ad6265SDimitry Andric if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) 65381ad6265SDimitry Andric return true; 65481ad6265SDimitry Andric 65506c3fb27SDimitry Andric const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>( 65681ad6265SDimitry Andric *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); 65706c3fb27SDimitry Andric if (!PointerInfoAA) 65806c3fb27SDimitry Andric return false; 65981ad6265SDimitry Andric 66006c3fb27SDimitry Andric return PointerInfoAA->forallInterferingAccesses( 661bdd1243dSDimitry Andric Range, [](const AAPointerInfo::Access &Acc, bool IsExact) { 66281ad6265SDimitry Andric return Acc.getRemoteInst()->isDroppable(); 66381ad6265SDimitry Andric }); 66481ad6265SDimitry Andric }; 66581ad6265SDimitry Andric 66681ad6265SDimitry Andric bool UsedAssumedInformation = false; 66781ad6265SDimitry Andric return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, 66881ad6265SDimitry Andric UsedAssumedInformation); 66981ad6265SDimitry Andric } 670fcaf7f86SDimitry Andric 671fcaf7f86SDimitry Andric bool funcRetrievesLDSKernelId(Attributor &A) { 672fcaf7f86SDimitry Andric auto DoesNotRetrieve = [&](Instruction &I) { 673fcaf7f86SDimitry Andric auto &Call = cast<CallBase>(I); 674fcaf7f86SDimitry Andric return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id; 675fcaf7f86SDimitry Andric }; 676fcaf7f86SDimitry Andric bool UsedAssumedInformation = false; 677fcaf7f86SDimitry Andric return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, 678fcaf7f86SDimitry Andric UsedAssumedInformation); 679fcaf7f86SDimitry Andric } 680fe6060f1SDimitry Andric }; 681fe6060f1SDimitry Andric 682fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 683fe6060f1SDimitry Andric Attributor &A) { 684fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 685fe6060f1SDimitry Andric return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 686fe6060f1SDimitry Andric llvm_unreachable("AAAMDAttributes is only valid for function position"); 687fe6060f1SDimitry Andric } 688fe6060f1SDimitry Andric 68906c3fb27SDimitry Andric /// Base class to derive different size ranges. 69006c3fb27SDimitry Andric struct AAAMDSizeRangeAttribute 691349cc55cSDimitry Andric : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 692349cc55cSDimitry Andric using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 693349cc55cSDimitry Andric 69406c3fb27SDimitry Andric StringRef AttrName; 69506c3fb27SDimitry Andric 69606c3fb27SDimitry Andric AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A, 69706c3fb27SDimitry Andric StringRef AttrName) 69806c3fb27SDimitry Andric : Base(IRP, 32), AttrName(AttrName) {} 69906c3fb27SDimitry Andric 70006c3fb27SDimitry Andric /// See AbstractAttribute::trackStatistics() 70106c3fb27SDimitry Andric void trackStatistics() const override {} 70206c3fb27SDimitry Andric 70306c3fb27SDimitry Andric template <class AttributeImpl> 70406c3fb27SDimitry Andric ChangeStatus updateImplImpl(Attributor &A) { 70506c3fb27SDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 70606c3fb27SDimitry Andric 70706c3fb27SDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 70806c3fb27SDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 70906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName() 71006c3fb27SDimitry Andric << "->" << getAssociatedFunction()->getName() << '\n'); 71106c3fb27SDimitry Andric 71206c3fb27SDimitry Andric const auto *CallerInfo = A.getAAFor<AttributeImpl>( 71306c3fb27SDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 71406c3fb27SDimitry Andric if (!CallerInfo) 71506c3fb27SDimitry Andric return false; 71606c3fb27SDimitry Andric 71706c3fb27SDimitry Andric Change |= 71806c3fb27SDimitry Andric clampStateAndIndicateChange(this->getState(), CallerInfo->getState()); 71906c3fb27SDimitry Andric 72006c3fb27SDimitry Andric return true; 72106c3fb27SDimitry Andric }; 72206c3fb27SDimitry Andric 72306c3fb27SDimitry Andric bool AllCallSitesKnown = true; 72406c3fb27SDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 72506c3fb27SDimitry Andric return indicatePessimisticFixpoint(); 72606c3fb27SDimitry Andric 72706c3fb27SDimitry Andric return Change; 72806c3fb27SDimitry Andric } 72906c3fb27SDimitry Andric 73006c3fb27SDimitry Andric ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, 73106c3fb27SDimitry Andric unsigned Max) { 73206c3fb27SDimitry Andric // Don't add the attribute if it's the implied default. 73306c3fb27SDimitry Andric if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 73406c3fb27SDimitry Andric return ChangeStatus::UNCHANGED; 73506c3fb27SDimitry Andric 73606c3fb27SDimitry Andric Function *F = getAssociatedFunction(); 73706c3fb27SDimitry Andric LLVMContext &Ctx = F->getContext(); 73806c3fb27SDimitry Andric SmallString<10> Buffer; 73906c3fb27SDimitry Andric raw_svector_ostream OS(Buffer); 74006c3fb27SDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 74106c3fb27SDimitry Andric return A.manifestAttrs(getIRPosition(), 74206c3fb27SDimitry Andric {Attribute::get(Ctx, AttrName, OS.str())}, 74306c3fb27SDimitry Andric /* ForceReplace */ true); 74406c3fb27SDimitry Andric } 74506c3fb27SDimitry Andric 74606c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 74706c3fb27SDimitry Andric std::string Str; 74806c3fb27SDimitry Andric raw_string_ostream OS(Str); 74906c3fb27SDimitry Andric OS << getName() << '['; 75006c3fb27SDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 75106c3fb27SDimitry Andric OS << ']'; 75206c3fb27SDimitry Andric return OS.str(); 75306c3fb27SDimitry Andric } 75406c3fb27SDimitry Andric }; 75506c3fb27SDimitry Andric 75606c3fb27SDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute. 75706c3fb27SDimitry Andric struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { 75806c3fb27SDimitry Andric AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 75906c3fb27SDimitry Andric : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {} 760349cc55cSDimitry Andric 761349cc55cSDimitry Andric void initialize(Attributor &A) override { 762349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 763349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 764349cc55cSDimitry Andric unsigned MinGroupSize, MaxGroupSize; 765349cc55cSDimitry Andric std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 766349cc55cSDimitry Andric intersectKnown( 767349cc55cSDimitry Andric ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 7680eae32dcSDimitry Andric 7690eae32dcSDimitry Andric if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 7700eae32dcSDimitry Andric indicatePessimisticFixpoint(); 771349cc55cSDimitry Andric } 772349cc55cSDimitry Andric 773349cc55cSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 77406c3fb27SDimitry Andric return updateImplImpl<AAAMDFlatWorkGroupSize>(A); 775349cc55cSDimitry Andric } 776349cc55cSDimitry Andric 777349cc55cSDimitry Andric /// Create an abstract attribute view for the position \p IRP. 778349cc55cSDimitry Andric static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 779349cc55cSDimitry Andric Attributor &A); 780349cc55cSDimitry Andric 78106c3fb27SDimitry Andric ChangeStatus manifest(Attributor &A) override { 78206c3fb27SDimitry Andric Function *F = getAssociatedFunction(); 78306c3fb27SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 78406c3fb27SDimitry Andric unsigned Min, Max; 78506c3fb27SDimitry Andric std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 78606c3fb27SDimitry Andric return emitAttributeIfNotDefault(A, Min, Max); 78706c3fb27SDimitry Andric } 78806c3fb27SDimitry Andric 789349cc55cSDimitry Andric /// See AbstractAttribute::getName() 790349cc55cSDimitry Andric const std::string getName() const override { 791349cc55cSDimitry Andric return "AAAMDFlatWorkGroupSize"; 792349cc55cSDimitry Andric } 793349cc55cSDimitry Andric 794349cc55cSDimitry Andric /// See AbstractAttribute::getIdAddr() 795349cc55cSDimitry Andric const char *getIdAddr() const override { return &ID; } 796349cc55cSDimitry Andric 797349cc55cSDimitry Andric /// This function should return true if the type of the \p AA is 798349cc55cSDimitry Andric /// AAAMDFlatWorkGroupSize 799349cc55cSDimitry Andric static bool classof(const AbstractAttribute *AA) { 800349cc55cSDimitry Andric return (AA->getIdAddr() == &ID); 801349cc55cSDimitry Andric } 802349cc55cSDimitry Andric 803349cc55cSDimitry Andric /// Unique ID (due to the unique address) 804349cc55cSDimitry Andric static const char ID; 805349cc55cSDimitry Andric }; 806349cc55cSDimitry Andric 807349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0; 808349cc55cSDimitry Andric 809349cc55cSDimitry Andric AAAMDFlatWorkGroupSize & 810349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 811349cc55cSDimitry Andric Attributor &A) { 812349cc55cSDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 813349cc55cSDimitry Andric return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 814349cc55cSDimitry Andric llvm_unreachable( 815349cc55cSDimitry Andric "AAAMDFlatWorkGroupSize is only valid for function position"); 816349cc55cSDimitry Andric } 817349cc55cSDimitry Andric 81806c3fb27SDimitry Andric /// Propagate amdgpu-waves-per-eu attribute. 81906c3fb27SDimitry Andric struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { 82006c3fb27SDimitry Andric AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) 82106c3fb27SDimitry Andric : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} 82206c3fb27SDimitry Andric 82306c3fb27SDimitry Andric bool isValidState() const override { 82406c3fb27SDimitry Andric return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); 82506c3fb27SDimitry Andric } 82606c3fb27SDimitry Andric 82706c3fb27SDimitry Andric void initialize(Attributor &A) override { 82806c3fb27SDimitry Andric Function *F = getAssociatedFunction(); 82906c3fb27SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 83006c3fb27SDimitry Andric 83106c3fb27SDimitry Andric if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>( 83206c3fb27SDimitry Andric *this, IRPosition::function(*F), DepClassTy::REQUIRED)) { 83306c3fb27SDimitry Andric 83406c3fb27SDimitry Andric unsigned Min, Max; 83506c3fb27SDimitry Andric std::tie(Min, Max) = InfoCache.getWavesPerEU( 83606c3fb27SDimitry Andric *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(), 83706c3fb27SDimitry Andric AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); 83806c3fb27SDimitry Andric 83906c3fb27SDimitry Andric ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); 84006c3fb27SDimitry Andric intersectKnown(Range); 84106c3fb27SDimitry Andric } 84206c3fb27SDimitry Andric 84306c3fb27SDimitry Andric if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 84406c3fb27SDimitry Andric indicatePessimisticFixpoint(); 84506c3fb27SDimitry Andric } 84606c3fb27SDimitry Andric 84706c3fb27SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 84806c3fb27SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 84906c3fb27SDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 85006c3fb27SDimitry Andric 85106c3fb27SDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 85206c3fb27SDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 85306c3fb27SDimitry Andric Function *Func = getAssociatedFunction(); 85406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName() 85506c3fb27SDimitry Andric << "->" << Func->getName() << '\n'); 85606c3fb27SDimitry Andric 85706c3fb27SDimitry Andric const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>( 85806c3fb27SDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 85906c3fb27SDimitry Andric const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>( 86006c3fb27SDimitry Andric *this, IRPosition::function(*Func), DepClassTy::REQUIRED); 86106c3fb27SDimitry Andric if (!CallerInfo || !AssumedGroupSize) 86206c3fb27SDimitry Andric return false; 86306c3fb27SDimitry Andric 86406c3fb27SDimitry Andric unsigned Min, Max; 86506c3fb27SDimitry Andric std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU( 86606c3fb27SDimitry Andric *Caller, 86706c3fb27SDimitry Andric {CallerInfo->getAssumed().getLower().getZExtValue(), 86806c3fb27SDimitry Andric CallerInfo->getAssumed().getUpper().getZExtValue() - 1}, 86906c3fb27SDimitry Andric {AssumedGroupSize->getAssumed().getLower().getZExtValue(), 87006c3fb27SDimitry Andric AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1}); 87106c3fb27SDimitry Andric ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1)); 87206c3fb27SDimitry Andric IntegerRangeState CallerRangeState(CallerRange); 87306c3fb27SDimitry Andric Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState); 87406c3fb27SDimitry Andric 87506c3fb27SDimitry Andric return true; 87606c3fb27SDimitry Andric }; 87706c3fb27SDimitry Andric 87806c3fb27SDimitry Andric bool AllCallSitesKnown = true; 87906c3fb27SDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 88006c3fb27SDimitry Andric return indicatePessimisticFixpoint(); 88106c3fb27SDimitry Andric 88206c3fb27SDimitry Andric return Change; 88306c3fb27SDimitry Andric } 88406c3fb27SDimitry Andric 88506c3fb27SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 88606c3fb27SDimitry Andric static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP, 88706c3fb27SDimitry Andric Attributor &A); 88806c3fb27SDimitry Andric 88906c3fb27SDimitry Andric ChangeStatus manifest(Attributor &A) override { 89006c3fb27SDimitry Andric Function *F = getAssociatedFunction(); 89106c3fb27SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 89206c3fb27SDimitry Andric unsigned Max = InfoCache.getMaxWavesPerEU(*F); 89306c3fb27SDimitry Andric return emitAttributeIfNotDefault(A, 1, Max); 89406c3fb27SDimitry Andric } 89506c3fb27SDimitry Andric 89606c3fb27SDimitry Andric /// See AbstractAttribute::getName() 89706c3fb27SDimitry Andric const std::string getName() const override { return "AAAMDWavesPerEU"; } 89806c3fb27SDimitry Andric 89906c3fb27SDimitry Andric /// See AbstractAttribute::getIdAddr() 90006c3fb27SDimitry Andric const char *getIdAddr() const override { return &ID; } 90106c3fb27SDimitry Andric 90206c3fb27SDimitry Andric /// This function should return true if the type of the \p AA is 90306c3fb27SDimitry Andric /// AAAMDWavesPerEU 90406c3fb27SDimitry Andric static bool classof(const AbstractAttribute *AA) { 90506c3fb27SDimitry Andric return (AA->getIdAddr() == &ID); 90606c3fb27SDimitry Andric } 90706c3fb27SDimitry Andric 90806c3fb27SDimitry Andric /// Unique ID (due to the unique address) 90906c3fb27SDimitry Andric static const char ID; 91006c3fb27SDimitry Andric }; 91106c3fb27SDimitry Andric 91206c3fb27SDimitry Andric const char AAAMDWavesPerEU::ID = 0; 91306c3fb27SDimitry Andric 91406c3fb27SDimitry Andric AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP, 91506c3fb27SDimitry Andric Attributor &A) { 91606c3fb27SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 91706c3fb27SDimitry Andric return *new (A.Allocator) AAAMDWavesPerEU(IRP, A); 91806c3fb27SDimitry Andric llvm_unreachable("AAAMDWavesPerEU is only valid for function position"); 91906c3fb27SDimitry Andric } 92006c3fb27SDimitry Andric 921*0fca6ea1SDimitry Andric static bool inlineAsmUsesAGPRs(const InlineAsm *IA) { 922*0fca6ea1SDimitry Andric for (const auto &CI : IA->ParseConstraints()) { 923*0fca6ea1SDimitry Andric for (StringRef Code : CI.Codes) { 924*0fca6ea1SDimitry Andric Code.consume_front("{"); 925*0fca6ea1SDimitry Andric if (Code.starts_with("a")) 926*0fca6ea1SDimitry Andric return true; 927*0fca6ea1SDimitry Andric } 928*0fca6ea1SDimitry Andric } 929*0fca6ea1SDimitry Andric 930*0fca6ea1SDimitry Andric return false; 931*0fca6ea1SDimitry Andric } 932*0fca6ea1SDimitry Andric 933*0fca6ea1SDimitry Andric struct AAAMDGPUNoAGPR 934*0fca6ea1SDimitry Andric : public IRAttribute<Attribute::NoUnwind, 935*0fca6ea1SDimitry Andric StateWrapper<BooleanState, AbstractAttribute>, 936*0fca6ea1SDimitry Andric AAAMDGPUNoAGPR> { 937*0fca6ea1SDimitry Andric AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} 938*0fca6ea1SDimitry Andric 939*0fca6ea1SDimitry Andric static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP, 940*0fca6ea1SDimitry Andric Attributor &A) { 941*0fca6ea1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 942*0fca6ea1SDimitry Andric return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A); 943*0fca6ea1SDimitry Andric llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position"); 944*0fca6ea1SDimitry Andric } 945*0fca6ea1SDimitry Andric 946*0fca6ea1SDimitry Andric void initialize(Attributor &A) override { 947*0fca6ea1SDimitry Andric Function *F = getAssociatedFunction(); 948*0fca6ea1SDimitry Andric if (F->hasFnAttribute("amdgpu-no-agpr")) 949*0fca6ea1SDimitry Andric indicateOptimisticFixpoint(); 950*0fca6ea1SDimitry Andric } 951*0fca6ea1SDimitry Andric 952*0fca6ea1SDimitry Andric const std::string getAsStr(Attributor *A) const override { 953*0fca6ea1SDimitry Andric return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr"; 954*0fca6ea1SDimitry Andric } 955*0fca6ea1SDimitry Andric 956*0fca6ea1SDimitry Andric void trackStatistics() const override {} 957*0fca6ea1SDimitry Andric 958*0fca6ea1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 959*0fca6ea1SDimitry Andric // TODO: Use AACallEdges, but then we need a way to inspect asm edges. 960*0fca6ea1SDimitry Andric 961*0fca6ea1SDimitry Andric auto CheckForNoAGPRs = [&](Instruction &I) { 962*0fca6ea1SDimitry Andric const auto &CB = cast<CallBase>(I); 963*0fca6ea1SDimitry Andric const Value *CalleeOp = CB.getCalledOperand(); 964*0fca6ea1SDimitry Andric const Function *Callee = dyn_cast<Function>(CalleeOp); 965*0fca6ea1SDimitry Andric if (!Callee) { 966*0fca6ea1SDimitry Andric if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) 967*0fca6ea1SDimitry Andric return !inlineAsmUsesAGPRs(IA); 968*0fca6ea1SDimitry Andric return false; 969*0fca6ea1SDimitry Andric } 970*0fca6ea1SDimitry Andric 971*0fca6ea1SDimitry Andric // Some intrinsics may use AGPRs, but if we have a choice, we are not 972*0fca6ea1SDimitry Andric // required to use AGPRs. 973*0fca6ea1SDimitry Andric if (Callee->isIntrinsic()) 974*0fca6ea1SDimitry Andric return true; 975*0fca6ea1SDimitry Andric 976*0fca6ea1SDimitry Andric // TODO: Handle callsite attributes 977*0fca6ea1SDimitry Andric const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>( 978*0fca6ea1SDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 979*0fca6ea1SDimitry Andric return CalleeInfo && CalleeInfo->getAssumed(); 980*0fca6ea1SDimitry Andric }; 981*0fca6ea1SDimitry Andric 982*0fca6ea1SDimitry Andric bool UsedAssumedInformation = false; 983*0fca6ea1SDimitry Andric if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this, 984*0fca6ea1SDimitry Andric UsedAssumedInformation)) 985*0fca6ea1SDimitry Andric return indicatePessimisticFixpoint(); 986*0fca6ea1SDimitry Andric return ChangeStatus::UNCHANGED; 987*0fca6ea1SDimitry Andric } 988*0fca6ea1SDimitry Andric 989*0fca6ea1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 990*0fca6ea1SDimitry Andric if (!getAssumed()) 991*0fca6ea1SDimitry Andric return ChangeStatus::UNCHANGED; 992*0fca6ea1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 993*0fca6ea1SDimitry Andric return A.manifestAttrs(getIRPosition(), 994*0fca6ea1SDimitry Andric {Attribute::get(Ctx, "amdgpu-no-agpr")}); 995*0fca6ea1SDimitry Andric } 996*0fca6ea1SDimitry Andric 997*0fca6ea1SDimitry Andric const std::string getName() const override { return "AAAMDGPUNoAGPR"; } 998*0fca6ea1SDimitry Andric const char *getIdAddr() const override { return &ID; } 999*0fca6ea1SDimitry Andric 1000*0fca6ea1SDimitry Andric /// This function should return true if the type of the \p AA is 1001*0fca6ea1SDimitry Andric /// AAAMDGPUNoAGPRs 1002*0fca6ea1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 1003*0fca6ea1SDimitry Andric return (AA->getIdAddr() == &ID); 1004*0fca6ea1SDimitry Andric } 1005*0fca6ea1SDimitry Andric 1006*0fca6ea1SDimitry Andric static const char ID; 1007*0fca6ea1SDimitry Andric }; 1008*0fca6ea1SDimitry Andric 1009*0fca6ea1SDimitry Andric const char AAAMDGPUNoAGPR::ID = 0; 1010*0fca6ea1SDimitry Andric 10115f757f3fSDimitry Andric static void addPreloadKernArgHint(Function &F, TargetMachine &TM) { 10125f757f3fSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 10135f757f3fSDimitry Andric for (unsigned I = 0; 10145f757f3fSDimitry Andric I < F.arg_size() && 10155f757f3fSDimitry Andric I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs()); 10165f757f3fSDimitry Andric ++I) { 10175f757f3fSDimitry Andric Argument &Arg = *F.getArg(I); 10185f757f3fSDimitry Andric // Check for incompatible attributes. 10195f757f3fSDimitry Andric if (Arg.hasByRefAttr() || Arg.hasNestAttr()) 10205f757f3fSDimitry Andric break; 1021fe6060f1SDimitry Andric 10225f757f3fSDimitry Andric Arg.addAttr(Attribute::InReg); 10235f757f3fSDimitry Andric } 1024fe6060f1SDimitry Andric } 1025fe6060f1SDimitry Andric 10265f757f3fSDimitry Andric static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { 1027fe6060f1SDimitry Andric SetVector<Function *> Functions; 1028349cc55cSDimitry Andric for (Function &F : M) { 1029349cc55cSDimitry Andric if (!F.isIntrinsic()) 1030fe6060f1SDimitry Andric Functions.insert(&F); 1031349cc55cSDimitry Andric } 1032fe6060f1SDimitry Andric 1033fe6060f1SDimitry Andric CallGraphUpdater CGUpdater; 1034fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 10355f757f3fSDimitry Andric AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM); 1036349cc55cSDimitry Andric DenseSet<const char *> Allowed( 1037349cc55cSDimitry Andric {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 103806c3fb27SDimitry Andric &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, 1039*0fca6ea1SDimitry Andric &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, 1040*0fca6ea1SDimitry Andric &AAPointerInfo::ID, &AAPotentialConstantValues::ID, 1041*0fca6ea1SDimitry Andric &AAUnderlyingObjects::ID}); 1042349cc55cSDimitry Andric 104381ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 104481ad6265SDimitry Andric AC.Allowed = &Allowed; 104581ad6265SDimitry Andric AC.IsModulePass = true; 104681ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 104706c3fb27SDimitry Andric AC.IPOAmendableCB = [](const Function &F) { 104806c3fb27SDimitry Andric return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; 104906c3fb27SDimitry Andric }; 105081ad6265SDimitry Andric 105181ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 1052fe6060f1SDimitry Andric 1053fe6060f1SDimitry Andric for (Function &F : M) { 1054349cc55cSDimitry Andric if (!F.isIntrinsic()) { 1055fe6060f1SDimitry Andric A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 1056349cc55cSDimitry Andric A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 1057*0fca6ea1SDimitry Andric A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F)); 10585f757f3fSDimitry Andric CallingConv::ID CC = F.getCallingConv(); 10595f757f3fSDimitry Andric if (!AMDGPU::isEntryFunctionCC(CC)) { 1060349cc55cSDimitry Andric A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 106106c3fb27SDimitry Andric A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F)); 10625f757f3fSDimitry Andric } else if (CC == CallingConv::AMDGPU_KERNEL) { 10635f757f3fSDimitry Andric addPreloadKernArgHint(F, TM); 1064349cc55cSDimitry Andric } 1065349cc55cSDimitry Andric } 1066fe6060f1SDimitry Andric } 1067fe6060f1SDimitry Andric 1068fe6060f1SDimitry Andric ChangeStatus Change = A.run(); 1069fe6060f1SDimitry Andric return Change == ChangeStatus::CHANGED; 1070fe6060f1SDimitry Andric } 1071fe6060f1SDimitry Andric 10725f757f3fSDimitry Andric class AMDGPUAttributorLegacy : public ModulePass { 10735f757f3fSDimitry Andric public: 10745f757f3fSDimitry Andric AMDGPUAttributorLegacy() : ModulePass(ID) {} 10755f757f3fSDimitry Andric 10765f757f3fSDimitry Andric /// doInitialization - Virtual method overridden by subclasses to do 10775f757f3fSDimitry Andric /// any necessary initialization before any pass is run. 10785f757f3fSDimitry Andric bool doInitialization(Module &) override { 10795f757f3fSDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 10805f757f3fSDimitry Andric if (!TPC) 10815f757f3fSDimitry Andric report_fatal_error("TargetMachine is required"); 10825f757f3fSDimitry Andric 10835f757f3fSDimitry Andric TM = &TPC->getTM<TargetMachine>(); 10845f757f3fSDimitry Andric return false; 10855f757f3fSDimitry Andric } 10865f757f3fSDimitry Andric 10875f757f3fSDimitry Andric bool runOnModule(Module &M) override { 10885f757f3fSDimitry Andric AnalysisGetter AG(this); 10895f757f3fSDimitry Andric return runImpl(M, AG, *TM); 10905f757f3fSDimitry Andric } 10915f757f3fSDimitry Andric 1092bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1093bdd1243dSDimitry Andric AU.addRequired<CycleInfoWrapperPass>(); 1094bdd1243dSDimitry Andric } 1095bdd1243dSDimitry Andric 1096fe6060f1SDimitry Andric StringRef getPassName() const override { return "AMDGPU Attributor"; } 1097fe6060f1SDimitry Andric TargetMachine *TM; 1098fe6060f1SDimitry Andric static char ID; 1099fe6060f1SDimitry Andric }; 1100349cc55cSDimitry Andric } // namespace 1101fe6060f1SDimitry Andric 11025f757f3fSDimitry Andric PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M, 11035f757f3fSDimitry Andric ModuleAnalysisManager &AM) { 1104fe6060f1SDimitry Andric 11055f757f3fSDimitry Andric FunctionAnalysisManager &FAM = 11065f757f3fSDimitry Andric AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 11075f757f3fSDimitry Andric AnalysisGetter AG(FAM); 11085f757f3fSDimitry Andric 11095f757f3fSDimitry Andric // TODO: Probably preserves CFG 11105f757f3fSDimitry Andric return runImpl(M, AG, TM) ? PreservedAnalyses::none() 11115f757f3fSDimitry Andric : PreservedAnalyses::all(); 11125f757f3fSDimitry Andric } 11135f757f3fSDimitry Andric 11145f757f3fSDimitry Andric char AMDGPUAttributorLegacy::ID = 0; 11155f757f3fSDimitry Andric 11165f757f3fSDimitry Andric Pass *llvm::createAMDGPUAttributorLegacyPass() { 11175f757f3fSDimitry Andric return new AMDGPUAttributorLegacy(); 11185f757f3fSDimitry Andric } 11195f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor", 11205f757f3fSDimitry Andric false, false) 1121bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass); 11225f757f3fSDimitry Andric INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor", 11235f757f3fSDimitry Andric false, false) 1124