1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10fe6060f1SDimitry Andric // 11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 12fe6060f1SDimitry Andric 13fe6060f1SDimitry Andric #include "AMDGPU.h" 14fe6060f1SDimitry Andric #include "GCNSubtarget.h" 15*81ad6265SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 16fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 17fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 18fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 19fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h" 20fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 21fe6060f1SDimitry Andric 22fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor" 23fe6060f1SDimitry Andric 24fe6060f1SDimitry Andric using namespace llvm; 25fe6060f1SDimitry Andric 26*81ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, 27349cc55cSDimitry Andric 28*81ad6265SDimitry Andric enum ImplicitArgumentPositions { 29*81ad6265SDimitry Andric #include "AMDGPUAttributes.def" 30*81ad6265SDimitry Andric LAST_ARG_POS 31349cc55cSDimitry Andric }; 32349cc55cSDimitry Andric 33*81ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, 34*81ad6265SDimitry Andric 35*81ad6265SDimitry Andric enum ImplicitArgumentMask { 36*81ad6265SDimitry Andric NOT_IMPLICIT_INPUT = 0, 37*81ad6265SDimitry Andric #include "AMDGPUAttributes.def" 38*81ad6265SDimitry Andric ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 39*81ad6265SDimitry Andric }; 40*81ad6265SDimitry Andric 41*81ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, 42349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask, 43349cc55cSDimitry Andric StringLiteral> ImplicitAttrs[] = { 44*81ad6265SDimitry Andric #include "AMDGPUAttributes.def" 45349cc55cSDimitry Andric }; 46fe6060f1SDimitry Andric 47fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 48fe6060f1SDimitry Andric // initialized. 49fe6060f1SDimitry Andric // 50fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 51fe6060f1SDimitry Andric // size is 1 for y/z. 52349cc55cSDimitry Andric static ImplicitArgumentMask 53*81ad6265SDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, 54*81ad6265SDimitry Andric bool HasApertureRegs, bool SupportsGetDoorBellID) { 55*81ad6265SDimitry Andric unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion(); 56fe6060f1SDimitry Andric switch (ID) { 57fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 58fe6060f1SDimitry Andric NonKernelOnly = true; 59349cc55cSDimitry Andric return WORKITEM_ID_X; 60fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 61fe6060f1SDimitry Andric NonKernelOnly = true; 62349cc55cSDimitry Andric return WORKGROUP_ID_X; 63fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 64fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_y: 65349cc55cSDimitry Andric return WORKITEM_ID_Y; 66fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 67fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_z: 68349cc55cSDimitry Andric return WORKITEM_ID_Z; 69fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 70fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_y: 71349cc55cSDimitry Andric return WORKGROUP_ID_Y; 72fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 73fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_z: 74349cc55cSDimitry Andric return WORKGROUP_ID_Z; 75fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 76349cc55cSDimitry Andric return DISPATCH_PTR; 77fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 78349cc55cSDimitry Andric return DISPATCH_ID; 79fe6060f1SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 80349cc55cSDimitry Andric return IMPLICIT_ARG_PTR; 81*81ad6265SDimitry Andric // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access 82*81ad6265SDimitry Andric // queue_ptr. 83fe6060f1SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 84*81ad6265SDimitry Andric NeedsImplicit = (CodeObjectVersion == 5); 85*81ad6265SDimitry Andric return QUEUE_PTR; 86fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_shared: 87fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_private: 88*81ad6265SDimitry Andric if (HasApertureRegs) 89*81ad6265SDimitry Andric return NOT_IMPLICIT_INPUT; 90*81ad6265SDimitry Andric // Under V5, we need implicitarg_ptr + offsets to access private_base or 91*81ad6265SDimitry Andric // shared_base. For pre-V5, however, need to access them through queue_ptr + 92*81ad6265SDimitry Andric // offsets. 93*81ad6265SDimitry Andric return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR; 94fe6060f1SDimitry Andric case Intrinsic::trap: 95*81ad6265SDimitry Andric if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4. 96*81ad6265SDimitry Andric return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR; 97*81ad6265SDimitry Andric NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5. 98349cc55cSDimitry Andric return QUEUE_PTR; 99fe6060f1SDimitry Andric default: 100349cc55cSDimitry Andric return NOT_IMPLICIT_INPUT; 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric } 103fe6060f1SDimitry Andric 104fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 105fe6060f1SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 106fe6060f1SDimitry Andric } 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) { 109fe6060f1SDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C); 110fe6060f1SDimitry Andric if (!GV) 111fe6060f1SDimitry Andric return false; 112fe6060f1SDimitry Andric unsigned AS = GV->getAddressSpace(); 113fe6060f1SDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 114fe6060f1SDimitry Andric } 115fe6060f1SDimitry Andric 1160eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed 1170eae32dcSDimitry Andric /// regardless of the function contents. 118*81ad6265SDimitry Andric static bool funcRequiresHostcallPtr(const Function &F) { 1190eae32dcSDimitry Andric // Sanitizers require the hostcall buffer passed in the implicit arguments. 1200eae32dcSDimitry Andric return F.hasFnAttribute(Attribute::SanitizeAddress) || 1210eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeThread) || 1220eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemory) || 1230eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeHWAddress) || 1240eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemTag); 1250eae32dcSDimitry Andric } 1260eae32dcSDimitry Andric 127349cc55cSDimitry Andric namespace { 128fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache { 129fe6060f1SDimitry Andric public: 130fe6060f1SDimitry Andric AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 131fe6060f1SDimitry Andric BumpPtrAllocator &Allocator, 132fe6060f1SDimitry Andric SetVector<Function *> *CGSCC, TargetMachine &TM) 133fe6060f1SDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 134fe6060f1SDimitry Andric TargetMachine &TM; 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 137fe6060f1SDimitry Andric 138fe6060f1SDimitry Andric /// Check if the subtarget has aperture regs. 139fe6060f1SDimitry Andric bool hasApertureRegs(Function &F) { 140fe6060f1SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 141fe6060f1SDimitry Andric return ST.hasApertureRegs(); 142fe6060f1SDimitry Andric } 143fe6060f1SDimitry Andric 144*81ad6265SDimitry Andric /// Check if the subtarget supports GetDoorbellID. 145*81ad6265SDimitry Andric bool supportsGetDoorbellID(Function &F) { 146*81ad6265SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 147*81ad6265SDimitry Andric return ST.supportsGetDoorbellID(); 148*81ad6265SDimitry Andric } 149*81ad6265SDimitry Andric 150349cc55cSDimitry Andric std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 151349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 152349cc55cSDimitry Andric return ST.getFlatWorkGroupSizes(F); 153349cc55cSDimitry Andric } 154349cc55cSDimitry Andric 155349cc55cSDimitry Andric std::pair<unsigned, unsigned> 156349cc55cSDimitry Andric getMaximumFlatWorkGroupRange(const Function &F) { 157349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 158349cc55cSDimitry Andric return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 159349cc55cSDimitry Andric } 160349cc55cSDimitry Andric 161fe6060f1SDimitry Andric private: 162*81ad6265SDimitry Andric /// Check if the ConstantExpr \p CE requires the queue pointer. 163fe6060f1SDimitry Andric static bool visitConstExpr(const ConstantExpr *CE) { 164fe6060f1SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 165fe6060f1SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 166fe6060f1SDimitry Andric return castRequiresQueuePtr(SrcAS); 167fe6060f1SDimitry Andric } 168fe6060f1SDimitry Andric return false; 169fe6060f1SDimitry Andric } 170fe6060f1SDimitry Andric 171fe6060f1SDimitry Andric /// Get the constant access bitmap for \p C. 172fe6060f1SDimitry Andric uint8_t getConstantAccess(const Constant *C) { 173fe6060f1SDimitry Andric auto It = ConstantStatus.find(C); 174fe6060f1SDimitry Andric if (It != ConstantStatus.end()) 175fe6060f1SDimitry Andric return It->second; 176fe6060f1SDimitry Andric 177fe6060f1SDimitry Andric uint8_t Result = 0; 178fe6060f1SDimitry Andric if (isDSAddress(C)) 179fe6060f1SDimitry Andric Result = DS_GLOBAL; 180fe6060f1SDimitry Andric 181fe6060f1SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) 182fe6060f1SDimitry Andric if (visitConstExpr(CE)) 183fe6060f1SDimitry Andric Result |= ADDR_SPACE_CAST; 184fe6060f1SDimitry Andric 185fe6060f1SDimitry Andric for (const Use &U : C->operands()) { 186fe6060f1SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 187fe6060f1SDimitry Andric if (!OpC) 188fe6060f1SDimitry Andric continue; 189fe6060f1SDimitry Andric 190fe6060f1SDimitry Andric Result |= getConstantAccess(OpC); 191fe6060f1SDimitry Andric } 192fe6060f1SDimitry Andric return Result; 193fe6060f1SDimitry Andric } 194fe6060f1SDimitry Andric 195fe6060f1SDimitry Andric public: 196*81ad6265SDimitry Andric /// Returns true if \p Fn needs the queue pointer because of \p C. 197fe6060f1SDimitry Andric bool needsQueuePtr(const Constant *C, Function &Fn) { 198fe6060f1SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 199fe6060f1SDimitry Andric bool HasAperture = hasApertureRegs(Fn); 200fe6060f1SDimitry Andric 201fe6060f1SDimitry Andric // No need to explore the constants. 202fe6060f1SDimitry Andric if (!IsNonEntryFunc && HasAperture) 203fe6060f1SDimitry Andric return false; 204fe6060f1SDimitry Andric 205fe6060f1SDimitry Andric uint8_t Access = getConstantAccess(C); 206fe6060f1SDimitry Andric 207fe6060f1SDimitry Andric // We need to trap on DS globals in non-entry functions. 208fe6060f1SDimitry Andric if (IsNonEntryFunc && (Access & DS_GLOBAL)) 209fe6060f1SDimitry Andric return true; 210fe6060f1SDimitry Andric 211fe6060f1SDimitry Andric return !HasAperture && (Access & ADDR_SPACE_CAST); 212fe6060f1SDimitry Andric } 213fe6060f1SDimitry Andric 214fe6060f1SDimitry Andric private: 215*81ad6265SDimitry Andric /// Used to determine if the Constant needs the queue pointer. 216fe6060f1SDimitry Andric DenseMap<const Constant *, uint8_t> ConstantStatus; 217fe6060f1SDimitry Andric }; 218fe6060f1SDimitry Andric 219349cc55cSDimitry Andric struct AAAMDAttributes : public StateWrapper< 220349cc55cSDimitry Andric BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 221349cc55cSDimitry Andric using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 222349cc55cSDimitry Andric AbstractAttribute>; 223349cc55cSDimitry Andric 224fe6060f1SDimitry Andric AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 225fe6060f1SDimitry Andric 226fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 227fe6060f1SDimitry Andric static AAAMDAttributes &createForPosition(const IRPosition &IRP, 228fe6060f1SDimitry Andric Attributor &A); 229fe6060f1SDimitry Andric 230fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 231fe6060f1SDimitry Andric const std::string getName() const override { return "AAAMDAttributes"; } 232fe6060f1SDimitry Andric 233fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 234fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 235fe6060f1SDimitry Andric 236fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 237fe6060f1SDimitry Andric /// AAAMDAttributes. 238fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 239fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 240fe6060f1SDimitry Andric } 241fe6060f1SDimitry Andric 242fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 243fe6060f1SDimitry Andric static const char ID; 244fe6060f1SDimitry Andric }; 245fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0; 246fe6060f1SDimitry Andric 247349cc55cSDimitry Andric struct AAUniformWorkGroupSize 248fe6060f1SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> { 249fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 250349cc55cSDimitry Andric AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 251fe6060f1SDimitry Andric 252fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 253349cc55cSDimitry Andric static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 254fe6060f1SDimitry Andric Attributor &A); 255fe6060f1SDimitry Andric 256fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 257349cc55cSDimitry Andric const std::string getName() const override { 258349cc55cSDimitry Andric return "AAUniformWorkGroupSize"; 259349cc55cSDimitry Andric } 260fe6060f1SDimitry Andric 261fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 262fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 263fe6060f1SDimitry Andric 264fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 265fe6060f1SDimitry Andric /// AAAMDAttributes. 266fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 267fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 268fe6060f1SDimitry Andric } 269fe6060f1SDimitry Andric 270fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 271fe6060f1SDimitry Andric static const char ID; 272fe6060f1SDimitry Andric }; 273349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0; 274fe6060f1SDimitry Andric 275349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 276349cc55cSDimitry Andric AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 277349cc55cSDimitry Andric : AAUniformWorkGroupSize(IRP, A) {} 278fe6060f1SDimitry Andric 279fe6060f1SDimitry Andric void initialize(Attributor &A) override { 280fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 281fe6060f1SDimitry Andric CallingConv::ID CC = F->getCallingConv(); 282fe6060f1SDimitry Andric 283fe6060f1SDimitry Andric if (CC != CallingConv::AMDGPU_KERNEL) 284fe6060f1SDimitry Andric return; 285fe6060f1SDimitry Andric 286fe6060f1SDimitry Andric bool InitialValue = false; 287fe6060f1SDimitry Andric if (F->hasFnAttribute("uniform-work-group-size")) 288fe6060f1SDimitry Andric InitialValue = F->getFnAttribute("uniform-work-group-size") 289fe6060f1SDimitry Andric .getValueAsString() 290fe6060f1SDimitry Andric .equals("true"); 291fe6060f1SDimitry Andric 292fe6060f1SDimitry Andric if (InitialValue) 293fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 294fe6060f1SDimitry Andric else 295fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 296fe6060f1SDimitry Andric } 297fe6060f1SDimitry Andric 298fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 299fe6060f1SDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 300fe6060f1SDimitry Andric 301fe6060f1SDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 302fe6060f1SDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 303349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 304fe6060f1SDimitry Andric << "->" << getAssociatedFunction()->getName() << "\n"); 305fe6060f1SDimitry Andric 306349cc55cSDimitry Andric const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 307fe6060f1SDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 308fe6060f1SDimitry Andric 309fe6060f1SDimitry Andric Change = Change | clampStateAndIndicateChange(this->getState(), 310fe6060f1SDimitry Andric CallerInfo.getState()); 311fe6060f1SDimitry Andric 312fe6060f1SDimitry Andric return true; 313fe6060f1SDimitry Andric }; 314fe6060f1SDimitry Andric 315fe6060f1SDimitry Andric bool AllCallSitesKnown = true; 316fe6060f1SDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 3170eae32dcSDimitry Andric return indicatePessimisticFixpoint(); 318fe6060f1SDimitry Andric 319fe6060f1SDimitry Andric return Change; 320fe6060f1SDimitry Andric } 321fe6060f1SDimitry Andric 322fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 323fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 324fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 325fe6060f1SDimitry Andric 326fe6060f1SDimitry Andric AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 327fe6060f1SDimitry Andric getAssumed() ? "true" : "false")); 328fe6060f1SDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 329fe6060f1SDimitry Andric /* ForceReplace */ true); 330fe6060f1SDimitry Andric } 331fe6060f1SDimitry Andric 332fe6060f1SDimitry Andric bool isValidState() const override { 333fe6060f1SDimitry Andric // This state is always valid, even when the state is false. 334fe6060f1SDimitry Andric return true; 335fe6060f1SDimitry Andric } 336fe6060f1SDimitry Andric 337fe6060f1SDimitry Andric const std::string getAsStr() const override { 338fe6060f1SDimitry Andric return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 339fe6060f1SDimitry Andric } 340fe6060f1SDimitry Andric 341fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 342fe6060f1SDimitry Andric void trackStatistics() const override {} 343fe6060f1SDimitry Andric }; 344fe6060f1SDimitry Andric 345349cc55cSDimitry Andric AAUniformWorkGroupSize & 346349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 347fe6060f1SDimitry Andric Attributor &A) { 348fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 349349cc55cSDimitry Andric return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 350349cc55cSDimitry Andric llvm_unreachable( 351349cc55cSDimitry Andric "AAUniformWorkGroupSize is only valid for function position"); 352fe6060f1SDimitry Andric } 353fe6060f1SDimitry Andric 354fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes { 355fe6060f1SDimitry Andric AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 356fe6060f1SDimitry Andric : AAAMDAttributes(IRP, A) {} 357fe6060f1SDimitry Andric 358fe6060f1SDimitry Andric void initialize(Attributor &A) override { 359fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 3600eae32dcSDimitry Andric 3610eae32dcSDimitry Andric // If the function requires the implicit arg pointer due to sanitizers, 3620eae32dcSDimitry Andric // assume it's needed even if explicitly marked as not requiring it. 363*81ad6265SDimitry Andric const bool NeedsHostcall = funcRequiresHostcallPtr(*F); 364*81ad6265SDimitry Andric if (NeedsHostcall) { 3650eae32dcSDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 366*81ad6265SDimitry Andric removeAssumedBits(HOSTCALL_PTR); 367*81ad6265SDimitry Andric } 3680eae32dcSDimitry Andric 369349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 370*81ad6265SDimitry Andric if (NeedsHostcall && 371*81ad6265SDimitry Andric (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) 3720eae32dcSDimitry Andric continue; 3730eae32dcSDimitry Andric 374349cc55cSDimitry Andric if (F->hasFnAttribute(Attr.second)) 375349cc55cSDimitry Andric addKnownBits(Attr.first); 376fe6060f1SDimitry Andric } 377fe6060f1SDimitry Andric 378349cc55cSDimitry Andric if (F->isDeclaration()) 379349cc55cSDimitry Andric return; 380349cc55cSDimitry Andric 381fe6060f1SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 382fe6060f1SDimitry Andric // not allowed to have kernel arguments. 383fe6060f1SDimitry Andric if (AMDGPU::isGraphics(F->getCallingConv())) { 384fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 385fe6060f1SDimitry Andric return; 386fe6060f1SDimitry Andric } 387fe6060f1SDimitry Andric } 388fe6060f1SDimitry Andric 389fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 390fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 391349cc55cSDimitry Andric // The current assumed state used to determine a change. 392349cc55cSDimitry Andric auto OrigAssumed = getAssumed(); 393fe6060f1SDimitry Andric 394fe6060f1SDimitry Andric // Check for Intrinsics and propagate attributes. 395fe6060f1SDimitry Andric const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 396fe6060f1SDimitry Andric *this, this->getIRPosition(), DepClassTy::REQUIRED); 397349cc55cSDimitry Andric if (AAEdges.hasNonAsmUnknownCallee()) 398349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 399fe6060f1SDimitry Andric 400349cc55cSDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 401fe6060f1SDimitry Andric 402*81ad6265SDimitry Andric bool NeedsImplicit = false; 403*81ad6265SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 404*81ad6265SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 405*81ad6265SDimitry Andric bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); 406349cc55cSDimitry Andric 407fe6060f1SDimitry Andric for (Function *Callee : AAEdges.getOptimisticEdges()) { 408fe6060f1SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 409349cc55cSDimitry Andric if (IID == Intrinsic::not_intrinsic) { 410349cc55cSDimitry Andric const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 411349cc55cSDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 412349cc55cSDimitry Andric *this &= AAAMD; 413fe6060f1SDimitry Andric continue; 414fe6060f1SDimitry Andric } 415fe6060f1SDimitry Andric 416fe6060f1SDimitry Andric bool NonKernelOnly = false; 417349cc55cSDimitry Andric ImplicitArgumentMask AttrMask = 418*81ad6265SDimitry Andric intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, 419*81ad6265SDimitry Andric HasApertureRegs, SupportsGetDoorbellID); 420349cc55cSDimitry Andric if (AttrMask != NOT_IMPLICIT_INPUT) { 421349cc55cSDimitry Andric if ((IsNonEntryFunc || !NonKernelOnly)) 422349cc55cSDimitry Andric removeAssumedBits(AttrMask); 423fe6060f1SDimitry Andric } 424fe6060f1SDimitry Andric } 425fe6060f1SDimitry Andric 426*81ad6265SDimitry Andric // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base. 427*81ad6265SDimitry Andric if (NeedsImplicit) 428*81ad6265SDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 429*81ad6265SDimitry Andric 430*81ad6265SDimitry Andric if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { 431*81ad6265SDimitry Andric // Under V5, we need implicitarg_ptr + offsets to access private_base or 432*81ad6265SDimitry Andric // shared_base. We do not actually need queue_ptr. 433*81ad6265SDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) 434*81ad6265SDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 435*81ad6265SDimitry Andric else 436349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 437fe6060f1SDimitry Andric } 438fe6060f1SDimitry Andric 439*81ad6265SDimitry Andric if (funcRetrievesMultigridSyncArg(A)) { 440*81ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && 441*81ad6265SDimitry Andric "multigrid_sync_arg needs implicitarg_ptr"); 442*81ad6265SDimitry Andric removeAssumedBits(MULTIGRID_SYNC_ARG); 443349cc55cSDimitry Andric } 444fe6060f1SDimitry Andric 445*81ad6265SDimitry Andric if (funcRetrievesHostcallPtr(A)) { 446*81ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); 447*81ad6265SDimitry Andric removeAssumedBits(HOSTCALL_PTR); 448*81ad6265SDimitry Andric } 449*81ad6265SDimitry Andric 450*81ad6265SDimitry Andric if (funcRetrievesHeapPtr(A)) { 451*81ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); 452*81ad6265SDimitry Andric removeAssumedBits(HEAP_PTR); 453*81ad6265SDimitry Andric } 454*81ad6265SDimitry Andric 455*81ad6265SDimitry Andric if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) { 456*81ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); 457349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 458fe6060f1SDimitry Andric } 459fe6060f1SDimitry Andric 460*81ad6265SDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED 461*81ad6265SDimitry Andric : ChangeStatus::UNCHANGED; 462fe6060f1SDimitry Andric } 463fe6060f1SDimitry Andric 464fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 465fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 466fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 467fe6060f1SDimitry Andric 468349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 469349cc55cSDimitry Andric if (isKnown(Attr.first)) 470349cc55cSDimitry Andric AttrList.push_back(Attribute::get(Ctx, Attr.second)); 471349cc55cSDimitry Andric } 472fe6060f1SDimitry Andric 473fe6060f1SDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 474fe6060f1SDimitry Andric /* ForceReplace */ true); 475fe6060f1SDimitry Andric } 476fe6060f1SDimitry Andric 477fe6060f1SDimitry Andric const std::string getAsStr() const override { 478349cc55cSDimitry Andric std::string Str; 479349cc55cSDimitry Andric raw_string_ostream OS(Str); 480349cc55cSDimitry Andric OS << "AMDInfo["; 481349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) 482349cc55cSDimitry Andric OS << ' ' << Attr.second; 483349cc55cSDimitry Andric OS << " ]"; 484349cc55cSDimitry Andric return OS.str(); 485fe6060f1SDimitry Andric } 486fe6060f1SDimitry Andric 487fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 488fe6060f1SDimitry Andric void trackStatistics() const override {} 489*81ad6265SDimitry Andric 490*81ad6265SDimitry Andric private: 491*81ad6265SDimitry Andric bool checkForQueuePtr(Attributor &A) { 492*81ad6265SDimitry Andric Function *F = getAssociatedFunction(); 493*81ad6265SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 494*81ad6265SDimitry Andric 495*81ad6265SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 496*81ad6265SDimitry Andric 497*81ad6265SDimitry Andric bool NeedsQueuePtr = false; 498*81ad6265SDimitry Andric 499*81ad6265SDimitry Andric auto CheckAddrSpaceCasts = [&](Instruction &I) { 500*81ad6265SDimitry Andric unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 501*81ad6265SDimitry Andric if (castRequiresQueuePtr(SrcAS)) { 502*81ad6265SDimitry Andric NeedsQueuePtr = true; 503*81ad6265SDimitry Andric return false; 504*81ad6265SDimitry Andric } 505*81ad6265SDimitry Andric return true; 506*81ad6265SDimitry Andric }; 507*81ad6265SDimitry Andric 508*81ad6265SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 509*81ad6265SDimitry Andric 510*81ad6265SDimitry Andric // `checkForAllInstructions` is much more cheaper than going through all 511*81ad6265SDimitry Andric // instructions, try it first. 512*81ad6265SDimitry Andric 513*81ad6265SDimitry Andric // The queue pointer is not needed if aperture regs is present. 514*81ad6265SDimitry Andric if (!HasApertureRegs) { 515*81ad6265SDimitry Andric bool UsedAssumedInformation = false; 516*81ad6265SDimitry Andric A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 517*81ad6265SDimitry Andric {Instruction::AddrSpaceCast}, 518*81ad6265SDimitry Andric UsedAssumedInformation); 519*81ad6265SDimitry Andric } 520*81ad6265SDimitry Andric 521*81ad6265SDimitry Andric // If we found that we need the queue pointer, nothing else to do. 522*81ad6265SDimitry Andric if (NeedsQueuePtr) 523*81ad6265SDimitry Andric return true; 524*81ad6265SDimitry Andric 525*81ad6265SDimitry Andric if (!IsNonEntryFunc && HasApertureRegs) 526*81ad6265SDimitry Andric return false; 527*81ad6265SDimitry Andric 528*81ad6265SDimitry Andric for (BasicBlock &BB : *F) { 529*81ad6265SDimitry Andric for (Instruction &I : BB) { 530*81ad6265SDimitry Andric for (const Use &U : I.operands()) { 531*81ad6265SDimitry Andric if (const auto *C = dyn_cast<Constant>(U)) { 532*81ad6265SDimitry Andric if (InfoCache.needsQueuePtr(C, *F)) 533*81ad6265SDimitry Andric return true; 534*81ad6265SDimitry Andric } 535*81ad6265SDimitry Andric } 536*81ad6265SDimitry Andric } 537*81ad6265SDimitry Andric } 538*81ad6265SDimitry Andric 539*81ad6265SDimitry Andric return false; 540*81ad6265SDimitry Andric } 541*81ad6265SDimitry Andric 542*81ad6265SDimitry Andric bool funcRetrievesMultigridSyncArg(Attributor &A) { 543*81ad6265SDimitry Andric auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(); 544*81ad6265SDimitry Andric AAPointerInfo::OffsetAndSize OAS(Pos, 8); 545*81ad6265SDimitry Andric return funcRetrievesImplicitKernelArg(A, OAS); 546*81ad6265SDimitry Andric } 547*81ad6265SDimitry Andric 548*81ad6265SDimitry Andric bool funcRetrievesHostcallPtr(Attributor &A) { 549*81ad6265SDimitry Andric auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); 550*81ad6265SDimitry Andric AAPointerInfo::OffsetAndSize OAS(Pos, 8); 551*81ad6265SDimitry Andric return funcRetrievesImplicitKernelArg(A, OAS); 552*81ad6265SDimitry Andric } 553*81ad6265SDimitry Andric 554*81ad6265SDimitry Andric bool funcRetrievesHeapPtr(Attributor &A) { 555*81ad6265SDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) 556*81ad6265SDimitry Andric return false; 557*81ad6265SDimitry Andric AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); 558*81ad6265SDimitry Andric return funcRetrievesImplicitKernelArg(A, OAS); 559*81ad6265SDimitry Andric } 560*81ad6265SDimitry Andric 561*81ad6265SDimitry Andric bool funcRetrievesQueuePtr(Attributor &A) { 562*81ad6265SDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) 563*81ad6265SDimitry Andric return false; 564*81ad6265SDimitry Andric AAPointerInfo::OffsetAndSize OAS(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); 565*81ad6265SDimitry Andric return funcRetrievesImplicitKernelArg(A, OAS); 566*81ad6265SDimitry Andric } 567*81ad6265SDimitry Andric 568*81ad6265SDimitry Andric bool funcRetrievesImplicitKernelArg(Attributor &A, 569*81ad6265SDimitry Andric AAPointerInfo::OffsetAndSize OAS) { 570*81ad6265SDimitry Andric // Check if this is a call to the implicitarg_ptr builtin and it 571*81ad6265SDimitry Andric // is used to retrieve the hostcall pointer. The implicit arg for 572*81ad6265SDimitry Andric // hostcall is not used only if every use of the implicitarg_ptr 573*81ad6265SDimitry Andric // is a load that clearly does not retrieve any byte of the 574*81ad6265SDimitry Andric // hostcall pointer. We check this by tracing all the uses of the 575*81ad6265SDimitry Andric // initial call to the implicitarg_ptr intrinsic. 576*81ad6265SDimitry Andric auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) { 577*81ad6265SDimitry Andric auto &Call = cast<CallBase>(I); 578*81ad6265SDimitry Andric if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) 579*81ad6265SDimitry Andric return true; 580*81ad6265SDimitry Andric 581*81ad6265SDimitry Andric const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>( 582*81ad6265SDimitry Andric *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); 583*81ad6265SDimitry Andric 584*81ad6265SDimitry Andric return PointerInfoAA.forallInterferingAccesses( 585*81ad6265SDimitry Andric OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) { 586*81ad6265SDimitry Andric return Acc.getRemoteInst()->isDroppable(); 587*81ad6265SDimitry Andric }); 588*81ad6265SDimitry Andric }; 589*81ad6265SDimitry Andric 590*81ad6265SDimitry Andric bool UsedAssumedInformation = false; 591*81ad6265SDimitry Andric return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, 592*81ad6265SDimitry Andric UsedAssumedInformation); 593*81ad6265SDimitry Andric } 594fe6060f1SDimitry Andric }; 595fe6060f1SDimitry Andric 596fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 597fe6060f1SDimitry Andric Attributor &A) { 598fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 599fe6060f1SDimitry Andric return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 600fe6060f1SDimitry Andric llvm_unreachable("AAAMDAttributes is only valid for function position"); 601fe6060f1SDimitry Andric } 602fe6060f1SDimitry Andric 603349cc55cSDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute. 604349cc55cSDimitry Andric struct AAAMDFlatWorkGroupSize 605349cc55cSDimitry Andric : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 606349cc55cSDimitry Andric using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 607349cc55cSDimitry Andric AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 608349cc55cSDimitry Andric : Base(IRP, 32) {} 609349cc55cSDimitry Andric 610349cc55cSDimitry Andric /// See AbstractAttribute::getState(...). 611349cc55cSDimitry Andric IntegerRangeState &getState() override { return *this; } 612349cc55cSDimitry Andric const IntegerRangeState &getState() const override { return *this; } 613349cc55cSDimitry Andric 614349cc55cSDimitry Andric void initialize(Attributor &A) override { 615349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 616349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 617349cc55cSDimitry Andric unsigned MinGroupSize, MaxGroupSize; 618349cc55cSDimitry Andric std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 619349cc55cSDimitry Andric intersectKnown( 620349cc55cSDimitry Andric ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 6210eae32dcSDimitry Andric 6220eae32dcSDimitry Andric if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 6230eae32dcSDimitry Andric indicatePessimisticFixpoint(); 624349cc55cSDimitry Andric } 625349cc55cSDimitry Andric 626349cc55cSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 627349cc55cSDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 628349cc55cSDimitry Andric 629349cc55cSDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 630349cc55cSDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 631349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 632349cc55cSDimitry Andric << "->" << getAssociatedFunction()->getName() << '\n'); 633349cc55cSDimitry Andric 634349cc55cSDimitry Andric const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 635349cc55cSDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 636349cc55cSDimitry Andric 637349cc55cSDimitry Andric Change |= 638349cc55cSDimitry Andric clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 639349cc55cSDimitry Andric 640349cc55cSDimitry Andric return true; 641349cc55cSDimitry Andric }; 642349cc55cSDimitry Andric 643349cc55cSDimitry Andric bool AllCallSitesKnown = true; 644349cc55cSDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 645349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 646349cc55cSDimitry Andric 647349cc55cSDimitry Andric return Change; 648349cc55cSDimitry Andric } 649349cc55cSDimitry Andric 650349cc55cSDimitry Andric ChangeStatus manifest(Attributor &A) override { 651349cc55cSDimitry Andric SmallVector<Attribute, 8> AttrList; 652349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 653349cc55cSDimitry Andric LLVMContext &Ctx = F->getContext(); 654349cc55cSDimitry Andric 655349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 656349cc55cSDimitry Andric unsigned Min, Max; 657349cc55cSDimitry Andric std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 658349cc55cSDimitry Andric 659349cc55cSDimitry Andric // Don't add the attribute if it's the implied default. 660349cc55cSDimitry Andric if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 661349cc55cSDimitry Andric return ChangeStatus::UNCHANGED; 662349cc55cSDimitry Andric 663349cc55cSDimitry Andric SmallString<10> Buffer; 664349cc55cSDimitry Andric raw_svector_ostream OS(Buffer); 665349cc55cSDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 666349cc55cSDimitry Andric 667349cc55cSDimitry Andric AttrList.push_back( 668349cc55cSDimitry Andric Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 669349cc55cSDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 670349cc55cSDimitry Andric /* ForceReplace */ true); 671349cc55cSDimitry Andric } 672349cc55cSDimitry Andric 673349cc55cSDimitry Andric const std::string getAsStr() const override { 674349cc55cSDimitry Andric std::string Str; 675349cc55cSDimitry Andric raw_string_ostream OS(Str); 676349cc55cSDimitry Andric OS << "AMDFlatWorkGroupSize["; 677349cc55cSDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 678349cc55cSDimitry Andric OS << ']'; 679349cc55cSDimitry Andric return OS.str(); 680349cc55cSDimitry Andric } 681349cc55cSDimitry Andric 682349cc55cSDimitry Andric /// See AbstractAttribute::trackStatistics() 683349cc55cSDimitry Andric void trackStatistics() const override {} 684349cc55cSDimitry Andric 685349cc55cSDimitry Andric /// Create an abstract attribute view for the position \p IRP. 686349cc55cSDimitry Andric static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 687349cc55cSDimitry Andric Attributor &A); 688349cc55cSDimitry Andric 689349cc55cSDimitry Andric /// See AbstractAttribute::getName() 690349cc55cSDimitry Andric const std::string getName() const override { 691349cc55cSDimitry Andric return "AAAMDFlatWorkGroupSize"; 692349cc55cSDimitry Andric } 693349cc55cSDimitry Andric 694349cc55cSDimitry Andric /// See AbstractAttribute::getIdAddr() 695349cc55cSDimitry Andric const char *getIdAddr() const override { return &ID; } 696349cc55cSDimitry Andric 697349cc55cSDimitry Andric /// This function should return true if the type of the \p AA is 698349cc55cSDimitry Andric /// AAAMDFlatWorkGroupSize 699349cc55cSDimitry Andric static bool classof(const AbstractAttribute *AA) { 700349cc55cSDimitry Andric return (AA->getIdAddr() == &ID); 701349cc55cSDimitry Andric } 702349cc55cSDimitry Andric 703349cc55cSDimitry Andric /// Unique ID (due to the unique address) 704349cc55cSDimitry Andric static const char ID; 705349cc55cSDimitry Andric }; 706349cc55cSDimitry Andric 707349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0; 708349cc55cSDimitry Andric 709349cc55cSDimitry Andric AAAMDFlatWorkGroupSize & 710349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 711349cc55cSDimitry Andric Attributor &A) { 712349cc55cSDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 713349cc55cSDimitry Andric return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 714349cc55cSDimitry Andric llvm_unreachable( 715349cc55cSDimitry Andric "AAAMDFlatWorkGroupSize is only valid for function position"); 716349cc55cSDimitry Andric } 717349cc55cSDimitry Andric 718fe6060f1SDimitry Andric class AMDGPUAttributor : public ModulePass { 719fe6060f1SDimitry Andric public: 720fe6060f1SDimitry Andric AMDGPUAttributor() : ModulePass(ID) {} 721fe6060f1SDimitry Andric 722fe6060f1SDimitry Andric /// doInitialization - Virtual method overridden by subclasses to do 723fe6060f1SDimitry Andric /// any necessary initialization before any pass is run. 724fe6060f1SDimitry Andric bool doInitialization(Module &) override { 725fe6060f1SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 726fe6060f1SDimitry Andric if (!TPC) 727fe6060f1SDimitry Andric report_fatal_error("TargetMachine is required"); 728fe6060f1SDimitry Andric 729fe6060f1SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 730fe6060f1SDimitry Andric return false; 731fe6060f1SDimitry Andric } 732fe6060f1SDimitry Andric 733fe6060f1SDimitry Andric bool runOnModule(Module &M) override { 734fe6060f1SDimitry Andric SetVector<Function *> Functions; 735fe6060f1SDimitry Andric AnalysisGetter AG; 736349cc55cSDimitry Andric for (Function &F : M) { 737349cc55cSDimitry Andric if (!F.isIntrinsic()) 738fe6060f1SDimitry Andric Functions.insert(&F); 739349cc55cSDimitry Andric } 740fe6060f1SDimitry Andric 741fe6060f1SDimitry Andric CallGraphUpdater CGUpdater; 742fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 743fe6060f1SDimitry Andric AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 744349cc55cSDimitry Andric DenseSet<const char *> Allowed( 745349cc55cSDimitry Andric {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 746*81ad6265SDimitry Andric &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); 747349cc55cSDimitry Andric 748*81ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 749*81ad6265SDimitry Andric AC.Allowed = &Allowed; 750*81ad6265SDimitry Andric AC.IsModulePass = true; 751*81ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 752*81ad6265SDimitry Andric 753*81ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 754fe6060f1SDimitry Andric 755fe6060f1SDimitry Andric for (Function &F : M) { 756349cc55cSDimitry Andric if (!F.isIntrinsic()) { 757fe6060f1SDimitry Andric A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 758349cc55cSDimitry Andric A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 759349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 760349cc55cSDimitry Andric A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 761349cc55cSDimitry Andric } 762349cc55cSDimitry Andric } 763fe6060f1SDimitry Andric } 764fe6060f1SDimitry Andric 765fe6060f1SDimitry Andric ChangeStatus Change = A.run(); 766fe6060f1SDimitry Andric return Change == ChangeStatus::CHANGED; 767fe6060f1SDimitry Andric } 768fe6060f1SDimitry Andric 769fe6060f1SDimitry Andric StringRef getPassName() const override { return "AMDGPU Attributor"; } 770fe6060f1SDimitry Andric TargetMachine *TM; 771fe6060f1SDimitry Andric static char ID; 772fe6060f1SDimitry Andric }; 773349cc55cSDimitry Andric } // namespace 774fe6060f1SDimitry Andric 775fe6060f1SDimitry Andric char AMDGPUAttributor::ID = 0; 776fe6060f1SDimitry Andric 777fe6060f1SDimitry Andric Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 778fe6060f1SDimitry Andric INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 779