1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10fe6060f1SDimitry Andric // 11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 12fe6060f1SDimitry Andric 13fe6060f1SDimitry Andric #include "AMDGPU.h" 14fe6060f1SDimitry Andric #include "GCNSubtarget.h" 15fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 16fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 17fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 18fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h" 19fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 20fe6060f1SDimitry Andric 21fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor" 22fe6060f1SDimitry Andric 23fe6060f1SDimitry Andric using namespace llvm; 24fe6060f1SDimitry Andric 25349cc55cSDimitry Andric enum ImplicitArgumentMask { 26349cc55cSDimitry Andric NOT_IMPLICIT_INPUT = 0, 27349cc55cSDimitry Andric 28349cc55cSDimitry Andric // SGPRs 29349cc55cSDimitry Andric DISPATCH_PTR = 1 << 0, 30349cc55cSDimitry Andric QUEUE_PTR = 1 << 1, 31349cc55cSDimitry Andric DISPATCH_ID = 1 << 2, 32349cc55cSDimitry Andric IMPLICIT_ARG_PTR = 1 << 3, 33349cc55cSDimitry Andric WORKGROUP_ID_X = 1 << 4, 34349cc55cSDimitry Andric WORKGROUP_ID_Y = 1 << 5, 35349cc55cSDimitry Andric WORKGROUP_ID_Z = 1 << 6, 36349cc55cSDimitry Andric 37349cc55cSDimitry Andric // VGPRS: 38349cc55cSDimitry Andric WORKITEM_ID_X = 1 << 7, 39349cc55cSDimitry Andric WORKITEM_ID_Y = 1 << 8, 40349cc55cSDimitry Andric WORKITEM_ID_Z = 1 << 9, 41349cc55cSDimitry Andric ALL_ARGUMENT_MASK = (1 << 10) - 1 42349cc55cSDimitry Andric }; 43349cc55cSDimitry Andric 44349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask, 45349cc55cSDimitry Andric StringLiteral> ImplicitAttrs[] = { 46349cc55cSDimitry Andric {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, 47349cc55cSDimitry Andric {QUEUE_PTR, "amdgpu-no-queue-ptr"}, 48349cc55cSDimitry Andric {DISPATCH_ID, "amdgpu-no-dispatch-id"}, 49349cc55cSDimitry Andric {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, 50349cc55cSDimitry Andric {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, 51349cc55cSDimitry Andric {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"}, 52349cc55cSDimitry Andric {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"}, 53349cc55cSDimitry Andric {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"}, 54349cc55cSDimitry Andric {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"}, 55349cc55cSDimitry Andric {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"} 56349cc55cSDimitry Andric }; 57fe6060f1SDimitry Andric 58fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 59fe6060f1SDimitry Andric // initialized. 60fe6060f1SDimitry Andric // 61fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 62fe6060f1SDimitry Andric // size is 1 for y/z. 63349cc55cSDimitry Andric static ImplicitArgumentMask 64349cc55cSDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) { 65fe6060f1SDimitry Andric switch (ID) { 66fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 67fe6060f1SDimitry Andric NonKernelOnly = true; 68349cc55cSDimitry Andric return WORKITEM_ID_X; 69fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 70fe6060f1SDimitry Andric NonKernelOnly = true; 71349cc55cSDimitry Andric return WORKGROUP_ID_X; 72fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 73fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_y: 74349cc55cSDimitry Andric return WORKITEM_ID_Y; 75fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 76fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_z: 77349cc55cSDimitry Andric return WORKITEM_ID_Z; 78fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 79fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_y: 80349cc55cSDimitry Andric return WORKGROUP_ID_Y; 81fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 82fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_z: 83349cc55cSDimitry Andric return WORKGROUP_ID_Z; 84fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 85349cc55cSDimitry Andric return DISPATCH_PTR; 86fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 87349cc55cSDimitry Andric return DISPATCH_ID; 88fe6060f1SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 89349cc55cSDimitry Andric return IMPLICIT_ARG_PTR; 90fe6060f1SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 91fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_shared: 92fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_private: 93fe6060f1SDimitry Andric // TODO: Does not require queue ptr on gfx9+ 94fe6060f1SDimitry Andric case Intrinsic::trap: 95fe6060f1SDimitry Andric case Intrinsic::debugtrap: 96fe6060f1SDimitry Andric IsQueuePtr = true; 97349cc55cSDimitry Andric return QUEUE_PTR; 98fe6060f1SDimitry Andric default: 99349cc55cSDimitry Andric return NOT_IMPLICIT_INPUT; 100fe6060f1SDimitry Andric } 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric 103fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 104fe6060f1SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 105fe6060f1SDimitry Andric } 106fe6060f1SDimitry Andric 107fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) { 108fe6060f1SDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C); 109fe6060f1SDimitry Andric if (!GV) 110fe6060f1SDimitry Andric return false; 111fe6060f1SDimitry Andric unsigned AS = GV->getAddressSpace(); 112fe6060f1SDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 113fe6060f1SDimitry Andric } 114fe6060f1SDimitry Andric 115*0eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed 116*0eae32dcSDimitry Andric /// regardless of the function contents. 117*0eae32dcSDimitry Andric static bool funcRequiresImplicitArgPtr(const Function &F) { 118*0eae32dcSDimitry Andric // Sanitizers require the hostcall buffer passed in the implicit arguments. 119*0eae32dcSDimitry Andric return F.hasFnAttribute(Attribute::SanitizeAddress) || 120*0eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeThread) || 121*0eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemory) || 122*0eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeHWAddress) || 123*0eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemTag); 124*0eae32dcSDimitry Andric } 125*0eae32dcSDimitry Andric 126349cc55cSDimitry Andric namespace { 127fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache { 128fe6060f1SDimitry Andric public: 129fe6060f1SDimitry Andric AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 130fe6060f1SDimitry Andric BumpPtrAllocator &Allocator, 131fe6060f1SDimitry Andric SetVector<Function *> *CGSCC, TargetMachine &TM) 132fe6060f1SDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 133fe6060f1SDimitry Andric TargetMachine &TM; 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 136fe6060f1SDimitry Andric 137fe6060f1SDimitry Andric /// Check if the subtarget has aperture regs. 138fe6060f1SDimitry Andric bool hasApertureRegs(Function &F) { 139fe6060f1SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 140fe6060f1SDimitry Andric return ST.hasApertureRegs(); 141fe6060f1SDimitry Andric } 142fe6060f1SDimitry Andric 143349cc55cSDimitry Andric std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 144349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 145349cc55cSDimitry Andric return ST.getFlatWorkGroupSizes(F); 146349cc55cSDimitry Andric } 147349cc55cSDimitry Andric 148349cc55cSDimitry Andric std::pair<unsigned, unsigned> 149349cc55cSDimitry Andric getMaximumFlatWorkGroupRange(const Function &F) { 150349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 151349cc55cSDimitry Andric return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 152349cc55cSDimitry Andric } 153349cc55cSDimitry Andric 154fe6060f1SDimitry Andric private: 155fe6060f1SDimitry Andric /// Check if the ConstantExpr \p CE requires queue ptr attribute. 156fe6060f1SDimitry Andric static bool visitConstExpr(const ConstantExpr *CE) { 157fe6060f1SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 158fe6060f1SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 159fe6060f1SDimitry Andric return castRequiresQueuePtr(SrcAS); 160fe6060f1SDimitry Andric } 161fe6060f1SDimitry Andric return false; 162fe6060f1SDimitry Andric } 163fe6060f1SDimitry Andric 164fe6060f1SDimitry Andric /// Get the constant access bitmap for \p C. 165fe6060f1SDimitry Andric uint8_t getConstantAccess(const Constant *C) { 166fe6060f1SDimitry Andric auto It = ConstantStatus.find(C); 167fe6060f1SDimitry Andric if (It != ConstantStatus.end()) 168fe6060f1SDimitry Andric return It->second; 169fe6060f1SDimitry Andric 170fe6060f1SDimitry Andric uint8_t Result = 0; 171fe6060f1SDimitry Andric if (isDSAddress(C)) 172fe6060f1SDimitry Andric Result = DS_GLOBAL; 173fe6060f1SDimitry Andric 174fe6060f1SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) 175fe6060f1SDimitry Andric if (visitConstExpr(CE)) 176fe6060f1SDimitry Andric Result |= ADDR_SPACE_CAST; 177fe6060f1SDimitry Andric 178fe6060f1SDimitry Andric for (const Use &U : C->operands()) { 179fe6060f1SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 180fe6060f1SDimitry Andric if (!OpC) 181fe6060f1SDimitry Andric continue; 182fe6060f1SDimitry Andric 183fe6060f1SDimitry Andric Result |= getConstantAccess(OpC); 184fe6060f1SDimitry Andric } 185fe6060f1SDimitry Andric return Result; 186fe6060f1SDimitry Andric } 187fe6060f1SDimitry Andric 188fe6060f1SDimitry Andric public: 189fe6060f1SDimitry Andric /// Returns true if \p Fn needs a queue ptr attribute because of \p C. 190fe6060f1SDimitry Andric bool needsQueuePtr(const Constant *C, Function &Fn) { 191fe6060f1SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 192fe6060f1SDimitry Andric bool HasAperture = hasApertureRegs(Fn); 193fe6060f1SDimitry Andric 194fe6060f1SDimitry Andric // No need to explore the constants. 195fe6060f1SDimitry Andric if (!IsNonEntryFunc && HasAperture) 196fe6060f1SDimitry Andric return false; 197fe6060f1SDimitry Andric 198fe6060f1SDimitry Andric uint8_t Access = getConstantAccess(C); 199fe6060f1SDimitry Andric 200fe6060f1SDimitry Andric // We need to trap on DS globals in non-entry functions. 201fe6060f1SDimitry Andric if (IsNonEntryFunc && (Access & DS_GLOBAL)) 202fe6060f1SDimitry Andric return true; 203fe6060f1SDimitry Andric 204fe6060f1SDimitry Andric return !HasAperture && (Access & ADDR_SPACE_CAST); 205fe6060f1SDimitry Andric } 206fe6060f1SDimitry Andric 207fe6060f1SDimitry Andric private: 208fe6060f1SDimitry Andric /// Used to determine if the Constant needs a queue ptr attribute. 209fe6060f1SDimitry Andric DenseMap<const Constant *, uint8_t> ConstantStatus; 210fe6060f1SDimitry Andric }; 211fe6060f1SDimitry Andric 212349cc55cSDimitry Andric struct AAAMDAttributes : public StateWrapper< 213349cc55cSDimitry Andric BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> { 214349cc55cSDimitry Andric using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, 215349cc55cSDimitry Andric AbstractAttribute>; 216349cc55cSDimitry Andric 217fe6060f1SDimitry Andric AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 218fe6060f1SDimitry Andric 219fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 220fe6060f1SDimitry Andric static AAAMDAttributes &createForPosition(const IRPosition &IRP, 221fe6060f1SDimitry Andric Attributor &A); 222fe6060f1SDimitry Andric 223fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 224fe6060f1SDimitry Andric const std::string getName() const override { return "AAAMDAttributes"; } 225fe6060f1SDimitry Andric 226fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 227fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 228fe6060f1SDimitry Andric 229fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 230fe6060f1SDimitry Andric /// AAAMDAttributes. 231fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 232fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 233fe6060f1SDimitry Andric } 234fe6060f1SDimitry Andric 235fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 236fe6060f1SDimitry Andric static const char ID; 237fe6060f1SDimitry Andric }; 238fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0; 239fe6060f1SDimitry Andric 240349cc55cSDimitry Andric struct AAUniformWorkGroupSize 241fe6060f1SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> { 242fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 243349cc55cSDimitry Andric AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 244fe6060f1SDimitry Andric 245fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 246349cc55cSDimitry Andric static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 247fe6060f1SDimitry Andric Attributor &A); 248fe6060f1SDimitry Andric 249fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 250349cc55cSDimitry Andric const std::string getName() const override { 251349cc55cSDimitry Andric return "AAUniformWorkGroupSize"; 252349cc55cSDimitry Andric } 253fe6060f1SDimitry Andric 254fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 255fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 256fe6060f1SDimitry Andric 257fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 258fe6060f1SDimitry Andric /// AAAMDAttributes. 259fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 260fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 261fe6060f1SDimitry Andric } 262fe6060f1SDimitry Andric 263fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 264fe6060f1SDimitry Andric static const char ID; 265fe6060f1SDimitry Andric }; 266349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0; 267fe6060f1SDimitry Andric 268349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 269349cc55cSDimitry Andric AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 270349cc55cSDimitry Andric : AAUniformWorkGroupSize(IRP, A) {} 271fe6060f1SDimitry Andric 272fe6060f1SDimitry Andric void initialize(Attributor &A) override { 273fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 274fe6060f1SDimitry Andric CallingConv::ID CC = F->getCallingConv(); 275fe6060f1SDimitry Andric 276fe6060f1SDimitry Andric if (CC != CallingConv::AMDGPU_KERNEL) 277fe6060f1SDimitry Andric return; 278fe6060f1SDimitry Andric 279fe6060f1SDimitry Andric bool InitialValue = false; 280fe6060f1SDimitry Andric if (F->hasFnAttribute("uniform-work-group-size")) 281fe6060f1SDimitry Andric InitialValue = F->getFnAttribute("uniform-work-group-size") 282fe6060f1SDimitry Andric .getValueAsString() 283fe6060f1SDimitry Andric .equals("true"); 284fe6060f1SDimitry Andric 285fe6060f1SDimitry Andric if (InitialValue) 286fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 287fe6060f1SDimitry Andric else 288fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 289fe6060f1SDimitry Andric } 290fe6060f1SDimitry Andric 291fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 292fe6060f1SDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 293fe6060f1SDimitry Andric 294fe6060f1SDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 295fe6060f1SDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 296349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 297fe6060f1SDimitry Andric << "->" << getAssociatedFunction()->getName() << "\n"); 298fe6060f1SDimitry Andric 299349cc55cSDimitry Andric const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 300fe6060f1SDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 301fe6060f1SDimitry Andric 302fe6060f1SDimitry Andric Change = Change | clampStateAndIndicateChange(this->getState(), 303fe6060f1SDimitry Andric CallerInfo.getState()); 304fe6060f1SDimitry Andric 305fe6060f1SDimitry Andric return true; 306fe6060f1SDimitry Andric }; 307fe6060f1SDimitry Andric 308fe6060f1SDimitry Andric bool AllCallSitesKnown = true; 309fe6060f1SDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 310*0eae32dcSDimitry Andric return indicatePessimisticFixpoint(); 311fe6060f1SDimitry Andric 312fe6060f1SDimitry Andric return Change; 313fe6060f1SDimitry Andric } 314fe6060f1SDimitry Andric 315fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 316fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 317fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 318fe6060f1SDimitry Andric 319fe6060f1SDimitry Andric AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 320fe6060f1SDimitry Andric getAssumed() ? "true" : "false")); 321fe6060f1SDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 322fe6060f1SDimitry Andric /* ForceReplace */ true); 323fe6060f1SDimitry Andric } 324fe6060f1SDimitry Andric 325fe6060f1SDimitry Andric bool isValidState() const override { 326fe6060f1SDimitry Andric // This state is always valid, even when the state is false. 327fe6060f1SDimitry Andric return true; 328fe6060f1SDimitry Andric } 329fe6060f1SDimitry Andric 330fe6060f1SDimitry Andric const std::string getAsStr() const override { 331fe6060f1SDimitry Andric return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 332fe6060f1SDimitry Andric } 333fe6060f1SDimitry Andric 334fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 335fe6060f1SDimitry Andric void trackStatistics() const override {} 336fe6060f1SDimitry Andric }; 337fe6060f1SDimitry Andric 338349cc55cSDimitry Andric AAUniformWorkGroupSize & 339349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 340fe6060f1SDimitry Andric Attributor &A) { 341fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 342349cc55cSDimitry Andric return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 343349cc55cSDimitry Andric llvm_unreachable( 344349cc55cSDimitry Andric "AAUniformWorkGroupSize is only valid for function position"); 345fe6060f1SDimitry Andric } 346fe6060f1SDimitry Andric 347fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes { 348fe6060f1SDimitry Andric AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 349fe6060f1SDimitry Andric : AAAMDAttributes(IRP, A) {} 350fe6060f1SDimitry Andric 351fe6060f1SDimitry Andric void initialize(Attributor &A) override { 352fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 353*0eae32dcSDimitry Andric 354*0eae32dcSDimitry Andric // If the function requires the implicit arg pointer due to sanitizers, 355*0eae32dcSDimitry Andric // assume it's needed even if explicitly marked as not requiring it. 356*0eae32dcSDimitry Andric const bool NeedsImplicit = funcRequiresImplicitArgPtr(*F); 357*0eae32dcSDimitry Andric if (NeedsImplicit) 358*0eae32dcSDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 359*0eae32dcSDimitry Andric 360349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 361*0eae32dcSDimitry Andric if (NeedsImplicit && Attr.first == IMPLICIT_ARG_PTR) 362*0eae32dcSDimitry Andric continue; 363*0eae32dcSDimitry Andric 364349cc55cSDimitry Andric if (F->hasFnAttribute(Attr.second)) 365349cc55cSDimitry Andric addKnownBits(Attr.first); 366fe6060f1SDimitry Andric } 367fe6060f1SDimitry Andric 368349cc55cSDimitry Andric if (F->isDeclaration()) 369349cc55cSDimitry Andric return; 370349cc55cSDimitry Andric 371fe6060f1SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 372fe6060f1SDimitry Andric // not allowed to have kernel arguments. 373fe6060f1SDimitry Andric if (AMDGPU::isGraphics(F->getCallingConv())) { 374fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 375fe6060f1SDimitry Andric return; 376fe6060f1SDimitry Andric } 377fe6060f1SDimitry Andric } 378fe6060f1SDimitry Andric 379fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 380fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 381349cc55cSDimitry Andric // The current assumed state used to determine a change. 382349cc55cSDimitry Andric auto OrigAssumed = getAssumed(); 383fe6060f1SDimitry Andric 384fe6060f1SDimitry Andric // Check for Intrinsics and propagate attributes. 385fe6060f1SDimitry Andric const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 386fe6060f1SDimitry Andric *this, this->getIRPosition(), DepClassTy::REQUIRED); 387349cc55cSDimitry Andric if (AAEdges.hasNonAsmUnknownCallee()) 388349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 389fe6060f1SDimitry Andric 390349cc55cSDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 391349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 392fe6060f1SDimitry Andric 393fe6060f1SDimitry Andric bool NeedsQueuePtr = false; 394349cc55cSDimitry Andric 395fe6060f1SDimitry Andric for (Function *Callee : AAEdges.getOptimisticEdges()) { 396fe6060f1SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 397349cc55cSDimitry Andric if (IID == Intrinsic::not_intrinsic) { 398349cc55cSDimitry Andric const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 399349cc55cSDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 400349cc55cSDimitry Andric *this &= AAAMD; 401fe6060f1SDimitry Andric continue; 402fe6060f1SDimitry Andric } 403fe6060f1SDimitry Andric 404fe6060f1SDimitry Andric bool NonKernelOnly = false; 405349cc55cSDimitry Andric ImplicitArgumentMask AttrMask = 406349cc55cSDimitry Andric intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr); 407349cc55cSDimitry Andric if (AttrMask != NOT_IMPLICIT_INPUT) { 408349cc55cSDimitry Andric if ((IsNonEntryFunc || !NonKernelOnly)) 409349cc55cSDimitry Andric removeAssumedBits(AttrMask); 410fe6060f1SDimitry Andric } 411fe6060f1SDimitry Andric } 412fe6060f1SDimitry Andric 413fe6060f1SDimitry Andric // If we found that we need amdgpu-queue-ptr, nothing else to do. 414349cc55cSDimitry Andric if (NeedsQueuePtr) { 415349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 416349cc55cSDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 417349cc55cSDimitry Andric ChangeStatus::UNCHANGED; 418fe6060f1SDimitry Andric } 419fe6060f1SDimitry Andric 420fe6060f1SDimitry Andric auto CheckAddrSpaceCasts = [&](Instruction &I) { 421fe6060f1SDimitry Andric unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 422fe6060f1SDimitry Andric if (castRequiresQueuePtr(SrcAS)) { 423fe6060f1SDimitry Andric NeedsQueuePtr = true; 424fe6060f1SDimitry Andric return false; 425fe6060f1SDimitry Andric } 426fe6060f1SDimitry Andric return true; 427fe6060f1SDimitry Andric }; 428fe6060f1SDimitry Andric 429fe6060f1SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 430fe6060f1SDimitry Andric 431fe6060f1SDimitry Andric // `checkForAllInstructions` is much more cheaper than going through all 432fe6060f1SDimitry Andric // instructions, try it first. 433fe6060f1SDimitry Andric 434fe6060f1SDimitry Andric // amdgpu-queue-ptr is not needed if aperture regs is present. 435349cc55cSDimitry Andric if (!HasApertureRegs) { 436349cc55cSDimitry Andric bool UsedAssumedInformation = false; 437fe6060f1SDimitry Andric A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 438fe6060f1SDimitry Andric {Instruction::AddrSpaceCast}, 439fe6060f1SDimitry Andric UsedAssumedInformation); 440349cc55cSDimitry Andric } 441fe6060f1SDimitry Andric 442fe6060f1SDimitry Andric // If we found that we need amdgpu-queue-ptr, nothing else to do. 443fe6060f1SDimitry Andric if (NeedsQueuePtr) { 444349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 445349cc55cSDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 446349cc55cSDimitry Andric ChangeStatus::UNCHANGED; 447fe6060f1SDimitry Andric } 448fe6060f1SDimitry Andric 449349cc55cSDimitry Andric if (!IsNonEntryFunc && HasApertureRegs) { 450349cc55cSDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 451349cc55cSDimitry Andric ChangeStatus::UNCHANGED; 452349cc55cSDimitry Andric } 453fe6060f1SDimitry Andric 454fe6060f1SDimitry Andric for (BasicBlock &BB : *F) { 455fe6060f1SDimitry Andric for (Instruction &I : BB) { 456fe6060f1SDimitry Andric for (const Use &U : I.operands()) { 457fe6060f1SDimitry Andric if (const auto *C = dyn_cast<Constant>(U)) { 458fe6060f1SDimitry Andric if (InfoCache.needsQueuePtr(C, *F)) { 459349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 460349cc55cSDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 461349cc55cSDimitry Andric ChangeStatus::UNCHANGED; 462fe6060f1SDimitry Andric } 463fe6060f1SDimitry Andric } 464fe6060f1SDimitry Andric } 465fe6060f1SDimitry Andric } 466fe6060f1SDimitry Andric } 467fe6060f1SDimitry Andric 468349cc55cSDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : 469349cc55cSDimitry Andric ChangeStatus::UNCHANGED; 470fe6060f1SDimitry Andric } 471fe6060f1SDimitry Andric 472fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 473fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 474fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 475fe6060f1SDimitry Andric 476349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 477349cc55cSDimitry Andric if (isKnown(Attr.first)) 478349cc55cSDimitry Andric AttrList.push_back(Attribute::get(Ctx, Attr.second)); 479349cc55cSDimitry Andric } 480fe6060f1SDimitry Andric 481fe6060f1SDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 482fe6060f1SDimitry Andric /* ForceReplace */ true); 483fe6060f1SDimitry Andric } 484fe6060f1SDimitry Andric 485fe6060f1SDimitry Andric const std::string getAsStr() const override { 486349cc55cSDimitry Andric std::string Str; 487349cc55cSDimitry Andric raw_string_ostream OS(Str); 488349cc55cSDimitry Andric OS << "AMDInfo["; 489349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) 490349cc55cSDimitry Andric OS << ' ' << Attr.second; 491349cc55cSDimitry Andric OS << " ]"; 492349cc55cSDimitry Andric return OS.str(); 493fe6060f1SDimitry Andric } 494fe6060f1SDimitry Andric 495fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 496fe6060f1SDimitry Andric void trackStatistics() const override {} 497fe6060f1SDimitry Andric }; 498fe6060f1SDimitry Andric 499fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 500fe6060f1SDimitry Andric Attributor &A) { 501fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 502fe6060f1SDimitry Andric return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 503fe6060f1SDimitry Andric llvm_unreachable("AAAMDAttributes is only valid for function position"); 504fe6060f1SDimitry Andric } 505fe6060f1SDimitry Andric 506349cc55cSDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute. 507349cc55cSDimitry Andric struct AAAMDFlatWorkGroupSize 508349cc55cSDimitry Andric : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 509349cc55cSDimitry Andric using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 510349cc55cSDimitry Andric AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 511349cc55cSDimitry Andric : Base(IRP, 32) {} 512349cc55cSDimitry Andric 513349cc55cSDimitry Andric /// See AbstractAttribute::getState(...). 514349cc55cSDimitry Andric IntegerRangeState &getState() override { return *this; } 515349cc55cSDimitry Andric const IntegerRangeState &getState() const override { return *this; } 516349cc55cSDimitry Andric 517349cc55cSDimitry Andric void initialize(Attributor &A) override { 518349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 519349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 520349cc55cSDimitry Andric unsigned MinGroupSize, MaxGroupSize; 521349cc55cSDimitry Andric std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 522349cc55cSDimitry Andric intersectKnown( 523349cc55cSDimitry Andric ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 524*0eae32dcSDimitry Andric 525*0eae32dcSDimitry Andric if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 526*0eae32dcSDimitry Andric indicatePessimisticFixpoint(); 527349cc55cSDimitry Andric } 528349cc55cSDimitry Andric 529349cc55cSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 530349cc55cSDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 531349cc55cSDimitry Andric 532349cc55cSDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 533349cc55cSDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 534349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 535349cc55cSDimitry Andric << "->" << getAssociatedFunction()->getName() << '\n'); 536349cc55cSDimitry Andric 537349cc55cSDimitry Andric const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 538349cc55cSDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 539349cc55cSDimitry Andric 540349cc55cSDimitry Andric Change |= 541349cc55cSDimitry Andric clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 542349cc55cSDimitry Andric 543349cc55cSDimitry Andric return true; 544349cc55cSDimitry Andric }; 545349cc55cSDimitry Andric 546349cc55cSDimitry Andric bool AllCallSitesKnown = true; 547349cc55cSDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 548349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 549349cc55cSDimitry Andric 550349cc55cSDimitry Andric return Change; 551349cc55cSDimitry Andric } 552349cc55cSDimitry Andric 553349cc55cSDimitry Andric ChangeStatus manifest(Attributor &A) override { 554349cc55cSDimitry Andric SmallVector<Attribute, 8> AttrList; 555349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 556349cc55cSDimitry Andric LLVMContext &Ctx = F->getContext(); 557349cc55cSDimitry Andric 558349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 559349cc55cSDimitry Andric unsigned Min, Max; 560349cc55cSDimitry Andric std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 561349cc55cSDimitry Andric 562349cc55cSDimitry Andric // Don't add the attribute if it's the implied default. 563349cc55cSDimitry Andric if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 564349cc55cSDimitry Andric return ChangeStatus::UNCHANGED; 565349cc55cSDimitry Andric 566349cc55cSDimitry Andric SmallString<10> Buffer; 567349cc55cSDimitry Andric raw_svector_ostream OS(Buffer); 568349cc55cSDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 569349cc55cSDimitry Andric 570349cc55cSDimitry Andric AttrList.push_back( 571349cc55cSDimitry Andric Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 572349cc55cSDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 573349cc55cSDimitry Andric /* ForceReplace */ true); 574349cc55cSDimitry Andric } 575349cc55cSDimitry Andric 576349cc55cSDimitry Andric const std::string getAsStr() const override { 577349cc55cSDimitry Andric std::string Str; 578349cc55cSDimitry Andric raw_string_ostream OS(Str); 579349cc55cSDimitry Andric OS << "AMDFlatWorkGroupSize["; 580349cc55cSDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 581349cc55cSDimitry Andric OS << ']'; 582349cc55cSDimitry Andric return OS.str(); 583349cc55cSDimitry Andric } 584349cc55cSDimitry Andric 585349cc55cSDimitry Andric /// See AbstractAttribute::trackStatistics() 586349cc55cSDimitry Andric void trackStatistics() const override {} 587349cc55cSDimitry Andric 588349cc55cSDimitry Andric /// Create an abstract attribute view for the position \p IRP. 589349cc55cSDimitry Andric static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 590349cc55cSDimitry Andric Attributor &A); 591349cc55cSDimitry Andric 592349cc55cSDimitry Andric /// See AbstractAttribute::getName() 593349cc55cSDimitry Andric const std::string getName() const override { 594349cc55cSDimitry Andric return "AAAMDFlatWorkGroupSize"; 595349cc55cSDimitry Andric } 596349cc55cSDimitry Andric 597349cc55cSDimitry Andric /// See AbstractAttribute::getIdAddr() 598349cc55cSDimitry Andric const char *getIdAddr() const override { return &ID; } 599349cc55cSDimitry Andric 600349cc55cSDimitry Andric /// This function should return true if the type of the \p AA is 601349cc55cSDimitry Andric /// AAAMDFlatWorkGroupSize 602349cc55cSDimitry Andric static bool classof(const AbstractAttribute *AA) { 603349cc55cSDimitry Andric return (AA->getIdAddr() == &ID); 604349cc55cSDimitry Andric } 605349cc55cSDimitry Andric 606349cc55cSDimitry Andric /// Unique ID (due to the unique address) 607349cc55cSDimitry Andric static const char ID; 608349cc55cSDimitry Andric }; 609349cc55cSDimitry Andric 610349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0; 611349cc55cSDimitry Andric 612349cc55cSDimitry Andric AAAMDFlatWorkGroupSize & 613349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 614349cc55cSDimitry Andric Attributor &A) { 615349cc55cSDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 616349cc55cSDimitry Andric return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 617349cc55cSDimitry Andric llvm_unreachable( 618349cc55cSDimitry Andric "AAAMDFlatWorkGroupSize is only valid for function position"); 619349cc55cSDimitry Andric } 620349cc55cSDimitry Andric 621fe6060f1SDimitry Andric class AMDGPUAttributor : public ModulePass { 622fe6060f1SDimitry Andric public: 623fe6060f1SDimitry Andric AMDGPUAttributor() : ModulePass(ID) {} 624fe6060f1SDimitry Andric 625fe6060f1SDimitry Andric /// doInitialization - Virtual method overridden by subclasses to do 626fe6060f1SDimitry Andric /// any necessary initialization before any pass is run. 627fe6060f1SDimitry Andric bool doInitialization(Module &) override { 628fe6060f1SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 629fe6060f1SDimitry Andric if (!TPC) 630fe6060f1SDimitry Andric report_fatal_error("TargetMachine is required"); 631fe6060f1SDimitry Andric 632fe6060f1SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 633fe6060f1SDimitry Andric return false; 634fe6060f1SDimitry Andric } 635fe6060f1SDimitry Andric 636fe6060f1SDimitry Andric bool runOnModule(Module &M) override { 637fe6060f1SDimitry Andric SetVector<Function *> Functions; 638fe6060f1SDimitry Andric AnalysisGetter AG; 639349cc55cSDimitry Andric for (Function &F : M) { 640349cc55cSDimitry Andric if (!F.isIntrinsic()) 641fe6060f1SDimitry Andric Functions.insert(&F); 642349cc55cSDimitry Andric } 643fe6060f1SDimitry Andric 644fe6060f1SDimitry Andric CallGraphUpdater CGUpdater; 645fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 646fe6060f1SDimitry Andric AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 647349cc55cSDimitry Andric DenseSet<const char *> Allowed( 648349cc55cSDimitry Andric {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 649349cc55cSDimitry Andric &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); 650349cc55cSDimitry Andric 651349cc55cSDimitry Andric Attributor A(Functions, InfoCache, CGUpdater, &Allowed); 652fe6060f1SDimitry Andric 653fe6060f1SDimitry Andric for (Function &F : M) { 654349cc55cSDimitry Andric if (!F.isIntrinsic()) { 655fe6060f1SDimitry Andric A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 656349cc55cSDimitry Andric A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 657349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 658349cc55cSDimitry Andric A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 659349cc55cSDimitry Andric } 660349cc55cSDimitry Andric } 661fe6060f1SDimitry Andric } 662fe6060f1SDimitry Andric 663fe6060f1SDimitry Andric ChangeStatus Change = A.run(); 664fe6060f1SDimitry Andric return Change == ChangeStatus::CHANGED; 665fe6060f1SDimitry Andric } 666fe6060f1SDimitry Andric 667fe6060f1SDimitry Andric StringRef getPassName() const override { return "AMDGPU Attributor"; } 668fe6060f1SDimitry Andric TargetMachine *TM; 669fe6060f1SDimitry Andric static char ID; 670fe6060f1SDimitry Andric }; 671349cc55cSDimitry Andric } // namespace 672fe6060f1SDimitry Andric 673fe6060f1SDimitry Andric char AMDGPUAttributor::ID = 0; 674fe6060f1SDimitry Andric 675fe6060f1SDimitry Andric Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 676fe6060f1SDimitry Andric INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) 677