1fe6060f1SDimitry Andric //===- AMDGPUAttributor.cpp -----------------------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file This pass uses Attributor framework to deduce AMDGPU attributes. 10fe6060f1SDimitry Andric // 11fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 12fe6060f1SDimitry Andric 13fe6060f1SDimitry Andric #include "AMDGPU.h" 14fe6060f1SDimitry Andric #include "GCNSubtarget.h" 1581ad6265SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 16*bdd1243dSDimitry Andric #include "llvm/Analysis/CycleAnalysis.h" 17fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 18fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 19fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 20fe6060f1SDimitry Andric #include "llvm/Target/TargetMachine.h" 21fe6060f1SDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 22fe6060f1SDimitry Andric 23fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-attributor" 24fe6060f1SDimitry Andric 25*bdd1243dSDimitry Andric namespace llvm { 26*bdd1243dSDimitry Andric void initializeCycleInfoWrapperPassPass(PassRegistry &); 27*bdd1243dSDimitry Andric } 28*bdd1243dSDimitry Andric 29fe6060f1SDimitry Andric using namespace llvm; 30fe6060f1SDimitry Andric 3181ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS, 32349cc55cSDimitry Andric 3381ad6265SDimitry Andric enum ImplicitArgumentPositions { 3481ad6265SDimitry Andric #include "AMDGPUAttributes.def" 3581ad6265SDimitry Andric LAST_ARG_POS 36349cc55cSDimitry Andric }; 37349cc55cSDimitry Andric 3881ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS, 3981ad6265SDimitry Andric 4081ad6265SDimitry Andric enum ImplicitArgumentMask { 4181ad6265SDimitry Andric NOT_IMPLICIT_INPUT = 0, 4281ad6265SDimitry Andric #include "AMDGPUAttributes.def" 4381ad6265SDimitry Andric ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1 4481ad6265SDimitry Andric }; 4581ad6265SDimitry Andric 4681ad6265SDimitry Andric #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str}, 47349cc55cSDimitry Andric static constexpr std::pair<ImplicitArgumentMask, 48349cc55cSDimitry Andric StringLiteral> ImplicitAttrs[] = { 4981ad6265SDimitry Andric #include "AMDGPUAttributes.def" 50349cc55cSDimitry Andric }; 51fe6060f1SDimitry Andric 52fe6060f1SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 53fe6060f1SDimitry Andric // initialized. 54fe6060f1SDimitry Andric // 55fe6060f1SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 56fe6060f1SDimitry Andric // size is 1 for y/z. 57349cc55cSDimitry Andric static ImplicitArgumentMask 5881ad6265SDimitry Andric intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, 5981ad6265SDimitry Andric bool HasApertureRegs, bool SupportsGetDoorBellID) { 6081ad6265SDimitry Andric unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion(); 61fe6060f1SDimitry Andric switch (ID) { 62fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 63fe6060f1SDimitry Andric NonKernelOnly = true; 64349cc55cSDimitry Andric return WORKITEM_ID_X; 65fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 66fe6060f1SDimitry Andric NonKernelOnly = true; 67349cc55cSDimitry Andric return WORKGROUP_ID_X; 68fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 69fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_y: 70349cc55cSDimitry Andric return WORKITEM_ID_Y; 71fe6060f1SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 72fe6060f1SDimitry Andric case Intrinsic::r600_read_tidig_z: 73349cc55cSDimitry Andric return WORKITEM_ID_Z; 74fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 75fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_y: 76349cc55cSDimitry Andric return WORKGROUP_ID_Y; 77fe6060f1SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 78fe6060f1SDimitry Andric case Intrinsic::r600_read_tgid_z: 79349cc55cSDimitry Andric return WORKGROUP_ID_Z; 80fcaf7f86SDimitry Andric case Intrinsic::amdgcn_lds_kernel_id: 81fcaf7f86SDimitry Andric return LDS_KERNEL_ID; 82fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 83349cc55cSDimitry Andric return DISPATCH_PTR; 84fe6060f1SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 85349cc55cSDimitry Andric return DISPATCH_ID; 86fe6060f1SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 87349cc55cSDimitry Andric return IMPLICIT_ARG_PTR; 8881ad6265SDimitry Andric // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access 8981ad6265SDimitry Andric // queue_ptr. 90fe6060f1SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 9181ad6265SDimitry Andric NeedsImplicit = (CodeObjectVersion == 5); 9281ad6265SDimitry Andric return QUEUE_PTR; 93fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_shared: 94fe6060f1SDimitry Andric case Intrinsic::amdgcn_is_private: 9581ad6265SDimitry Andric if (HasApertureRegs) 9681ad6265SDimitry Andric return NOT_IMPLICIT_INPUT; 9781ad6265SDimitry Andric // Under V5, we need implicitarg_ptr + offsets to access private_base or 9881ad6265SDimitry Andric // shared_base. For pre-V5, however, need to access them through queue_ptr + 9981ad6265SDimitry Andric // offsets. 10081ad6265SDimitry Andric return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR; 101fe6060f1SDimitry Andric case Intrinsic::trap: 10281ad6265SDimitry Andric if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4. 10381ad6265SDimitry Andric return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR; 10481ad6265SDimitry Andric NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5. 105349cc55cSDimitry Andric return QUEUE_PTR; 106fe6060f1SDimitry Andric default: 107349cc55cSDimitry Andric return NOT_IMPLICIT_INPUT; 108fe6060f1SDimitry Andric } 109fe6060f1SDimitry Andric } 110fe6060f1SDimitry Andric 111fe6060f1SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 112fe6060f1SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 113fe6060f1SDimitry Andric } 114fe6060f1SDimitry Andric 115fe6060f1SDimitry Andric static bool isDSAddress(const Constant *C) { 116fe6060f1SDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C); 117fe6060f1SDimitry Andric if (!GV) 118fe6060f1SDimitry Andric return false; 119fe6060f1SDimitry Andric unsigned AS = GV->getAddressSpace(); 120fe6060f1SDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 121fe6060f1SDimitry Andric } 122fe6060f1SDimitry Andric 1230eae32dcSDimitry Andric /// Returns true if the function requires the implicit argument be passed 1240eae32dcSDimitry Andric /// regardless of the function contents. 12581ad6265SDimitry Andric static bool funcRequiresHostcallPtr(const Function &F) { 1260eae32dcSDimitry Andric // Sanitizers require the hostcall buffer passed in the implicit arguments. 1270eae32dcSDimitry Andric return F.hasFnAttribute(Attribute::SanitizeAddress) || 1280eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeThread) || 1290eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemory) || 1300eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeHWAddress) || 1310eae32dcSDimitry Andric F.hasFnAttribute(Attribute::SanitizeMemTag); 1320eae32dcSDimitry Andric } 1330eae32dcSDimitry Andric 134349cc55cSDimitry Andric namespace { 135fe6060f1SDimitry Andric class AMDGPUInformationCache : public InformationCache { 136fe6060f1SDimitry Andric public: 137fe6060f1SDimitry Andric AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, 138fe6060f1SDimitry Andric BumpPtrAllocator &Allocator, 139fe6060f1SDimitry Andric SetVector<Function *> *CGSCC, TargetMachine &TM) 140fe6060f1SDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} 141fe6060f1SDimitry Andric TargetMachine &TM; 142fe6060f1SDimitry Andric 143fe6060f1SDimitry Andric enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; 144fe6060f1SDimitry Andric 145fe6060f1SDimitry Andric /// Check if the subtarget has aperture regs. 146fe6060f1SDimitry Andric bool hasApertureRegs(Function &F) { 147fe6060f1SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 148fe6060f1SDimitry Andric return ST.hasApertureRegs(); 149fe6060f1SDimitry Andric } 150fe6060f1SDimitry Andric 15181ad6265SDimitry Andric /// Check if the subtarget supports GetDoorbellID. 15281ad6265SDimitry Andric bool supportsGetDoorbellID(Function &F) { 15381ad6265SDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 15481ad6265SDimitry Andric return ST.supportsGetDoorbellID(); 15581ad6265SDimitry Andric } 15681ad6265SDimitry Andric 157349cc55cSDimitry Andric std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) { 158349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 159349cc55cSDimitry Andric return ST.getFlatWorkGroupSizes(F); 160349cc55cSDimitry Andric } 161349cc55cSDimitry Andric 162349cc55cSDimitry Andric std::pair<unsigned, unsigned> 163349cc55cSDimitry Andric getMaximumFlatWorkGroupRange(const Function &F) { 164349cc55cSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); 165349cc55cSDimitry Andric return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; 166349cc55cSDimitry Andric } 167349cc55cSDimitry Andric 168fe6060f1SDimitry Andric private: 16981ad6265SDimitry Andric /// Check if the ConstantExpr \p CE requires the queue pointer. 170fe6060f1SDimitry Andric static bool visitConstExpr(const ConstantExpr *CE) { 171fe6060f1SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 172fe6060f1SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 173fe6060f1SDimitry Andric return castRequiresQueuePtr(SrcAS); 174fe6060f1SDimitry Andric } 175fe6060f1SDimitry Andric return false; 176fe6060f1SDimitry Andric } 177fe6060f1SDimitry Andric 178fe6060f1SDimitry Andric /// Get the constant access bitmap for \p C. 179fe6060f1SDimitry Andric uint8_t getConstantAccess(const Constant *C) { 180fe6060f1SDimitry Andric auto It = ConstantStatus.find(C); 181fe6060f1SDimitry Andric if (It != ConstantStatus.end()) 182fe6060f1SDimitry Andric return It->second; 183fe6060f1SDimitry Andric 184fe6060f1SDimitry Andric uint8_t Result = 0; 185fe6060f1SDimitry Andric if (isDSAddress(C)) 186fe6060f1SDimitry Andric Result = DS_GLOBAL; 187fe6060f1SDimitry Andric 188fe6060f1SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) 189fe6060f1SDimitry Andric if (visitConstExpr(CE)) 190fe6060f1SDimitry Andric Result |= ADDR_SPACE_CAST; 191fe6060f1SDimitry Andric 192fe6060f1SDimitry Andric for (const Use &U : C->operands()) { 193fe6060f1SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 194fe6060f1SDimitry Andric if (!OpC) 195fe6060f1SDimitry Andric continue; 196fe6060f1SDimitry Andric 197fe6060f1SDimitry Andric Result |= getConstantAccess(OpC); 198fe6060f1SDimitry Andric } 199fe6060f1SDimitry Andric return Result; 200fe6060f1SDimitry Andric } 201fe6060f1SDimitry Andric 202fe6060f1SDimitry Andric public: 20381ad6265SDimitry Andric /// Returns true if \p Fn needs the queue pointer because of \p C. 204fe6060f1SDimitry Andric bool needsQueuePtr(const Constant *C, Function &Fn) { 205fe6060f1SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); 206fe6060f1SDimitry Andric bool HasAperture = hasApertureRegs(Fn); 207fe6060f1SDimitry Andric 208fe6060f1SDimitry Andric // No need to explore the constants. 209fe6060f1SDimitry Andric if (!IsNonEntryFunc && HasAperture) 210fe6060f1SDimitry Andric return false; 211fe6060f1SDimitry Andric 212fe6060f1SDimitry Andric uint8_t Access = getConstantAccess(C); 213fe6060f1SDimitry Andric 214fe6060f1SDimitry Andric // We need to trap on DS globals in non-entry functions. 215fe6060f1SDimitry Andric if (IsNonEntryFunc && (Access & DS_GLOBAL)) 216fe6060f1SDimitry Andric return true; 217fe6060f1SDimitry Andric 218fe6060f1SDimitry Andric return !HasAperture && (Access & ADDR_SPACE_CAST); 219fe6060f1SDimitry Andric } 220fe6060f1SDimitry Andric 221fe6060f1SDimitry Andric private: 22281ad6265SDimitry Andric /// Used to determine if the Constant needs the queue pointer. 223fe6060f1SDimitry Andric DenseMap<const Constant *, uint8_t> ConstantStatus; 224fe6060f1SDimitry Andric }; 225fe6060f1SDimitry Andric 226*bdd1243dSDimitry Andric struct AAAMDAttributes 227*bdd1243dSDimitry Andric : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>, 228*bdd1243dSDimitry Andric AbstractAttribute> { 229*bdd1243dSDimitry Andric using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>, 230349cc55cSDimitry Andric AbstractAttribute>; 231349cc55cSDimitry Andric 232fe6060f1SDimitry Andric AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 233fe6060f1SDimitry Andric 234fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 235fe6060f1SDimitry Andric static AAAMDAttributes &createForPosition(const IRPosition &IRP, 236fe6060f1SDimitry Andric Attributor &A); 237fe6060f1SDimitry Andric 238fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 239fe6060f1SDimitry Andric const std::string getName() const override { return "AAAMDAttributes"; } 240fe6060f1SDimitry Andric 241fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 242fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 243fe6060f1SDimitry Andric 244fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 245fe6060f1SDimitry Andric /// AAAMDAttributes. 246fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 247fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 248fe6060f1SDimitry Andric } 249fe6060f1SDimitry Andric 250fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 251fe6060f1SDimitry Andric static const char ID; 252fe6060f1SDimitry Andric }; 253fe6060f1SDimitry Andric const char AAAMDAttributes::ID = 0; 254fe6060f1SDimitry Andric 255349cc55cSDimitry Andric struct AAUniformWorkGroupSize 256fe6060f1SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> { 257fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 258349cc55cSDimitry Andric AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 259fe6060f1SDimitry Andric 260fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 261349cc55cSDimitry Andric static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP, 262fe6060f1SDimitry Andric Attributor &A); 263fe6060f1SDimitry Andric 264fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 265349cc55cSDimitry Andric const std::string getName() const override { 266349cc55cSDimitry Andric return "AAUniformWorkGroupSize"; 267349cc55cSDimitry Andric } 268fe6060f1SDimitry Andric 269fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 270fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 271fe6060f1SDimitry Andric 272fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 273fe6060f1SDimitry Andric /// AAAMDAttributes. 274fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 275fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 276fe6060f1SDimitry Andric } 277fe6060f1SDimitry Andric 278fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 279fe6060f1SDimitry Andric static const char ID; 280fe6060f1SDimitry Andric }; 281349cc55cSDimitry Andric const char AAUniformWorkGroupSize::ID = 0; 282fe6060f1SDimitry Andric 283349cc55cSDimitry Andric struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { 284349cc55cSDimitry Andric AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) 285349cc55cSDimitry Andric : AAUniformWorkGroupSize(IRP, A) {} 286fe6060f1SDimitry Andric 287fe6060f1SDimitry Andric void initialize(Attributor &A) override { 288fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 289fe6060f1SDimitry Andric CallingConv::ID CC = F->getCallingConv(); 290fe6060f1SDimitry Andric 291fe6060f1SDimitry Andric if (CC != CallingConv::AMDGPU_KERNEL) 292fe6060f1SDimitry Andric return; 293fe6060f1SDimitry Andric 294fe6060f1SDimitry Andric bool InitialValue = false; 295fe6060f1SDimitry Andric if (F->hasFnAttribute("uniform-work-group-size")) 296fe6060f1SDimitry Andric InitialValue = F->getFnAttribute("uniform-work-group-size") 297fe6060f1SDimitry Andric .getValueAsString() 298fe6060f1SDimitry Andric .equals("true"); 299fe6060f1SDimitry Andric 300fe6060f1SDimitry Andric if (InitialValue) 301fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 302fe6060f1SDimitry Andric else 303fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 304fe6060f1SDimitry Andric } 305fe6060f1SDimitry Andric 306fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 307fe6060f1SDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 308fe6060f1SDimitry Andric 309fe6060f1SDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 310fe6060f1SDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 311349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName() 312fe6060f1SDimitry Andric << "->" << getAssociatedFunction()->getName() << "\n"); 313fe6060f1SDimitry Andric 314349cc55cSDimitry Andric const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>( 315fe6060f1SDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 316fe6060f1SDimitry Andric 317fe6060f1SDimitry Andric Change = Change | clampStateAndIndicateChange(this->getState(), 318fe6060f1SDimitry Andric CallerInfo.getState()); 319fe6060f1SDimitry Andric 320fe6060f1SDimitry Andric return true; 321fe6060f1SDimitry Andric }; 322fe6060f1SDimitry Andric 323fe6060f1SDimitry Andric bool AllCallSitesKnown = true; 324fe6060f1SDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 3250eae32dcSDimitry Andric return indicatePessimisticFixpoint(); 326fe6060f1SDimitry Andric 327fe6060f1SDimitry Andric return Change; 328fe6060f1SDimitry Andric } 329fe6060f1SDimitry Andric 330fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 331fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 332fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 333fe6060f1SDimitry Andric 334fe6060f1SDimitry Andric AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", 335fe6060f1SDimitry Andric getAssumed() ? "true" : "false")); 336fe6060f1SDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 337fe6060f1SDimitry Andric /* ForceReplace */ true); 338fe6060f1SDimitry Andric } 339fe6060f1SDimitry Andric 340fe6060f1SDimitry Andric bool isValidState() const override { 341fe6060f1SDimitry Andric // This state is always valid, even when the state is false. 342fe6060f1SDimitry Andric return true; 343fe6060f1SDimitry Andric } 344fe6060f1SDimitry Andric 345fe6060f1SDimitry Andric const std::string getAsStr() const override { 346fe6060f1SDimitry Andric return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; 347fe6060f1SDimitry Andric } 348fe6060f1SDimitry Andric 349fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 350fe6060f1SDimitry Andric void trackStatistics() const override {} 351fe6060f1SDimitry Andric }; 352fe6060f1SDimitry Andric 353349cc55cSDimitry Andric AAUniformWorkGroupSize & 354349cc55cSDimitry Andric AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP, 355fe6060f1SDimitry Andric Attributor &A) { 356fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 357349cc55cSDimitry Andric return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A); 358349cc55cSDimitry Andric llvm_unreachable( 359349cc55cSDimitry Andric "AAUniformWorkGroupSize is only valid for function position"); 360fe6060f1SDimitry Andric } 361fe6060f1SDimitry Andric 362fe6060f1SDimitry Andric struct AAAMDAttributesFunction : public AAAMDAttributes { 363fe6060f1SDimitry Andric AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) 364fe6060f1SDimitry Andric : AAAMDAttributes(IRP, A) {} 365fe6060f1SDimitry Andric 366fe6060f1SDimitry Andric void initialize(Attributor &A) override { 367fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 3680eae32dcSDimitry Andric 3690eae32dcSDimitry Andric // If the function requires the implicit arg pointer due to sanitizers, 3700eae32dcSDimitry Andric // assume it's needed even if explicitly marked as not requiring it. 37181ad6265SDimitry Andric const bool NeedsHostcall = funcRequiresHostcallPtr(*F); 37281ad6265SDimitry Andric if (NeedsHostcall) { 3730eae32dcSDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 37481ad6265SDimitry Andric removeAssumedBits(HOSTCALL_PTR); 37581ad6265SDimitry Andric } 3760eae32dcSDimitry Andric 377349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 37881ad6265SDimitry Andric if (NeedsHostcall && 37981ad6265SDimitry Andric (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR)) 3800eae32dcSDimitry Andric continue; 3810eae32dcSDimitry Andric 382349cc55cSDimitry Andric if (F->hasFnAttribute(Attr.second)) 383349cc55cSDimitry Andric addKnownBits(Attr.first); 384fe6060f1SDimitry Andric } 385fe6060f1SDimitry Andric 386349cc55cSDimitry Andric if (F->isDeclaration()) 387349cc55cSDimitry Andric return; 388349cc55cSDimitry Andric 389fe6060f1SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 390fe6060f1SDimitry Andric // not allowed to have kernel arguments. 391fe6060f1SDimitry Andric if (AMDGPU::isGraphics(F->getCallingConv())) { 392fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 393fe6060f1SDimitry Andric return; 394fe6060f1SDimitry Andric } 395fe6060f1SDimitry Andric } 396fe6060f1SDimitry Andric 397fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 398fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 399349cc55cSDimitry Andric // The current assumed state used to determine a change. 400349cc55cSDimitry Andric auto OrigAssumed = getAssumed(); 401fe6060f1SDimitry Andric 402fe6060f1SDimitry Andric // Check for Intrinsics and propagate attributes. 403fe6060f1SDimitry Andric const AACallEdges &AAEdges = A.getAAFor<AACallEdges>( 404fe6060f1SDimitry Andric *this, this->getIRPosition(), DepClassTy::REQUIRED); 405349cc55cSDimitry Andric if (AAEdges.hasNonAsmUnknownCallee()) 406349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 407fe6060f1SDimitry Andric 408349cc55cSDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 409fe6060f1SDimitry Andric 41081ad6265SDimitry Andric bool NeedsImplicit = false; 41181ad6265SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 41281ad6265SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 41381ad6265SDimitry Andric bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F); 414349cc55cSDimitry Andric 415fe6060f1SDimitry Andric for (Function *Callee : AAEdges.getOptimisticEdges()) { 416fe6060f1SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 417349cc55cSDimitry Andric if (IID == Intrinsic::not_intrinsic) { 418349cc55cSDimitry Andric const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>( 419349cc55cSDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); 420349cc55cSDimitry Andric *this &= AAAMD; 421fe6060f1SDimitry Andric continue; 422fe6060f1SDimitry Andric } 423fe6060f1SDimitry Andric 424fe6060f1SDimitry Andric bool NonKernelOnly = false; 425349cc55cSDimitry Andric ImplicitArgumentMask AttrMask = 42681ad6265SDimitry Andric intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit, 42781ad6265SDimitry Andric HasApertureRegs, SupportsGetDoorbellID); 428349cc55cSDimitry Andric if (AttrMask != NOT_IMPLICIT_INPUT) { 429349cc55cSDimitry Andric if ((IsNonEntryFunc || !NonKernelOnly)) 430349cc55cSDimitry Andric removeAssumedBits(AttrMask); 431fe6060f1SDimitry Andric } 432fe6060f1SDimitry Andric } 433fe6060f1SDimitry Andric 43481ad6265SDimitry Andric // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base. 43581ad6265SDimitry Andric if (NeedsImplicit) 43681ad6265SDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 43781ad6265SDimitry Andric 43881ad6265SDimitry Andric if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) { 43981ad6265SDimitry Andric // Under V5, we need implicitarg_ptr + offsets to access private_base or 44081ad6265SDimitry Andric // shared_base. We do not actually need queue_ptr. 44181ad6265SDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) 44281ad6265SDimitry Andric removeAssumedBits(IMPLICIT_ARG_PTR); 44381ad6265SDimitry Andric else 444349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 445fe6060f1SDimitry Andric } 446fe6060f1SDimitry Andric 44781ad6265SDimitry Andric if (funcRetrievesMultigridSyncArg(A)) { 44881ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && 44981ad6265SDimitry Andric "multigrid_sync_arg needs implicitarg_ptr"); 45081ad6265SDimitry Andric removeAssumedBits(MULTIGRID_SYNC_ARG); 451349cc55cSDimitry Andric } 452fe6060f1SDimitry Andric 45381ad6265SDimitry Andric if (funcRetrievesHostcallPtr(A)) { 45481ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr"); 45581ad6265SDimitry Andric removeAssumedBits(HOSTCALL_PTR); 45681ad6265SDimitry Andric } 45781ad6265SDimitry Andric 45881ad6265SDimitry Andric if (funcRetrievesHeapPtr(A)) { 45981ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr"); 46081ad6265SDimitry Andric removeAssumedBits(HEAP_PTR); 46181ad6265SDimitry Andric } 46281ad6265SDimitry Andric 46381ad6265SDimitry Andric if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) { 46481ad6265SDimitry Andric assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr"); 465349cc55cSDimitry Andric removeAssumedBits(QUEUE_PTR); 466fe6060f1SDimitry Andric } 467fe6060f1SDimitry Andric 468fcaf7f86SDimitry Andric if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) { 469fcaf7f86SDimitry Andric removeAssumedBits(LDS_KERNEL_ID); 470fcaf7f86SDimitry Andric } 471fcaf7f86SDimitry Andric 472*bdd1243dSDimitry Andric if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A)) 473*bdd1243dSDimitry Andric removeAssumedBits(DEFAULT_QUEUE); 474*bdd1243dSDimitry Andric 475*bdd1243dSDimitry Andric if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A)) 476*bdd1243dSDimitry Andric removeAssumedBits(COMPLETION_ACTION); 477*bdd1243dSDimitry Andric 47881ad6265SDimitry Andric return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED 47981ad6265SDimitry Andric : ChangeStatus::UNCHANGED; 480fe6060f1SDimitry Andric } 481fe6060f1SDimitry Andric 482fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 483fe6060f1SDimitry Andric SmallVector<Attribute, 8> AttrList; 484fe6060f1SDimitry Andric LLVMContext &Ctx = getAssociatedFunction()->getContext(); 485fe6060f1SDimitry Andric 486349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) { 487349cc55cSDimitry Andric if (isKnown(Attr.first)) 488349cc55cSDimitry Andric AttrList.push_back(Attribute::get(Ctx, Attr.second)); 489349cc55cSDimitry Andric } 490fe6060f1SDimitry Andric 491fe6060f1SDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 492fe6060f1SDimitry Andric /* ForceReplace */ true); 493fe6060f1SDimitry Andric } 494fe6060f1SDimitry Andric 495fe6060f1SDimitry Andric const std::string getAsStr() const override { 496349cc55cSDimitry Andric std::string Str; 497349cc55cSDimitry Andric raw_string_ostream OS(Str); 498349cc55cSDimitry Andric OS << "AMDInfo["; 499349cc55cSDimitry Andric for (auto Attr : ImplicitAttrs) 500349cc55cSDimitry Andric OS << ' ' << Attr.second; 501349cc55cSDimitry Andric OS << " ]"; 502349cc55cSDimitry Andric return OS.str(); 503fe6060f1SDimitry Andric } 504fe6060f1SDimitry Andric 505fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics() 506fe6060f1SDimitry Andric void trackStatistics() const override {} 50781ad6265SDimitry Andric 50881ad6265SDimitry Andric private: 50981ad6265SDimitry Andric bool checkForQueuePtr(Attributor &A) { 51081ad6265SDimitry Andric Function *F = getAssociatedFunction(); 51181ad6265SDimitry Andric bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); 51281ad6265SDimitry Andric 51381ad6265SDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 51481ad6265SDimitry Andric 51581ad6265SDimitry Andric bool NeedsQueuePtr = false; 51681ad6265SDimitry Andric 51781ad6265SDimitry Andric auto CheckAddrSpaceCasts = [&](Instruction &I) { 51881ad6265SDimitry Andric unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace(); 51981ad6265SDimitry Andric if (castRequiresQueuePtr(SrcAS)) { 52081ad6265SDimitry Andric NeedsQueuePtr = true; 52181ad6265SDimitry Andric return false; 52281ad6265SDimitry Andric } 52381ad6265SDimitry Andric return true; 52481ad6265SDimitry Andric }; 52581ad6265SDimitry Andric 52681ad6265SDimitry Andric bool HasApertureRegs = InfoCache.hasApertureRegs(*F); 52781ad6265SDimitry Andric 52881ad6265SDimitry Andric // `checkForAllInstructions` is much more cheaper than going through all 52981ad6265SDimitry Andric // instructions, try it first. 53081ad6265SDimitry Andric 53181ad6265SDimitry Andric // The queue pointer is not needed if aperture regs is present. 53281ad6265SDimitry Andric if (!HasApertureRegs) { 53381ad6265SDimitry Andric bool UsedAssumedInformation = false; 53481ad6265SDimitry Andric A.checkForAllInstructions(CheckAddrSpaceCasts, *this, 53581ad6265SDimitry Andric {Instruction::AddrSpaceCast}, 53681ad6265SDimitry Andric UsedAssumedInformation); 53781ad6265SDimitry Andric } 53881ad6265SDimitry Andric 53981ad6265SDimitry Andric // If we found that we need the queue pointer, nothing else to do. 54081ad6265SDimitry Andric if (NeedsQueuePtr) 54181ad6265SDimitry Andric return true; 54281ad6265SDimitry Andric 54381ad6265SDimitry Andric if (!IsNonEntryFunc && HasApertureRegs) 54481ad6265SDimitry Andric return false; 54581ad6265SDimitry Andric 54681ad6265SDimitry Andric for (BasicBlock &BB : *F) { 54781ad6265SDimitry Andric for (Instruction &I : BB) { 54881ad6265SDimitry Andric for (const Use &U : I.operands()) { 54981ad6265SDimitry Andric if (const auto *C = dyn_cast<Constant>(U)) { 55081ad6265SDimitry Andric if (InfoCache.needsQueuePtr(C, *F)) 55181ad6265SDimitry Andric return true; 55281ad6265SDimitry Andric } 55381ad6265SDimitry Andric } 55481ad6265SDimitry Andric } 55581ad6265SDimitry Andric } 55681ad6265SDimitry Andric 55781ad6265SDimitry Andric return false; 55881ad6265SDimitry Andric } 55981ad6265SDimitry Andric 56081ad6265SDimitry Andric bool funcRetrievesMultigridSyncArg(Attributor &A) { 56181ad6265SDimitry Andric auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(); 562*bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 563*bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 56481ad6265SDimitry Andric } 56581ad6265SDimitry Andric 56681ad6265SDimitry Andric bool funcRetrievesHostcallPtr(Attributor &A) { 56781ad6265SDimitry Andric auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(); 568*bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 569*bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 570*bdd1243dSDimitry Andric } 571*bdd1243dSDimitry Andric 572*bdd1243dSDimitry Andric bool funcRetrievesDefaultQueue(Attributor &A) { 573*bdd1243dSDimitry Andric auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(); 574*bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 575*bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 576*bdd1243dSDimitry Andric } 577*bdd1243dSDimitry Andric 578*bdd1243dSDimitry Andric bool funcRetrievesCompletionAction(Attributor &A) { 579*bdd1243dSDimitry Andric auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(); 580*bdd1243dSDimitry Andric AA::RangeTy Range(Pos, 8); 581*bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 58281ad6265SDimitry Andric } 58381ad6265SDimitry Andric 58481ad6265SDimitry Andric bool funcRetrievesHeapPtr(Attributor &A) { 58581ad6265SDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) 58681ad6265SDimitry Andric return false; 587*bdd1243dSDimitry Andric AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8); 588*bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 58981ad6265SDimitry Andric } 59081ad6265SDimitry Andric 59181ad6265SDimitry Andric bool funcRetrievesQueuePtr(Attributor &A) { 59281ad6265SDimitry Andric if (AMDGPU::getAmdhsaCodeObjectVersion() != 5) 59381ad6265SDimitry Andric return false; 594*bdd1243dSDimitry Andric AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8); 595*bdd1243dSDimitry Andric return funcRetrievesImplicitKernelArg(A, Range); 59681ad6265SDimitry Andric } 59781ad6265SDimitry Andric 598*bdd1243dSDimitry Andric bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) { 59981ad6265SDimitry Andric // Check if this is a call to the implicitarg_ptr builtin and it 60081ad6265SDimitry Andric // is used to retrieve the hostcall pointer. The implicit arg for 60181ad6265SDimitry Andric // hostcall is not used only if every use of the implicitarg_ptr 60281ad6265SDimitry Andric // is a load that clearly does not retrieve any byte of the 60381ad6265SDimitry Andric // hostcall pointer. We check this by tracing all the uses of the 60481ad6265SDimitry Andric // initial call to the implicitarg_ptr intrinsic. 60581ad6265SDimitry Andric auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) { 60681ad6265SDimitry Andric auto &Call = cast<CallBase>(I); 60781ad6265SDimitry Andric if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr) 60881ad6265SDimitry Andric return true; 60981ad6265SDimitry Andric 61081ad6265SDimitry Andric const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>( 61181ad6265SDimitry Andric *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); 61281ad6265SDimitry Andric 61381ad6265SDimitry Andric return PointerInfoAA.forallInterferingAccesses( 614*bdd1243dSDimitry Andric Range, [](const AAPointerInfo::Access &Acc, bool IsExact) { 61581ad6265SDimitry Andric return Acc.getRemoteInst()->isDroppable(); 61681ad6265SDimitry Andric }); 61781ad6265SDimitry Andric }; 61881ad6265SDimitry Andric 61981ad6265SDimitry Andric bool UsedAssumedInformation = false; 62081ad6265SDimitry Andric return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, 62181ad6265SDimitry Andric UsedAssumedInformation); 62281ad6265SDimitry Andric } 623fcaf7f86SDimitry Andric 624fcaf7f86SDimitry Andric bool funcRetrievesLDSKernelId(Attributor &A) { 625fcaf7f86SDimitry Andric auto DoesNotRetrieve = [&](Instruction &I) { 626fcaf7f86SDimitry Andric auto &Call = cast<CallBase>(I); 627fcaf7f86SDimitry Andric return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id; 628fcaf7f86SDimitry Andric }; 629fcaf7f86SDimitry Andric bool UsedAssumedInformation = false; 630fcaf7f86SDimitry Andric return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, 631fcaf7f86SDimitry Andric UsedAssumedInformation); 632fcaf7f86SDimitry Andric } 633fe6060f1SDimitry Andric }; 634fe6060f1SDimitry Andric 635fe6060f1SDimitry Andric AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, 636fe6060f1SDimitry Andric Attributor &A) { 637fe6060f1SDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 638fe6060f1SDimitry Andric return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); 639fe6060f1SDimitry Andric llvm_unreachable("AAAMDAttributes is only valid for function position"); 640fe6060f1SDimitry Andric } 641fe6060f1SDimitry Andric 642349cc55cSDimitry Andric /// Propagate amdgpu-flat-work-group-size attribute. 643349cc55cSDimitry Andric struct AAAMDFlatWorkGroupSize 644349cc55cSDimitry Andric : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { 645349cc55cSDimitry Andric using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; 646349cc55cSDimitry Andric AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) 647349cc55cSDimitry Andric : Base(IRP, 32) {} 648349cc55cSDimitry Andric 649349cc55cSDimitry Andric /// See AbstractAttribute::getState(...). 650349cc55cSDimitry Andric IntegerRangeState &getState() override { return *this; } 651349cc55cSDimitry Andric const IntegerRangeState &getState() const override { return *this; } 652349cc55cSDimitry Andric 653349cc55cSDimitry Andric void initialize(Attributor &A) override { 654349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 655349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 656349cc55cSDimitry Andric unsigned MinGroupSize, MaxGroupSize; 657349cc55cSDimitry Andric std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); 658349cc55cSDimitry Andric intersectKnown( 659349cc55cSDimitry Andric ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); 6600eae32dcSDimitry Andric 6610eae32dcSDimitry Andric if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) 6620eae32dcSDimitry Andric indicatePessimisticFixpoint(); 663349cc55cSDimitry Andric } 664349cc55cSDimitry Andric 665349cc55cSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 666349cc55cSDimitry Andric ChangeStatus Change = ChangeStatus::UNCHANGED; 667349cc55cSDimitry Andric 668349cc55cSDimitry Andric auto CheckCallSite = [&](AbstractCallSite CS) { 669349cc55cSDimitry Andric Function *Caller = CS.getInstruction()->getFunction(); 670349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() 671349cc55cSDimitry Andric << "->" << getAssociatedFunction()->getName() << '\n'); 672349cc55cSDimitry Andric 673349cc55cSDimitry Andric const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>( 674349cc55cSDimitry Andric *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); 675349cc55cSDimitry Andric 676349cc55cSDimitry Andric Change |= 677349cc55cSDimitry Andric clampStateAndIndicateChange(this->getState(), CallerInfo.getState()); 678349cc55cSDimitry Andric 679349cc55cSDimitry Andric return true; 680349cc55cSDimitry Andric }; 681349cc55cSDimitry Andric 682349cc55cSDimitry Andric bool AllCallSitesKnown = true; 683349cc55cSDimitry Andric if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) 684349cc55cSDimitry Andric return indicatePessimisticFixpoint(); 685349cc55cSDimitry Andric 686349cc55cSDimitry Andric return Change; 687349cc55cSDimitry Andric } 688349cc55cSDimitry Andric 689349cc55cSDimitry Andric ChangeStatus manifest(Attributor &A) override { 690349cc55cSDimitry Andric SmallVector<Attribute, 8> AttrList; 691349cc55cSDimitry Andric Function *F = getAssociatedFunction(); 692349cc55cSDimitry Andric LLVMContext &Ctx = F->getContext(); 693349cc55cSDimitry Andric 694349cc55cSDimitry Andric auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache()); 695349cc55cSDimitry Andric unsigned Min, Max; 696349cc55cSDimitry Andric std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); 697349cc55cSDimitry Andric 698349cc55cSDimitry Andric // Don't add the attribute if it's the implied default. 699349cc55cSDimitry Andric if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) 700349cc55cSDimitry Andric return ChangeStatus::UNCHANGED; 701349cc55cSDimitry Andric 702349cc55cSDimitry Andric SmallString<10> Buffer; 703349cc55cSDimitry Andric raw_svector_ostream OS(Buffer); 704349cc55cSDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 705349cc55cSDimitry Andric 706349cc55cSDimitry Andric AttrList.push_back( 707349cc55cSDimitry Andric Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); 708349cc55cSDimitry Andric return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, 709349cc55cSDimitry Andric /* ForceReplace */ true); 710349cc55cSDimitry Andric } 711349cc55cSDimitry Andric 712349cc55cSDimitry Andric const std::string getAsStr() const override { 713349cc55cSDimitry Andric std::string Str; 714349cc55cSDimitry Andric raw_string_ostream OS(Str); 715349cc55cSDimitry Andric OS << "AMDFlatWorkGroupSize["; 716349cc55cSDimitry Andric OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; 717349cc55cSDimitry Andric OS << ']'; 718349cc55cSDimitry Andric return OS.str(); 719349cc55cSDimitry Andric } 720349cc55cSDimitry Andric 721349cc55cSDimitry Andric /// See AbstractAttribute::trackStatistics() 722349cc55cSDimitry Andric void trackStatistics() const override {} 723349cc55cSDimitry Andric 724349cc55cSDimitry Andric /// Create an abstract attribute view for the position \p IRP. 725349cc55cSDimitry Andric static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, 726349cc55cSDimitry Andric Attributor &A); 727349cc55cSDimitry Andric 728349cc55cSDimitry Andric /// See AbstractAttribute::getName() 729349cc55cSDimitry Andric const std::string getName() const override { 730349cc55cSDimitry Andric return "AAAMDFlatWorkGroupSize"; 731349cc55cSDimitry Andric } 732349cc55cSDimitry Andric 733349cc55cSDimitry Andric /// See AbstractAttribute::getIdAddr() 734349cc55cSDimitry Andric const char *getIdAddr() const override { return &ID; } 735349cc55cSDimitry Andric 736349cc55cSDimitry Andric /// This function should return true if the type of the \p AA is 737349cc55cSDimitry Andric /// AAAMDFlatWorkGroupSize 738349cc55cSDimitry Andric static bool classof(const AbstractAttribute *AA) { 739349cc55cSDimitry Andric return (AA->getIdAddr() == &ID); 740349cc55cSDimitry Andric } 741349cc55cSDimitry Andric 742349cc55cSDimitry Andric /// Unique ID (due to the unique address) 743349cc55cSDimitry Andric static const char ID; 744349cc55cSDimitry Andric }; 745349cc55cSDimitry Andric 746349cc55cSDimitry Andric const char AAAMDFlatWorkGroupSize::ID = 0; 747349cc55cSDimitry Andric 748349cc55cSDimitry Andric AAAMDFlatWorkGroupSize & 749349cc55cSDimitry Andric AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP, 750349cc55cSDimitry Andric Attributor &A) { 751349cc55cSDimitry Andric if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) 752349cc55cSDimitry Andric return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A); 753349cc55cSDimitry Andric llvm_unreachable( 754349cc55cSDimitry Andric "AAAMDFlatWorkGroupSize is only valid for function position"); 755349cc55cSDimitry Andric } 756349cc55cSDimitry Andric 757fe6060f1SDimitry Andric class AMDGPUAttributor : public ModulePass { 758fe6060f1SDimitry Andric public: 759fe6060f1SDimitry Andric AMDGPUAttributor() : ModulePass(ID) {} 760fe6060f1SDimitry Andric 761fe6060f1SDimitry Andric /// doInitialization - Virtual method overridden by subclasses to do 762fe6060f1SDimitry Andric /// any necessary initialization before any pass is run. 763fe6060f1SDimitry Andric bool doInitialization(Module &) override { 764fe6060f1SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 765fe6060f1SDimitry Andric if (!TPC) 766fe6060f1SDimitry Andric report_fatal_error("TargetMachine is required"); 767fe6060f1SDimitry Andric 768fe6060f1SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 769fe6060f1SDimitry Andric return false; 770fe6060f1SDimitry Andric } 771fe6060f1SDimitry Andric 772fe6060f1SDimitry Andric bool runOnModule(Module &M) override { 773fe6060f1SDimitry Andric SetVector<Function *> Functions; 774*bdd1243dSDimitry Andric AnalysisGetter AG(this); 775349cc55cSDimitry Andric for (Function &F : M) { 776349cc55cSDimitry Andric if (!F.isIntrinsic()) 777fe6060f1SDimitry Andric Functions.insert(&F); 778349cc55cSDimitry Andric } 779fe6060f1SDimitry Andric 780fe6060f1SDimitry Andric CallGraphUpdater CGUpdater; 781fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 782fe6060f1SDimitry Andric AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); 783349cc55cSDimitry Andric DenseSet<const char *> Allowed( 784349cc55cSDimitry Andric {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, 785fcaf7f86SDimitry Andric &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, 786*bdd1243dSDimitry Andric &AAPointerInfo::ID, &AAPotentialConstantValues::ID}); 787349cc55cSDimitry Andric 78881ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 78981ad6265SDimitry Andric AC.Allowed = &Allowed; 79081ad6265SDimitry Andric AC.IsModulePass = true; 79181ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 79281ad6265SDimitry Andric 79381ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 794fe6060f1SDimitry Andric 795fe6060f1SDimitry Andric for (Function &F : M) { 796349cc55cSDimitry Andric if (!F.isIntrinsic()) { 797fe6060f1SDimitry Andric A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F)); 798349cc55cSDimitry Andric A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F)); 799349cc55cSDimitry Andric if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { 800349cc55cSDimitry Andric A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F)); 801349cc55cSDimitry Andric } 802349cc55cSDimitry Andric } 803fe6060f1SDimitry Andric } 804fe6060f1SDimitry Andric 805fe6060f1SDimitry Andric ChangeStatus Change = A.run(); 806fe6060f1SDimitry Andric return Change == ChangeStatus::CHANGED; 807fe6060f1SDimitry Andric } 808fe6060f1SDimitry Andric 809*bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 810*bdd1243dSDimitry Andric AU.addRequired<CycleInfoWrapperPass>(); 811*bdd1243dSDimitry Andric } 812*bdd1243dSDimitry Andric 813fe6060f1SDimitry Andric StringRef getPassName() const override { return "AMDGPU Attributor"; } 814fe6060f1SDimitry Andric TargetMachine *TM; 815fe6060f1SDimitry Andric static char ID; 816fe6060f1SDimitry Andric }; 817349cc55cSDimitry Andric } // namespace 818fe6060f1SDimitry Andric 819fe6060f1SDimitry Andric char AMDGPUAttributor::ID = 0; 820fe6060f1SDimitry Andric 821fe6060f1SDimitry Andric Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } 822*bdd1243dSDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, 823*bdd1243dSDimitry Andric false) 824*bdd1243dSDimitry Andric INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass); 825*bdd1243dSDimitry Andric INITIALIZE_PASS_END(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, 826*bdd1243dSDimitry Andric false) 827