1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUMemoryUtils.h" 12 #include "AMDGPUPerfHintAnalysis.h" 13 #include "AMDGPUSubtarget.h" 14 #include "Utils/AMDGPUBaseInfo.h" 15 #include "llvm/CodeGen/MachineModuleInfo.h" 16 #include "llvm/IR/ConstantRange.h" 17 #include "llvm/IR/Constants.h" 18 #include "llvm/IR/Metadata.h" 19 #include "llvm/Target/TargetMachine.h" 20 21 using namespace llvm; 22 23 static const GlobalVariable * 24 getKernelDynLDSGlobalFromFunction(const Function &F) { 25 const Module *M = F.getParent(); 26 SmallString<64> KernelDynLDSName("llvm.amdgcn."); 27 KernelDynLDSName += F.getName(); 28 KernelDynLDSName += ".dynlds"; 29 return M->getNamedGlobal(KernelDynLDSName); 30 } 31 32 static bool hasLDSKernelArgument(const Function &F) { 33 for (const Argument &Arg : F.args()) { 34 Type *ArgTy = Arg.getType(); 35 if (auto *PtrTy = dyn_cast<PointerType>(ArgTy)) { 36 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 37 return true; 38 } 39 } 40 return false; 41 } 42 43 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 44 const AMDGPUSubtarget &ST) 45 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 46 IsModuleEntryFunction( 47 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 48 IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) { 49 50 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 51 // except reserved size is not correctly aligned. 52 53 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 54 MemoryBound = MemBoundAttr.getValueAsBool(); 55 56 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 57 WaveLimiter = WaveLimitAttr.getValueAsBool(); 58 59 // FIXME: How is this attribute supposed to interact with statically known 60 // global sizes? 61 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 62 if (!S.empty()) 63 S.consumeInteger(0, GDSSize); 64 65 // Assume the attribute allocates before any known GDS globals. 66 StaticGDSSize = GDSSize; 67 68 // Second value, if present, is the maximum value that can be assigned. 69 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics 70 // during codegen. 71 std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( 72 F, "amdgpu-lds-size", {0, UINT32_MAX}, true); 73 74 // The two separate variables are only profitable when the LDS module lowering 75 // pass is disabled. If graphics does not use dynamic LDS, this is never 76 // profitable. Leaving cleanup for a later change. 77 LDSSize = LDSSizeRange.first; 78 StaticLDSSize = LDSSize; 79 80 CallingConv::ID CC = F.getCallingConv(); 81 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 82 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 83 84 // FIXME: Shouldn't be target specific 85 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 86 NoSignedZerosFPMath = 87 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 88 89 const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F); 90 if (DynLdsGlobal || hasLDSKernelArgument(F)) 91 UsesDynamicLDS = true; 92 } 93 94 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 95 const GlobalVariable &GV, 96 Align Trailing) { 97 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 98 if (!Entry.second) 99 return Entry.first->second; 100 101 Align Alignment = 102 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 103 104 unsigned Offset; 105 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 106 if (AMDGPU::isNamedBarrier(GV)) { 107 std::optional<unsigned> BarAddr = getLDSAbsoluteAddress(GV); 108 if (!BarAddr) 109 llvm_unreachable("named barrier should have an assigned address"); 110 Entry.first->second = BarAddr.value(); 111 return BarAddr.value(); 112 } 113 114 std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); 115 if (MaybeAbs) { 116 // Absolute address LDS variables that exist prior to the LDS lowering 117 // pass raise a fatal error in that pass. These failure modes are only 118 // reachable if that lowering pass is disabled or broken. If/when adding 119 // support for absolute addresses on user specified variables, the 120 // alignment check moves to the lowering pass and the frame calculation 121 // needs to take the user variables into consideration. 122 123 uint32_t ObjectStart = *MaybeAbs; 124 125 if (ObjectStart != alignTo(ObjectStart, Alignment)) { 126 report_fatal_error("Absolute address LDS variable inconsistent with " 127 "variable alignment"); 128 } 129 130 if (isModuleEntryFunction()) { 131 // If this is a module entry function, we can also sanity check against 132 // the static frame. Strictly it would be better to check against the 133 // attribute, i.e. that the variable is within the always-allocated 134 // section, and not within some other non-absolute-address object 135 // allocated here, but the extra error detection is minimal and we would 136 // have to pass the Function around or cache the attribute value. 137 uint32_t ObjectEnd = 138 ObjectStart + DL.getTypeAllocSize(GV.getValueType()); 139 if (ObjectEnd > StaticLDSSize) { 140 report_fatal_error( 141 "Absolute address LDS variable outside of static frame"); 142 } 143 } 144 145 Entry.first->second = ObjectStart; 146 return ObjectStart; 147 } 148 149 /// TODO: We should sort these to minimize wasted space due to alignment 150 /// padding. Currently the padding is decided by the first encountered use 151 /// during lowering. 152 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 153 154 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 155 156 // Align LDS size to trailing, e.g. for aligning dynamic shared memory 157 LDSSize = alignTo(StaticLDSSize, Trailing); 158 } else { 159 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 160 "expected region address space"); 161 162 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 163 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 164 165 // FIXME: Apply alignment of dynamic GDS 166 GDSSize = StaticGDSSize; 167 } 168 169 Entry.first->second = Offset; 170 return Offset; 171 } 172 173 std::optional<uint32_t> 174 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 175 // TODO: Would be more consistent with the abs symbols to use a range 176 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 177 if (MD && MD->getNumOperands() == 1) { 178 if (ConstantInt *KnownSize = 179 mdconst::extract<ConstantInt>(MD->getOperand(0))) { 180 uint64_t ZExt = KnownSize->getZExtValue(); 181 if (ZExt <= UINT32_MAX) { 182 return ZExt; 183 } 184 } 185 } 186 return {}; 187 } 188 189 std::optional<uint32_t> 190 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 191 if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 192 return {}; 193 194 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 195 if (!AbsSymRange) 196 return {}; 197 198 if (const APInt *V = AbsSymRange->getSingleElement()) { 199 std::optional<uint64_t> ZExt = V->tryZExtValue(); 200 if (ZExt && (*ZExt <= UINT32_MAX)) { 201 return *ZExt; 202 } 203 } 204 205 return {}; 206 } 207 208 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 209 const GlobalVariable &GV) { 210 const Module *M = F.getParent(); 211 const DataLayout &DL = M->getDataLayout(); 212 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 213 214 Align Alignment = 215 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 216 if (Alignment <= DynLDSAlign) 217 return; 218 219 LDSSize = alignTo(StaticLDSSize, Alignment); 220 DynLDSAlign = Alignment; 221 222 // If there is a dynamic LDS variable associated with this function F, every 223 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 224 // map to the same address. This holds because no LDS is allocated after the 225 // lowering pass if there are dynamic LDS variables present. 226 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 227 if (Dyn) { 228 unsigned Offset = LDSSize; // return this? 229 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 230 if (!Expect || (Offset != *Expect)) { 231 report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 232 } 233 } 234 } 235 236 void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) { 237 UsesDynamicLDS = DynLDS; 238 } 239 240 bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; } 241