1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUPerfHintAnalysis.h" 12 #include "AMDGPUSubtarget.h" 13 #include "llvm/CodeGen/MachineModuleInfo.h" 14 #include "llvm/IR/ConstantRange.h" 15 #include "llvm/IR/Constants.h" 16 #include "llvm/IR/Metadata.h" 17 #include "llvm/Target/TargetMachine.h" 18 19 using namespace llvm; 20 21 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 22 const AMDGPUSubtarget &ST) 23 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 24 IsModuleEntryFunction( 25 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 26 NoSignedZerosFPMath(false) { 27 28 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 29 // except reserved size is not correctly aligned. 30 31 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 32 MemoryBound = MemBoundAttr.getValueAsBool(); 33 34 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 35 WaveLimiter = WaveLimitAttr.getValueAsBool(); 36 37 // FIXME: How is this attribute supposed to interact with statically known 38 // global sizes? 39 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 40 if (!S.empty()) 41 S.consumeInteger(0, GDSSize); 42 43 // Assume the attribute allocates before any known GDS globals. 44 StaticGDSSize = GDSSize; 45 46 CallingConv::ID CC = F.getCallingConv(); 47 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 48 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 49 50 // FIXME: Shouldn't be target specific 51 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 52 NoSignedZerosFPMath = 53 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 54 } 55 56 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 57 const GlobalVariable &GV, 58 Align Trailing) { 59 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 60 if (!Entry.second) 61 return Entry.first->second; 62 63 Align Alignment = 64 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 65 66 unsigned Offset; 67 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 68 /// TODO: We should sort these to minimize wasted space due to alignment 69 /// padding. Currently the padding is decided by the first encountered use 70 /// during lowering. 71 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 72 73 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 74 75 // Align LDS size to trailing, e.g. for aligning dynamic shared memory 76 LDSSize = alignTo(StaticLDSSize, Trailing); 77 } else { 78 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 79 "expected region address space"); 80 81 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 82 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 83 84 // FIXME: Apply alignment of dynamic GDS 85 GDSSize = StaticGDSSize; 86 } 87 88 Entry.first->second = Offset; 89 return Offset; 90 } 91 92 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; 93 94 static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) { 95 const Module *M = F.getParent(); 96 std::string KernelLDSName = "llvm.amdgcn.kernel."; 97 KernelLDSName += F.getName(); 98 KernelLDSName += ".lds"; 99 return M->getNamedGlobal(KernelLDSName); 100 } 101 102 static const GlobalVariable * 103 getKernelDynLDSGlobalFromFunction(const Function &F) { 104 const Module *M = F.getParent(); 105 std::string KernelDynLDSName = "llvm.amdgcn."; 106 KernelDynLDSName += F.getName(); 107 KernelDynLDSName += ".dynlds"; 108 return M->getNamedGlobal(KernelDynLDSName); 109 } 110 111 // This kernel calls no functions that require the module lds struct 112 static bool canElideModuleLDS(const Function &F) { 113 return F.hasFnAttribute("amdgpu-elide-module-lds"); 114 } 115 116 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { 117 const Module *M = F.getParent(); 118 // This function is called before allocating any other LDS so that it can 119 // reliably put values at known addresses. Consequently, dynamic LDS, if 120 // present, will not yet have been allocated 121 122 assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); 123 124 if (isModuleEntryFunction()) { 125 126 // Pointer values start from zero, memory allocated per-kernel-launch 127 // Variables can be grouped into a module level struct and a struct per 128 // kernel function by AMDGPULowerModuleLDSPass. If that is done, they 129 // are allocated at statically computable addresses here. 130 // 131 // Address 0 132 // { 133 // llvm.amdgcn.module.lds 134 // } 135 // alignment padding 136 // { 137 // llvm.amdgcn.kernel.some-name.lds 138 // } 139 // other variables, e.g. dynamic lds, allocated after this call 140 141 const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); 142 const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); 143 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 144 145 if (GV && !canElideModuleLDS(F)) { 146 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); 147 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV); 148 if (!Expect || (Offset != *Expect)) { 149 report_fatal_error("Inconsistent metadata on module LDS variable"); 150 } 151 } 152 153 if (KV) { 154 // The per-kernel offset is deterministic because it is allocated 155 // before any other non-module LDS variables. 156 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); 157 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV); 158 if (!Expect || (Offset != *Expect)) { 159 report_fatal_error("Inconsistent metadata on kernel LDS variable"); 160 } 161 } 162 163 if (Dyn) { 164 // The dynamic LDS is deterministic because the per-kernel one has the 165 // maximum alignment of any reachable and all remaining LDS variables, 166 // if this is present, are themselves dynamic LDS and will be allocated 167 // at the same address. 168 setDynLDSAlign(F, *Dyn); 169 unsigned Offset = LDSSize; 170 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 171 if (!Expect || (Offset != *Expect)) { 172 report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 173 } 174 } 175 } 176 } 177 178 std::optional<uint32_t> 179 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 180 // TODO: Would be more consistent with the abs symbols to use a range 181 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 182 if (MD && MD->getNumOperands() == 1) { 183 if (ConstantInt *KnownSize = 184 mdconst::extract<ConstantInt>(MD->getOperand(0))) { 185 uint64_t ZExt = KnownSize->getZExtValue(); 186 if (ZExt <= UINT32_MAX) { 187 return ZExt; 188 } 189 } 190 } 191 return {}; 192 } 193 194 std::optional<uint32_t> 195 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 196 if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 197 return {}; 198 199 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 200 if (!AbsSymRange) 201 return {}; 202 203 if (const APInt *V = AbsSymRange->getSingleElement()) { 204 std::optional<uint64_t> ZExt = V->tryZExtValue(); 205 if (ZExt && (*ZExt <= UINT32_MAX)) { 206 return *ZExt; 207 } 208 } 209 210 return {}; 211 } 212 213 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 214 const GlobalVariable &GV) { 215 const Module *M = F.getParent(); 216 const DataLayout &DL = M->getDataLayout(); 217 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 218 219 Align Alignment = 220 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 221 if (Alignment <= DynLDSAlign) 222 return; 223 224 LDSSize = alignTo(StaticLDSSize, Alignment); 225 DynLDSAlign = Alignment; 226 227 // If there is a dynamic LDS variable associated with this function F, every 228 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 229 // map to the same address. This holds because no LDS is allocated after the 230 // lowering pass if there are dynamic LDS variables present. 231 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 232 if (Dyn) { 233 unsigned Offset = LDSSize; // return this? 234 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 235 if (!Expect || (Offset != *Expect)) { 236 report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 237 } 238 } 239 } 240