1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUPerfHintAnalysis.h" 12 #include "AMDGPUSubtarget.h" 13 #include "llvm/CodeGen/MachineModuleInfo.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/Target/TargetMachine.h" 16 17 using namespace llvm; 18 19 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) 20 : IsEntryFunction(AMDGPU::isEntryFunctionCC( 21 MF.getFunction().getCallingConv())), 22 IsModuleEntryFunction( 23 AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())), 24 NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { 25 const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 26 27 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 28 // except reserved size is not correctly aligned. 29 const Function &F = MF.getFunction(); 30 31 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 32 MemoryBound = MemBoundAttr.getValueAsBool(); 33 34 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 35 WaveLimiter = WaveLimitAttr.getValueAsBool(); 36 37 // FIXME: How is this attribute supposed to interact with statically known 38 // global sizes? 39 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 40 if (!S.empty()) 41 S.consumeInteger(0, GDSSize); 42 43 // Assume the attribute allocates before any known GDS globals. 44 StaticGDSSize = GDSSize; 45 46 CallingConv::ID CC = F.getCallingConv(); 47 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 48 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 49 } 50 51 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 52 const GlobalVariable &GV, 53 Align Trailing) { 54 auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); 55 if (!Entry.second) 56 return Entry.first->second; 57 58 Align Alignment = 59 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 60 61 unsigned Offset; 62 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 63 /// TODO: We should sort these to minimize wasted space due to alignment 64 /// padding. Currently the padding is decided by the first encountered use 65 /// during lowering. 66 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 67 68 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 69 70 // Align LDS size to trailing, e.g. for aligning dynamic shared memory 71 LDSSize = alignTo(StaticLDSSize, Trailing); 72 } else { 73 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 74 "expected region address space"); 75 76 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 77 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 78 79 // FIXME: Apply alignment of dynamic GDS 80 GDSSize = StaticGDSSize; 81 } 82 83 Entry.first->second = Offset; 84 return Offset; 85 } 86 87 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; 88 89 bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { 90 auto name = GV.getName(); 91 return (name == ModuleLDSName) || 92 (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); 93 } 94 95 const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( 96 const GlobalVariable &GV) { 97 const Module &M = *GV.getParent(); 98 StringRef N(GV.getName()); 99 if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { 100 return M.getFunction(N); 101 } 102 return nullptr; 103 } 104 105 const GlobalVariable * 106 AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { 107 const Module *M = F.getParent(); 108 std::string KernelLDSName = "llvm.amdgcn.kernel."; 109 KernelLDSName += F.getName(); 110 KernelLDSName += ".lds"; 111 return M->getNamedGlobal(KernelLDSName); 112 } 113 114 // This kernel calls no functions that require the module lds struct 115 static bool canElideModuleLDS(const Function &F) { 116 return F.hasFnAttribute("amdgpu-elide-module-lds"); 117 } 118 119 unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( 120 const GlobalVariable &GV) { 121 // module.lds, then alignment padding, then kernel.lds, then other variables 122 // if any 123 124 assert(isKnownAddressLDSGlobal(GV)); 125 unsigned Offset = 0; 126 127 if (GV.getName() == ModuleLDSName) { 128 return 0; 129 } 130 131 const Module *M = GV.getParent(); 132 const DataLayout &DL = M->getDataLayout(); 133 134 const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); 135 const Function *f = getKernelLDSFunctionFromGlobal(GV); 136 137 // Account for module.lds if allocated for this function 138 if (GVM && f && !canElideModuleLDS(*f)) { 139 // allocator aligns this to var align, but it's zero to begin with 140 Offset += DL.getTypeAllocSize(GVM->getValueType()); 141 } 142 143 // No dynamic LDS alignment done by allocateModuleLDSGlobal 144 Offset = alignTo( 145 Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); 146 147 return Offset; 148 } 149 150 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { 151 const Module *M = F.getParent(); 152 153 // This function is called before allocating any other LDS so that it can 154 // reliably put values at known addresses. Consequently, dynamic LDS, if 155 // present, will not yet have been allocated 156 157 assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); 158 159 if (isModuleEntryFunction()) { 160 161 // Pointer values start from zero, memory allocated per-kernel-launch 162 // Variables can be grouped into a module level struct and a struct per 163 // kernel function by AMDGPULowerModuleLDSPass. If that is done, they 164 // are allocated at statically computable addresses here. 165 // 166 // Address 0 167 // { 168 // llvm.amdgcn.module.lds 169 // } 170 // alignment padding 171 // { 172 // llvm.amdgcn.kernel.some-name.lds 173 // } 174 // other variables, e.g. dynamic lds, allocated after this call 175 176 const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); 177 const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); 178 179 if (GV && !canElideModuleLDS(F)) { 180 assert(isKnownAddressLDSGlobal(*GV)); 181 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); 182 (void)Offset; 183 assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && 184 "Module LDS expected to be allocated before other LDS"); 185 } 186 187 if (KV) { 188 // The per-kernel offset is deterministic because it is allocated 189 // before any other non-module LDS variables. 190 assert(isKnownAddressLDSGlobal(*KV)); 191 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); 192 (void)Offset; 193 assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && 194 "Kernel LDS expected to be immediately after module LDS"); 195 } 196 } 197 } 198 199 std::optional<uint32_t> 200 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 201 auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 202 if (MD && MD->getNumOperands() == 1) { 203 ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); 204 if (KnownSize) { 205 uint64_t V = KnownSize->getZExtValue(); 206 if (V <= UINT32_MAX) { 207 return V; 208 } 209 } 210 } 211 return {}; 212 } 213 214 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 215 const GlobalVariable &GV) { 216 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 217 218 Align Alignment = 219 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 220 if (Alignment <= DynLDSAlign) 221 return; 222 223 LDSSize = alignTo(StaticLDSSize, Alignment); 224 DynLDSAlign = Alignment; 225 } 226