1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUPerfHintAnalysis.h" 12 #include "AMDGPUSubtarget.h" 13 #include "Utils/AMDGPUBaseInfo.h" 14 #include "llvm/CodeGen/MachineModuleInfo.h" 15 #include "llvm/IR/ConstantRange.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/Metadata.h" 18 #include "llvm/Target/TargetMachine.h" 19 20 using namespace llvm; 21 22 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 23 const AMDGPUSubtarget &ST) 24 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 25 IsModuleEntryFunction( 26 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 27 IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())), 28 NoSignedZerosFPMath(false) { 29 30 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 31 // except reserved size is not correctly aligned. 32 33 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 34 MemoryBound = MemBoundAttr.getValueAsBool(); 35 36 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 37 WaveLimiter = WaveLimitAttr.getValueAsBool(); 38 39 // FIXME: How is this attribute supposed to interact with statically known 40 // global sizes? 41 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 42 if (!S.empty()) 43 S.consumeInteger(0, GDSSize); 44 45 // Assume the attribute allocates before any known GDS globals. 46 StaticGDSSize = GDSSize; 47 48 // Second value, if present, is the maximum value that can be assigned. 49 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics 50 // during codegen. 51 std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( 52 F, "amdgpu-lds-size", {0, UINT32_MAX}, true); 53 54 // The two separate variables are only profitable when the LDS module lowering 55 // pass is disabled. If graphics does not use dynamic LDS, this is never 56 // profitable. Leaving cleanup for a later change. 57 LDSSize = LDSSizeRange.first; 58 StaticLDSSize = LDSSize; 59 60 CallingConv::ID CC = F.getCallingConv(); 61 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 62 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 63 64 // FIXME: Shouldn't be target specific 65 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 66 NoSignedZerosFPMath = 67 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 68 } 69 70 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 71 const GlobalVariable &GV, 72 Align Trailing) { 73 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 74 if (!Entry.second) 75 return Entry.first->second; 76 77 Align Alignment = 78 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 79 80 unsigned Offset; 81 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 82 83 std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); 84 if (MaybeAbs) { 85 // Absolute address LDS variables that exist prior to the LDS lowering 86 // pass raise a fatal error in that pass. These failure modes are only 87 // reachable if that lowering pass is disabled or broken. If/when adding 88 // support for absolute addresses on user specified variables, the 89 // alignment check moves to the lowering pass and the frame calculation 90 // needs to take the user variables into consideration. 91 92 uint32_t ObjectStart = *MaybeAbs; 93 94 if (ObjectStart != alignTo(ObjectStart, Alignment)) { 95 report_fatal_error("Absolute address LDS variable inconsistent with " 96 "variable alignment"); 97 } 98 99 if (isModuleEntryFunction()) { 100 // If this is a module entry function, we can also sanity check against 101 // the static frame. Strictly it would be better to check against the 102 // attribute, i.e. that the variable is within the always-allocated 103 // section, and not within some other non-absolute-address object 104 // allocated here, but the extra error detection is minimal and we would 105 // have to pass the Function around or cache the attribute value. 106 uint32_t ObjectEnd = 107 ObjectStart + DL.getTypeAllocSize(GV.getValueType()); 108 if (ObjectEnd > StaticLDSSize) { 109 report_fatal_error( 110 "Absolute address LDS variable outside of static frame"); 111 } 112 } 113 114 Entry.first->second = ObjectStart; 115 return ObjectStart; 116 } 117 118 /// TODO: We should sort these to minimize wasted space due to alignment 119 /// padding. Currently the padding is decided by the first encountered use 120 /// during lowering. 121 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 122 123 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 124 125 // Align LDS size to trailing, e.g. for aligning dynamic shared memory 126 LDSSize = alignTo(StaticLDSSize, Trailing); 127 } else { 128 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 129 "expected region address space"); 130 131 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 132 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 133 134 // FIXME: Apply alignment of dynamic GDS 135 GDSSize = StaticGDSSize; 136 } 137 138 Entry.first->second = Offset; 139 return Offset; 140 } 141 142 static const GlobalVariable * 143 getKernelDynLDSGlobalFromFunction(const Function &F) { 144 const Module *M = F.getParent(); 145 std::string KernelDynLDSName = "llvm.amdgcn."; 146 KernelDynLDSName += F.getName(); 147 KernelDynLDSName += ".dynlds"; 148 return M->getNamedGlobal(KernelDynLDSName); 149 } 150 151 std::optional<uint32_t> 152 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 153 // TODO: Would be more consistent with the abs symbols to use a range 154 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 155 if (MD && MD->getNumOperands() == 1) { 156 if (ConstantInt *KnownSize = 157 mdconst::extract<ConstantInt>(MD->getOperand(0))) { 158 uint64_t ZExt = KnownSize->getZExtValue(); 159 if (ZExt <= UINT32_MAX) { 160 return ZExt; 161 } 162 } 163 } 164 return {}; 165 } 166 167 std::optional<uint32_t> 168 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 169 if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 170 return {}; 171 172 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 173 if (!AbsSymRange) 174 return {}; 175 176 if (const APInt *V = AbsSymRange->getSingleElement()) { 177 std::optional<uint64_t> ZExt = V->tryZExtValue(); 178 if (ZExt && (*ZExt <= UINT32_MAX)) { 179 return *ZExt; 180 } 181 } 182 183 return {}; 184 } 185 186 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 187 const GlobalVariable &GV) { 188 const Module *M = F.getParent(); 189 const DataLayout &DL = M->getDataLayout(); 190 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 191 192 Align Alignment = 193 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 194 if (Alignment <= DynLDSAlign) 195 return; 196 197 LDSSize = alignTo(StaticLDSSize, Alignment); 198 DynLDSAlign = Alignment; 199 200 // If there is a dynamic LDS variable associated with this function F, every 201 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 202 // map to the same address. This holds because no LDS is allocated after the 203 // lowering pass if there are dynamic LDS variables present. 204 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 205 if (Dyn) { 206 unsigned Offset = LDSSize; // return this? 207 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 208 if (!Expect || (Offset != *Expect)) { 209 report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 210 } 211 } 212 } 213