xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 4a6d13bf4db63f4cd845d38128c79c17bbf8d99c)
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUMemoryUtils.h"
12 #include "AMDGPUPerfHintAnalysis.h"
13 #include "AMDGPUSubtarget.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/CodeGen/MachineModuleInfo.h"
16 #include "llvm/IR/ConstantRange.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Metadata.h"
19 #include "llvm/Target/TargetMachine.h"
20 
21 using namespace llvm;
22 
23 static const GlobalVariable *
24 getKernelDynLDSGlobalFromFunction(const Function &F) {
25   const Module *M = F.getParent();
26   SmallString<64> KernelDynLDSName("llvm.amdgcn.");
27   KernelDynLDSName += F.getName();
28   KernelDynLDSName += ".dynlds";
29   return M->getNamedGlobal(KernelDynLDSName);
30 }
31 
32 static bool hasLDSKernelArgument(const Function &F) {
33   for (const Argument &Arg : F.args()) {
34     Type *ArgTy = Arg.getType();
35     if (auto *PtrTy = dyn_cast<PointerType>(ArgTy)) {
36       if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
37         return true;
38     }
39   }
40   return false;
41 }
42 
43 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
44                                              const AMDGPUSubtarget &ST)
45     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
46       IsModuleEntryFunction(
47           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
48       IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) {
49 
50   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
51   // except reserved size is not correctly aligned.
52 
53   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
54   MemoryBound = MemBoundAttr.getValueAsBool();
55 
56   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
57   WaveLimiter = WaveLimitAttr.getValueAsBool();
58 
59   // FIXME: How is this attribute supposed to interact with statically known
60   // global sizes?
61   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
62   if (!S.empty())
63     S.consumeInteger(0, GDSSize);
64 
65   // Assume the attribute allocates before any known GDS globals.
66   StaticGDSSize = GDSSize;
67 
68   // Second value, if present, is the maximum value that can be assigned.
69   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
70   // during codegen.
71   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
72       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
73 
74   // The two separate variables are only profitable when the LDS module lowering
75   // pass is disabled. If graphics does not use dynamic LDS, this is never
76   // profitable. Leaving cleanup for a later change.
77   LDSSize = LDSSizeRange.first;
78   StaticLDSSize = LDSSize;
79 
80   CallingConv::ID CC = F.getCallingConv();
81   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
82     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
83 
84   // FIXME: Shouldn't be target specific
85   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
86   NoSignedZerosFPMath =
87       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
88 
89   const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
90   if (DynLdsGlobal || hasLDSKernelArgument(F))
91     UsesDynamicLDS = true;
92 }
93 
94 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
95                                                   const GlobalVariable &GV,
96                                                   Align Trailing) {
97   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
98   if (!Entry.second)
99     return Entry.first->second;
100 
101   Align Alignment =
102       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
103 
104   unsigned Offset;
105   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
106     if (AMDGPU::isNamedBarrier(GV)) {
107       std::optional<unsigned> BarAddr = getLDSAbsoluteAddress(GV);
108       if (!BarAddr)
109         llvm_unreachable("named barrier should have an assigned address");
110       Entry.first->second = BarAddr.value();
111       return BarAddr.value();
112     }
113 
114     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
115     if (MaybeAbs) {
116       // Absolute address LDS variables that exist prior to the LDS lowering
117       // pass raise a fatal error in that pass. These failure modes are only
118       // reachable if that lowering pass is disabled or broken. If/when adding
119       // support for absolute addresses on user specified variables, the
120       // alignment check moves to the lowering pass and the frame calculation
121       // needs to take the user variables into consideration.
122 
123       uint32_t ObjectStart = *MaybeAbs;
124 
125       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
126         report_fatal_error("Absolute address LDS variable inconsistent with "
127                            "variable alignment");
128       }
129 
130       if (isModuleEntryFunction()) {
131         // If this is a module entry function, we can also sanity check against
132         // the static frame. Strictly it would be better to check against the
133         // attribute, i.e. that the variable is within the always-allocated
134         // section, and not within some other non-absolute-address object
135         // allocated here, but the extra error detection is minimal and we would
136         // have to pass the Function around or cache the attribute value.
137         uint32_t ObjectEnd =
138             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
139         if (ObjectEnd > StaticLDSSize) {
140           report_fatal_error(
141               "Absolute address LDS variable outside of static frame");
142         }
143       }
144 
145       Entry.first->second = ObjectStart;
146       return ObjectStart;
147     }
148 
149     /// TODO: We should sort these to minimize wasted space due to alignment
150     /// padding. Currently the padding is decided by the first encountered use
151     /// during lowering.
152     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
153 
154     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
155 
156     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
157     LDSSize = alignTo(StaticLDSSize, Trailing);
158   } else {
159     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
160            "expected region address space");
161 
162     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
163     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
164 
165     // FIXME: Apply alignment of dynamic GDS
166     GDSSize = StaticGDSSize;
167   }
168 
169   Entry.first->second = Offset;
170   return Offset;
171 }
172 
173 std::optional<uint32_t>
174 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
175   // TODO: Would be more consistent with the abs symbols to use a range
176   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
177   if (MD && MD->getNumOperands() == 1) {
178     if (ConstantInt *KnownSize =
179             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
180       uint64_t ZExt = KnownSize->getZExtValue();
181       if (ZExt <= UINT32_MAX) {
182         return ZExt;
183       }
184     }
185   }
186   return {};
187 }
188 
189 std::optional<uint32_t>
190 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
191   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
192     return {};
193 
194   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
195   if (!AbsSymRange)
196     return {};
197 
198   if (const APInt *V = AbsSymRange->getSingleElement()) {
199     std::optional<uint64_t> ZExt = V->tryZExtValue();
200     if (ZExt && (*ZExt <= UINT32_MAX)) {
201       return *ZExt;
202     }
203   }
204 
205   return {};
206 }
207 
208 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
209                                            const GlobalVariable &GV) {
210   const Module *M = F.getParent();
211   const DataLayout &DL = M->getDataLayout();
212   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
213 
214   Align Alignment =
215       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
216   if (Alignment <= DynLDSAlign)
217     return;
218 
219   LDSSize = alignTo(StaticLDSSize, Alignment);
220   DynLDSAlign = Alignment;
221 
222   // If there is a dynamic LDS variable associated with this function F, every
223   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
224   // map to the same address. This holds because no LDS is allocated after the
225   // lowering pass if there are dynamic LDS variables present.
226   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
227   if (Dyn) {
228     unsigned Offset = LDSSize; // return this?
229     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
230     if (!Expect || (Offset != *Expect)) {
231       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
232     }
233   }
234 }
235 
236 void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
237   UsesDynamicLDS = DynLDS;
238 }
239 
240 bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
241