xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 0507448d829818e29f7d8df6652002c8cc5683d1)
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "llvm/CodeGen/MachineModuleInfo.h"
14 #include "llvm/IR/ConstantRange.h"
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/Metadata.h"
17 #include "llvm/Target/TargetMachine.h"
18 
19 using namespace llvm;
20 
21 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
22                                              const AMDGPUSubtarget &ST)
23     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
24       IsModuleEntryFunction(
25           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
26       NoSignedZerosFPMath(false) {
27 
28   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
29   // except reserved size is not correctly aligned.
30 
31   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
32   MemoryBound = MemBoundAttr.getValueAsBool();
33 
34   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
35   WaveLimiter = WaveLimitAttr.getValueAsBool();
36 
37   // FIXME: How is this attribute supposed to interact with statically known
38   // global sizes?
39   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
40   if (!S.empty())
41     S.consumeInteger(0, GDSSize);
42 
43   // Assume the attribute allocates before any known GDS globals.
44   StaticGDSSize = GDSSize;
45 
46   CallingConv::ID CC = F.getCallingConv();
47   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
48     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
49 
50   // FIXME: Shouldn't be target specific
51   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
52   NoSignedZerosFPMath =
53       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
54 }
55 
56 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
57                                                   const GlobalVariable &GV,
58                                                   Align Trailing) {
59   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
60   if (!Entry.second)
61     return Entry.first->second;
62 
63   Align Alignment =
64       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
65 
66   unsigned Offset;
67   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
68     /// TODO: We should sort these to minimize wasted space due to alignment
69     /// padding. Currently the padding is decided by the first encountered use
70     /// during lowering.
71     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
72 
73     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
74 
75     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
76     LDSSize = alignTo(StaticLDSSize, Trailing);
77   } else {
78     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
79            "expected region address space");
80 
81     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
82     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
83 
84     // FIXME: Apply alignment of dynamic GDS
85     GDSSize = StaticGDSSize;
86   }
87 
88   Entry.first->second = Offset;
89   return Offset;
90 }
91 
92 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
93 
94 static const GlobalVariable *getKernelLDSGlobalFromFunction(const Function &F) {
95   const Module *M = F.getParent();
96   std::string KernelLDSName = "llvm.amdgcn.kernel.";
97   KernelLDSName += F.getName();
98   KernelLDSName += ".lds";
99   return M->getNamedGlobal(KernelLDSName);
100 }
101 
102 static const GlobalVariable *
103 getKernelDynLDSGlobalFromFunction(const Function &F) {
104   const Module *M = F.getParent();
105   std::string KernelDynLDSName = "llvm.amdgcn.";
106   KernelDynLDSName += F.getName();
107   KernelDynLDSName += ".dynlds";
108   return M->getNamedGlobal(KernelDynLDSName);
109 }
110 
111 // This kernel calls no functions that require the module lds struct
112 static bool canElideModuleLDS(const Function &F) {
113   return F.hasFnAttribute("amdgpu-elide-module-lds");
114 }
115 
116 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
117   const Module *M = F.getParent();
118   // This function is called before allocating any other LDS so that it can
119   // reliably put values at known addresses. Consequently, dynamic LDS, if
120   // present, will not yet have been allocated
121 
122   assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
123 
124   if (isModuleEntryFunction()) {
125 
126     // Pointer values start from zero, memory allocated per-kernel-launch
127     // Variables can be grouped into a module level struct and a struct per
128     // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
129     // are allocated at statically computable addresses here.
130     //
131     // Address 0
132     // {
133     //   llvm.amdgcn.module.lds
134     // }
135     // alignment padding
136     // {
137     //   llvm.amdgcn.kernel.some-name.lds
138     // }
139     // other variables, e.g. dynamic lds, allocated after this call
140 
141     const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
142     const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
143     const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
144 
145     if (GV && !canElideModuleLDS(F)) {
146       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
147       std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*GV);
148       if (!Expect || (Offset != *Expect)) {
149         report_fatal_error("Inconsistent metadata on module LDS variable");
150       }
151     }
152 
153     if (KV) {
154       // The per-kernel offset is deterministic because it is allocated
155       // before any other non-module LDS variables.
156       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
157       std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*KV);
158       if (!Expect || (Offset != *Expect)) {
159         report_fatal_error("Inconsistent metadata on kernel LDS variable");
160       }
161     }
162 
163     if (Dyn) {
164       // The dynamic LDS is deterministic because the per-kernel one has the
165       // maximum alignment of any reachable and all remaining LDS variables,
166       // if this is present, are themselves dynamic LDS and will be allocated
167       // at the same address.
168       setDynLDSAlign(F, *Dyn);
169       unsigned Offset = LDSSize;
170       std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
171       if (!Expect || (Offset != *Expect)) {
172         report_fatal_error("Inconsistent metadata on dynamic LDS variable");
173       }
174     }
175   }
176 }
177 
178 std::optional<uint32_t>
179 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
180   // TODO: Would be more consistent with the abs symbols to use a range
181   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
182   if (MD && MD->getNumOperands() == 1) {
183     if (ConstantInt *KnownSize =
184             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
185       uint64_t ZExt = KnownSize->getZExtValue();
186       if (ZExt <= UINT32_MAX) {
187         return ZExt;
188       }
189     }
190   }
191   return {};
192 }
193 
194 std::optional<uint32_t>
195 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
196   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
197     return {};
198 
199   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
200   if (!AbsSymRange)
201     return {};
202 
203   if (const APInt *V = AbsSymRange->getSingleElement()) {
204     std::optional<uint64_t> ZExt = V->tryZExtValue();
205     if (ZExt && (*ZExt <= UINT32_MAX)) {
206       return *ZExt;
207     }
208   }
209 
210   return {};
211 }
212 
213 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
214                                            const GlobalVariable &GV) {
215   const Module *M = F.getParent();
216   const DataLayout &DL = M->getDataLayout();
217   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
218 
219   Align Alignment =
220       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
221   if (Alignment <= DynLDSAlign)
222     return;
223 
224   LDSSize = alignTo(StaticLDSSize, Alignment);
225   DynLDSAlign = Alignment;
226 
227   // If there is a dynamic LDS variable associated with this function F, every
228   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
229   // map to the same address. This holds because no LDS is allocated after the
230   // lowering pass if there are dynamic LDS variables present.
231   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
232   if (Dyn) {
233     unsigned Offset = LDSSize; // return this?
234     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
235     if (!Expect || (Offset != *Expect)) {
236       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
237     }
238   }
239 }
240