xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 5272ae667dd5c628689097687f4679a8be29bc4d)
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "Utils/AMDGPUBaseInfo.h"
14 #include "llvm/CodeGen/MachineModuleInfo.h"
15 #include "llvm/IR/ConstantRange.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/Metadata.h"
18 #include "llvm/Target/TargetMachine.h"
19 
20 using namespace llvm;
21 
22 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
23                                              const AMDGPUSubtarget &ST)
24     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
25       IsModuleEntryFunction(
26           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
27       IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())),
28       NoSignedZerosFPMath(false) {
29 
30   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
31   // except reserved size is not correctly aligned.
32 
33   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
34   MemoryBound = MemBoundAttr.getValueAsBool();
35 
36   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
37   WaveLimiter = WaveLimitAttr.getValueAsBool();
38 
39   // FIXME: How is this attribute supposed to interact with statically known
40   // global sizes?
41   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
42   if (!S.empty())
43     S.consumeInteger(0, GDSSize);
44 
45   // Assume the attribute allocates before any known GDS globals.
46   StaticGDSSize = GDSSize;
47 
48   // Second value, if present, is the maximum value that can be assigned.
49   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
50   // during codegen.
51   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
52       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
53 
54   // The two separate variables are only profitable when the LDS module lowering
55   // pass is disabled. If graphics does not use dynamic LDS, this is never
56   // profitable. Leaving cleanup for a later change.
57   LDSSize = LDSSizeRange.first;
58   StaticLDSSize = LDSSize;
59 
60   CallingConv::ID CC = F.getCallingConv();
61   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
62     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
63 
64   // FIXME: Shouldn't be target specific
65   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
66   NoSignedZerosFPMath =
67       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
68 }
69 
70 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
71                                                   const GlobalVariable &GV,
72                                                   Align Trailing) {
73   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
74   if (!Entry.second)
75     return Entry.first->second;
76 
77   Align Alignment =
78       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
79 
80   unsigned Offset;
81   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
82 
83     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
84     if (MaybeAbs) {
85       // Absolute address LDS variables that exist prior to the LDS lowering
86       // pass raise a fatal error in that pass. These failure modes are only
87       // reachable if that lowering pass is disabled or broken. If/when adding
88       // support for absolute addresses on user specified variables, the
89       // alignment check moves to the lowering pass and the frame calculation
90       // needs to take the user variables into consideration.
91 
92       uint32_t ObjectStart = *MaybeAbs;
93 
94       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
95         report_fatal_error("Absolute address LDS variable inconsistent with "
96                            "variable alignment");
97       }
98 
99       if (isModuleEntryFunction()) {
100         // If this is a module entry function, we can also sanity check against
101         // the static frame. Strictly it would be better to check against the
102         // attribute, i.e. that the variable is within the always-allocated
103         // section, and not within some other non-absolute-address object
104         // allocated here, but the extra error detection is minimal and we would
105         // have to pass the Function around or cache the attribute value.
106         uint32_t ObjectEnd =
107             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
108         if (ObjectEnd > StaticLDSSize) {
109           report_fatal_error(
110               "Absolute address LDS variable outside of static frame");
111         }
112       }
113 
114       Entry.first->second = ObjectStart;
115       return ObjectStart;
116     }
117 
118     /// TODO: We should sort these to minimize wasted space due to alignment
119     /// padding. Currently the padding is decided by the first encountered use
120     /// during lowering.
121     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
122 
123     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
124 
125     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
126     LDSSize = alignTo(StaticLDSSize, Trailing);
127   } else {
128     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
129            "expected region address space");
130 
131     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
132     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
133 
134     // FIXME: Apply alignment of dynamic GDS
135     GDSSize = StaticGDSSize;
136   }
137 
138   Entry.first->second = Offset;
139   return Offset;
140 }
141 
142 static const GlobalVariable *
143 getKernelDynLDSGlobalFromFunction(const Function &F) {
144   const Module *M = F.getParent();
145   std::string KernelDynLDSName = "llvm.amdgcn.";
146   KernelDynLDSName += F.getName();
147   KernelDynLDSName += ".dynlds";
148   return M->getNamedGlobal(KernelDynLDSName);
149 }
150 
151 std::optional<uint32_t>
152 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
153   // TODO: Would be more consistent with the abs symbols to use a range
154   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
155   if (MD && MD->getNumOperands() == 1) {
156     if (ConstantInt *KnownSize =
157             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
158       uint64_t ZExt = KnownSize->getZExtValue();
159       if (ZExt <= UINT32_MAX) {
160         return ZExt;
161       }
162     }
163   }
164   return {};
165 }
166 
167 std::optional<uint32_t>
168 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
169   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
170     return {};
171 
172   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
173   if (!AbsSymRange)
174     return {};
175 
176   if (const APInt *V = AbsSymRange->getSingleElement()) {
177     std::optional<uint64_t> ZExt = V->tryZExtValue();
178     if (ZExt && (*ZExt <= UINT32_MAX)) {
179       return *ZExt;
180     }
181   }
182 
183   return {};
184 }
185 
186 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
187                                            const GlobalVariable &GV) {
188   const Module *M = F.getParent();
189   const DataLayout &DL = M->getDataLayout();
190   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
191 
192   Align Alignment =
193       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
194   if (Alignment <= DynLDSAlign)
195     return;
196 
197   LDSSize = alignTo(StaticLDSSize, Alignment);
198   DynLDSAlign = Alignment;
199 
200   // If there is a dynamic LDS variable associated with this function F, every
201   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
202   // map to the same address. This holds because no LDS is allocated after the
203   // lowering pass if there are dynamic LDS variables present.
204   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
205   if (Dyn) {
206     unsigned Offset = LDSSize; // return this?
207     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
208     if (!Expect || (Offset != *Expect)) {
209       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
210     }
211   }
212 }
213