xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 67819a72c6ba39267effe8edfc1befddc3f3f2f9)
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "llvm/CodeGen/MachineModuleInfo.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/Target/TargetMachine.h"
16 
17 using namespace llvm;
18 
19 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
20     : IsEntryFunction(AMDGPU::isEntryFunctionCC(
21                                   MF.getFunction().getCallingConv())),
22       IsModuleEntryFunction(
23           AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
24       NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
25   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
26 
27   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
28   // except reserved size is not correctly aligned.
29   const Function &F = MF.getFunction();
30 
31   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
32   MemoryBound = MemBoundAttr.getValueAsBool();
33 
34   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
35   WaveLimiter = WaveLimitAttr.getValueAsBool();
36 
37   // FIXME: How is this attribute supposed to interact with statically known
38   // global sizes?
39   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
40   if (!S.empty())
41     S.consumeInteger(0, GDSSize);
42 
43   // Assume the attribute allocates before any known GDS globals.
44   StaticGDSSize = GDSSize;
45 
46   CallingConv::ID CC = F.getCallingConv();
47   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
48     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
49 }
50 
51 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
52                                                   const GlobalVariable &GV,
53                                                   Align Trailing) {
54   auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
55   if (!Entry.second)
56     return Entry.first->second;
57 
58   Align Alignment =
59       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
60 
61   unsigned Offset;
62   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
63     /// TODO: We should sort these to minimize wasted space due to alignment
64     /// padding. Currently the padding is decided by the first encountered use
65     /// during lowering.
66     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
67 
68     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
69 
70     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
71     LDSSize = alignTo(StaticLDSSize, Trailing);
72   } else {
73     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
74            "expected region address space");
75 
76     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
77     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
78 
79     // FIXME: Apply alignment of dynamic GDS
80     GDSSize = StaticGDSSize;
81   }
82 
83   Entry.first->second = Offset;
84   return Offset;
85 }
86 
87 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
88 
89 bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
90   auto name = GV.getName();
91   return (name == ModuleLDSName) ||
92          (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
93 }
94 
95 const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
96     const GlobalVariable &GV) {
97   const Module &M = *GV.getParent();
98   StringRef N(GV.getName());
99   if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
100     return M.getFunction(N);
101   }
102   return nullptr;
103 }
104 
105 const GlobalVariable *
106 AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
107   const Module *M = F.getParent();
108   std::string KernelLDSName = "llvm.amdgcn.kernel.";
109   KernelLDSName += F.getName();
110   KernelLDSName += ".lds";
111   return M->getNamedGlobal(KernelLDSName);
112 }
113 
114 // This kernel calls no functions that require the module lds struct
115 static bool canElideModuleLDS(const Function &F) {
116   return F.hasFnAttribute("amdgpu-elide-module-lds");
117 }
118 
119 unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
120     const GlobalVariable &GV) {
121   // module.lds, then alignment padding, then kernel.lds, then other variables
122   // if any
123 
124   assert(isKnownAddressLDSGlobal(GV));
125   unsigned Offset = 0;
126 
127   if (GV.getName() == ModuleLDSName) {
128     return 0;
129   }
130 
131   const Module *M = GV.getParent();
132   const DataLayout &DL = M->getDataLayout();
133 
134   const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
135   const Function *f = getKernelLDSFunctionFromGlobal(GV);
136 
137   // Account for module.lds if allocated for this function
138   if (GVM && f && !canElideModuleLDS(*f)) {
139     // allocator aligns this to var align, but it's zero to begin with
140     Offset += DL.getTypeAllocSize(GVM->getValueType());
141   }
142 
143   // No dynamic LDS alignment done by allocateModuleLDSGlobal
144   Offset = alignTo(
145       Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
146 
147   return Offset;
148 }
149 
150 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
151   const Module *M = F.getParent();
152 
153   // This function is called before allocating any other LDS so that it can
154   // reliably put values at known addresses. Consequently, dynamic LDS, if
155   // present, will not yet have been allocated
156 
157   assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
158 
159   if (isModuleEntryFunction()) {
160 
161     // Pointer values start from zero, memory allocated per-kernel-launch
162     // Variables can be grouped into a module level struct and a struct per
163     // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
164     // are allocated at statically computable addresses here.
165     //
166     // Address 0
167     // {
168     //   llvm.amdgcn.module.lds
169     // }
170     // alignment padding
171     // {
172     //   llvm.amdgcn.kernel.some-name.lds
173     // }
174     // other variables, e.g. dynamic lds, allocated after this call
175 
176     const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
177     const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
178 
179     if (GV && !canElideModuleLDS(F)) {
180       assert(isKnownAddressLDSGlobal(*GV));
181       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
182       (void)Offset;
183       assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
184              "Module LDS expected to be allocated before other LDS");
185     }
186 
187     if (KV) {
188       // The per-kernel offset is deterministic because it is allocated
189       // before any other non-module LDS variables.
190       assert(isKnownAddressLDSGlobal(*KV));
191       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
192       (void)Offset;
193       assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
194              "Kernel LDS expected to be immediately after module LDS");
195     }
196   }
197 }
198 
199 std::optional<uint32_t>
200 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
201   auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
202   if (MD && MD->getNumOperands() == 1) {
203     ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
204     if (KnownSize) {
205       uint64_t V = KnownSize->getZExtValue();
206       if (V <= UINT32_MAX) {
207         return V;
208       }
209     }
210   }
211   return {};
212 }
213 
214 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
215                                            const GlobalVariable &GV) {
216   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
217 
218   Align Alignment =
219       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
220   if (Alignment <= DynLDSAlign)
221     return;
222 
223   LDSSize = alignTo(StaticLDSSize, Alignment);
224   DynLDSAlign = Alignment;
225 }
226