xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h (revision 335620853117153e52ce54fe4e879f66aa23ff99)
1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
11 
12 #include "Utils/AMDGPUBaseInfo.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/CodeGen/MachineFunction.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/GlobalVariable.h"
19 
20 namespace llvm {
21 
22 class AMDGPUSubtarget;
23 
24 class AMDGPUMachineFunction : public MachineFunctionInfo {
25   /// A map to keep track of local memory objects and their offsets within the
26   /// local memory space.
27   SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
28 
29 protected:
30   uint64_t ExplicitKernArgSize = 0; // Cache for this.
31   Align MaxKernArgAlign;        // Cache for this.
32 
33   /// Number of bytes in the LDS that are being used.
34   uint32_t LDSSize = 0;
35   uint32_t GDSSize = 0;
36 
37   /// Number of bytes in the LDS allocated statically. This field is only used
38   /// in the instruction selector and not part of the machine function info.
39   uint32_t StaticLDSSize = 0;
40   uint32_t StaticGDSSize = 0;
41 
42   /// Align for dynamic shared memory if any. Dynamic shared memory is
43   /// allocated directly after the static one, i.e., LDSSize. Need to pad
44   /// LDSSize to ensure that dynamic one is aligned accordingly.
45   /// The maximal alignment is updated during IR translation or lowering
46   /// stages.
47   Align DynLDSAlign;
48 
49   // Flag to check dynamic LDS usage by kernel.
50   bool UsesDynamicLDS = false;
51 
52   // Kernels + shaders. i.e. functions called by the hardware and not called
53   // by other functions.
54   bool IsEntryFunction = false;
55 
56   // Entry points called by other functions instead of directly by the hardware.
57   bool IsModuleEntryFunction = false;
58 
59   // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC.
60   bool IsChainFunction = false;
61 
62   bool NoSignedZerosFPMath = false;
63 
64   // Function may be memory bound.
65   bool MemoryBound = false;
66 
67   // Kernel may need limited waves per EU for better performance.
68   bool WaveLimiter = false;
69 
70   bool HasInitWholeWave = false;
71 
72 public:
73   AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST);
74 
75   uint64_t getExplicitKernArgSize() const {
76     return ExplicitKernArgSize;
77   }
78 
79   Align getMaxKernArgAlign() const { return MaxKernArgAlign; }
80 
81   uint32_t getLDSSize() const {
82     return LDSSize;
83   }
84 
85   uint32_t getGDSSize() const {
86     return GDSSize;
87   }
88 
89   bool isEntryFunction() const {
90     return IsEntryFunction;
91   }
92 
93   bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
94 
95   bool isChainFunction() const { return IsChainFunction; }
96 
97   // The stack is empty upon entry to this function.
98   bool isBottomOfStack() const {
99     return isEntryFunction() || isChainFunction();
100   }
101 
102   bool hasNoSignedZerosFPMath() const {
103     return NoSignedZerosFPMath;
104   }
105 
106   bool isMemoryBound() const {
107     return MemoryBound;
108   }
109 
110   bool needsWaveLimiter() const {
111     return WaveLimiter;
112   }
113 
114   bool hasInitWholeWave() const { return HasInitWholeWave; }
115   void setInitWholeWave() { HasInitWholeWave = true; }
116 
117   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) {
118     return allocateLDSGlobal(DL, GV, DynLDSAlign);
119   }
120 
121   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV,
122                              Align Trailing);
123 
124   static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
125   static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
126 
127   Align getDynLDSAlign() const { return DynLDSAlign; }
128 
129   void setDynLDSAlign(const Function &F, const GlobalVariable &GV);
130 
131   void setUsesDynamicLDS(bool DynLDS);
132 
133   bool isDynamicLDSUsed() const;
134 };
135 
136 }
137 #endif
138