1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H 10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H 11 12 #include "Utils/AMDGPUBaseInfo.h" 13 #include "llvm/ADT/DenseMap.h" 14 #include "llvm/CodeGen/MachineFunction.h" 15 #include "llvm/IR/DataLayout.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/GlobalValue.h" 18 #include "llvm/IR/GlobalVariable.h" 19 20 namespace llvm { 21 22 class AMDGPUSubtarget; 23 24 class AMDGPUMachineFunction : public MachineFunctionInfo { 25 /// A map to keep track of local memory objects and their offsets within the 26 /// local memory space. 27 SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; 28 29 protected: 30 uint64_t ExplicitKernArgSize = 0; // Cache for this. 31 Align MaxKernArgAlign; // Cache for this. 32 33 /// Number of bytes in the LDS that are being used. 34 uint32_t LDSSize = 0; 35 uint32_t GDSSize = 0; 36 37 /// Number of bytes in the LDS allocated statically. This field is only used 38 /// in the instruction selector and not part of the machine function info. 39 uint32_t StaticLDSSize = 0; 40 uint32_t StaticGDSSize = 0; 41 42 /// Align for dynamic shared memory if any. Dynamic shared memory is 43 /// allocated directly after the static one, i.e., LDSSize. Need to pad 44 /// LDSSize to ensure that dynamic one is aligned accordingly. 45 /// The maximal alignment is updated during IR translation or lowering 46 /// stages. 47 Align DynLDSAlign; 48 49 // Kernels + shaders. i.e. functions called by the hardware and not called 50 // by other functions. 51 bool IsEntryFunction = false; 52 53 // Entry points called by other functions instead of directly by the hardware. 54 bool IsModuleEntryFunction = false; 55 56 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC. 57 bool IsChainFunction = false; 58 59 bool NoSignedZerosFPMath = false; 60 61 // Function may be memory bound. 62 bool MemoryBound = false; 63 64 // Kernel may need limited waves per EU for better performance. 65 bool WaveLimiter = false; 66 67 public: 68 AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); 69 70 uint64_t getExplicitKernArgSize() const { 71 return ExplicitKernArgSize; 72 } 73 74 Align getMaxKernArgAlign() const { return MaxKernArgAlign; } 75 76 uint32_t getLDSSize() const { 77 return LDSSize; 78 } 79 80 uint32_t getGDSSize() const { 81 return GDSSize; 82 } 83 84 bool isEntryFunction() const { 85 return IsEntryFunction; 86 } 87 88 bool isModuleEntryFunction() const { return IsModuleEntryFunction; } 89 90 bool isChainFunction() const { return IsChainFunction; } 91 92 // The stack is empty upon entry to this function. 93 bool isBottomOfStack() const { 94 return isEntryFunction() || isChainFunction(); 95 } 96 97 bool hasNoSignedZerosFPMath() const { 98 return NoSignedZerosFPMath; 99 } 100 101 bool isMemoryBound() const { 102 return MemoryBound; 103 } 104 105 bool needsWaveLimiter() const { 106 return WaveLimiter; 107 } 108 109 unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { 110 return allocateLDSGlobal(DL, GV, DynLDSAlign); 111 } 112 113 unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, 114 Align Trailing); 115 116 static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F); 117 static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV); 118 119 Align getDynLDSAlign() const { return DynLDSAlign; } 120 121 void setDynLDSAlign(const Function &F, const GlobalVariable &GV); 122 }; 123 124 } 125 #endif 126