xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
17330f729Sjoerg //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg 
97330f729Sjoerg #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
107330f729Sjoerg #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
117330f729Sjoerg 
12*82d56013Sjoerg #include "Utils/AMDGPUBaseInfo.h"
137330f729Sjoerg #include "llvm/ADT/DenseMap.h"
147330f729Sjoerg #include "llvm/CodeGen/MachineFunction.h"
157330f729Sjoerg 
167330f729Sjoerg namespace llvm {
177330f729Sjoerg 
187330f729Sjoerg class GCNSubtarget;
197330f729Sjoerg 
207330f729Sjoerg class AMDGPUMachineFunction : public MachineFunctionInfo {
217330f729Sjoerg   /// A map to keep track of local memory objects and their offsets within the
227330f729Sjoerg   /// local memory space.
237330f729Sjoerg   SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
247330f729Sjoerg 
257330f729Sjoerg protected:
26*82d56013Sjoerg   uint64_t ExplicitKernArgSize = 0; // Cache for this.
277330f729Sjoerg   Align MaxKernArgAlign;        // Cache for this.
287330f729Sjoerg 
297330f729Sjoerg   /// Number of bytes in the LDS that are being used.
30*82d56013Sjoerg   unsigned LDSSize = 0;
317330f729Sjoerg 
32*82d56013Sjoerg   /// Number of bytes in the LDS allocated statically. This field is only used
33*82d56013Sjoerg   /// in the instruction selector and not part of the machine function info.
34*82d56013Sjoerg   unsigned StaticLDSSize = 0;
35*82d56013Sjoerg 
36*82d56013Sjoerg   /// Align for dynamic shared memory if any. Dynamic shared memory is
37*82d56013Sjoerg   /// allocated directly after the static one, i.e., LDSSize. Need to pad
38*82d56013Sjoerg   /// LDSSize to ensure that dynamic one is aligned accordingly.
39*82d56013Sjoerg   /// The maximal alignment is updated during IR translation or lowering
40*82d56013Sjoerg   /// stages.
41*82d56013Sjoerg   Align DynLDSAlign;
42*82d56013Sjoerg 
43*82d56013Sjoerg   // State of MODE register, assumed FP mode.
44*82d56013Sjoerg   AMDGPU::SIModeRegisterDefaults Mode;
45*82d56013Sjoerg 
46*82d56013Sjoerg   // Kernels + shaders. i.e. functions called by the hardware and not called
477330f729Sjoerg   // by other functions.
48*82d56013Sjoerg   bool IsEntryFunction = false;
497330f729Sjoerg 
50*82d56013Sjoerg   // Entry points called by other functions instead of directly by the hardware.
51*82d56013Sjoerg   bool IsModuleEntryFunction = false;
52*82d56013Sjoerg 
53*82d56013Sjoerg   bool NoSignedZerosFPMath = false;
547330f729Sjoerg 
557330f729Sjoerg   // Function may be memory bound.
56*82d56013Sjoerg   bool MemoryBound = false;
577330f729Sjoerg 
587330f729Sjoerg   // Kernel may need limited waves per EU for better performance.
59*82d56013Sjoerg   bool WaveLimiter = false;
607330f729Sjoerg 
617330f729Sjoerg public:
627330f729Sjoerg   AMDGPUMachineFunction(const MachineFunction &MF);
637330f729Sjoerg 
getExplicitKernArgSize()647330f729Sjoerg   uint64_t getExplicitKernArgSize() const {
657330f729Sjoerg     return ExplicitKernArgSize;
667330f729Sjoerg   }
677330f729Sjoerg 
getMaxKernArgAlign()687330f729Sjoerg   unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); }
697330f729Sjoerg 
getLDSSize()707330f729Sjoerg   unsigned getLDSSize() const {
717330f729Sjoerg     return LDSSize;
727330f729Sjoerg   }
737330f729Sjoerg 
getMode()74*82d56013Sjoerg   AMDGPU::SIModeRegisterDefaults getMode() const {
75*82d56013Sjoerg     return Mode;
76*82d56013Sjoerg   }
77*82d56013Sjoerg 
isEntryFunction()787330f729Sjoerg   bool isEntryFunction() const {
797330f729Sjoerg     return IsEntryFunction;
807330f729Sjoerg   }
817330f729Sjoerg 
isModuleEntryFunction()82*82d56013Sjoerg   bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
83*82d56013Sjoerg 
hasNoSignedZerosFPMath()847330f729Sjoerg   bool hasNoSignedZerosFPMath() const {
857330f729Sjoerg     return NoSignedZerosFPMath;
867330f729Sjoerg   }
877330f729Sjoerg 
isMemoryBound()887330f729Sjoerg   bool isMemoryBound() const {
897330f729Sjoerg     return MemoryBound;
907330f729Sjoerg   }
917330f729Sjoerg 
needsWaveLimiter()927330f729Sjoerg   bool needsWaveLimiter() const {
937330f729Sjoerg     return WaveLimiter;
947330f729Sjoerg   }
957330f729Sjoerg 
96*82d56013Sjoerg   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
97*82d56013Sjoerg   void allocateModuleLDSGlobal(const Module *M);
98*82d56013Sjoerg 
getDynLDSAlign()99*82d56013Sjoerg   Align getDynLDSAlign() const { return DynLDSAlign; }
100*82d56013Sjoerg 
101*82d56013Sjoerg   void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV);
1027330f729Sjoerg };
1037330f729Sjoerg 
1047330f729Sjoerg }
1057330f729Sjoerg #endif
106