xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision 055e4dce45c3f2194c0610db1cee6dedfa6040ab)
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUMachineFunction.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "SIRegisterInfo.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/CodeGen/MIRYamlMapping.h"
26 #include "llvm/CodeGen/PseudoSourceValue.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include <array>
31 #include <cassert>
32 #include <utility>
33 #include <vector>
34 
35 namespace llvm {
36 
37 class MachineFrameInfo;
38 class MachineFunction;
39 class TargetRegisterClass;
40 
41 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
42 public:
43   // TODO: Is the img rsrc useful?
44   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
45     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
46 
47   bool isConstant(const MachineFrameInfo *) const override {
48     // This should probably be true for most images, but we will start by being
49     // conservative.
50     return false;
51   }
52 
53   bool isAliased(const MachineFrameInfo *) const override {
54     return true;
55   }
56 
57   bool mayAlias(const MachineFrameInfo *) const override {
58     return true;
59   }
60 };
61 
62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
63 public:
64   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
65     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
66 
67   bool isConstant(const MachineFrameInfo *) const override {
68     // This should probably be true for most images, but we will start by being
69     // conservative.
70     return false;
71   }
72 
73   bool isAliased(const MachineFrameInfo *) const override {
74     return true;
75   }
76 
77   bool mayAlias(const MachineFrameInfo *) const override {
78     return true;
79   }
80 };
81 
82 namespace yaml {
83 
84 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
85   uint64_t ExplicitKernArgSize = 0;
86   unsigned MaxKernArgAlign = 0;
87   unsigned LDSSize = 0;
88   bool IsEntryFunction = false;
89   bool NoSignedZerosFPMath = false;
90   bool MemoryBound = false;
91   bool WaveLimiter = false;
92 
93   StringValue ScratchRSrcReg = "$private_rsrc_reg";
94   StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
95   StringValue FrameOffsetReg = "$fp_reg";
96   StringValue StackPtrOffsetReg = "$sp_reg";
97 
98   SIMachineFunctionInfo() = default;
99   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
100                         const TargetRegisterInfo &TRI);
101 
102   void mappingImpl(yaml::IO &YamlIO) override;
103   ~SIMachineFunctionInfo() = default;
104 };
105 
106 template <> struct MappingTraits<SIMachineFunctionInfo> {
107   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
108     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
109                        UINT64_C(0));
110     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
111     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
112     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
113     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
114     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
115     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
116     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
117                        StringValue("$private_rsrc_reg"));
118     YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
119                        StringValue("$scratch_wave_offset_reg"));
120     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
121                        StringValue("$fp_reg"));
122     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
123                        StringValue("$sp_reg"));
124   }
125 };
126 
127 } // end namespace yaml
128 
129 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
130 /// tells the hardware which interpolation parameters to load.
131 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
132   friend class GCNTargetMachine;
133 
134   unsigned TIDReg = AMDGPU::NoRegister;
135 
136   // Registers that may be reserved for spilling purposes. These may be the same
137   // as the input registers.
138   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
139   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
140 
141   // This is the current function's incremented size from the kernel's scratch
142   // wave offset register. For an entry function, this is exactly the same as
143   // the ScratchWaveOffsetReg.
144   unsigned FrameOffsetReg = AMDGPU::FP_REG;
145 
146   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
147   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
148 
149   AMDGPUFunctionArgInfo ArgInfo;
150 
151   // State of MODE register, assumed FP mode.
152   AMDGPU::SIModeRegisterDefaults Mode;
153 
154   // Graphics info.
155   unsigned PSInputAddr = 0;
156   unsigned PSInputEnable = 0;
157 
158   /// Number of bytes of arguments this function has on the stack. If the callee
159   /// is expected to restore the argument stack this should be a multiple of 16,
160   /// all usable during a tail call.
161   ///
162   /// The alternative would forbid tail call optimisation in some cases: if we
163   /// want to transfer control from a function with 8-bytes of stack-argument
164   /// space to a function with 16-bytes then misalignment of this value would
165   /// make a stack adjustment necessary, which could not be undone by the
166   /// callee.
167   unsigned BytesInStackArgArea = 0;
168 
169   bool ReturnsVoid = true;
170 
171   // A pair of default/requested minimum/maximum flat work group sizes.
172   // Minimum - first, maximum - second.
173   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
174 
175   // A pair of default/requested minimum/maximum number of waves per execution
176   // unit. Minimum - first, maximum - second.
177   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
178 
179   DenseMap<const Value *,
180            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
181   DenseMap<const Value *,
182            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
183 
184 private:
185   unsigned LDSWaveSpillSize = 0;
186   unsigned NumUserSGPRs = 0;
187   unsigned NumSystemSGPRs = 0;
188 
189   bool HasSpilledSGPRs = false;
190   bool HasSpilledVGPRs = false;
191   bool HasNonSpillStackObjects = false;
192   bool IsStackRealigned = false;
193 
194   unsigned NumSpilledSGPRs = 0;
195   unsigned NumSpilledVGPRs = 0;
196 
197   // Feature bits required for inputs passed in user SGPRs.
198   bool PrivateSegmentBuffer : 1;
199   bool DispatchPtr : 1;
200   bool QueuePtr : 1;
201   bool KernargSegmentPtr : 1;
202   bool DispatchID : 1;
203   bool FlatScratchInit : 1;
204 
205   // Feature bits required for inputs passed in system SGPRs.
206   bool WorkGroupIDX : 1; // Always initialized.
207   bool WorkGroupIDY : 1;
208   bool WorkGroupIDZ : 1;
209   bool WorkGroupInfo : 1;
210   bool PrivateSegmentWaveByteOffset : 1;
211 
212   bool WorkItemIDX : 1; // Always initialized.
213   bool WorkItemIDY : 1;
214   bool WorkItemIDZ : 1;
215 
216   // Private memory buffer
217   // Compute directly in sgpr[0:1]
218   // Other shaders indirect 64-bits at sgpr[0:1]
219   bool ImplicitBufferPtr : 1;
220 
221   // Pointer to where the ABI inserts special kernel arguments separate from the
222   // user arguments. This is an offset from the KernargSegmentPtr.
223   bool ImplicitArgPtr : 1;
224 
225   // The hard-wired high half of the address of the global information table
226   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
227   // current hardware only allows a 16 bit value.
228   unsigned GITPtrHigh;
229 
230   unsigned HighBitsOf32BitAddress;
231 
232   // Current recorded maximum possible occupancy.
233   unsigned Occupancy;
234 
235   MCPhysReg getNextUserSGPR() const;
236 
237   MCPhysReg getNextSystemSGPR() const;
238 
239 public:
240   struct SpilledReg {
241     unsigned VGPR = 0;
242     int Lane = -1;
243 
244     SpilledReg() = default;
245     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
246 
247     bool hasLane() { return Lane != -1;}
248     bool hasReg() { return VGPR != 0;}
249   };
250 
251   struct SGPRSpillVGPRCSR {
252     // VGPR used for SGPR spills
253     unsigned VGPR;
254 
255     // If the VGPR is a CSR, the stack slot used to save/restore it in the
256     // prolog/epilog.
257     Optional<int> FI;
258 
259     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
260   };
261 
262 private:
263   // SGPR->VGPR spilling support.
264   using SpillRegMask = std::pair<unsigned, unsigned>;
265 
266   // Track VGPR + wave index for each subregister of the SGPR spilled to
267   // frameindex key.
268   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
269   unsigned NumVGPRSpillLanes = 0;
270   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
271 
272 public:
273   SIMachineFunctionInfo(const MachineFunction &MF);
274 
275   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
276 
277   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
278     auto I = SGPRToVGPRSpills.find(FrameIndex);
279     return (I == SGPRToVGPRSpills.end()) ?
280       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
281   }
282 
283   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
284     return SpillVGPRs;
285   }
286 
287   AMDGPU::SIModeRegisterDefaults getMode() const {
288     return Mode;
289   }
290 
291   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
292   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
293 
294   bool hasCalculatedTID() const { return TIDReg != 0; };
295   unsigned getTIDReg() const { return TIDReg; };
296   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
297 
298   unsigned getBytesInStackArgArea() const {
299     return BytesInStackArgArea;
300   }
301 
302   void setBytesInStackArgArea(unsigned Bytes) {
303     BytesInStackArgArea = Bytes;
304   }
305 
306   // Add user SGPRs.
307   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
308   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
309   unsigned addQueuePtr(const SIRegisterInfo &TRI);
310   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
311   unsigned addDispatchID(const SIRegisterInfo &TRI);
312   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
313   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
314 
315   // Add system SGPRs.
316   unsigned addWorkGroupIDX() {
317     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
318     NumSystemSGPRs += 1;
319     return ArgInfo.WorkGroupIDX.getRegister();
320   }
321 
322   unsigned addWorkGroupIDY() {
323     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
324     NumSystemSGPRs += 1;
325     return ArgInfo.WorkGroupIDY.getRegister();
326   }
327 
328   unsigned addWorkGroupIDZ() {
329     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
330     NumSystemSGPRs += 1;
331     return ArgInfo.WorkGroupIDZ.getRegister();
332   }
333 
334   unsigned addWorkGroupInfo() {
335     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
336     NumSystemSGPRs += 1;
337     return ArgInfo.WorkGroupInfo.getRegister();
338   }
339 
340   // Add special VGPR inputs
341   void setWorkItemIDX(ArgDescriptor Arg) {
342     ArgInfo.WorkItemIDX = Arg;
343   }
344 
345   void setWorkItemIDY(ArgDescriptor Arg) {
346     ArgInfo.WorkItemIDY = Arg;
347   }
348 
349   void setWorkItemIDZ(ArgDescriptor Arg) {
350     ArgInfo.WorkItemIDZ = Arg;
351   }
352 
353   unsigned addPrivateSegmentWaveByteOffset() {
354     ArgInfo.PrivateSegmentWaveByteOffset
355       = ArgDescriptor::createRegister(getNextSystemSGPR());
356     NumSystemSGPRs += 1;
357     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
358   }
359 
360   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
361     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
362   }
363 
364   bool hasPrivateSegmentBuffer() const {
365     return PrivateSegmentBuffer;
366   }
367 
368   bool hasDispatchPtr() const {
369     return DispatchPtr;
370   }
371 
372   bool hasQueuePtr() const {
373     return QueuePtr;
374   }
375 
376   bool hasKernargSegmentPtr() const {
377     return KernargSegmentPtr;
378   }
379 
380   bool hasDispatchID() const {
381     return DispatchID;
382   }
383 
384   bool hasFlatScratchInit() const {
385     return FlatScratchInit;
386   }
387 
388   bool hasWorkGroupIDX() const {
389     return WorkGroupIDX;
390   }
391 
392   bool hasWorkGroupIDY() const {
393     return WorkGroupIDY;
394   }
395 
396   bool hasWorkGroupIDZ() const {
397     return WorkGroupIDZ;
398   }
399 
400   bool hasWorkGroupInfo() const {
401     return WorkGroupInfo;
402   }
403 
404   bool hasPrivateSegmentWaveByteOffset() const {
405     return PrivateSegmentWaveByteOffset;
406   }
407 
408   bool hasWorkItemIDX() const {
409     return WorkItemIDX;
410   }
411 
412   bool hasWorkItemIDY() const {
413     return WorkItemIDY;
414   }
415 
416   bool hasWorkItemIDZ() const {
417     return WorkItemIDZ;
418   }
419 
420   bool hasImplicitArgPtr() const {
421     return ImplicitArgPtr;
422   }
423 
424   bool hasImplicitBufferPtr() const {
425     return ImplicitBufferPtr;
426   }
427 
428   AMDGPUFunctionArgInfo &getArgInfo() {
429     return ArgInfo;
430   }
431 
432   const AMDGPUFunctionArgInfo &getArgInfo() const {
433     return ArgInfo;
434   }
435 
436   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
437   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
438     return ArgInfo.getPreloadedValue(Value);
439   }
440 
441   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
442     return ArgInfo.getPreloadedValue(Value).first->getRegister();
443   }
444 
445   unsigned getGITPtrHigh() const {
446     return GITPtrHigh;
447   }
448 
449   unsigned get32BitAddressHighBits() const {
450     return HighBitsOf32BitAddress;
451   }
452 
453   unsigned getNumUserSGPRs() const {
454     return NumUserSGPRs;
455   }
456 
457   unsigned getNumPreloadedSGPRs() const {
458     return NumUserSGPRs + NumSystemSGPRs;
459   }
460 
461   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
462     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
463   }
464 
465   /// Returns the physical register reserved for use as the resource
466   /// descriptor for scratch accesses.
467   unsigned getScratchRSrcReg() const {
468     return ScratchRSrcReg;
469   }
470 
471   void setScratchRSrcReg(unsigned Reg) {
472     assert(Reg != 0 && "Should never be unset");
473     ScratchRSrcReg = Reg;
474   }
475 
476   unsigned getScratchWaveOffsetReg() const {
477     return ScratchWaveOffsetReg;
478   }
479 
480   unsigned getFrameOffsetReg() const {
481     return FrameOffsetReg;
482   }
483 
484   void setStackPtrOffsetReg(unsigned Reg) {
485     assert(Reg != 0 && "Should never be unset");
486     StackPtrOffsetReg = Reg;
487   }
488 
489   // Note the unset value for this is AMDGPU::SP_REG rather than
490   // NoRegister. This is mostly a workaround for MIR tests where state that
491   // can't be directly computed from the function is not preserved in serialized
492   // MIR.
493   unsigned getStackPtrOffsetReg() const {
494     return StackPtrOffsetReg;
495   }
496 
497   void setScratchWaveOffsetReg(unsigned Reg) {
498     assert(Reg != 0 && "Should never be unset");
499     ScratchWaveOffsetReg = Reg;
500     if (isEntryFunction())
501       FrameOffsetReg = ScratchWaveOffsetReg;
502   }
503 
504   unsigned getQueuePtrUserSGPR() const {
505     return ArgInfo.QueuePtr.getRegister();
506   }
507 
508   unsigned getImplicitBufferPtrUserSGPR() const {
509     return ArgInfo.ImplicitBufferPtr.getRegister();
510   }
511 
512   bool hasSpilledSGPRs() const {
513     return HasSpilledSGPRs;
514   }
515 
516   void setHasSpilledSGPRs(bool Spill = true) {
517     HasSpilledSGPRs = Spill;
518   }
519 
520   bool hasSpilledVGPRs() const {
521     return HasSpilledVGPRs;
522   }
523 
524   void setHasSpilledVGPRs(bool Spill = true) {
525     HasSpilledVGPRs = Spill;
526   }
527 
528   bool hasNonSpillStackObjects() const {
529     return HasNonSpillStackObjects;
530   }
531 
532   void setHasNonSpillStackObjects(bool StackObject = true) {
533     HasNonSpillStackObjects = StackObject;
534   }
535 
536   bool isStackRealigned() const {
537     return IsStackRealigned;
538   }
539 
540   void setIsStackRealigned(bool Realigned = true) {
541     IsStackRealigned = Realigned;
542   }
543 
544   unsigned getNumSpilledSGPRs() const {
545     return NumSpilledSGPRs;
546   }
547 
548   unsigned getNumSpilledVGPRs() const {
549     return NumSpilledVGPRs;
550   }
551 
552   void addToSpilledSGPRs(unsigned num) {
553     NumSpilledSGPRs += num;
554   }
555 
556   void addToSpilledVGPRs(unsigned num) {
557     NumSpilledVGPRs += num;
558   }
559 
560   unsigned getPSInputAddr() const {
561     return PSInputAddr;
562   }
563 
564   unsigned getPSInputEnable() const {
565     return PSInputEnable;
566   }
567 
568   bool isPSInputAllocated(unsigned Index) const {
569     return PSInputAddr & (1 << Index);
570   }
571 
572   void markPSInputAllocated(unsigned Index) {
573     PSInputAddr |= 1 << Index;
574   }
575 
576   void markPSInputEnabled(unsigned Index) {
577     PSInputEnable |= 1 << Index;
578   }
579 
580   bool returnsVoid() const {
581     return ReturnsVoid;
582   }
583 
584   void setIfReturnsVoid(bool Value) {
585     ReturnsVoid = Value;
586   }
587 
588   /// \returns A pair of default/requested minimum/maximum flat work group sizes
589   /// for this function.
590   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
591     return FlatWorkGroupSizes;
592   }
593 
594   /// \returns Default/requested minimum flat work group size for this function.
595   unsigned getMinFlatWorkGroupSize() const {
596     return FlatWorkGroupSizes.first;
597   }
598 
599   /// \returns Default/requested maximum flat work group size for this function.
600   unsigned getMaxFlatWorkGroupSize() const {
601     return FlatWorkGroupSizes.second;
602   }
603 
604   /// \returns A pair of default/requested minimum/maximum number of waves per
605   /// execution unit.
606   std::pair<unsigned, unsigned> getWavesPerEU() const {
607     return WavesPerEU;
608   }
609 
610   /// \returns Default/requested minimum number of waves per execution unit.
611   unsigned getMinWavesPerEU() const {
612     return WavesPerEU.first;
613   }
614 
615   /// \returns Default/requested maximum number of waves per execution unit.
616   unsigned getMaxWavesPerEU() const {
617     return WavesPerEU.second;
618   }
619 
620   /// \returns SGPR used for \p Dim's work group ID.
621   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
622     switch (Dim) {
623     case 0:
624       assert(hasWorkGroupIDX());
625       return ArgInfo.WorkGroupIDX.getRegister();
626     case 1:
627       assert(hasWorkGroupIDY());
628       return ArgInfo.WorkGroupIDY.getRegister();
629     case 2:
630       assert(hasWorkGroupIDZ());
631       return ArgInfo.WorkGroupIDZ.getRegister();
632     }
633     llvm_unreachable("unexpected dimension");
634   }
635 
636   /// \returns VGPR used for \p Dim' work item ID.
637   unsigned getWorkItemIDVGPR(unsigned Dim) const;
638 
639   unsigned getLDSWaveSpillSize() const {
640     return LDSWaveSpillSize;
641   }
642 
643   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
644                                                     const Value *BufferRsrc) {
645     assert(BufferRsrc);
646     auto PSV = BufferPSVs.try_emplace(
647       BufferRsrc,
648       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
649     return PSV.first->second.get();
650   }
651 
652   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
653                                                   const Value *ImgRsrc) {
654     assert(ImgRsrc);
655     auto PSV = ImagePSVs.try_emplace(
656       ImgRsrc,
657       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
658     return PSV.first->second.get();
659   }
660 
661   unsigned getOccupancy() const {
662     return Occupancy;
663   }
664 
665   unsigned getMinAllowedOccupancy() const {
666     if (!isMemoryBound() && !needsWaveLimiter())
667       return Occupancy;
668     return (Occupancy < 4) ? Occupancy : 4;
669   }
670 
671   void limitOccupancy(const MachineFunction &MF);
672 
673   void limitOccupancy(unsigned Limit) {
674     if (Occupancy > Limit)
675       Occupancy = Limit;
676   }
677 
678   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
679     if (Occupancy < Limit)
680       Occupancy = Limit;
681     limitOccupancy(MF);
682   }
683 };
684 
685 } // end namespace llvm
686 
687 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
688