xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision 8e8f8f43b043b1839973fcc28694ca8d220a2137)
1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 
17 #include "AMDGPUMachineFunction.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include <array>
24 #include <cassert>
25 #include <map>
26 #include <utility>
27 
28 namespace llvm {
29 
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
31 public:
32   explicit AMDGPUImagePseudoSourceValue() :
33     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
34 
35   bool isConstant(const MachineFrameInfo *) const override {
36     // This should probably be true for most images, but we will start by being
37     // conservative.
38     return false;
39   }
40 
41   bool isAliased(const MachineFrameInfo *) const override {
42     // FIXME: If we ever change image intrinsics to accept fat pointers, then
43     // this could be true for some cases.
44     return false;
45   }
46 
47   bool mayAlias(const MachineFrameInfo*) const override {
48     // FIXME: If we ever change image intrinsics to accept fat pointers, then
49     // this could be true for some cases.
50     return false;
51   }
52 };
53 
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
55 public:
56   explicit AMDGPUBufferPseudoSourceValue() :
57     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
58 
59   bool isConstant(const MachineFrameInfo *) const override {
60     // This should probably be true for most images, but we will start by being
61     // conservative.
62     return false;
63   }
64 
65   bool isAliased(const MachineFrameInfo *) const override {
66     // FIXME: If we ever change image intrinsics to accept fat pointers, then
67     // this could be true for some cases.
68     return false;
69   }
70 
71   bool mayAlias(const MachineFrameInfo*) const override {
72     // FIXME: If we ever change image intrinsics to accept fat pointers, then
73     // this could be true for some cases.
74     return false;
75   }
76 };
77 
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81   // FIXME: This should be removed and getPreloadedValue moved here.
82   friend class SIRegisterInfo;
83 
84   unsigned TIDReg;
85 
86   // Registers that may be reserved for spilling purposes. These may be the same
87   // as the input registers.
88   unsigned ScratchRSrcReg;
89   unsigned ScratchWaveOffsetReg;
90 
91   // This is the current function's incremented size from the kernel's scratch
92   // wave offset register. For an entry function, this is exactly the same as
93   // the ScratchWaveOffsetReg.
94   unsigned FrameOffsetReg;
95 
96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
97   unsigned StackPtrOffsetReg;
98 
99   // Input registers for non-HSA ABI
100   unsigned ImplicitBufferPtrUserSGPR;
101 
102   // Input registers setup for the HSA ABI.
103   // User SGPRs in allocation order.
104   unsigned PrivateSegmentBufferUserSGPR;
105   unsigned DispatchPtrUserSGPR;
106   unsigned QueuePtrUserSGPR;
107   unsigned KernargSegmentPtrUserSGPR;
108   unsigned DispatchIDUserSGPR;
109   unsigned FlatScratchInitUserSGPR;
110   unsigned PrivateSegmentSizeUserSGPR;
111   unsigned GridWorkGroupCountXUserSGPR;
112   unsigned GridWorkGroupCountYUserSGPR;
113   unsigned GridWorkGroupCountZUserSGPR;
114 
115   // System SGPRs in allocation order.
116   unsigned WorkGroupIDXSystemSGPR;
117   unsigned WorkGroupIDYSystemSGPR;
118   unsigned WorkGroupIDZSystemSGPR;
119   unsigned WorkGroupInfoSystemSGPR;
120   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
121 
122   // VGPR inputs. These are always v0, v1 and v2 for entry functions.
123   unsigned WorkItemIDXVGPR;
124   unsigned WorkItemIDYVGPR;
125   unsigned WorkItemIDZVGPR;
126 
127   // Graphics info.
128   unsigned PSInputAddr;
129   unsigned PSInputEnable;
130 
131   bool ReturnsVoid;
132 
133   // A pair of default/requested minimum/maximum flat work group sizes.
134   // Minimum - first, maximum - second.
135   std::pair<unsigned, unsigned> FlatWorkGroupSizes;
136 
137   // A pair of default/requested minimum/maximum number of waves per execution
138   // unit. Minimum - first, maximum - second.
139   std::pair<unsigned, unsigned> WavesPerEU;
140 
141   // Stack object indices for work group IDs.
142   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
143   // Stack object indices for work item IDs.
144   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
145 
146   AMDGPUBufferPseudoSourceValue BufferPSV;
147   AMDGPUImagePseudoSourceValue ImagePSV;
148 
149 private:
150   unsigned LDSWaveSpillSize;
151   unsigned ScratchOffsetReg;
152   unsigned NumUserSGPRs;
153   unsigned NumSystemSGPRs;
154 
155   bool HasSpilledSGPRs;
156   bool HasSpilledVGPRs;
157   bool HasNonSpillStackObjects;
158 
159   unsigned NumSpilledSGPRs;
160   unsigned NumSpilledVGPRs;
161 
162   // Feature bits required for inputs passed in user SGPRs.
163   bool PrivateSegmentBuffer : 1;
164   bool DispatchPtr : 1;
165   bool QueuePtr : 1;
166   bool KernargSegmentPtr : 1;
167   bool DispatchID : 1;
168   bool FlatScratchInit : 1;
169   bool GridWorkgroupCountX : 1;
170   bool GridWorkgroupCountY : 1;
171   bool GridWorkgroupCountZ : 1;
172 
173   // Feature bits required for inputs passed in system SGPRs.
174   bool WorkGroupIDX : 1; // Always initialized.
175   bool WorkGroupIDY : 1;
176   bool WorkGroupIDZ : 1;
177   bool WorkGroupInfo : 1;
178   bool PrivateSegmentWaveByteOffset : 1;
179 
180   bool WorkItemIDX : 1; // Always initialized.
181   bool WorkItemIDY : 1;
182   bool WorkItemIDZ : 1;
183 
184   // Private memory buffer
185   // Compute directly in sgpr[0:1]
186   // Other shaders indirect 64-bits at sgpr[0:1]
187   bool ImplicitBufferPtr : 1;
188 
189   // Pointer to where the ABI inserts special kernel arguments separate from the
190   // user arguments. This is an offset from the KernargSegmentPtr.
191   bool ImplicitArgPtr : 1;
192 
193   MCPhysReg getNextUserSGPR() const {
194     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
195     return AMDGPU::SGPR0 + NumUserSGPRs;
196   }
197 
198   MCPhysReg getNextSystemSGPR() const {
199     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
200   }
201 
202 public:
203   struct SpilledReg {
204     unsigned VGPR = AMDGPU::NoRegister;
205     int Lane = -1;
206 
207     SpilledReg() = default;
208     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
209 
210     bool hasLane() { return Lane != -1;}
211     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
212   };
213 
214   struct SGPRSpillVGPRCSR {
215     // VGPR used for SGPR spills
216     unsigned VGPR;
217 
218     // If the VGPR is a CSR, the stack slot used to save/restore it in the
219     // prolog/epilog.
220     Optional<int> FI;
221 
222     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) :
223       VGPR(V),
224       FI(F) {}
225   };
226 
227 private:
228   // SGPR->VGPR spilling support.
229   typedef std::pair<unsigned, unsigned> SpillRegMask;
230 
231   // Track VGPR + wave index for each subregister of the SGPR spilled to
232   // frameindex key.
233   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
234   unsigned NumVGPRSpillLanes = 0;
235   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
236 
237 public:
238 
239   SIMachineFunctionInfo(const MachineFunction &MF);
240 
241   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
242     auto I = SGPRToVGPRSpills.find(FrameIndex);
243     return (I == SGPRToVGPRSpills.end()) ?
244       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
245   }
246 
247   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
248     return SpillVGPRs;
249   }
250 
251   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
252   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
253 
254   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
255   unsigned getTIDReg() const { return TIDReg; };
256   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
257 
258   // Add user SGPRs.
259   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
260   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
261   unsigned addQueuePtr(const SIRegisterInfo &TRI);
262   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
263   unsigned addDispatchID(const SIRegisterInfo &TRI);
264   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
265   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
266 
267   // Add system SGPRs.
268   unsigned addWorkGroupIDX() {
269     WorkGroupIDXSystemSGPR = getNextSystemSGPR();
270     NumSystemSGPRs += 1;
271     return WorkGroupIDXSystemSGPR;
272   }
273 
274   unsigned addWorkGroupIDY() {
275     WorkGroupIDYSystemSGPR = getNextSystemSGPR();
276     NumSystemSGPRs += 1;
277     return WorkGroupIDYSystemSGPR;
278   }
279 
280   unsigned addWorkGroupIDZ() {
281     WorkGroupIDZSystemSGPR = getNextSystemSGPR();
282     NumSystemSGPRs += 1;
283     return WorkGroupIDZSystemSGPR;
284   }
285 
286   unsigned addWorkGroupInfo() {
287     WorkGroupInfoSystemSGPR = getNextSystemSGPR();
288     NumSystemSGPRs += 1;
289     return WorkGroupInfoSystemSGPR;
290   }
291 
292   unsigned addPrivateSegmentWaveByteOffset() {
293     PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
294     NumSystemSGPRs += 1;
295     return PrivateSegmentWaveByteOffsetSystemSGPR;
296   }
297 
298   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
299     PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
300   }
301 
302   bool hasPrivateSegmentBuffer() const {
303     return PrivateSegmentBuffer;
304   }
305 
306   bool hasDispatchPtr() const {
307     return DispatchPtr;
308   }
309 
310   bool hasQueuePtr() const {
311     return QueuePtr;
312   }
313 
314   bool hasKernargSegmentPtr() const {
315     return KernargSegmentPtr;
316   }
317 
318   bool hasDispatchID() const {
319     return DispatchID;
320   }
321 
322   bool hasFlatScratchInit() const {
323     return FlatScratchInit;
324   }
325 
326   bool hasGridWorkgroupCountX() const {
327     return GridWorkgroupCountX;
328   }
329 
330   bool hasGridWorkgroupCountY() const {
331     return GridWorkgroupCountY;
332   }
333 
334   bool hasGridWorkgroupCountZ() const {
335     return GridWorkgroupCountZ;
336   }
337 
338   bool hasWorkGroupIDX() const {
339     return WorkGroupIDX;
340   }
341 
342   bool hasWorkGroupIDY() const {
343     return WorkGroupIDY;
344   }
345 
346   bool hasWorkGroupIDZ() const {
347     return WorkGroupIDZ;
348   }
349 
350   bool hasWorkGroupInfo() const {
351     return WorkGroupInfo;
352   }
353 
354   bool hasPrivateSegmentWaveByteOffset() const {
355     return PrivateSegmentWaveByteOffset;
356   }
357 
358   bool hasWorkItemIDX() const {
359     return WorkItemIDX;
360   }
361 
362   bool hasWorkItemIDY() const {
363     return WorkItemIDY;
364   }
365 
366   bool hasWorkItemIDZ() const {
367     return WorkItemIDZ;
368   }
369 
370   bool hasImplicitArgPtr() const {
371     return ImplicitArgPtr;
372   }
373 
374   bool hasImplicitBufferPtr() const {
375     return ImplicitBufferPtr;
376   }
377 
378   unsigned getNumUserSGPRs() const {
379     return NumUserSGPRs;
380   }
381 
382   unsigned getNumPreloadedSGPRs() const {
383     return NumUserSGPRs + NumSystemSGPRs;
384   }
385 
386   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
387     return PrivateSegmentWaveByteOffsetSystemSGPR;
388   }
389 
390   /// \brief Returns the physical register reserved for use as the resource
391   /// descriptor for scratch accesses.
392   unsigned getScratchRSrcReg() const {
393     return ScratchRSrcReg;
394   }
395 
396   void setScratchRSrcReg(unsigned Reg) {
397     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
398     ScratchRSrcReg = Reg;
399   }
400 
401   unsigned getScratchWaveOffsetReg() const {
402     return ScratchWaveOffsetReg;
403   }
404 
405   unsigned getFrameOffsetReg() const {
406     return FrameOffsetReg;
407   }
408 
409   void setStackPtrOffsetReg(unsigned Reg) {
410     StackPtrOffsetReg = Reg;
411   }
412 
413   // Note the unset value for this is AMDGPU::SP_REG rather than
414   // NoRegister. This is mostly a workaround for MIR tests where state that
415   // can't be directly computed from the function is not preserved in serialized
416   // MIR.
417   unsigned getStackPtrOffsetReg() const {
418     return StackPtrOffsetReg;
419   }
420 
421   void setScratchWaveOffsetReg(unsigned Reg) {
422     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
423     ScratchWaveOffsetReg = Reg;
424     if (isEntryFunction())
425       FrameOffsetReg = ScratchWaveOffsetReg;
426   }
427 
428   unsigned getQueuePtrUserSGPR() const {
429     return QueuePtrUserSGPR;
430   }
431 
432   unsigned getImplicitBufferPtrUserSGPR() const {
433     return ImplicitBufferPtrUserSGPR;
434   }
435 
436   bool hasSpilledSGPRs() const {
437     return HasSpilledSGPRs;
438   }
439 
440   void setHasSpilledSGPRs(bool Spill = true) {
441     HasSpilledSGPRs = Spill;
442   }
443 
444   bool hasSpilledVGPRs() const {
445     return HasSpilledVGPRs;
446   }
447 
448   void setHasSpilledVGPRs(bool Spill = true) {
449     HasSpilledVGPRs = Spill;
450   }
451 
452   bool hasNonSpillStackObjects() const {
453     return HasNonSpillStackObjects;
454   }
455 
456   void setHasNonSpillStackObjects(bool StackObject = true) {
457     HasNonSpillStackObjects = StackObject;
458   }
459 
460   unsigned getNumSpilledSGPRs() const {
461     return NumSpilledSGPRs;
462   }
463 
464   unsigned getNumSpilledVGPRs() const {
465     return NumSpilledVGPRs;
466   }
467 
468   void addToSpilledSGPRs(unsigned num) {
469     NumSpilledSGPRs += num;
470   }
471 
472   void addToSpilledVGPRs(unsigned num) {
473     NumSpilledVGPRs += num;
474   }
475 
476   unsigned getPSInputAddr() const {
477     return PSInputAddr;
478   }
479 
480   unsigned getPSInputEnable() const {
481     return PSInputEnable;
482   }
483 
484   bool isPSInputAllocated(unsigned Index) const {
485     return PSInputAddr & (1 << Index);
486   }
487 
488   void markPSInputAllocated(unsigned Index) {
489     PSInputAddr |= 1 << Index;
490   }
491 
492   void markPSInputEnabled(unsigned Index) {
493     PSInputEnable |= 1 << Index;
494   }
495 
496   bool returnsVoid() const {
497     return ReturnsVoid;
498   }
499 
500   void setIfReturnsVoid(bool Value) {
501     ReturnsVoid = Value;
502   }
503 
504   /// \returns A pair of default/requested minimum/maximum flat work group sizes
505   /// for this function.
506   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
507     return FlatWorkGroupSizes;
508   }
509 
510   /// \returns Default/requested minimum flat work group size for this function.
511   unsigned getMinFlatWorkGroupSize() const {
512     return FlatWorkGroupSizes.first;
513   }
514 
515   /// \returns Default/requested maximum flat work group size for this function.
516   unsigned getMaxFlatWorkGroupSize() const {
517     return FlatWorkGroupSizes.second;
518   }
519 
520   /// \returns A pair of default/requested minimum/maximum number of waves per
521   /// execution unit.
522   std::pair<unsigned, unsigned> getWavesPerEU() const {
523     return WavesPerEU;
524   }
525 
526   /// \returns Default/requested minimum number of waves per execution unit.
527   unsigned getMinWavesPerEU() const {
528     return WavesPerEU.first;
529   }
530 
531   /// \returns Default/requested maximum number of waves per execution unit.
532   unsigned getMaxWavesPerEU() const {
533     return WavesPerEU.second;
534   }
535 
536   /// \returns Stack object index for \p Dim's work group ID.
537   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
538     assert(Dim < 3);
539     return DebuggerWorkGroupIDStackObjectIndices[Dim];
540   }
541 
542   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
543   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
544     assert(Dim < 3);
545     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
546   }
547 
548   /// \returns Stack object index for \p Dim's work item ID.
549   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
550     assert(Dim < 3);
551     return DebuggerWorkItemIDStackObjectIndices[Dim];
552   }
553 
554   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
555   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
556     assert(Dim < 3);
557     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
558   }
559 
560   /// \returns SGPR used for \p Dim's work group ID.
561   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
562     switch (Dim) {
563     case 0:
564       assert(hasWorkGroupIDX());
565       return WorkGroupIDXSystemSGPR;
566     case 1:
567       assert(hasWorkGroupIDY());
568       return WorkGroupIDYSystemSGPR;
569     case 2:
570       assert(hasWorkGroupIDZ());
571       return WorkGroupIDZSystemSGPR;
572     }
573     llvm_unreachable("unexpected dimension");
574   }
575 
576   /// \returns VGPR used for \p Dim' work item ID.
577   unsigned getWorkItemIDVGPR(unsigned Dim) const {
578     switch (Dim) {
579     case 0:
580       assert(hasWorkItemIDX());
581       return AMDGPU::VGPR0;
582     case 1:
583       assert(hasWorkItemIDY());
584       return AMDGPU::VGPR1;
585     case 2:
586       assert(hasWorkItemIDZ());
587       return AMDGPU::VGPR2;
588     }
589     llvm_unreachable("unexpected dimension");
590   }
591 
592   unsigned getLDSWaveSpillSize() const {
593     return LDSWaveSpillSize;
594   }
595 
596   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
597     return &BufferPSV;
598   }
599 
600   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
601     return &ImagePSV;
602   }
603 };
604 
605 } // end namespace llvm
606 
607 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
608