xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision 678e111e11307b52ba1da0b5538cda9b8b4746cb)
1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 
17 #include "AMDGPUMachineFunction.h"
18 #include "SIRegisterInfo.h"
19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include <array>
24 #include <cassert>
25 #include <map>
26 #include <utility>
27 
28 namespace llvm {
29 
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
31 public:
32   explicit AMDGPUImagePseudoSourceValue() :
33     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
34 
35   bool isConstant(const MachineFrameInfo *) const override {
36     // This should probably be true for most images, but we will start by being
37     // conservative.
38     return false;
39   }
40 
41   bool isAliased(const MachineFrameInfo *) const override {
42     // FIXME: If we ever change image intrinsics to accept fat pointers, then
43     // this could be true for some cases.
44     return false;
45   }
46 
47   bool mayAlias(const MachineFrameInfo*) const override {
48     // FIXME: If we ever change image intrinsics to accept fat pointers, then
49     // this could be true for some cases.
50     return false;
51   }
52 };
53 
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
55 public:
56   explicit AMDGPUBufferPseudoSourceValue() :
57     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
58 
59   bool isConstant(const MachineFrameInfo *) const override {
60     // This should probably be true for most images, but we will start by being
61     // conservative.
62     return false;
63   }
64 
65   bool isAliased(const MachineFrameInfo *) const override {
66     // FIXME: If we ever change image intrinsics to accept fat pointers, then
67     // this could be true for some cases.
68     return false;
69   }
70 
71   bool mayAlias(const MachineFrameInfo*) const override {
72     // FIXME: If we ever change image intrinsics to accept fat pointers, then
73     // this could be true for some cases.
74     return false;
75   }
76 };
77 
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81   // FIXME: This should be removed and getPreloadedValue moved here.
82   friend class SIRegisterInfo;
83 
84   unsigned TIDReg;
85 
86   // Registers that may be reserved for spilling purposes. These may be the same
87   // as the input registers.
88   unsigned ScratchRSrcReg;
89   unsigned ScratchWaveOffsetReg;
90 
91   // Input registers for non-HSA ABI
92   unsigned PrivateMemoryPtrUserSGPR;
93 
94   // Input registers setup for the HSA ABI.
95   // User SGPRs in allocation order.
96   unsigned PrivateSegmentBufferUserSGPR;
97   unsigned DispatchPtrUserSGPR;
98   unsigned QueuePtrUserSGPR;
99   unsigned KernargSegmentPtrUserSGPR;
100   unsigned DispatchIDUserSGPR;
101   unsigned FlatScratchInitUserSGPR;
102   unsigned PrivateSegmentSizeUserSGPR;
103   unsigned GridWorkGroupCountXUserSGPR;
104   unsigned GridWorkGroupCountYUserSGPR;
105   unsigned GridWorkGroupCountZUserSGPR;
106 
107   // System SGPRs in allocation order.
108   unsigned WorkGroupIDXSystemSGPR;
109   unsigned WorkGroupIDYSystemSGPR;
110   unsigned WorkGroupIDZSystemSGPR;
111   unsigned WorkGroupInfoSystemSGPR;
112   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
113 
114   // Graphics info.
115   unsigned PSInputAddr;
116   bool ReturnsVoid;
117 
118   // A pair of default/requested minimum/maximum flat work group sizes.
119   // Minimum - first, maximum - second.
120   std::pair<unsigned, unsigned> FlatWorkGroupSizes;
121 
122   // A pair of default/requested minimum/maximum number of waves per execution
123   // unit. Minimum - first, maximum - second.
124   std::pair<unsigned, unsigned> WavesPerEU;
125 
126   // Stack object indices for work group IDs.
127   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
128   // Stack object indices for work item IDs.
129   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
130 
131   AMDGPUBufferPseudoSourceValue BufferPSV;
132   AMDGPUImagePseudoSourceValue ImagePSV;
133 
134 public:
135   // FIXME: Make private
136   unsigned LDSWaveSpillSize;
137   unsigned PSInputEna;
138 
139 
140   unsigned ScratchOffsetReg;
141   unsigned NumUserSGPRs;
142   unsigned NumSystemSGPRs;
143 
144 private:
145   bool HasSpilledSGPRs;
146   bool HasSpilledVGPRs;
147   bool HasNonSpillStackObjects;
148 
149   unsigned NumSpilledSGPRs;
150   unsigned NumSpilledVGPRs;
151 
152   // Feature bits required for inputs passed in user SGPRs.
153   bool PrivateSegmentBuffer : 1;
154   bool DispatchPtr : 1;
155   bool QueuePtr : 1;
156   bool KernargSegmentPtr : 1;
157   bool DispatchID : 1;
158   bool FlatScratchInit : 1;
159   bool GridWorkgroupCountX : 1;
160   bool GridWorkgroupCountY : 1;
161   bool GridWorkgroupCountZ : 1;
162 
163   // Feature bits required for inputs passed in system SGPRs.
164   bool WorkGroupIDX : 1; // Always initialized.
165   bool WorkGroupIDY : 1;
166   bool WorkGroupIDZ : 1;
167   bool WorkGroupInfo : 1;
168   bool PrivateSegmentWaveByteOffset : 1;
169 
170   bool WorkItemIDX : 1; // Always initialized.
171   bool WorkItemIDY : 1;
172   bool WorkItemIDZ : 1;
173 
174   // Private memory buffer
175   // Compute directly in sgpr[0:1]
176   // Other shaders indirect 64-bits at sgpr[0:1]
177   bool PrivateMemoryInputPtr : 1;
178 
179   MCPhysReg getNextUserSGPR() const {
180     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
181     return AMDGPU::SGPR0 + NumUserSGPRs;
182   }
183 
184   MCPhysReg getNextSystemSGPR() const {
185     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
186   }
187 
188 public:
189   struct SpilledReg {
190     unsigned VGPR = AMDGPU::NoRegister;
191     int Lane = -1;
192 
193     SpilledReg() = default;
194     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
195 
196     bool hasLane() { return Lane != -1;}
197     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
198   };
199 
200 private:
201   // SGPR->VGPR spilling support.
202   typedef std::pair<unsigned, unsigned> SpillRegMask;
203 
204   // Track VGPR + wave index for each subregister of the SGPR spilled to
205   // frameindex key.
206   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
207   unsigned NumVGPRSpillLanes = 0;
208   SmallVector<unsigned, 2> SpillVGPRs;
209 
210 public:
211 
212   SIMachineFunctionInfo(const MachineFunction &MF);
213 
214   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
215     auto I = SGPRToVGPRSpills.find(FrameIndex);
216     return (I == SGPRToVGPRSpills.end()) ?
217       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
218   }
219 
220   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
221   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
222 
223   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
224   unsigned getTIDReg() const { return TIDReg; };
225   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
226 
227   // Add user SGPRs.
228   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
229   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
230   unsigned addQueuePtr(const SIRegisterInfo &TRI);
231   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
232   unsigned addDispatchID(const SIRegisterInfo &TRI);
233   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
234   unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
235 
236   // Add system SGPRs.
237   unsigned addWorkGroupIDX() {
238     WorkGroupIDXSystemSGPR = getNextSystemSGPR();
239     NumSystemSGPRs += 1;
240     return WorkGroupIDXSystemSGPR;
241   }
242 
243   unsigned addWorkGroupIDY() {
244     WorkGroupIDYSystemSGPR = getNextSystemSGPR();
245     NumSystemSGPRs += 1;
246     return WorkGroupIDYSystemSGPR;
247   }
248 
249   unsigned addWorkGroupIDZ() {
250     WorkGroupIDZSystemSGPR = getNextSystemSGPR();
251     NumSystemSGPRs += 1;
252     return WorkGroupIDZSystemSGPR;
253   }
254 
255   unsigned addWorkGroupInfo() {
256     WorkGroupInfoSystemSGPR = getNextSystemSGPR();
257     NumSystemSGPRs += 1;
258     return WorkGroupInfoSystemSGPR;
259   }
260 
261   unsigned addPrivateSegmentWaveByteOffset() {
262     PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
263     NumSystemSGPRs += 1;
264     return PrivateSegmentWaveByteOffsetSystemSGPR;
265   }
266 
267   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
268     PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
269   }
270 
271   bool hasPrivateSegmentBuffer() const {
272     return PrivateSegmentBuffer;
273   }
274 
275   bool hasDispatchPtr() const {
276     return DispatchPtr;
277   }
278 
279   bool hasQueuePtr() const {
280     return QueuePtr;
281   }
282 
283   bool hasKernargSegmentPtr() const {
284     return KernargSegmentPtr;
285   }
286 
287   bool hasDispatchID() const {
288     return DispatchID;
289   }
290 
291   bool hasFlatScratchInit() const {
292     return FlatScratchInit;
293   }
294 
295   bool hasGridWorkgroupCountX() const {
296     return GridWorkgroupCountX;
297   }
298 
299   bool hasGridWorkgroupCountY() const {
300     return GridWorkgroupCountY;
301   }
302 
303   bool hasGridWorkgroupCountZ() const {
304     return GridWorkgroupCountZ;
305   }
306 
307   bool hasWorkGroupIDX() const {
308     return WorkGroupIDX;
309   }
310 
311   bool hasWorkGroupIDY() const {
312     return WorkGroupIDY;
313   }
314 
315   bool hasWorkGroupIDZ() const {
316     return WorkGroupIDZ;
317   }
318 
319   bool hasWorkGroupInfo() const {
320     return WorkGroupInfo;
321   }
322 
323   bool hasPrivateSegmentWaveByteOffset() const {
324     return PrivateSegmentWaveByteOffset;
325   }
326 
327   bool hasWorkItemIDX() const {
328     return WorkItemIDX;
329   }
330 
331   bool hasWorkItemIDY() const {
332     return WorkItemIDY;
333   }
334 
335   bool hasWorkItemIDZ() const {
336     return WorkItemIDZ;
337   }
338 
339   bool hasPrivateMemoryInputPtr() const {
340     return PrivateMemoryInputPtr;
341   }
342 
343   unsigned getNumUserSGPRs() const {
344     return NumUserSGPRs;
345   }
346 
347   unsigned getNumPreloadedSGPRs() const {
348     return NumUserSGPRs + NumSystemSGPRs;
349   }
350 
351   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
352     return PrivateSegmentWaveByteOffsetSystemSGPR;
353   }
354 
355   /// \brief Returns the physical register reserved for use as the resource
356   /// descriptor for scratch accesses.
357   unsigned getScratchRSrcReg() const {
358     return ScratchRSrcReg;
359   }
360 
361   void setScratchRSrcReg(unsigned Reg) {
362     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
363     ScratchRSrcReg = Reg;
364   }
365 
366   unsigned getScratchWaveOffsetReg() const {
367     return ScratchWaveOffsetReg;
368   }
369 
370   void setScratchWaveOffsetReg(unsigned Reg) {
371     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
372     ScratchWaveOffsetReg = Reg;
373   }
374 
375   unsigned getQueuePtrUserSGPR() const {
376     return QueuePtrUserSGPR;
377   }
378 
379   unsigned getPrivateMemoryPtrUserSGPR() const {
380     return PrivateMemoryPtrUserSGPR;
381   }
382 
383   bool hasSpilledSGPRs() const {
384     return HasSpilledSGPRs;
385   }
386 
387   void setHasSpilledSGPRs(bool Spill = true) {
388     HasSpilledSGPRs = Spill;
389   }
390 
391   bool hasSpilledVGPRs() const {
392     return HasSpilledVGPRs;
393   }
394 
395   void setHasSpilledVGPRs(bool Spill = true) {
396     HasSpilledVGPRs = Spill;
397   }
398 
399   bool hasNonSpillStackObjects() const {
400     return HasNonSpillStackObjects;
401   }
402 
403   void setHasNonSpillStackObjects(bool StackObject = true) {
404     HasNonSpillStackObjects = StackObject;
405   }
406 
407   unsigned getNumSpilledSGPRs() const {
408     return NumSpilledSGPRs;
409   }
410 
411   unsigned getNumSpilledVGPRs() const {
412     return NumSpilledVGPRs;
413   }
414 
415   void addToSpilledSGPRs(unsigned num) {
416     NumSpilledSGPRs += num;
417   }
418 
419   void addToSpilledVGPRs(unsigned num) {
420     NumSpilledVGPRs += num;
421   }
422 
423   unsigned getPSInputAddr() const {
424     return PSInputAddr;
425   }
426 
427   bool isPSInputAllocated(unsigned Index) const {
428     return PSInputAddr & (1 << Index);
429   }
430 
431   void markPSInputAllocated(unsigned Index) {
432     PSInputAddr |= 1 << Index;
433   }
434 
435   bool returnsVoid() const {
436     return ReturnsVoid;
437   }
438 
439   void setIfReturnsVoid(bool Value) {
440     ReturnsVoid = Value;
441   }
442 
443   /// \returns A pair of default/requested minimum/maximum flat work group sizes
444   /// for this function.
445   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
446     return FlatWorkGroupSizes;
447   }
448 
449   /// \returns Default/requested minimum flat work group size for this function.
450   unsigned getMinFlatWorkGroupSize() const {
451     return FlatWorkGroupSizes.first;
452   }
453 
454   /// \returns Default/requested maximum flat work group size for this function.
455   unsigned getMaxFlatWorkGroupSize() const {
456     return FlatWorkGroupSizes.second;
457   }
458 
459   /// \returns A pair of default/requested minimum/maximum number of waves per
460   /// execution unit.
461   std::pair<unsigned, unsigned> getWavesPerEU() const {
462     return WavesPerEU;
463   }
464 
465   /// \returns Default/requested minimum number of waves per execution unit.
466   unsigned getMinWavesPerEU() const {
467     return WavesPerEU.first;
468   }
469 
470   /// \returns Default/requested maximum number of waves per execution unit.
471   unsigned getMaxWavesPerEU() const {
472     return WavesPerEU.second;
473   }
474 
475   /// \returns Stack object index for \p Dim's work group ID.
476   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
477     assert(Dim < 3);
478     return DebuggerWorkGroupIDStackObjectIndices[Dim];
479   }
480 
481   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
482   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
483     assert(Dim < 3);
484     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
485   }
486 
487   /// \returns Stack object index for \p Dim's work item ID.
488   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
489     assert(Dim < 3);
490     return DebuggerWorkItemIDStackObjectIndices[Dim];
491   }
492 
493   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
494   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
495     assert(Dim < 3);
496     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
497   }
498 
499   /// \returns SGPR used for \p Dim's work group ID.
500   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
501     switch (Dim) {
502     case 0:
503       assert(hasWorkGroupIDX());
504       return WorkGroupIDXSystemSGPR;
505     case 1:
506       assert(hasWorkGroupIDY());
507       return WorkGroupIDYSystemSGPR;
508     case 2:
509       assert(hasWorkGroupIDZ());
510       return WorkGroupIDZSystemSGPR;
511     }
512     llvm_unreachable("unexpected dimension");
513   }
514 
515   /// \returns VGPR used for \p Dim' work item ID.
516   unsigned getWorkItemIDVGPR(unsigned Dim) const {
517     switch (Dim) {
518     case 0:
519       assert(hasWorkItemIDX());
520       return AMDGPU::VGPR0;
521     case 1:
522       assert(hasWorkItemIDY());
523       return AMDGPU::VGPR1;
524     case 2:
525       assert(hasWorkItemIDZ());
526       return AMDGPU::VGPR2;
527     }
528     llvm_unreachable("unexpected dimension");
529   }
530 
531   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
532     return &BufferPSV;
533   }
534 
535   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
536     return &ImagePSV;
537   }
538 };
539 
540 } // end namespace llvm
541 
542 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
543