xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision e15855d9e3b7ffe828781b63ac332ec7f504875a)
1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 
17 #include "AMDGPUMachineFunction.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include <array>
24 #include <cassert>
25 #include <map>
26 #include <utility>
27 
28 namespace llvm {
29 
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
31 public:
32   explicit AMDGPUImagePseudoSourceValue() :
33     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
34 
35   bool isConstant(const MachineFrameInfo *) const override {
36     // This should probably be true for most images, but we will start by being
37     // conservative.
38     return false;
39   }
40 
41   bool isAliased(const MachineFrameInfo *) const override {
42     // FIXME: If we ever change image intrinsics to accept fat pointers, then
43     // this could be true for some cases.
44     return false;
45   }
46 
47   bool mayAlias(const MachineFrameInfo*) const override {
48     // FIXME: If we ever change image intrinsics to accept fat pointers, then
49     // this could be true for some cases.
50     return false;
51   }
52 };
53 
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
55 public:
56   explicit AMDGPUBufferPseudoSourceValue() :
57     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
58 
59   bool isConstant(const MachineFrameInfo *) const override {
60     // This should probably be true for most images, but we will start by being
61     // conservative.
62     return false;
63   }
64 
65   bool isAliased(const MachineFrameInfo *) const override {
66     // FIXME: If we ever change image intrinsics to accept fat pointers, then
67     // this could be true for some cases.
68     return false;
69   }
70 
71   bool mayAlias(const MachineFrameInfo*) const override {
72     // FIXME: If we ever change image intrinsics to accept fat pointers, then
73     // this could be true for some cases.
74     return false;
75   }
76 };
77 
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81   // FIXME: This should be removed and getPreloadedValue moved here.
82   friend class SIRegisterInfo;
83 
84   unsigned TIDReg;
85 
86   // Registers that may be reserved for spilling purposes. These may be the same
87   // as the input registers.
88   unsigned ScratchRSrcReg;
89   unsigned ScratchWaveOffsetReg;
90 
91   // This is the current function's incremented size from the kernel's scratch
92   // wave offset register. For an entry function, this is exactly the same as
93   // the ScratchWaveOffsetReg.
94   unsigned FrameOffsetReg;
95 
96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
97   unsigned StackPtrOffsetReg;
98 
99   // Input registers for non-HSA ABI
100   unsigned ImplicitBufferPtrUserSGPR;
101 
102   // Input registers setup for the HSA ABI.
103   // User SGPRs in allocation order.
104   unsigned PrivateSegmentBufferUserSGPR;
105   unsigned DispatchPtrUserSGPR;
106   unsigned QueuePtrUserSGPR;
107   unsigned KernargSegmentPtrUserSGPR;
108   unsigned DispatchIDUserSGPR;
109   unsigned FlatScratchInitUserSGPR;
110   unsigned PrivateSegmentSizeUserSGPR;
111   unsigned GridWorkGroupCountXUserSGPR;
112   unsigned GridWorkGroupCountYUserSGPR;
113   unsigned GridWorkGroupCountZUserSGPR;
114 
115   // System SGPRs in allocation order.
116   unsigned WorkGroupIDXSystemSGPR;
117   unsigned WorkGroupIDYSystemSGPR;
118   unsigned WorkGroupIDZSystemSGPR;
119   unsigned WorkGroupInfoSystemSGPR;
120   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
121 
122   // VGPR inputs. These are always v0, v1 and v2 for entry functions.
123   unsigned WorkItemIDXVGPR;
124   unsigned WorkItemIDYVGPR;
125   unsigned WorkItemIDZVGPR;
126 
127   // Graphics info.
128   unsigned PSInputAddr;
129   unsigned PSInputEnable;
130 
131   bool ReturnsVoid;
132 
133   // A pair of default/requested minimum/maximum flat work group sizes.
134   // Minimum - first, maximum - second.
135   std::pair<unsigned, unsigned> FlatWorkGroupSizes;
136 
137   // A pair of default/requested minimum/maximum number of waves per execution
138   // unit. Minimum - first, maximum - second.
139   std::pair<unsigned, unsigned> WavesPerEU;
140 
141   // Stack object indices for work group IDs.
142   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
143   // Stack object indices for work item IDs.
144   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
145 
146   AMDGPUBufferPseudoSourceValue BufferPSV;
147   AMDGPUImagePseudoSourceValue ImagePSV;
148 
149 private:
150   unsigned LDSWaveSpillSize;
151   unsigned ScratchOffsetReg;
152   unsigned NumUserSGPRs;
153   unsigned NumSystemSGPRs;
154 
155   bool HasSpilledSGPRs;
156   bool HasSpilledVGPRs;
157   bool HasNonSpillStackObjects;
158 
159   unsigned NumSpilledSGPRs;
160   unsigned NumSpilledVGPRs;
161 
162   // Feature bits required for inputs passed in user SGPRs.
163   bool PrivateSegmentBuffer : 1;
164   bool DispatchPtr : 1;
165   bool QueuePtr : 1;
166   bool KernargSegmentPtr : 1;
167   bool DispatchID : 1;
168   bool FlatScratchInit : 1;
169   bool GridWorkgroupCountX : 1;
170   bool GridWorkgroupCountY : 1;
171   bool GridWorkgroupCountZ : 1;
172 
173   // Feature bits required for inputs passed in system SGPRs.
174   bool WorkGroupIDX : 1; // Always initialized.
175   bool WorkGroupIDY : 1;
176   bool WorkGroupIDZ : 1;
177   bool WorkGroupInfo : 1;
178   bool PrivateSegmentWaveByteOffset : 1;
179 
180   bool WorkItemIDX : 1; // Always initialized.
181   bool WorkItemIDY : 1;
182   bool WorkItemIDZ : 1;
183 
184   // Private memory buffer
185   // Compute directly in sgpr[0:1]
186   // Other shaders indirect 64-bits at sgpr[0:1]
187   bool ImplicitBufferPtr : 1;
188 
189   MCPhysReg getNextUserSGPR() const {
190     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
191     return AMDGPU::SGPR0 + NumUserSGPRs;
192   }
193 
194   MCPhysReg getNextSystemSGPR() const {
195     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
196   }
197 
198 public:
199   struct SpilledReg {
200     unsigned VGPR = AMDGPU::NoRegister;
201     int Lane = -1;
202 
203     SpilledReg() = default;
204     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
205 
206     bool hasLane() { return Lane != -1;}
207     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
208   };
209 
210 private:
211   // SGPR->VGPR spilling support.
212   typedef std::pair<unsigned, unsigned> SpillRegMask;
213 
214   // Track VGPR + wave index for each subregister of the SGPR spilled to
215   // frameindex key.
216   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
217   unsigned NumVGPRSpillLanes = 0;
218   SmallVector<unsigned, 2> SpillVGPRs;
219 
220 public:
221 
222   SIMachineFunctionInfo(const MachineFunction &MF);
223 
224   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
225     auto I = SGPRToVGPRSpills.find(FrameIndex);
226     return (I == SGPRToVGPRSpills.end()) ?
227       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
228   }
229 
230   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
231   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
232 
233   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
234   unsigned getTIDReg() const { return TIDReg; };
235   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
236 
237   // Add user SGPRs.
238   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
239   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
240   unsigned addQueuePtr(const SIRegisterInfo &TRI);
241   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
242   unsigned addDispatchID(const SIRegisterInfo &TRI);
243   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
244   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
245 
246   // Add system SGPRs.
247   unsigned addWorkGroupIDX() {
248     WorkGroupIDXSystemSGPR = getNextSystemSGPR();
249     NumSystemSGPRs += 1;
250     return WorkGroupIDXSystemSGPR;
251   }
252 
253   unsigned addWorkGroupIDY() {
254     WorkGroupIDYSystemSGPR = getNextSystemSGPR();
255     NumSystemSGPRs += 1;
256     return WorkGroupIDYSystemSGPR;
257   }
258 
259   unsigned addWorkGroupIDZ() {
260     WorkGroupIDZSystemSGPR = getNextSystemSGPR();
261     NumSystemSGPRs += 1;
262     return WorkGroupIDZSystemSGPR;
263   }
264 
265   unsigned addWorkGroupInfo() {
266     WorkGroupInfoSystemSGPR = getNextSystemSGPR();
267     NumSystemSGPRs += 1;
268     return WorkGroupInfoSystemSGPR;
269   }
270 
271   unsigned addPrivateSegmentWaveByteOffset() {
272     PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
273     NumSystemSGPRs += 1;
274     return PrivateSegmentWaveByteOffsetSystemSGPR;
275   }
276 
277   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
278     PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
279   }
280 
281   bool hasPrivateSegmentBuffer() const {
282     return PrivateSegmentBuffer;
283   }
284 
285   bool hasDispatchPtr() const {
286     return DispatchPtr;
287   }
288 
289   bool hasQueuePtr() const {
290     return QueuePtr;
291   }
292 
293   bool hasKernargSegmentPtr() const {
294     return KernargSegmentPtr;
295   }
296 
297   bool hasDispatchID() const {
298     return DispatchID;
299   }
300 
301   bool hasFlatScratchInit() const {
302     return FlatScratchInit;
303   }
304 
305   bool hasGridWorkgroupCountX() const {
306     return GridWorkgroupCountX;
307   }
308 
309   bool hasGridWorkgroupCountY() const {
310     return GridWorkgroupCountY;
311   }
312 
313   bool hasGridWorkgroupCountZ() const {
314     return GridWorkgroupCountZ;
315   }
316 
317   bool hasWorkGroupIDX() const {
318     return WorkGroupIDX;
319   }
320 
321   bool hasWorkGroupIDY() const {
322     return WorkGroupIDY;
323   }
324 
325   bool hasWorkGroupIDZ() const {
326     return WorkGroupIDZ;
327   }
328 
329   bool hasWorkGroupInfo() const {
330     return WorkGroupInfo;
331   }
332 
333   bool hasPrivateSegmentWaveByteOffset() const {
334     return PrivateSegmentWaveByteOffset;
335   }
336 
337   bool hasWorkItemIDX() const {
338     return WorkItemIDX;
339   }
340 
341   bool hasWorkItemIDY() const {
342     return WorkItemIDY;
343   }
344 
345   bool hasWorkItemIDZ() const {
346     return WorkItemIDZ;
347   }
348 
349   bool hasImplicitBufferPtr() const {
350     return ImplicitBufferPtr;
351   }
352 
353   unsigned getNumUserSGPRs() const {
354     return NumUserSGPRs;
355   }
356 
357   unsigned getNumPreloadedSGPRs() const {
358     return NumUserSGPRs + NumSystemSGPRs;
359   }
360 
361   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
362     return PrivateSegmentWaveByteOffsetSystemSGPR;
363   }
364 
365   /// \brief Returns the physical register reserved for use as the resource
366   /// descriptor for scratch accesses.
367   unsigned getScratchRSrcReg() const {
368     return ScratchRSrcReg;
369   }
370 
371   void setScratchRSrcReg(unsigned Reg) {
372     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
373     ScratchRSrcReg = Reg;
374   }
375 
376   unsigned getScratchWaveOffsetReg() const {
377     return ScratchWaveOffsetReg;
378   }
379 
380   unsigned getFrameOffsetReg() const {
381     return FrameOffsetReg;
382   }
383 
384   void setStackPtrOffsetReg(unsigned Reg) {
385     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
386     StackPtrOffsetReg = Reg;
387   }
388 
389   unsigned getStackPtrOffsetReg() const {
390     return StackPtrOffsetReg;
391   }
392 
393   void setScratchWaveOffsetReg(unsigned Reg) {
394     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
395     ScratchWaveOffsetReg = Reg;
396     if (isEntryFunction())
397       FrameOffsetReg = ScratchWaveOffsetReg;
398   }
399 
400   unsigned getQueuePtrUserSGPR() const {
401     return QueuePtrUserSGPR;
402   }
403 
404   unsigned getImplicitBufferPtrUserSGPR() const {
405     return ImplicitBufferPtrUserSGPR;
406   }
407 
408   bool hasSpilledSGPRs() const {
409     return HasSpilledSGPRs;
410   }
411 
412   void setHasSpilledSGPRs(bool Spill = true) {
413     HasSpilledSGPRs = Spill;
414   }
415 
416   bool hasSpilledVGPRs() const {
417     return HasSpilledVGPRs;
418   }
419 
420   void setHasSpilledVGPRs(bool Spill = true) {
421     HasSpilledVGPRs = Spill;
422   }
423 
424   bool hasNonSpillStackObjects() const {
425     return HasNonSpillStackObjects;
426   }
427 
428   void setHasNonSpillStackObjects(bool StackObject = true) {
429     HasNonSpillStackObjects = StackObject;
430   }
431 
432   unsigned getNumSpilledSGPRs() const {
433     return NumSpilledSGPRs;
434   }
435 
436   unsigned getNumSpilledVGPRs() const {
437     return NumSpilledVGPRs;
438   }
439 
440   void addToSpilledSGPRs(unsigned num) {
441     NumSpilledSGPRs += num;
442   }
443 
444   void addToSpilledVGPRs(unsigned num) {
445     NumSpilledVGPRs += num;
446   }
447 
448   unsigned getPSInputAddr() const {
449     return PSInputAddr;
450   }
451 
452   unsigned getPSInputEnable() const {
453     return PSInputEnable;
454   }
455 
456   bool isPSInputAllocated(unsigned Index) const {
457     return PSInputAddr & (1 << Index);
458   }
459 
460   void markPSInputAllocated(unsigned Index) {
461     PSInputAddr |= 1 << Index;
462   }
463 
464   void markPSInputEnabled(unsigned Index) {
465     PSInputEnable |= 1 << Index;
466   }
467 
468   bool returnsVoid() const {
469     return ReturnsVoid;
470   }
471 
472   void setIfReturnsVoid(bool Value) {
473     ReturnsVoid = Value;
474   }
475 
476   /// \returns A pair of default/requested minimum/maximum flat work group sizes
477   /// for this function.
478   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
479     return FlatWorkGroupSizes;
480   }
481 
482   /// \returns Default/requested minimum flat work group size for this function.
483   unsigned getMinFlatWorkGroupSize() const {
484     return FlatWorkGroupSizes.first;
485   }
486 
487   /// \returns Default/requested maximum flat work group size for this function.
488   unsigned getMaxFlatWorkGroupSize() const {
489     return FlatWorkGroupSizes.second;
490   }
491 
492   /// \returns A pair of default/requested minimum/maximum number of waves per
493   /// execution unit.
494   std::pair<unsigned, unsigned> getWavesPerEU() const {
495     return WavesPerEU;
496   }
497 
498   /// \returns Default/requested minimum number of waves per execution unit.
499   unsigned getMinWavesPerEU() const {
500     return WavesPerEU.first;
501   }
502 
503   /// \returns Default/requested maximum number of waves per execution unit.
504   unsigned getMaxWavesPerEU() const {
505     return WavesPerEU.second;
506   }
507 
508   /// \returns Stack object index for \p Dim's work group ID.
509   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
510     assert(Dim < 3);
511     return DebuggerWorkGroupIDStackObjectIndices[Dim];
512   }
513 
514   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
515   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
516     assert(Dim < 3);
517     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
518   }
519 
520   /// \returns Stack object index for \p Dim's work item ID.
521   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
522     assert(Dim < 3);
523     return DebuggerWorkItemIDStackObjectIndices[Dim];
524   }
525 
526   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
527   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
528     assert(Dim < 3);
529     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
530   }
531 
532   /// \returns SGPR used for \p Dim's work group ID.
533   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
534     switch (Dim) {
535     case 0:
536       assert(hasWorkGroupIDX());
537       return WorkGroupIDXSystemSGPR;
538     case 1:
539       assert(hasWorkGroupIDY());
540       return WorkGroupIDYSystemSGPR;
541     case 2:
542       assert(hasWorkGroupIDZ());
543       return WorkGroupIDZSystemSGPR;
544     }
545     llvm_unreachable("unexpected dimension");
546   }
547 
548   /// \returns VGPR used for \p Dim' work item ID.
549   unsigned getWorkItemIDVGPR(unsigned Dim) const {
550     switch (Dim) {
551     case 0:
552       assert(hasWorkItemIDX());
553       return AMDGPU::VGPR0;
554     case 1:
555       assert(hasWorkItemIDY());
556       return AMDGPU::VGPR1;
557     case 2:
558       assert(hasWorkItemIDZ());
559       return AMDGPU::VGPR2;
560     }
561     llvm_unreachable("unexpected dimension");
562   }
563 
564   unsigned getLDSWaveSpillSize() const {
565     return LDSWaveSpillSize;
566   }
567 
568   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
569     return &BufferPSV;
570   }
571 
572   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
573     return &ImagePSV;
574   }
575 };
576 
577 } // end namespace llvm
578 
579 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
580