xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision 59e128266c9de11ba334450e1c3b7101155bd55b)
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
16 
17 #include "AMDGPUMachineFunction.h"
18 #include "AMDGPUArgumentUsageInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <array>
28 #include <cassert>
29 #include <utility>
30 #include <vector>
31 
32 namespace llvm {
33 
34 class MachineFrameInfo;
35 class MachineFunction;
36 class TargetRegisterClass;
37 
38 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
39 public:
40   explicit AMDGPUImagePseudoSourceValue() :
41     PseudoSourceValue(PseudoSourceValue::TargetCustom) {}
42 
43   bool isConstant(const MachineFrameInfo *) const override {
44     // This should probably be true for most images, but we will start by being
45     // conservative.
46     return false;
47   }
48 
49   bool isAliased(const MachineFrameInfo *) const override {
50     // FIXME: If we ever change image intrinsics to accept fat pointers, then
51     // this could be true for some cases.
52     return false;
53   }
54 
55   bool mayAlias(const MachineFrameInfo *) const override {
56     // FIXME: If we ever change image intrinsics to accept fat pointers, then
57     // this could be true for some cases.
58     return false;
59   }
60 };
61 
62 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
63 public:
64   explicit AMDGPUBufferPseudoSourceValue() :
65     PseudoSourceValue(PseudoSourceValue::TargetCustom) {}
66 
67   bool isConstant(const MachineFrameInfo *) const override {
68     // This should probably be true for most images, but we will start by being
69     // conservative.
70     return false;
71   }
72 
73   bool isAliased(const MachineFrameInfo *) const override {
74     // FIXME: If we ever change image intrinsics to accept fat pointers, then
75     // this could be true for some cases.
76     return false;
77   }
78 
79   bool mayAlias(const MachineFrameInfo *) const override {
80     // FIXME: If we ever change image intrinsics to accept fat pointers, then
81     // this could be true for some cases.
82     return false;
83   }
84 };
85 
86 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
87 /// tells the hardware which interpolation parameters to load.
88 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
89   // FIXME: This should be removed and getPreloadedValue moved here.
90   friend class SIRegisterInfo;
91 
92   unsigned TIDReg = AMDGPU::NoRegister;
93 
94   // Registers that may be reserved for spilling purposes. These may be the same
95   // as the input registers.
96   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
97   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
98 
99   // This is the current function's incremented size from the kernel's scratch
100   // wave offset register. For an entry function, this is exactly the same as
101   // the ScratchWaveOffsetReg.
102   unsigned FrameOffsetReg = AMDGPU::FP_REG;
103 
104   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
105   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
106 
107   AMDGPUFunctionArgInfo ArgInfo;
108 
109   // Graphics info.
110   unsigned PSInputAddr = 0;
111   unsigned PSInputEnable = 0;
112 
113   bool ReturnsVoid = true;
114 
115   // A pair of default/requested minimum/maximum flat work group sizes.
116   // Minimum - first, maximum - second.
117   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
118 
119   // A pair of default/requested minimum/maximum number of waves per execution
120   // unit. Minimum - first, maximum - second.
121   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
122 
123   // Stack object indices for work group IDs.
124   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
125 
126   // Stack object indices for work item IDs.
127   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
128 
129   AMDGPUBufferPseudoSourceValue BufferPSV;
130   AMDGPUImagePseudoSourceValue ImagePSV;
131 
132 private:
133   unsigned LDSWaveSpillSize = 0;
134   unsigned ScratchOffsetReg;
135   unsigned NumUserSGPRs = 0;
136   unsigned NumSystemSGPRs = 0;
137 
138   bool HasSpilledSGPRs = false;
139   bool HasSpilledVGPRs = false;
140   bool HasNonSpillStackObjects = false;
141 
142   unsigned NumSpilledSGPRs = 0;
143   unsigned NumSpilledVGPRs = 0;
144 
145   // Feature bits required for inputs passed in user SGPRs.
146   bool PrivateSegmentBuffer : 1;
147   bool DispatchPtr : 1;
148   bool QueuePtr : 1;
149   bool KernargSegmentPtr : 1;
150   bool DispatchID : 1;
151   bool FlatScratchInit : 1;
152   bool GridWorkgroupCountX : 1;
153   bool GridWorkgroupCountY : 1;
154   bool GridWorkgroupCountZ : 1;
155 
156   // Feature bits required for inputs passed in system SGPRs.
157   bool WorkGroupIDX : 1; // Always initialized.
158   bool WorkGroupIDY : 1;
159   bool WorkGroupIDZ : 1;
160   bool WorkGroupInfo : 1;
161   bool PrivateSegmentWaveByteOffset : 1;
162 
163   bool WorkItemIDX : 1; // Always initialized.
164   bool WorkItemIDY : 1;
165   bool WorkItemIDZ : 1;
166 
167   // Private memory buffer
168   // Compute directly in sgpr[0:1]
169   // Other shaders indirect 64-bits at sgpr[0:1]
170   bool ImplicitBufferPtr : 1;
171 
172   // Pointer to where the ABI inserts special kernel arguments separate from the
173   // user arguments. This is an offset from the KernargSegmentPtr.
174   bool ImplicitArgPtr : 1;
175 
176   MCPhysReg getNextUserSGPR() const {
177     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
178     return AMDGPU::SGPR0 + NumUserSGPRs;
179   }
180 
181   MCPhysReg getNextSystemSGPR() const {
182     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
183   }
184 
185 public:
186   struct SpilledReg {
187     unsigned VGPR = AMDGPU::NoRegister;
188     int Lane = -1;
189 
190     SpilledReg() = default;
191     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
192 
193     bool hasLane() { return Lane != -1;}
194     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
195   };
196 
197   struct SGPRSpillVGPRCSR {
198     // VGPR used for SGPR spills
199     unsigned VGPR;
200 
201     // If the VGPR is a CSR, the stack slot used to save/restore it in the
202     // prolog/epilog.
203     Optional<int> FI;
204 
205     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
206   };
207 
208 private:
209   // SGPR->VGPR spilling support.
210   using SpillRegMask = std::pair<unsigned, unsigned>;
211 
212   // Track VGPR + wave index for each subregister of the SGPR spilled to
213   // frameindex key.
214   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
215   unsigned NumVGPRSpillLanes = 0;
216   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
217 
218 public:
219   SIMachineFunctionInfo(const MachineFunction &MF);
220 
221   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
222     auto I = SGPRToVGPRSpills.find(FrameIndex);
223     return (I == SGPRToVGPRSpills.end()) ?
224       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
225   }
226 
227   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
228     return SpillVGPRs;
229   }
230 
231   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
232   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
233 
234   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }
235   unsigned getTIDReg() const { return TIDReg; }
236   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
237 
238   // Add user SGPRs.
239   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
240   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
241   unsigned addQueuePtr(const SIRegisterInfo &TRI);
242   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
243   unsigned addDispatchID(const SIRegisterInfo &TRI);
244   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
245   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
246 
247   // Add system SGPRs.
248   unsigned addWorkGroupIDX() {
249     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
250     NumSystemSGPRs += 1;
251     return ArgInfo.WorkGroupIDX.getRegister();
252   }
253 
254   unsigned addWorkGroupIDY() {
255     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
256     NumSystemSGPRs += 1;
257     return ArgInfo.WorkGroupIDY.getRegister();
258   }
259 
260   unsigned addWorkGroupIDZ() {
261     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
262     NumSystemSGPRs += 1;
263     return ArgInfo.WorkGroupIDZ.getRegister();
264   }
265 
266   unsigned addWorkGroupInfo() {
267     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
268     NumSystemSGPRs += 1;
269     return ArgInfo.WorkGroupInfo.getRegister();
270   }
271 
272   // Add special VGPR inputs
273   void setWorkItemIDX(ArgDescriptor Arg) {
274     ArgInfo.WorkItemIDX = Arg;
275   }
276 
277   void setWorkItemIDY(ArgDescriptor Arg) {
278     ArgInfo.WorkItemIDY = Arg;
279   }
280 
281   void setWorkItemIDZ(ArgDescriptor Arg) {
282     ArgInfo.WorkItemIDZ = Arg;
283   }
284 
285   unsigned addPrivateSegmentWaveByteOffset() {
286     ArgInfo.PrivateSegmentWaveByteOffset
287       = ArgDescriptor::createRegister(getNextSystemSGPR());
288     NumSystemSGPRs += 1;
289     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
290   }
291 
292   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
293     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
294   }
295 
296   bool hasPrivateSegmentBuffer() const {
297     return PrivateSegmentBuffer;
298   }
299 
300   bool hasDispatchPtr() const {
301     return DispatchPtr;
302   }
303 
304   bool hasQueuePtr() const {
305     return QueuePtr;
306   }
307 
308   bool hasKernargSegmentPtr() const {
309     return KernargSegmentPtr;
310   }
311 
312   bool hasDispatchID() const {
313     return DispatchID;
314   }
315 
316   bool hasFlatScratchInit() const {
317     return FlatScratchInit;
318   }
319 
320   bool hasGridWorkgroupCountX() const {
321     return GridWorkgroupCountX;
322   }
323 
324   bool hasGridWorkgroupCountY() const {
325     return GridWorkgroupCountY;
326   }
327 
328   bool hasGridWorkgroupCountZ() const {
329     return GridWorkgroupCountZ;
330   }
331 
332   bool hasWorkGroupIDX() const {
333     return WorkGroupIDX;
334   }
335 
336   bool hasWorkGroupIDY() const {
337     return WorkGroupIDY;
338   }
339 
340   bool hasWorkGroupIDZ() const {
341     return WorkGroupIDZ;
342   }
343 
344   bool hasWorkGroupInfo() const {
345     return WorkGroupInfo;
346   }
347 
348   bool hasPrivateSegmentWaveByteOffset() const {
349     return PrivateSegmentWaveByteOffset;
350   }
351 
352   bool hasWorkItemIDX() const {
353     return WorkItemIDX;
354   }
355 
356   bool hasWorkItemIDY() const {
357     return WorkItemIDY;
358   }
359 
360   bool hasWorkItemIDZ() const {
361     return WorkItemIDZ;
362   }
363 
364   bool hasImplicitArgPtr() const {
365     return ImplicitArgPtr;
366   }
367 
368   bool hasImplicitBufferPtr() const {
369     return ImplicitBufferPtr;
370   }
371 
372   AMDGPUFunctionArgInfo &getArgInfo() {
373     return ArgInfo;
374   }
375 
376   const AMDGPUFunctionArgInfo &getArgInfo() const {
377     return ArgInfo;
378   }
379 
380   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
381   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
382     return ArgInfo.getPreloadedValue(Value);
383   }
384 
385   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
386     return ArgInfo.getPreloadedValue(Value).first->getRegister();
387   }
388 
389   unsigned getNumUserSGPRs() const {
390     return NumUserSGPRs;
391   }
392 
393   unsigned getNumPreloadedSGPRs() const {
394     return NumUserSGPRs + NumSystemSGPRs;
395   }
396 
397   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
398     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
399   }
400 
401   /// \brief Returns the physical register reserved for use as the resource
402   /// descriptor for scratch accesses.
403   unsigned getScratchRSrcReg() const {
404     return ScratchRSrcReg;
405   }
406 
407   void setScratchRSrcReg(unsigned Reg) {
408     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
409     ScratchRSrcReg = Reg;
410   }
411 
412   unsigned getScratchWaveOffsetReg() const {
413     return ScratchWaveOffsetReg;
414   }
415 
416   unsigned getFrameOffsetReg() const {
417     return FrameOffsetReg;
418   }
419 
420   void setStackPtrOffsetReg(unsigned Reg) {
421     StackPtrOffsetReg = Reg;
422   }
423 
424   // Note the unset value for this is AMDGPU::SP_REG rather than
425   // NoRegister. This is mostly a workaround for MIR tests where state that
426   // can't be directly computed from the function is not preserved in serialized
427   // MIR.
428   unsigned getStackPtrOffsetReg() const {
429     return StackPtrOffsetReg;
430   }
431 
432   void setScratchWaveOffsetReg(unsigned Reg) {
433     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
434     ScratchWaveOffsetReg = Reg;
435     if (isEntryFunction())
436       FrameOffsetReg = ScratchWaveOffsetReg;
437   }
438 
439   unsigned getQueuePtrUserSGPR() const {
440     return ArgInfo.QueuePtr.getRegister();
441   }
442 
443   unsigned getImplicitBufferPtrUserSGPR() const {
444     return ArgInfo.ImplicitBufferPtr.getRegister();
445   }
446 
447   bool hasSpilledSGPRs() const {
448     return HasSpilledSGPRs;
449   }
450 
451   void setHasSpilledSGPRs(bool Spill = true) {
452     HasSpilledSGPRs = Spill;
453   }
454 
455   bool hasSpilledVGPRs() const {
456     return HasSpilledVGPRs;
457   }
458 
459   void setHasSpilledVGPRs(bool Spill = true) {
460     HasSpilledVGPRs = Spill;
461   }
462 
463   bool hasNonSpillStackObjects() const {
464     return HasNonSpillStackObjects;
465   }
466 
467   void setHasNonSpillStackObjects(bool StackObject = true) {
468     HasNonSpillStackObjects = StackObject;
469   }
470 
471   unsigned getNumSpilledSGPRs() const {
472     return NumSpilledSGPRs;
473   }
474 
475   unsigned getNumSpilledVGPRs() const {
476     return NumSpilledVGPRs;
477   }
478 
479   void addToSpilledSGPRs(unsigned num) {
480     NumSpilledSGPRs += num;
481   }
482 
483   void addToSpilledVGPRs(unsigned num) {
484     NumSpilledVGPRs += num;
485   }
486 
487   unsigned getPSInputAddr() const {
488     return PSInputAddr;
489   }
490 
491   unsigned getPSInputEnable() const {
492     return PSInputEnable;
493   }
494 
495   bool isPSInputAllocated(unsigned Index) const {
496     return PSInputAddr & (1 << Index);
497   }
498 
499   void markPSInputAllocated(unsigned Index) {
500     PSInputAddr |= 1 << Index;
501   }
502 
503   void markPSInputEnabled(unsigned Index) {
504     PSInputEnable |= 1 << Index;
505   }
506 
507   bool returnsVoid() const {
508     return ReturnsVoid;
509   }
510 
511   void setIfReturnsVoid(bool Value) {
512     ReturnsVoid = Value;
513   }
514 
515   /// \returns A pair of default/requested minimum/maximum flat work group sizes
516   /// for this function.
517   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
518     return FlatWorkGroupSizes;
519   }
520 
521   /// \returns Default/requested minimum flat work group size for this function.
522   unsigned getMinFlatWorkGroupSize() const {
523     return FlatWorkGroupSizes.first;
524   }
525 
526   /// \returns Default/requested maximum flat work group size for this function.
527   unsigned getMaxFlatWorkGroupSize() const {
528     return FlatWorkGroupSizes.second;
529   }
530 
531   /// \returns A pair of default/requested minimum/maximum number of waves per
532   /// execution unit.
533   std::pair<unsigned, unsigned> getWavesPerEU() const {
534     return WavesPerEU;
535   }
536 
537   /// \returns Default/requested minimum number of waves per execution unit.
538   unsigned getMinWavesPerEU() const {
539     return WavesPerEU.first;
540   }
541 
542   /// \returns Default/requested maximum number of waves per execution unit.
543   unsigned getMaxWavesPerEU() const {
544     return WavesPerEU.second;
545   }
546 
547   /// \returns Stack object index for \p Dim's work group ID.
548   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
549     assert(Dim < 3);
550     return DebuggerWorkGroupIDStackObjectIndices[Dim];
551   }
552 
553   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
554   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
555     assert(Dim < 3);
556     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
557   }
558 
559   /// \returns Stack object index for \p Dim's work item ID.
560   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
561     assert(Dim < 3);
562     return DebuggerWorkItemIDStackObjectIndices[Dim];
563   }
564 
565   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
566   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
567     assert(Dim < 3);
568     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
569   }
570 
571   /// \returns SGPR used for \p Dim's work group ID.
572   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
573     switch (Dim) {
574     case 0:
575       assert(hasWorkGroupIDX());
576       return ArgInfo.WorkGroupIDX.getRegister();
577     case 1:
578       assert(hasWorkGroupIDY());
579       return ArgInfo.WorkGroupIDY.getRegister();
580     case 2:
581       assert(hasWorkGroupIDZ());
582       return ArgInfo.WorkGroupIDZ.getRegister();
583     }
584     llvm_unreachable("unexpected dimension");
585   }
586 
587   /// \returns VGPR used for \p Dim' work item ID.
588   unsigned getWorkItemIDVGPR(unsigned Dim) const {
589     switch (Dim) {
590     case 0:
591       assert(hasWorkItemIDX());
592       return AMDGPU::VGPR0;
593     case 1:
594       assert(hasWorkItemIDY());
595       return AMDGPU::VGPR1;
596     case 2:
597       assert(hasWorkItemIDZ());
598       return AMDGPU::VGPR2;
599     }
600     llvm_unreachable("unexpected dimension");
601   }
602 
603   unsigned getLDSWaveSpillSize() const {
604     return LDSWaveSpillSize;
605   }
606 
607   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
608     return &BufferPSV;
609   }
610 
611   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
612     return &ImagePSV;
613   }
614 };
615 
616 } // end namespace llvm
617 
618 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
619