xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision aa6fb4c45e01348159d565c17a77118dc57332e5)
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUMachineFunction.h"
18 #include "SIInstrInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/CodeGen/PseudoSourceValue.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include <array>
30 #include <cassert>
31 #include <utility>
32 #include <vector>
33 
34 namespace llvm {
35 
36 class MachineFrameInfo;
37 class MachineFunction;
38 class TargetRegisterClass;
39 
40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
41 public:
42   // TODO: Is the img rsrc useful?
43   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
44     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
45 
46   bool isConstant(const MachineFrameInfo *) const override {
47     // This should probably be true for most images, but we will start by being
48     // conservative.
49     return false;
50   }
51 
52   bool isAliased(const MachineFrameInfo *) const override {
53     return true;
54   }
55 
56   bool mayAlias(const MachineFrameInfo *) const override {
57     return true;
58   }
59 };
60 
61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
62 public:
63   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
64     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
65 
66   bool isConstant(const MachineFrameInfo *) const override {
67     // This should probably be true for most images, but we will start by being
68     // conservative.
69     return false;
70   }
71 
72   bool isAliased(const MachineFrameInfo *) const override {
73     return true;
74   }
75 
76   bool mayAlias(const MachineFrameInfo *) const override {
77     return true;
78   }
79 };
80 
81 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
82 /// tells the hardware which interpolation parameters to load.
83 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
84   unsigned TIDReg = AMDGPU::NoRegister;
85 
86   // Registers that may be reserved for spilling purposes. These may be the same
87   // as the input registers.
88   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
89   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
90 
91   // This is the current function's incremented size from the kernel's scratch
92   // wave offset register. For an entry function, this is exactly the same as
93   // the ScratchWaveOffsetReg.
94   unsigned FrameOffsetReg = AMDGPU::FP_REG;
95 
96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
97   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
98 
99   AMDGPUFunctionArgInfo ArgInfo;
100 
101   // Graphics info.
102   unsigned PSInputAddr = 0;
103   unsigned PSInputEnable = 0;
104 
105   /// Number of bytes of arguments this function has on the stack. If the callee
106   /// is expected to restore the argument stack this should be a multiple of 16,
107   /// all usable during a tail call.
108   ///
109   /// The alternative would forbid tail call optimisation in some cases: if we
110   /// want to transfer control from a function with 8-bytes of stack-argument
111   /// space to a function with 16-bytes then misalignment of this value would
112   /// make a stack adjustment necessary, which could not be undone by the
113   /// callee.
114   unsigned BytesInStackArgArea = 0;
115 
116   bool ReturnsVoid = true;
117 
118   // A pair of default/requested minimum/maximum flat work group sizes.
119   // Minimum - first, maximum - second.
120   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
121 
122   // A pair of default/requested minimum/maximum number of waves per execution
123   // unit. Minimum - first, maximum - second.
124   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
125 
126   DenseMap<const Value *,
127            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
128   DenseMap<const Value *,
129            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
130 
131 private:
132   unsigned LDSWaveSpillSize = 0;
133   unsigned NumUserSGPRs = 0;
134   unsigned NumSystemSGPRs = 0;
135 
136   bool HasSpilledSGPRs = false;
137   bool HasSpilledVGPRs = false;
138   bool HasNonSpillStackObjects = false;
139   bool IsStackRealigned = false;
140 
141   unsigned NumSpilledSGPRs = 0;
142   unsigned NumSpilledVGPRs = 0;
143 
144   // Feature bits required for inputs passed in user SGPRs.
145   bool PrivateSegmentBuffer : 1;
146   bool DispatchPtr : 1;
147   bool QueuePtr : 1;
148   bool KernargSegmentPtr : 1;
149   bool DispatchID : 1;
150   bool FlatScratchInit : 1;
151 
152   // Feature bits required for inputs passed in system SGPRs.
153   bool WorkGroupIDX : 1; // Always initialized.
154   bool WorkGroupIDY : 1;
155   bool WorkGroupIDZ : 1;
156   bool WorkGroupInfo : 1;
157   bool PrivateSegmentWaveByteOffset : 1;
158 
159   bool WorkItemIDX : 1; // Always initialized.
160   bool WorkItemIDY : 1;
161   bool WorkItemIDZ : 1;
162 
163   // Private memory buffer
164   // Compute directly in sgpr[0:1]
165   // Other shaders indirect 64-bits at sgpr[0:1]
166   bool ImplicitBufferPtr : 1;
167 
168   // Pointer to where the ABI inserts special kernel arguments separate from the
169   // user arguments. This is an offset from the KernargSegmentPtr.
170   bool ImplicitArgPtr : 1;
171 
172   // The hard-wired high half of the address of the global information table
173   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
174   // current hardware only allows a 16 bit value.
175   unsigned GITPtrHigh;
176 
177   unsigned HighBitsOf32BitAddress;
178 
179   // Current recorded maximum possible occupancy.
180   unsigned Occupancy;
181 
182   MCPhysReg getNextUserSGPR() const;
183 
184   MCPhysReg getNextSystemSGPR() const;
185 
186 public:
187   struct SpilledReg {
188     unsigned VGPR = 0;
189     int Lane = -1;
190 
191     SpilledReg() = default;
192     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
193 
194     bool hasLane() { return Lane != -1;}
195     bool hasReg() { return VGPR != 0;}
196   };
197 
198   struct SGPRSpillVGPRCSR {
199     // VGPR used for SGPR spills
200     unsigned VGPR;
201 
202     // If the VGPR is a CSR, the stack slot used to save/restore it in the
203     // prolog/epilog.
204     Optional<int> FI;
205 
206     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
207   };
208 
209 private:
210   // SGPR->VGPR spilling support.
211   using SpillRegMask = std::pair<unsigned, unsigned>;
212 
213   // Track VGPR + wave index for each subregister of the SGPR spilled to
214   // frameindex key.
215   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
216   unsigned NumVGPRSpillLanes = 0;
217   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
218 
219 public:
220   SIMachineFunctionInfo(const MachineFunction &MF);
221 
222   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
223     auto I = SGPRToVGPRSpills.find(FrameIndex);
224     return (I == SGPRToVGPRSpills.end()) ?
225       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
226   }
227 
228   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
229     return SpillVGPRs;
230   }
231 
232   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
233   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
234 
235   bool hasCalculatedTID() const { return TIDReg != 0; };
236   unsigned getTIDReg() const { return TIDReg; };
237   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
238 
239   unsigned getBytesInStackArgArea() const {
240     return BytesInStackArgArea;
241   }
242 
243   void setBytesInStackArgArea(unsigned Bytes) {
244     BytesInStackArgArea = Bytes;
245   }
246 
247   // Add user SGPRs.
248   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
249   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
250   unsigned addQueuePtr(const SIRegisterInfo &TRI);
251   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
252   unsigned addDispatchID(const SIRegisterInfo &TRI);
253   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
254   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
255 
256   // Add system SGPRs.
257   unsigned addWorkGroupIDX() {
258     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
259     NumSystemSGPRs += 1;
260     return ArgInfo.WorkGroupIDX.getRegister();
261   }
262 
263   unsigned addWorkGroupIDY() {
264     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
265     NumSystemSGPRs += 1;
266     return ArgInfo.WorkGroupIDY.getRegister();
267   }
268 
269   unsigned addWorkGroupIDZ() {
270     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
271     NumSystemSGPRs += 1;
272     return ArgInfo.WorkGroupIDZ.getRegister();
273   }
274 
275   unsigned addWorkGroupInfo() {
276     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
277     NumSystemSGPRs += 1;
278     return ArgInfo.WorkGroupInfo.getRegister();
279   }
280 
281   // Add special VGPR inputs
282   void setWorkItemIDX(ArgDescriptor Arg) {
283     ArgInfo.WorkItemIDX = Arg;
284   }
285 
286   void setWorkItemIDY(ArgDescriptor Arg) {
287     ArgInfo.WorkItemIDY = Arg;
288   }
289 
290   void setWorkItemIDZ(ArgDescriptor Arg) {
291     ArgInfo.WorkItemIDZ = Arg;
292   }
293 
294   unsigned addPrivateSegmentWaveByteOffset() {
295     ArgInfo.PrivateSegmentWaveByteOffset
296       = ArgDescriptor::createRegister(getNextSystemSGPR());
297     NumSystemSGPRs += 1;
298     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
299   }
300 
301   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
302     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
303   }
304 
305   bool hasPrivateSegmentBuffer() const {
306     return PrivateSegmentBuffer;
307   }
308 
309   bool hasDispatchPtr() const {
310     return DispatchPtr;
311   }
312 
313   bool hasQueuePtr() const {
314     return QueuePtr;
315   }
316 
317   bool hasKernargSegmentPtr() const {
318     return KernargSegmentPtr;
319   }
320 
321   bool hasDispatchID() const {
322     return DispatchID;
323   }
324 
325   bool hasFlatScratchInit() const {
326     return FlatScratchInit;
327   }
328 
329   bool hasWorkGroupIDX() const {
330     return WorkGroupIDX;
331   }
332 
333   bool hasWorkGroupIDY() const {
334     return WorkGroupIDY;
335   }
336 
337   bool hasWorkGroupIDZ() const {
338     return WorkGroupIDZ;
339   }
340 
341   bool hasWorkGroupInfo() const {
342     return WorkGroupInfo;
343   }
344 
345   bool hasPrivateSegmentWaveByteOffset() const {
346     return PrivateSegmentWaveByteOffset;
347   }
348 
349   bool hasWorkItemIDX() const {
350     return WorkItemIDX;
351   }
352 
353   bool hasWorkItemIDY() const {
354     return WorkItemIDY;
355   }
356 
357   bool hasWorkItemIDZ() const {
358     return WorkItemIDZ;
359   }
360 
361   bool hasImplicitArgPtr() const {
362     return ImplicitArgPtr;
363   }
364 
365   bool hasImplicitBufferPtr() const {
366     return ImplicitBufferPtr;
367   }
368 
369   AMDGPUFunctionArgInfo &getArgInfo() {
370     return ArgInfo;
371   }
372 
373   const AMDGPUFunctionArgInfo &getArgInfo() const {
374     return ArgInfo;
375   }
376 
377   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
378   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
379     return ArgInfo.getPreloadedValue(Value);
380   }
381 
382   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
383     return ArgInfo.getPreloadedValue(Value).first->getRegister();
384   }
385 
386   unsigned getGITPtrHigh() const {
387     return GITPtrHigh;
388   }
389 
390   unsigned get32BitAddressHighBits() const {
391     return HighBitsOf32BitAddress;
392   }
393 
394   unsigned getNumUserSGPRs() const {
395     return NumUserSGPRs;
396   }
397 
398   unsigned getNumPreloadedSGPRs() const {
399     return NumUserSGPRs + NumSystemSGPRs;
400   }
401 
402   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
403     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
404   }
405 
406   /// Returns the physical register reserved for use as the resource
407   /// descriptor for scratch accesses.
408   unsigned getScratchRSrcReg() const {
409     return ScratchRSrcReg;
410   }
411 
412   void setScratchRSrcReg(unsigned Reg) {
413     assert(Reg != 0 && "Should never be unset");
414     ScratchRSrcReg = Reg;
415   }
416 
417   unsigned getScratchWaveOffsetReg() const {
418     return ScratchWaveOffsetReg;
419   }
420 
421   unsigned getFrameOffsetReg() const {
422     return FrameOffsetReg;
423   }
424 
425   void setStackPtrOffsetReg(unsigned Reg) {
426     assert(Reg != 0 && "Should never be unset");
427     StackPtrOffsetReg = Reg;
428   }
429 
430   // Note the unset value for this is AMDGPU::SP_REG rather than
431   // NoRegister. This is mostly a workaround for MIR tests where state that
432   // can't be directly computed from the function is not preserved in serialized
433   // MIR.
434   unsigned getStackPtrOffsetReg() const {
435     return StackPtrOffsetReg;
436   }
437 
438   void setScratchWaveOffsetReg(unsigned Reg) {
439     assert(Reg != 0 && "Should never be unset");
440     ScratchWaveOffsetReg = Reg;
441     if (isEntryFunction())
442       FrameOffsetReg = ScratchWaveOffsetReg;
443   }
444 
445   unsigned getQueuePtrUserSGPR() const {
446     return ArgInfo.QueuePtr.getRegister();
447   }
448 
449   unsigned getImplicitBufferPtrUserSGPR() const {
450     return ArgInfo.ImplicitBufferPtr.getRegister();
451   }
452 
453   bool hasSpilledSGPRs() const {
454     return HasSpilledSGPRs;
455   }
456 
457   void setHasSpilledSGPRs(bool Spill = true) {
458     HasSpilledSGPRs = Spill;
459   }
460 
461   bool hasSpilledVGPRs() const {
462     return HasSpilledVGPRs;
463   }
464 
465   void setHasSpilledVGPRs(bool Spill = true) {
466     HasSpilledVGPRs = Spill;
467   }
468 
469   bool hasNonSpillStackObjects() const {
470     return HasNonSpillStackObjects;
471   }
472 
473   void setHasNonSpillStackObjects(bool StackObject = true) {
474     HasNonSpillStackObjects = StackObject;
475   }
476 
477   bool isStackRealigned() const {
478     return IsStackRealigned;
479   }
480 
481   void setIsStackRealigned(bool Realigned = true) {
482     IsStackRealigned = Realigned;
483   }
484 
485   unsigned getNumSpilledSGPRs() const {
486     return NumSpilledSGPRs;
487   }
488 
489   unsigned getNumSpilledVGPRs() const {
490     return NumSpilledVGPRs;
491   }
492 
493   void addToSpilledSGPRs(unsigned num) {
494     NumSpilledSGPRs += num;
495   }
496 
497   void addToSpilledVGPRs(unsigned num) {
498     NumSpilledVGPRs += num;
499   }
500 
501   unsigned getPSInputAddr() const {
502     return PSInputAddr;
503   }
504 
505   unsigned getPSInputEnable() const {
506     return PSInputEnable;
507   }
508 
509   bool isPSInputAllocated(unsigned Index) const {
510     return PSInputAddr & (1 << Index);
511   }
512 
513   void markPSInputAllocated(unsigned Index) {
514     PSInputAddr |= 1 << Index;
515   }
516 
517   void markPSInputEnabled(unsigned Index) {
518     PSInputEnable |= 1 << Index;
519   }
520 
521   bool returnsVoid() const {
522     return ReturnsVoid;
523   }
524 
525   void setIfReturnsVoid(bool Value) {
526     ReturnsVoid = Value;
527   }
528 
529   /// \returns A pair of default/requested minimum/maximum flat work group sizes
530   /// for this function.
531   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
532     return FlatWorkGroupSizes;
533   }
534 
535   /// \returns Default/requested minimum flat work group size for this function.
536   unsigned getMinFlatWorkGroupSize() const {
537     return FlatWorkGroupSizes.first;
538   }
539 
540   /// \returns Default/requested maximum flat work group size for this function.
541   unsigned getMaxFlatWorkGroupSize() const {
542     return FlatWorkGroupSizes.second;
543   }
544 
545   /// \returns A pair of default/requested minimum/maximum number of waves per
546   /// execution unit.
547   std::pair<unsigned, unsigned> getWavesPerEU() const {
548     return WavesPerEU;
549   }
550 
551   /// \returns Default/requested minimum number of waves per execution unit.
552   unsigned getMinWavesPerEU() const {
553     return WavesPerEU.first;
554   }
555 
556   /// \returns Default/requested maximum number of waves per execution unit.
557   unsigned getMaxWavesPerEU() const {
558     return WavesPerEU.second;
559   }
560 
561   /// \returns SGPR used for \p Dim's work group ID.
562   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
563     switch (Dim) {
564     case 0:
565       assert(hasWorkGroupIDX());
566       return ArgInfo.WorkGroupIDX.getRegister();
567     case 1:
568       assert(hasWorkGroupIDY());
569       return ArgInfo.WorkGroupIDY.getRegister();
570     case 2:
571       assert(hasWorkGroupIDZ());
572       return ArgInfo.WorkGroupIDZ.getRegister();
573     }
574     llvm_unreachable("unexpected dimension");
575   }
576 
577   /// \returns VGPR used for \p Dim' work item ID.
578   unsigned getWorkItemIDVGPR(unsigned Dim) const;
579 
580   unsigned getLDSWaveSpillSize() const {
581     return LDSWaveSpillSize;
582   }
583 
584   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
585                                                     const Value *BufferRsrc) {
586     assert(BufferRsrc);
587     auto PSV = BufferPSVs.try_emplace(
588       BufferRsrc,
589       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
590     return PSV.first->second.get();
591   }
592 
593   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
594                                                   const Value *ImgRsrc) {
595     assert(ImgRsrc);
596     auto PSV = ImagePSVs.try_emplace(
597       ImgRsrc,
598       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
599     return PSV.first->second.get();
600   }
601 
602   unsigned getOccupancy() const {
603     return Occupancy;
604   }
605 
606   unsigned getMinAllowedOccupancy() const {
607     if (!isMemoryBound() && !needsWaveLimiter())
608       return Occupancy;
609     return (Occupancy < 4) ? Occupancy : 4;
610   }
611 
612   void limitOccupancy(const MachineFunction &MF);
613 
614   void limitOccupancy(unsigned Limit) {
615     if (Occupancy > Limit)
616       Occupancy = Limit;
617   }
618 
619   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
620     if (Occupancy < Limit)
621       Occupancy = Limit;
622     limitOccupancy(MF);
623   }
624 };
625 
626 } // end namespace llvm
627 
628 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
629