xref: /llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUMachineFunction.h"
18 #include "SIInstrInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/CodeGen/PseudoSourceValue.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include <array>
30 #include <cassert>
31 #include <utility>
32 #include <vector>
33 
34 namespace llvm {
35 
36 class MachineFrameInfo;
37 class MachineFunction;
38 class TargetRegisterClass;
39 
40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
41 public:
42   // TODO: Is the img rsrc useful?
43   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
44     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
45 
46   bool isConstant(const MachineFrameInfo *) const override {
47     // This should probably be true for most images, but we will start by being
48     // conservative.
49     return false;
50   }
51 
52   bool isAliased(const MachineFrameInfo *) const override {
53     return true;
54   }
55 
56   bool mayAlias(const MachineFrameInfo *) const override {
57     return true;
58   }
59 };
60 
61 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
62 public:
63   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
64     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
65 
66   bool isConstant(const MachineFrameInfo *) const override {
67     // This should probably be true for most images, but we will start by being
68     // conservative.
69     return false;
70   }
71 
72   bool isAliased(const MachineFrameInfo *) const override {
73     return true;
74   }
75 
76   bool mayAlias(const MachineFrameInfo *) const override {
77     return true;
78   }
79 };
80 
81 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
82 /// tells the hardware which interpolation parameters to load.
83 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
84   unsigned TIDReg = AMDGPU::NoRegister;
85 
86   // Registers that may be reserved for spilling purposes. These may be the same
87   // as the input registers.
88   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
89   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
90 
91   // This is the current function's incremented size from the kernel's scratch
92   // wave offset register. For an entry function, this is exactly the same as
93   // the ScratchWaveOffsetReg.
94   unsigned FrameOffsetReg = AMDGPU::FP_REG;
95 
96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
97   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
98 
99   AMDGPUFunctionArgInfo ArgInfo;
100 
101   // Graphics info.
102   unsigned PSInputAddr = 0;
103   unsigned PSInputEnable = 0;
104 
105   /// Number of bytes of arguments this function has on the stack. If the callee
106   /// is expected to restore the argument stack this should be a multiple of 16,
107   /// all usable during a tail call.
108   ///
109   /// The alternative would forbid tail call optimisation in some cases: if we
110   /// want to transfer control from a function with 8-bytes of stack-argument
111   /// space to a function with 16-bytes then misalignment of this value would
112   /// make a stack adjustment necessary, which could not be undone by the
113   /// callee.
114   unsigned BytesInStackArgArea = 0;
115 
116   bool ReturnsVoid = true;
117 
118   // A pair of default/requested minimum/maximum flat work group sizes.
119   // Minimum - first, maximum - second.
120   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
121 
122   // A pair of default/requested minimum/maximum number of waves per execution
123   // unit. Minimum - first, maximum - second.
124   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
125 
126   // Stack object indices for work group IDs.
127   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
128 
129   // Stack object indices for work item IDs.
130   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
131 
132   DenseMap<const Value *,
133            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
134   DenseMap<const Value *,
135            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
136 
137 private:
138   unsigned LDSWaveSpillSize = 0;
139   unsigned NumUserSGPRs = 0;
140   unsigned NumSystemSGPRs = 0;
141 
142   bool HasSpilledSGPRs = false;
143   bool HasSpilledVGPRs = false;
144   bool HasNonSpillStackObjects = false;
145   bool IsStackRealigned = false;
146 
147   unsigned NumSpilledSGPRs = 0;
148   unsigned NumSpilledVGPRs = 0;
149 
150   // Feature bits required for inputs passed in user SGPRs.
151   bool PrivateSegmentBuffer : 1;
152   bool DispatchPtr : 1;
153   bool QueuePtr : 1;
154   bool KernargSegmentPtr : 1;
155   bool DispatchID : 1;
156   bool FlatScratchInit : 1;
157 
158   // Feature bits required for inputs passed in system SGPRs.
159   bool WorkGroupIDX : 1; // Always initialized.
160   bool WorkGroupIDY : 1;
161   bool WorkGroupIDZ : 1;
162   bool WorkGroupInfo : 1;
163   bool PrivateSegmentWaveByteOffset : 1;
164 
165   bool WorkItemIDX : 1; // Always initialized.
166   bool WorkItemIDY : 1;
167   bool WorkItemIDZ : 1;
168 
169   // Private memory buffer
170   // Compute directly in sgpr[0:1]
171   // Other shaders indirect 64-bits at sgpr[0:1]
172   bool ImplicitBufferPtr : 1;
173 
174   // Pointer to where the ABI inserts special kernel arguments separate from the
175   // user arguments. This is an offset from the KernargSegmentPtr.
176   bool ImplicitArgPtr : 1;
177 
178   // The hard-wired high half of the address of the global information table
179   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
180   // current hardware only allows a 16 bit value.
181   unsigned GITPtrHigh;
182 
183   unsigned HighBitsOf32BitAddress;
184 
185   // Current recorded maximum possible occupancy.
186   unsigned Occupancy;
187 
188   MCPhysReg getNextUserSGPR() const;
189 
190   MCPhysReg getNextSystemSGPR() const;
191 
192 public:
193   struct SpilledReg {
194     unsigned VGPR = 0;
195     int Lane = -1;
196 
197     SpilledReg() = default;
198     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
199 
200     bool hasLane() { return Lane != -1;}
201     bool hasReg() { return VGPR != 0;}
202   };
203 
204   struct SGPRSpillVGPRCSR {
205     // VGPR used for SGPR spills
206     unsigned VGPR;
207 
208     // If the VGPR is a CSR, the stack slot used to save/restore it in the
209     // prolog/epilog.
210     Optional<int> FI;
211 
212     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
213   };
214 
215 private:
216   // SGPR->VGPR spilling support.
217   using SpillRegMask = std::pair<unsigned, unsigned>;
218 
219   // Track VGPR + wave index for each subregister of the SGPR spilled to
220   // frameindex key.
221   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
222   unsigned NumVGPRSpillLanes = 0;
223   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
224 
225 public:
226   SIMachineFunctionInfo(const MachineFunction &MF);
227 
228   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
229     auto I = SGPRToVGPRSpills.find(FrameIndex);
230     return (I == SGPRToVGPRSpills.end()) ?
231       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
232   }
233 
234   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
235     return SpillVGPRs;
236   }
237 
238   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
239   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
240 
241   bool hasCalculatedTID() const { return TIDReg != 0; };
242   unsigned getTIDReg() const { return TIDReg; };
243   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
244 
245   unsigned getBytesInStackArgArea() const {
246     return BytesInStackArgArea;
247   }
248 
249   void setBytesInStackArgArea(unsigned Bytes) {
250     BytesInStackArgArea = Bytes;
251   }
252 
253   // Add user SGPRs.
254   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
255   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
256   unsigned addQueuePtr(const SIRegisterInfo &TRI);
257   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
258   unsigned addDispatchID(const SIRegisterInfo &TRI);
259   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
260   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
261 
262   // Add system SGPRs.
263   unsigned addWorkGroupIDX() {
264     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
265     NumSystemSGPRs += 1;
266     return ArgInfo.WorkGroupIDX.getRegister();
267   }
268 
269   unsigned addWorkGroupIDY() {
270     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
271     NumSystemSGPRs += 1;
272     return ArgInfo.WorkGroupIDY.getRegister();
273   }
274 
275   unsigned addWorkGroupIDZ() {
276     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
277     NumSystemSGPRs += 1;
278     return ArgInfo.WorkGroupIDZ.getRegister();
279   }
280 
281   unsigned addWorkGroupInfo() {
282     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
283     NumSystemSGPRs += 1;
284     return ArgInfo.WorkGroupInfo.getRegister();
285   }
286 
287   // Add special VGPR inputs
288   void setWorkItemIDX(ArgDescriptor Arg) {
289     ArgInfo.WorkItemIDX = Arg;
290   }
291 
292   void setWorkItemIDY(ArgDescriptor Arg) {
293     ArgInfo.WorkItemIDY = Arg;
294   }
295 
296   void setWorkItemIDZ(ArgDescriptor Arg) {
297     ArgInfo.WorkItemIDZ = Arg;
298   }
299 
300   unsigned addPrivateSegmentWaveByteOffset() {
301     ArgInfo.PrivateSegmentWaveByteOffset
302       = ArgDescriptor::createRegister(getNextSystemSGPR());
303     NumSystemSGPRs += 1;
304     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
305   }
306 
307   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
308     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
309   }
310 
311   bool hasPrivateSegmentBuffer() const {
312     return PrivateSegmentBuffer;
313   }
314 
315   bool hasDispatchPtr() const {
316     return DispatchPtr;
317   }
318 
319   bool hasQueuePtr() const {
320     return QueuePtr;
321   }
322 
323   bool hasKernargSegmentPtr() const {
324     return KernargSegmentPtr;
325   }
326 
327   bool hasDispatchID() const {
328     return DispatchID;
329   }
330 
331   bool hasFlatScratchInit() const {
332     return FlatScratchInit;
333   }
334 
335   bool hasWorkGroupIDX() const {
336     return WorkGroupIDX;
337   }
338 
339   bool hasWorkGroupIDY() const {
340     return WorkGroupIDY;
341   }
342 
343   bool hasWorkGroupIDZ() const {
344     return WorkGroupIDZ;
345   }
346 
347   bool hasWorkGroupInfo() const {
348     return WorkGroupInfo;
349   }
350 
351   bool hasPrivateSegmentWaveByteOffset() const {
352     return PrivateSegmentWaveByteOffset;
353   }
354 
355   bool hasWorkItemIDX() const {
356     return WorkItemIDX;
357   }
358 
359   bool hasWorkItemIDY() const {
360     return WorkItemIDY;
361   }
362 
363   bool hasWorkItemIDZ() const {
364     return WorkItemIDZ;
365   }
366 
367   bool hasImplicitArgPtr() const {
368     return ImplicitArgPtr;
369   }
370 
371   bool hasImplicitBufferPtr() const {
372     return ImplicitBufferPtr;
373   }
374 
375   AMDGPUFunctionArgInfo &getArgInfo() {
376     return ArgInfo;
377   }
378 
379   const AMDGPUFunctionArgInfo &getArgInfo() const {
380     return ArgInfo;
381   }
382 
383   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
384   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
385     return ArgInfo.getPreloadedValue(Value);
386   }
387 
388   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
389     return ArgInfo.getPreloadedValue(Value).first->getRegister();
390   }
391 
392   unsigned getGITPtrHigh() const {
393     return GITPtrHigh;
394   }
395 
396   unsigned get32BitAddressHighBits() const {
397     return HighBitsOf32BitAddress;
398   }
399 
400   unsigned getNumUserSGPRs() const {
401     return NumUserSGPRs;
402   }
403 
404   unsigned getNumPreloadedSGPRs() const {
405     return NumUserSGPRs + NumSystemSGPRs;
406   }
407 
408   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
409     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
410   }
411 
412   /// Returns the physical register reserved for use as the resource
413   /// descriptor for scratch accesses.
414   unsigned getScratchRSrcReg() const {
415     return ScratchRSrcReg;
416   }
417 
418   void setScratchRSrcReg(unsigned Reg) {
419     assert(Reg != 0 && "Should never be unset");
420     ScratchRSrcReg = Reg;
421   }
422 
423   unsigned getScratchWaveOffsetReg() const {
424     return ScratchWaveOffsetReg;
425   }
426 
427   unsigned getFrameOffsetReg() const {
428     return FrameOffsetReg;
429   }
430 
431   void setStackPtrOffsetReg(unsigned Reg) {
432     assert(Reg != 0 && "Should never be unset");
433     StackPtrOffsetReg = Reg;
434   }
435 
436   // Note the unset value for this is AMDGPU::SP_REG rather than
437   // NoRegister. This is mostly a workaround for MIR tests where state that
438   // can't be directly computed from the function is not preserved in serialized
439   // MIR.
440   unsigned getStackPtrOffsetReg() const {
441     return StackPtrOffsetReg;
442   }
443 
444   void setScratchWaveOffsetReg(unsigned Reg) {
445     assert(Reg != 0 && "Should never be unset");
446     ScratchWaveOffsetReg = Reg;
447     if (isEntryFunction())
448       FrameOffsetReg = ScratchWaveOffsetReg;
449   }
450 
451   unsigned getQueuePtrUserSGPR() const {
452     return ArgInfo.QueuePtr.getRegister();
453   }
454 
455   unsigned getImplicitBufferPtrUserSGPR() const {
456     return ArgInfo.ImplicitBufferPtr.getRegister();
457   }
458 
459   bool hasSpilledSGPRs() const {
460     return HasSpilledSGPRs;
461   }
462 
463   void setHasSpilledSGPRs(bool Spill = true) {
464     HasSpilledSGPRs = Spill;
465   }
466 
467   bool hasSpilledVGPRs() const {
468     return HasSpilledVGPRs;
469   }
470 
471   void setHasSpilledVGPRs(bool Spill = true) {
472     HasSpilledVGPRs = Spill;
473   }
474 
475   bool hasNonSpillStackObjects() const {
476     return HasNonSpillStackObjects;
477   }
478 
479   void setHasNonSpillStackObjects(bool StackObject = true) {
480     HasNonSpillStackObjects = StackObject;
481   }
482 
483   bool isStackRealigned() const {
484     return IsStackRealigned;
485   }
486 
487   void setIsStackRealigned(bool Realigned = true) {
488     IsStackRealigned = Realigned;
489   }
490 
491   unsigned getNumSpilledSGPRs() const {
492     return NumSpilledSGPRs;
493   }
494 
495   unsigned getNumSpilledVGPRs() const {
496     return NumSpilledVGPRs;
497   }
498 
499   void addToSpilledSGPRs(unsigned num) {
500     NumSpilledSGPRs += num;
501   }
502 
503   void addToSpilledVGPRs(unsigned num) {
504     NumSpilledVGPRs += num;
505   }
506 
507   unsigned getPSInputAddr() const {
508     return PSInputAddr;
509   }
510 
511   unsigned getPSInputEnable() const {
512     return PSInputEnable;
513   }
514 
515   bool isPSInputAllocated(unsigned Index) const {
516     return PSInputAddr & (1 << Index);
517   }
518 
519   void markPSInputAllocated(unsigned Index) {
520     PSInputAddr |= 1 << Index;
521   }
522 
523   void markPSInputEnabled(unsigned Index) {
524     PSInputEnable |= 1 << Index;
525   }
526 
527   bool returnsVoid() const {
528     return ReturnsVoid;
529   }
530 
531   void setIfReturnsVoid(bool Value) {
532     ReturnsVoid = Value;
533   }
534 
535   /// \returns A pair of default/requested minimum/maximum flat work group sizes
536   /// for this function.
537   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
538     return FlatWorkGroupSizes;
539   }
540 
541   /// \returns Default/requested minimum flat work group size for this function.
542   unsigned getMinFlatWorkGroupSize() const {
543     return FlatWorkGroupSizes.first;
544   }
545 
546   /// \returns Default/requested maximum flat work group size for this function.
547   unsigned getMaxFlatWorkGroupSize() const {
548     return FlatWorkGroupSizes.second;
549   }
550 
551   /// \returns A pair of default/requested minimum/maximum number of waves per
552   /// execution unit.
553   std::pair<unsigned, unsigned> getWavesPerEU() const {
554     return WavesPerEU;
555   }
556 
557   /// \returns Default/requested minimum number of waves per execution unit.
558   unsigned getMinWavesPerEU() const {
559     return WavesPerEU.first;
560   }
561 
562   /// \returns Default/requested maximum number of waves per execution unit.
563   unsigned getMaxWavesPerEU() const {
564     return WavesPerEU.second;
565   }
566 
567   /// \returns Stack object index for \p Dim's work group ID.
568   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
569     assert(Dim < 3);
570     return DebuggerWorkGroupIDStackObjectIndices[Dim];
571   }
572 
573   /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
574   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
575     assert(Dim < 3);
576     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
577   }
578 
579   /// \returns Stack object index for \p Dim's work item ID.
580   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
581     assert(Dim < 3);
582     return DebuggerWorkItemIDStackObjectIndices[Dim];
583   }
584 
585   /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
586   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
587     assert(Dim < 3);
588     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
589   }
590 
591   /// \returns SGPR used for \p Dim's work group ID.
592   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
593     switch (Dim) {
594     case 0:
595       assert(hasWorkGroupIDX());
596       return ArgInfo.WorkGroupIDX.getRegister();
597     case 1:
598       assert(hasWorkGroupIDY());
599       return ArgInfo.WorkGroupIDY.getRegister();
600     case 2:
601       assert(hasWorkGroupIDZ());
602       return ArgInfo.WorkGroupIDZ.getRegister();
603     }
604     llvm_unreachable("unexpected dimension");
605   }
606 
607   /// \returns VGPR used for \p Dim' work item ID.
608   unsigned getWorkItemIDVGPR(unsigned Dim) const;
609 
610   unsigned getLDSWaveSpillSize() const {
611     return LDSWaveSpillSize;
612   }
613 
614   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
615                                                     const Value *BufferRsrc) {
616     assert(BufferRsrc);
617     auto PSV = BufferPSVs.try_emplace(
618       BufferRsrc,
619       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
620     return PSV.first->second.get();
621   }
622 
623   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
624                                                   const Value *ImgRsrc) {
625     assert(ImgRsrc);
626     auto PSV = ImagePSVs.try_emplace(
627       ImgRsrc,
628       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
629     return PSV.first->second.get();
630   }
631 
632   unsigned getOccupancy() const {
633     return Occupancy;
634   }
635 
636   unsigned getMinAllowedOccupancy() const {
637     if (!isMemoryBound() && !needsWaveLimiter())
638       return Occupancy;
639     return (Occupancy < 4) ? Occupancy : 4;
640   }
641 
642   void limitOccupancy(const MachineFunction &MF);
643 
644   void limitOccupancy(unsigned Limit) {
645     if (Occupancy > Limit)
646       Occupancy = Limit;
647   }
648 
649   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
650     if (Occupancy < Limit)
651       Occupancy = Limit;
652     limitOccupancy(MF);
653   }
654 };
655 
656 } // end namespace llvm
657 
658 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
659