xref: /llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h (revision 6360652e9f5b5975d71c619abd981f102eeccec8)
1 //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIRegisterInfo
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16 
17 #include "llvm/ADT/BitVector.h"
18 
19 #define GET_REGINFO_HEADER
20 #include "AMDGPUGenRegisterInfo.inc"
21 
22 #include "SIDefines.h"
23 
24 namespace llvm {
25 
26 class GCNSubtarget;
27 class LiveIntervals;
28 class LiveRegUnits;
29 class RegisterBank;
30 struct SGPRSpillBuilder;
31 
32 class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
33 private:
34   const GCNSubtarget &ST;
35   bool SpillSGPRToVGPR;
36   bool isWave32;
37   BitVector RegPressureIgnoredUnits;
38 
39   /// Sub reg indexes for getRegSplitParts.
40   /// First index represents subreg size from 1 to 16 DWORDs.
41   /// The inner vector is sorted by bit offset.
42   /// Provided a register can be fully split with given subregs,
43   /// all elements of the inner vector combined give a full lane mask.
44   static std::array<std::vector<int16_t>, 16> RegSplitParts;
45 
46   // Table representing sub reg of given width and offset.
47   // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
48   // Second index is 32 different dword offsets.
49   static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
50 
51   void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
52 
53 public:
54   SIRegisterInfo(const GCNSubtarget &ST);
55 
56   struct SpilledReg {
57     Register VGPR;
58     int Lane = -1;
59 
60     SpilledReg() = default;
61     SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
62 
63     bool hasLane() { return Lane != -1; }
64     bool hasReg() { return VGPR != 0; }
65   };
66 
67   /// \returns the sub reg enum value for the given \p Channel
68   /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
69   static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
70 
71   bool spillSGPRToVGPR() const {
72     return SpillSGPRToVGPR;
73   }
74 
75   /// Return the largest available SGPR aligned to \p Align for the register
76   /// class \p RC.
77   MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
78                                      const unsigned Align,
79                                      const TargetRegisterClass *RC) const;
80 
81   /// Return the end register initially reserved for the scratch buffer in case
82   /// spilling is needed.
83   MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
84 
85   /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
86   /// of waves per execution unit required for the function \p MF.
87   std::pair<unsigned, unsigned>
88   getMaxNumVectorRegs(const MachineFunction &MF) const;
89 
90   BitVector getReservedRegs(const MachineFunction &MF) const override;
91   bool isAsmClobberable(const MachineFunction &MF,
92                         MCRegister PhysReg) const override;
93 
94   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
95   const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
96   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
97                                        CallingConv::ID) const override;
98   const uint32_t *getNoPreservedMask() const override;
99 
100   // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
101   // conventions are free to use certain VGPRs without saving and restoring any
102   // lanes (not even inactive ones).
103   static bool isChainScratchRegister(Register VGPR);
104 
105   // Stack access is very expensive. CSRs are also the high registers, and we
106   // want to minimize the number of used registers.
107   unsigned getCSRFirstUseCost() const override {
108     return 100;
109   }
110 
111   const TargetRegisterClass *
112   getLargestLegalSuperClass(const TargetRegisterClass *RC,
113                             const MachineFunction &MF) const override;
114 
115   Register getFrameRegister(const MachineFunction &MF) const override;
116 
117   bool hasBasePointer(const MachineFunction &MF) const;
118   Register getBaseRegister() const;
119 
120   bool shouldRealignStack(const MachineFunction &MF) const override;
121   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
122 
123   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
124   bool requiresFrameIndexReplacementScavenging(
125     const MachineFunction &MF) const override;
126   bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
127 
128   int64_t getScratchInstrOffset(const MachineInstr *MI) const;
129 
130   int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
131                                    int Idx) const override;
132 
133   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
134 
135   Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
136                                         int64_t Offset) const override;
137 
138   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
139                          int64_t Offset) const override;
140 
141   bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
142                           int64_t Offset) const override;
143 
144   const TargetRegisterClass *getPointerRegClass(
145     const MachineFunction &MF, unsigned Kind = 0) const override;
146 
147   /// Returns a legal register class to copy a register in the specified class
148   /// to or from. If it is possible to copy the register directly without using
149   /// a cross register class copy, return the specified RC. Returns NULL if it
150   /// is not possible to copy between two registers of the specified class.
151   const TargetRegisterClass *
152   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
153 
154   void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
155                                bool IsLoad, bool IsKill = true) const;
156 
157   /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
158   /// free VGPR lane to spill.
159   bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
160                  SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
161                  bool OnlyToVGPR = false,
162                  bool SpillToPhysVGPRLane = false) const;
163 
164   bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
165                    SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
166                    bool OnlyToVGPR = false,
167                    bool SpillToPhysVGPRLane = false) const;
168 
169   bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
170                           MachineBasicBlock &RestoreMBB, Register SGPR,
171                           RegScavenger *RS) const;
172 
173   bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
174                            unsigned FIOperandNum,
175                            RegScavenger *RS) const override;
176 
177   bool eliminateSGPRToVGPRSpillFrameIndex(
178       MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
179       SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
180       bool SpillToPhysVGPRLane = false) const;
181 
182   StringRef getRegAsmName(MCRegister Reg) const override;
183 
184   // Pseudo regs are not allowed
185   unsigned getHWRegIndex(MCRegister Reg) const {
186     return getEncodingValue(Reg) & 0xff;
187   }
188 
189   LLVM_READONLY
190   const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
191 
192   LLVM_READONLY
193   const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
194 
195   LLVM_READONLY
196   const TargetRegisterClass *
197   getVectorSuperClassForBitWidth(unsigned BitWidth) const;
198 
199   LLVM_READONLY
200   static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
201 
202   /// \returns true if this class contains only SGPR registers
203   static bool isSGPRClass(const TargetRegisterClass *RC) {
204     return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
205   }
206 
207   /// \returns true if this class ID contains only SGPR registers
208   bool isSGPRClassID(unsigned RCID) const {
209     return isSGPRClass(getRegClass(RCID));
210   }
211 
212   bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
213   bool isSGPRPhysReg(Register Reg) const {
214     return isSGPRClass(getPhysRegBaseClass(Reg));
215   }
216 
217   /// \returns true if this class contains only VGPR registers
218   static bool isVGPRClass(const TargetRegisterClass *RC) {
219     return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
220   }
221 
222   /// \returns true if this class contains only AGPR registers
223   static bool isAGPRClass(const TargetRegisterClass *RC) {
224     return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
225   }
226 
227   /// \returns true only if this class contains both VGPR and AGPR registers
228   bool isVectorSuperClass(const TargetRegisterClass *RC) const {
229     return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
230   }
231 
232   /// \returns true only if this class contains both VGPR and SGPR registers
233   bool isVSSuperClass(const TargetRegisterClass *RC) const {
234     return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
235   }
236 
237   /// \returns true if this class contains VGPR registers.
238   static bool hasVGPRs(const TargetRegisterClass *RC) {
239     return RC->TSFlags & SIRCFlags::HasVGPR;
240   }
241 
242   /// \returns true if this class contains AGPR registers.
243   static bool hasAGPRs(const TargetRegisterClass *RC) {
244     return RC->TSFlags & SIRCFlags::HasAGPR;
245   }
246 
247   /// \returns true if this class contains SGPR registers.
248   static bool hasSGPRs(const TargetRegisterClass *RC) {
249     return RC->TSFlags & SIRCFlags::HasSGPR;
250   }
251 
252   /// \returns true if this class contains any vector registers.
253   static bool hasVectorRegisters(const TargetRegisterClass *RC) {
254     return hasVGPRs(RC) || hasAGPRs(RC);
255   }
256 
257   /// \returns A VGPR reg class with the same width as \p SRC
258   const TargetRegisterClass *
259   getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
260 
261   /// \returns An AGPR reg class with the same width as \p SRC
262   const TargetRegisterClass *
263   getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
264 
265   /// \returns A SGPR reg class with the same width as \p SRC
266   const TargetRegisterClass *
267   getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
268 
269   /// Returns a register class which is compatible with \p SuperRC, such that a
270   /// subregister exists with class \p SubRC with subregister index \p
271   /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
272   /// a register tuple), return null.
273   const TargetRegisterClass *
274   getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
275                            const TargetRegisterClass *SubRC,
276                            unsigned SubIdx) const;
277 
278   bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
279                             unsigned DefSubReg,
280                             const TargetRegisterClass *SrcRC,
281                             unsigned SrcSubReg) const override;
282 
283   /// \returns True if operands defined with this operand type can accept
284   /// a literal constant (i.e. any 32-bit immediate).
285   bool opCanUseLiteralConstant(unsigned OpType) const;
286 
287   /// \returns True if operands defined with this operand type can accept
288   /// an inline constant. i.e. An integer value in the range (-16, 64) or
289   /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
290   bool opCanUseInlineConstant(unsigned OpType) const;
291 
292   MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
293                                 const TargetRegisterClass *RC,
294                                 const MachineFunction &MF,
295                                 bool ReserveHighestVGPR = false) const;
296 
297   const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
298                                                Register Reg) const;
299   const TargetRegisterClass *
300   getRegClassForOperandReg(const MachineRegisterInfo &MRI,
301                            const MachineOperand &MO) const;
302 
303   bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
304   bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
305   bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
306     return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
307   }
308 
309   // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
310   // (such as VCC) which hold a wave-wide vector of boolean values. Examining
311   // just the register class is not suffcient; it needs to be combined with a
312   // value type. The next predicate isUniformReg() does this correctly.
313   bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
314     return !isSGPRClass(RC);
315   }
316 
317   bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
318                     Register Reg) const override;
319 
320   ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
321                                      unsigned EltSize) const;
322 
323   bool shouldCoalesce(MachineInstr *MI,
324                       const TargetRegisterClass *SrcRC,
325                       unsigned SubReg,
326                       const TargetRegisterClass *DstRC,
327                       unsigned DstSubReg,
328                       const TargetRegisterClass *NewRC,
329                       LiveIntervals &LIS) const override;
330 
331   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
332                                MachineFunction &MF) const override;
333 
334   unsigned getRegPressureSetLimit(const MachineFunction &MF,
335                                   unsigned Idx) const override;
336 
337   const int *getRegUnitPressureSets(unsigned RegUnit) const override;
338 
339   MCRegister getReturnAddressReg(const MachineFunction &MF) const;
340 
341   const TargetRegisterClass *
342   getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
343 
344   const TargetRegisterClass *
345   getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
346     return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank);
347   }
348 
349   const TargetRegisterClass *
350   getConstrainedRegClassForOperand(const MachineOperand &MO,
351                                  const MachineRegisterInfo &MRI) const override;
352 
353   const TargetRegisterClass *getBoolRC() const {
354     return isWave32 ? &AMDGPU::SReg_32RegClass
355                     : &AMDGPU::SReg_64RegClass;
356   }
357 
358   const TargetRegisterClass *getWaveMaskRegClass() const {
359     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
360                     : &AMDGPU::SReg_64_XEXECRegClass;
361   }
362 
363   // Return the appropriate register class to use for 64-bit VGPRs for the
364   // subtarget.
365   const TargetRegisterClass *getVGPR64Class() const;
366 
367   MCRegister getVCC() const;
368 
369   MCRegister getExec() const;
370 
371   const TargetRegisterClass *getRegClass(unsigned RCID) const;
372 
373   // Find reaching register definition
374   MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
375                                 MachineInstr &Use,
376                                 MachineRegisterInfo &MRI,
377                                 LiveIntervals *LIS) const;
378 
379   const uint32_t *getAllVGPRRegMask() const;
380   const uint32_t *getAllAGPRRegMask() const;
381   const uint32_t *getAllVectorRegMask() const;
382   const uint32_t *getAllAllocatableSRegMask() const;
383 
384   // \returns number of 32 bit registers covered by a \p LM
385   static unsigned getNumCoveredRegs(LaneBitmask LM) {
386     // The assumption is that every lo16 subreg is an even bit and every hi16
387     // is an adjacent odd bit or vice versa.
388     uint64_t Mask = LM.getAsInteger();
389     uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
390     Mask = (Even >> 1) | Mask;
391     uint64_t Odd = Mask & 0x5555555555555555ULL;
392     return llvm::popcount(Odd);
393   }
394 
395   // \returns a DWORD offset of a \p SubReg
396   unsigned getChannelFromSubReg(unsigned SubReg) const {
397     return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0;
398   }
399 
400   // \returns a DWORD size of a \p SubReg
401   unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
402     return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg));
403   }
404 
405   // For a given 16 bit \p Reg \returns a 32 bit register holding it.
406   // \returns \p Reg otherwise.
407   MCPhysReg get32BitRegister(MCPhysReg Reg) const;
408 
409   // Returns true if a given register class is properly aligned for
410   // the subtarget.
411   bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
412 
413   // Given \p RC returns corresponding aligned register class if required
414   // by the subtarget.
415   const TargetRegisterClass *
416   getProperlyAlignedRC(const TargetRegisterClass *RC) const;
417 
418   /// Return all SGPR128 which satisfy the waves per execution unit requirement
419   /// of the subtarget.
420   ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
421 
422   /// Return all SGPR64 which satisfy the waves per execution unit requirement
423   /// of the subtarget.
424   ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
425 
426   /// Return all SGPR32 which satisfy the waves per execution unit requirement
427   /// of the subtarget.
428   ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
429 
430   // Insert spill or restore instructions.
431   // When lowering spill pseudos, the RegScavenger should be set.
432   // For creating spill instructions during frame lowering, where no scavenger
433   // is available, LiveUnits can be used.
434   void buildSpillLoadStore(MachineBasicBlock &MBB,
435                            MachineBasicBlock::iterator MI, const DebugLoc &DL,
436                            unsigned LoadStoreOp, int Index, Register ValueReg,
437                            bool ValueIsKill, MCRegister ScratchOffsetReg,
438                            int64_t InstrOffset, MachineMemOperand *MMO,
439                            RegScavenger *RS,
440                            LiveRegUnits *LiveUnits = nullptr) const;
441 
442   // Return alignment in register file of first register in a register tuple.
443   unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
444     return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
445   }
446 
447   // Check if register class RC has required alignment.
448   bool isRegClassAligned(const TargetRegisterClass *RC,
449                          unsigned AlignNumBits) const {
450     assert(AlignNumBits != 0);
451     unsigned RCAlign = getRegClassAlignmentNumBits(RC);
452     return RCAlign == AlignNumBits ||
453            (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
454   }
455 
456   // Return alignment of a SubReg relative to start of a register in RC class.
457   // No check if the subreg is supported by the current RC is made.
458   unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
459                                      unsigned SubReg) const;
460 
461   // \returns a number of registers of a given \p RC used in a function.
462   // Does not go inside function calls.
463   unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
464                               const TargetRegisterClass &RC) const;
465 
466   std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
467     return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
468                              : std::optional<uint8_t>{};
469   }
470 
471   SmallVector<StringLiteral>
472   getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
473 };
474 
475 namespace AMDGPU {
476 /// Get the size in bits of a register from the register class \p RC.
477 unsigned getRegBitWidth(const TargetRegisterClass &RC);
478 } // namespace AMDGPU
479 
480 } // End namespace llvm
481 
482 #endif
483