xref: /llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h (revision f7d8336a2fb4fad4a6efe5af9b0a10ddd970f6d3)
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38 
39 constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;
40 
41 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
42 /// on any path from the start of an entry function to this load.
43 static const MachineMemOperand::Flags MONoClobber =
44     MachineMemOperand::MOTargetFlag1;
45 
46 /// Mark the MMO of a load as the last use.
47 static const MachineMemOperand::Flags MOLastUse =
48     MachineMemOperand::MOTargetFlag2;
49 
50 /// Utility to store machine instructions worklist.
51 struct SIInstrWorklist {
52   SIInstrWorklist() = default;
53 
54   void insert(MachineInstr *MI);
55 
56   MachineInstr *top() const {
57     const auto *iter = InstrList.begin();
58     return *iter;
59   }
60 
61   void erase_top() {
62     const auto *iter = InstrList.begin();
63     InstrList.erase(iter);
64   }
65 
66   bool empty() const { return InstrList.empty(); }
67 
68   void clear() {
69     InstrList.clear();
70     DeferredList.clear();
71   }
72 
73   bool isDeferred(MachineInstr *MI);
74 
75   SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
76 
77 private:
78   /// InstrList contains the MachineInstrs.
79   SetVector<MachineInstr *> InstrList;
80   /// Deferred instructions are specific MachineInstr
81   /// that will be added by insert method.
82   SetVector<MachineInstr *> DeferredList;
83 };
84 
85 class SIInstrInfo final : public AMDGPUGenInstrInfo {
86 private:
87   const SIRegisterInfo RI;
88   const GCNSubtarget &ST;
89   TargetSchedModel SchedModel;
90   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
91 
92   // The inverse predicate should have the negative value.
93   enum BranchPredicate {
94     INVALID_BR = 0,
95     SCC_TRUE = 1,
96     SCC_FALSE = -1,
97     VCCNZ = 2,
98     VCCZ = -2,
99     EXECNZ = -3,
100     EXECZ = 3
101   };
102 
103   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
104 
105   static unsigned getBranchOpcode(BranchPredicate Cond);
106   static BranchPredicate getBranchPredicate(unsigned Opcode);
107 
108 public:
109   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
110                               MachineRegisterInfo &MRI,
111                               const MachineOperand &SuperReg,
112                               const TargetRegisterClass *SuperRC,
113                               unsigned SubIdx,
114                               const TargetRegisterClass *SubRC) const;
115   MachineOperand buildExtractSubRegOrImm(
116       MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
117       const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
118       unsigned SubIdx, const TargetRegisterClass *SubRC) const;
119 
120 private:
121   void swapOperands(MachineInstr &Inst) const;
122 
123   std::pair<bool, MachineBasicBlock *>
124   moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
125                    MachineDominatorTree *MDT = nullptr) const;
126 
127   void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
128                    MachineDominatorTree *MDT = nullptr) const;
129 
130   void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
131 
132   void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
133 
134   void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
135                            unsigned Opcode) const;
136 
137   void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
138                           unsigned Opcode) const;
139 
140   void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
141                                unsigned Opcode, bool Swap = false) const;
142 
143   void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
144                                 unsigned Opcode,
145                                 MachineDominatorTree *MDT = nullptr) const;
146 
147   void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
148                           MachineDominatorTree *MDT) const;
149 
150   void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
151                              MachineDominatorTree *MDT) const;
152 
153   void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
154                             MachineDominatorTree *MDT = nullptr) const;
155 
156   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
157                             MachineInstr &Inst) const;
158   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
159   void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
160                                unsigned Opcode,
161                                MachineDominatorTree *MDT = nullptr) const;
162   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
163                       MachineInstr &Inst) const;
164 
165   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
166                                     SIInstrWorklist &Worklist) const;
167 
168   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
169                                     MachineInstr &SCCDefInst,
170                                     SIInstrWorklist &Worklist,
171                                     Register NewCond = Register()) const;
172   void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
173                                 SIInstrWorklist &Worklist) const;
174 
175   const TargetRegisterClass *
176   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
177 
178   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
179                                     const MachineInstr &MIb) const;
180 
181   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
182 
183   bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
184                   StringRef &ErrInfo) const;
185 
186 protected:
187   /// If the specific machine instruction is a instruction that moves/copies
188   /// value from one register to another register return destination and source
189   /// registers as machine operands.
190   std::optional<DestSourcePair>
191   isCopyInstrImpl(const MachineInstr &MI) const override;
192 
193   bool swapSourceModifiers(MachineInstr &MI,
194                            MachineOperand &Src0, unsigned Src0OpName,
195                            MachineOperand &Src1, unsigned Src1OpName) const;
196   bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
197                      const MachineOperand *fromMO, unsigned toIdx,
198                      const MachineOperand *toMO) const;
199   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
200                                        unsigned OpIdx0,
201                                        unsigned OpIdx1) const override;
202 
203 public:
204   enum TargetOperandFlags {
205     MO_MASK = 0xf,
206 
207     MO_NONE = 0,
208     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
209     MO_GOTPCREL = 1,
210     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
211     MO_GOTPCREL32 = 2,
212     MO_GOTPCREL32_LO = 2,
213     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
214     MO_GOTPCREL32_HI = 3,
215     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
216     MO_REL32 = 4,
217     MO_REL32_LO = 4,
218     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
219     MO_REL32_HI = 5,
220 
221     MO_FAR_BRANCH_OFFSET = 6,
222 
223     MO_ABS32_LO = 8,
224     MO_ABS32_HI = 9,
225   };
226 
227   explicit SIInstrInfo(const GCNSubtarget &ST);
228 
229   const SIRegisterInfo &getRegisterInfo() const {
230     return RI;
231   }
232 
233   const GCNSubtarget &getSubtarget() const {
234     return ST;
235   }
236 
237   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
238 
239   bool isIgnorableUse(const MachineOperand &MO) const override;
240 
241   bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
242                     MachineCycleInfo *CI) const override;
243 
244   bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
245                                int64_t &Offset1) const override;
246 
247   bool isGlobalMemoryObject(const MachineInstr *MI) const override;
248 
249   bool getMemOperandsWithOffsetWidth(
250       const MachineInstr &LdSt,
251       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
252       bool &OffsetIsScalable, LocationSize &Width,
253       const TargetRegisterInfo *TRI) const final;
254 
255   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
256                            int64_t Offset1, bool OffsetIsScalable1,
257                            ArrayRef<const MachineOperand *> BaseOps2,
258                            int64_t Offset2, bool OffsetIsScalable2,
259                            unsigned ClusterSize,
260                            unsigned NumBytes) const override;
261 
262   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
263                                int64_t Offset1, unsigned NumLoads) const override;
264 
265   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
266                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
267                    bool KillSrc, bool RenamableDest = false,
268                    bool RenamableSrc = false) const override;
269 
270   void materializeImmediate(MachineBasicBlock &MBB,
271                             MachineBasicBlock::iterator MI, const DebugLoc &DL,
272                             Register DestReg, int64_t Value) const;
273 
274   const TargetRegisterClass *getPreferredSelectRegClass(
275                                unsigned Size) const;
276 
277   Register insertNE(MachineBasicBlock *MBB,
278                     MachineBasicBlock::iterator I, const DebugLoc &DL,
279                     Register SrcReg, int Value) const;
280 
281   Register insertEQ(MachineBasicBlock *MBB,
282                     MachineBasicBlock::iterator I, const DebugLoc &DL,
283                     Register SrcReg, int Value)  const;
284 
285   void storeRegToStackSlot(
286       MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
287       bool isKill, int FrameIndex, const TargetRegisterClass *RC,
288       const TargetRegisterInfo *TRI, Register VReg,
289       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
290 
291   void loadRegFromStackSlot(
292       MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
293       int FrameIndex, const TargetRegisterClass *RC,
294       const TargetRegisterInfo *TRI, Register VReg,
295       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
296 
297   bool expandPostRAPseudo(MachineInstr &MI) const override;
298 
299   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
300                      Register DestReg, unsigned SubIdx,
301                      const MachineInstr &Orig,
302                      const TargetRegisterInfo &TRI) const override;
303 
304   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
305   // instructions. Returns a pair of generated instructions.
306   // Can split either post-RA with physical registers or pre-RA with
307   // virtual registers. In latter case IR needs to be in SSA form and
308   // and a REG_SEQUENCE is produced to define original register.
309   std::pair<MachineInstr*, MachineInstr*>
310   expandMovDPP64(MachineInstr &MI) const;
311 
312   // Returns an opcode that can be used to move a value to a \p DstRC
313   // register.  If there is no hardware instruction that can store to \p
314   // DstRC, then AMDGPU::COPY is returned.
315   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
316 
317   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
318                                                      unsigned EltSize,
319                                                      bool IsSGPR) const;
320 
321   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
322                                              bool IsIndirectSrc) const;
323   LLVM_READONLY
324   int commuteOpcode(unsigned Opc) const;
325 
326   LLVM_READONLY
327   inline int commuteOpcode(const MachineInstr &MI) const {
328     return commuteOpcode(MI.getOpcode());
329   }
330 
331   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
332                              unsigned &SrcOpIdx1) const override;
333 
334   bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
335                              unsigned &SrcOpIdx1) const;
336 
337   bool isBranchOffsetInRange(unsigned BranchOpc,
338                              int64_t BrOffset) const override;
339 
340   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
341 
342   /// Return whether the block terminate with divergent branch.
343   /// Note this only work before lowering the pseudo control flow instructions.
344   bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
345 
346   void insertIndirectBranch(MachineBasicBlock &MBB,
347                             MachineBasicBlock &NewDestBB,
348                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
349                             int64_t BrOffset, RegScavenger *RS) const override;
350 
351   bool analyzeBranchImpl(MachineBasicBlock &MBB,
352                          MachineBasicBlock::iterator I,
353                          MachineBasicBlock *&TBB,
354                          MachineBasicBlock *&FBB,
355                          SmallVectorImpl<MachineOperand> &Cond,
356                          bool AllowModify) const;
357 
358   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
359                      MachineBasicBlock *&FBB,
360                      SmallVectorImpl<MachineOperand> &Cond,
361                      bool AllowModify = false) const override;
362 
363   unsigned removeBranch(MachineBasicBlock &MBB,
364                         int *BytesRemoved = nullptr) const override;
365 
366   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
367                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
368                         const DebugLoc &DL,
369                         int *BytesAdded = nullptr) const override;
370 
371   bool reverseBranchCondition(
372     SmallVectorImpl<MachineOperand> &Cond) const override;
373 
374   bool canInsertSelect(const MachineBasicBlock &MBB,
375                        ArrayRef<MachineOperand> Cond, Register DstReg,
376                        Register TrueReg, Register FalseReg, int &CondCycles,
377                        int &TrueCycles, int &FalseCycles) const override;
378 
379   void insertSelect(MachineBasicBlock &MBB,
380                     MachineBasicBlock::iterator I, const DebugLoc &DL,
381                     Register DstReg, ArrayRef<MachineOperand> Cond,
382                     Register TrueReg, Register FalseReg) const override;
383 
384   void insertVectorSelect(MachineBasicBlock &MBB,
385                           MachineBasicBlock::iterator I, const DebugLoc &DL,
386                           Register DstReg, ArrayRef<MachineOperand> Cond,
387                           Register TrueReg, Register FalseReg) const;
388 
389   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
390                       Register &SrcReg2, int64_t &CmpMask,
391                       int64_t &CmpValue) const override;
392 
393   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
394                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
395                             const MachineRegisterInfo *MRI) const override;
396 
397   bool
398   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
399                                   const MachineInstr &MIb) const override;
400 
401   static bool isFoldableCopy(const MachineInstr &MI);
402 
403   void removeModOperands(MachineInstr &MI) const;
404 
405   bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
406                      MachineRegisterInfo *MRI) const final;
407 
408   unsigned getMachineCSELookAheadLimit() const override { return 500; }
409 
410   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
411                                       LiveIntervals *LIS) const override;
412 
413   bool isSchedulingBoundary(const MachineInstr &MI,
414                             const MachineBasicBlock *MBB,
415                             const MachineFunction &MF) const override;
416 
417   static bool isSALU(const MachineInstr &MI) {
418     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
419   }
420 
421   bool isSALU(uint16_t Opcode) const {
422     return get(Opcode).TSFlags & SIInstrFlags::SALU;
423   }
424 
425   static bool isVALU(const MachineInstr &MI) {
426     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
427   }
428 
429   bool isVALU(uint16_t Opcode) const {
430     return get(Opcode).TSFlags & SIInstrFlags::VALU;
431   }
432 
433   static bool isImage(const MachineInstr &MI) {
434     return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
435   }
436 
437   bool isImage(uint16_t Opcode) const {
438     return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
439   }
440 
441   static bool isVMEM(const MachineInstr &MI) {
442     return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
443   }
444 
445   bool isVMEM(uint16_t Opcode) const {
446     return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
447   }
448 
449   static bool isSOP1(const MachineInstr &MI) {
450     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
451   }
452 
453   bool isSOP1(uint16_t Opcode) const {
454     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
455   }
456 
457   static bool isSOP2(const MachineInstr &MI) {
458     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
459   }
460 
461   bool isSOP2(uint16_t Opcode) const {
462     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
463   }
464 
465   static bool isSOPC(const MachineInstr &MI) {
466     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
467   }
468 
469   bool isSOPC(uint16_t Opcode) const {
470     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
471   }
472 
473   static bool isSOPK(const MachineInstr &MI) {
474     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
475   }
476 
477   bool isSOPK(uint16_t Opcode) const {
478     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
479   }
480 
481   static bool isSOPP(const MachineInstr &MI) {
482     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
483   }
484 
485   bool isSOPP(uint16_t Opcode) const {
486     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
487   }
488 
489   static bool isPacked(const MachineInstr &MI) {
490     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
491   }
492 
493   bool isPacked(uint16_t Opcode) const {
494     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
495   }
496 
497   static bool isVOP1(const MachineInstr &MI) {
498     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
499   }
500 
501   bool isVOP1(uint16_t Opcode) const {
502     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
503   }
504 
505   static bool isVOP2(const MachineInstr &MI) {
506     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
507   }
508 
509   bool isVOP2(uint16_t Opcode) const {
510     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
511   }
512 
513   static bool isVOP3(const MachineInstr &MI) {
514     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
515   }
516 
517   bool isVOP3(uint16_t Opcode) const {
518     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
519   }
520 
521   static bool isSDWA(const MachineInstr &MI) {
522     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
523   }
524 
525   bool isSDWA(uint16_t Opcode) const {
526     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
527   }
528 
529   static bool isVOPC(const MachineInstr &MI) {
530     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
531   }
532 
533   bool isVOPC(uint16_t Opcode) const {
534     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
535   }
536 
537   static bool isMUBUF(const MachineInstr &MI) {
538     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
539   }
540 
541   bool isMUBUF(uint16_t Opcode) const {
542     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
543   }
544 
545   static bool isMTBUF(const MachineInstr &MI) {
546     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
547   }
548 
549   bool isMTBUF(uint16_t Opcode) const {
550     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
551   }
552 
553   static bool isSMRD(const MachineInstr &MI) {
554     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
555   }
556 
557   bool isSMRD(uint16_t Opcode) const {
558     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
559   }
560 
561   bool isBufferSMRD(const MachineInstr &MI) const;
562 
563   static bool isDS(const MachineInstr &MI) {
564     return MI.getDesc().TSFlags & SIInstrFlags::DS;
565   }
566 
567   bool isDS(uint16_t Opcode) const {
568     return get(Opcode).TSFlags & SIInstrFlags::DS;
569   }
570 
571   static bool isLDSDMA(const MachineInstr &MI) {
572     return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
573   }
574 
575   bool isLDSDMA(uint16_t Opcode) {
576     return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
577   }
578 
579   static bool isGWS(const MachineInstr &MI) {
580     return MI.getDesc().TSFlags & SIInstrFlags::GWS;
581   }
582 
583   bool isGWS(uint16_t Opcode) const {
584     return get(Opcode).TSFlags & SIInstrFlags::GWS;
585   }
586 
587   bool isAlwaysGDS(uint16_t Opcode) const;
588 
589   static bool isMIMG(const MachineInstr &MI) {
590     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
591   }
592 
593   bool isMIMG(uint16_t Opcode) const {
594     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
595   }
596 
597   static bool isVIMAGE(const MachineInstr &MI) {
598     return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
599   }
600 
601   bool isVIMAGE(uint16_t Opcode) const {
602     return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
603   }
604 
605   static bool isVSAMPLE(const MachineInstr &MI) {
606     return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
607   }
608 
609   bool isVSAMPLE(uint16_t Opcode) const {
610     return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
611   }
612 
613   static bool isGather4(const MachineInstr &MI) {
614     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
615   }
616 
617   bool isGather4(uint16_t Opcode) const {
618     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
619   }
620 
621   static bool isFLAT(const MachineInstr &MI) {
622     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
623   }
624 
625   // Is a FLAT encoded instruction which accesses a specific segment,
626   // i.e. global_* or scratch_*.
627   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
628     auto Flags = MI.getDesc().TSFlags;
629     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
630   }
631 
632   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
633     auto Flags = get(Opcode).TSFlags;
634     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
635   }
636 
637   static bool isFLATGlobal(const MachineInstr &MI) {
638     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
639   }
640 
641   bool isFLATGlobal(uint16_t Opcode) const {
642     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
643   }
644 
645   static bool isFLATScratch(const MachineInstr &MI) {
646     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
647   }
648 
649   bool isFLATScratch(uint16_t Opcode) const {
650     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
651   }
652 
653   // Any FLAT encoded instruction, including global_* and scratch_*.
654   bool isFLAT(uint16_t Opcode) const {
655     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
656   }
657 
658   static bool isEXP(const MachineInstr &MI) {
659     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
660   }
661 
662   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
663     if (!isEXP(MI))
664       return false;
665     unsigned Target = MI.getOperand(0).getImm();
666     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
667            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
668   }
669 
670   bool isEXP(uint16_t Opcode) const {
671     return get(Opcode).TSFlags & SIInstrFlags::EXP;
672   }
673 
674   static bool isAtomicNoRet(const MachineInstr &MI) {
675     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
676   }
677 
678   bool isAtomicNoRet(uint16_t Opcode) const {
679     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
680   }
681 
682   static bool isAtomicRet(const MachineInstr &MI) {
683     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
684   }
685 
686   bool isAtomicRet(uint16_t Opcode) const {
687     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
688   }
689 
690   static bool isAtomic(const MachineInstr &MI) {
691     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
692                                    SIInstrFlags::IsAtomicNoRet);
693   }
694 
695   bool isAtomic(uint16_t Opcode) const {
696     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
697                                   SIInstrFlags::IsAtomicNoRet);
698   }
699 
700   static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
701     return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
702   }
703 
704   static bool isWQM(const MachineInstr &MI) {
705     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
706   }
707 
708   bool isWQM(uint16_t Opcode) const {
709     return get(Opcode).TSFlags & SIInstrFlags::WQM;
710   }
711 
712   static bool isDisableWQM(const MachineInstr &MI) {
713     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
714   }
715 
716   bool isDisableWQM(uint16_t Opcode) const {
717     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
718   }
719 
720   // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
721   // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
722   // therefore we need an explicit check for them since just checking if the
723   // Spill bit is set and what instruction type it came from misclassifies
724   // them.
725   static bool isVGPRSpill(const MachineInstr &MI) {
726     return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
727            MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
728            (isSpill(MI) && isVALU(MI));
729   }
730 
731   bool isVGPRSpill(uint16_t Opcode) const {
732     return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
733            Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
734            (isSpill(Opcode) && isVALU(Opcode));
735   }
736 
737   static bool isSGPRSpill(const MachineInstr &MI) {
738     return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
739            MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
740            (isSpill(MI) && isSALU(MI));
741   }
742 
743   bool isSGPRSpill(uint16_t Opcode) const {
744     return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
745            Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
746            (isSpill(Opcode) && isSALU(Opcode));
747   }
748 
749   bool isSpill(uint16_t Opcode) const {
750     return get(Opcode).TSFlags & SIInstrFlags::Spill;
751   }
752 
753   static bool isSpill(const MachineInstr &MI) {
754     return MI.getDesc().TSFlags & SIInstrFlags::Spill;
755   }
756 
757   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
758     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
759            Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
760            Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
761            Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
762   }
763 
764   static bool isChainCallOpcode(uint64_t Opcode) {
765     return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
766            Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
767   }
768 
769   static bool isDPP(const MachineInstr &MI) {
770     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
771   }
772 
773   bool isDPP(uint16_t Opcode) const {
774     return get(Opcode).TSFlags & SIInstrFlags::DPP;
775   }
776 
777   static bool isTRANS(const MachineInstr &MI) {
778     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
779   }
780 
781   bool isTRANS(uint16_t Opcode) const {
782     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
783   }
784 
785   static bool isVOP3P(const MachineInstr &MI) {
786     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
787   }
788 
789   bool isVOP3P(uint16_t Opcode) const {
790     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
791   }
792 
793   static bool isVINTRP(const MachineInstr &MI) {
794     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
795   }
796 
797   bool isVINTRP(uint16_t Opcode) const {
798     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
799   }
800 
801   static bool isMAI(const MachineInstr &MI) {
802     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
803   }
804 
805   bool isMAI(uint16_t Opcode) const {
806     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
807   }
808 
809   static bool isMFMA(const MachineInstr &MI) {
810     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
811            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
812   }
813 
814   static bool isDOT(const MachineInstr &MI) {
815     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
816   }
817 
818   static bool isWMMA(const MachineInstr &MI) {
819     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
820   }
821 
822   bool isWMMA(uint16_t Opcode) const {
823     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
824   }
825 
826   static bool isMFMAorWMMA(const MachineInstr &MI) {
827     return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
828   }
829 
830   static bool isSWMMAC(const MachineInstr &MI) {
831     return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
832   }
833 
834   bool isSWMMAC(uint16_t Opcode) const {
835     return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
836   }
837 
838   bool isDOT(uint16_t Opcode) const {
839     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
840   }
841 
842   static bool isLDSDIR(const MachineInstr &MI) {
843     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
844   }
845 
846   bool isLDSDIR(uint16_t Opcode) const {
847     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
848   }
849 
850   static bool isVINTERP(const MachineInstr &MI) {
851     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
852   }
853 
854   bool isVINTERP(uint16_t Opcode) const {
855     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
856   }
857 
858   static bool isScalarUnit(const MachineInstr &MI) {
859     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
860   }
861 
862   static bool usesVM_CNT(const MachineInstr &MI) {
863     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
864   }
865 
866   static bool usesLGKM_CNT(const MachineInstr &MI) {
867     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
868   }
869 
870   // Most sopk treat the immediate as a signed 16-bit, however some
871   // use it as unsigned.
872   static bool sopkIsZext(unsigned Opcode) {
873     return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
874            Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
875            Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
876            Opcode == AMDGPU::S_GETREG_B32;
877   }
878 
879   /// \returns true if this is an s_store_dword* instruction. This is more
880   /// specific than isSMEM && mayStore.
881   static bool isScalarStore(const MachineInstr &MI) {
882     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
883   }
884 
885   bool isScalarStore(uint16_t Opcode) const {
886     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
887   }
888 
889   static bool isFixedSize(const MachineInstr &MI) {
890     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
891   }
892 
893   bool isFixedSize(uint16_t Opcode) const {
894     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
895   }
896 
897   static bool hasFPClamp(const MachineInstr &MI) {
898     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
899   }
900 
901   bool hasFPClamp(uint16_t Opcode) const {
902     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
903   }
904 
905   static bool hasIntClamp(const MachineInstr &MI) {
906     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
907   }
908 
909   uint64_t getClampMask(const MachineInstr &MI) const {
910     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
911                                 SIInstrFlags::IntClamp |
912                                 SIInstrFlags::ClampLo |
913                                 SIInstrFlags::ClampHi;
914       return MI.getDesc().TSFlags & ClampFlags;
915   }
916 
917   static bool usesFPDPRounding(const MachineInstr &MI) {
918     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
919   }
920 
921   bool usesFPDPRounding(uint16_t Opcode) const {
922     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
923   }
924 
925   static bool isFPAtomic(const MachineInstr &MI) {
926     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
927   }
928 
929   bool isFPAtomic(uint16_t Opcode) const {
930     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
931   }
932 
933   static bool isNeverUniform(const MachineInstr &MI) {
934     return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
935   }
936 
937   // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
938   // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
939   // to check for the barrier start (S_BARRIER_SIGNAL*)
940   bool isBarrierStart(unsigned Opcode) const {
941     return Opcode == AMDGPU::S_BARRIER ||
942            Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
943            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
944            Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
945            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
946   }
947 
948   bool isBarrier(unsigned Opcode) const {
949     return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
950            Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
951            Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
952            Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
953            Opcode == AMDGPU::S_BARRIER_LEAVE ||
954            Opcode == AMDGPU::S_BARRIER_LEAVE_IMM ||
955            Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
956   }
957 
958   static bool isF16PseudoScalarTrans(unsigned Opcode) {
959     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
960            Opcode == AMDGPU::V_S_LOG_F16_e64 ||
961            Opcode == AMDGPU::V_S_RCP_F16_e64 ||
962            Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
963            Opcode == AMDGPU::V_S_SQRT_F16_e64;
964   }
965 
966   static bool doesNotReadTiedSource(const MachineInstr &MI) {
967     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
968   }
969 
970   bool doesNotReadTiedSource(uint16_t Opcode) const {
971     return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
972   }
973 
974   bool isIGLP(unsigned Opcode) const {
975     return Opcode == AMDGPU::SCHED_BARRIER ||
976            Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
977   }
978 
979   bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
980 
981   static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
982     switch (Opcode) {
983     case AMDGPU::S_WAITCNT_soft:
984       return AMDGPU::S_WAITCNT;
985     case AMDGPU::S_WAITCNT_VSCNT_soft:
986       return AMDGPU::S_WAITCNT_VSCNT;
987     case AMDGPU::S_WAIT_LOADCNT_soft:
988       return AMDGPU::S_WAIT_LOADCNT;
989     case AMDGPU::S_WAIT_STORECNT_soft:
990       return AMDGPU::S_WAIT_STORECNT;
991     case AMDGPU::S_WAIT_SAMPLECNT_soft:
992       return AMDGPU::S_WAIT_SAMPLECNT;
993     case AMDGPU::S_WAIT_BVHCNT_soft:
994       return AMDGPU::S_WAIT_BVHCNT;
995     case AMDGPU::S_WAIT_DSCNT_soft:
996       return AMDGPU::S_WAIT_DSCNT;
997     case AMDGPU::S_WAIT_KMCNT_soft:
998       return AMDGPU::S_WAIT_KMCNT;
999     default:
1000       return Opcode;
1001     }
1002   }
1003 
1004   bool isWaitcnt(unsigned Opcode) const {
1005     switch (getNonSoftWaitcntOpcode(Opcode)) {
1006     case AMDGPU::S_WAITCNT:
1007     case AMDGPU::S_WAITCNT_VSCNT:
1008     case AMDGPU::S_WAITCNT_VMCNT:
1009     case AMDGPU::S_WAITCNT_EXPCNT:
1010     case AMDGPU::S_WAITCNT_LGKMCNT:
1011     case AMDGPU::S_WAIT_LOADCNT:
1012     case AMDGPU::S_WAIT_LOADCNT_DSCNT:
1013     case AMDGPU::S_WAIT_STORECNT:
1014     case AMDGPU::S_WAIT_STORECNT_DSCNT:
1015     case AMDGPU::S_WAIT_SAMPLECNT:
1016     case AMDGPU::S_WAIT_BVHCNT:
1017     case AMDGPU::S_WAIT_EXPCNT:
1018     case AMDGPU::S_WAIT_DSCNT:
1019     case AMDGPU::S_WAIT_KMCNT:
1020     case AMDGPU::S_WAIT_IDLE:
1021       return true;
1022     default:
1023       return false;
1024     }
1025   }
1026 
1027   bool isVGPRCopy(const MachineInstr &MI) const {
1028     assert(isCopyInstr(MI));
1029     Register Dest = MI.getOperand(0).getReg();
1030     const MachineFunction &MF = *MI.getParent()->getParent();
1031     const MachineRegisterInfo &MRI = MF.getRegInfo();
1032     return !RI.isSGPRReg(MRI, Dest);
1033   }
1034 
1035   bool hasVGPRUses(const MachineInstr &MI) const {
1036     const MachineFunction &MF = *MI.getParent()->getParent();
1037     const MachineRegisterInfo &MRI = MF.getRegInfo();
1038     return llvm::any_of(MI.explicit_uses(),
1039                         [&MRI, this](const MachineOperand &MO) {
1040       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
1041   }
1042 
1043   /// Return true if the instruction modifies the mode register.q
1044   static bool modifiesModeRegister(const MachineInstr &MI);
1045 
1046   /// This function is used to determine if an instruction can be safely
1047   /// executed under EXEC = 0 without hardware error, indeterminate results,
1048   /// and/or visible effects on future vector execution or outside the shader.
1049   /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1050   /// used in removing branches over short EXEC = 0 sequences.
1051   /// As such it embeds certain assumptions which may not apply to every case
1052   /// of EXEC = 0 execution.
1053   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1054 
1055   /// Returns true if the instruction could potentially depend on the value of
1056   /// exec. If false, exec dependencies may safely be ignored.
1057   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1058 
1059   bool isInlineConstant(const APInt &Imm) const;
1060 
1061   bool isInlineConstant(const APFloat &Imm) const;
1062 
1063   // Returns true if this non-register operand definitely does not need to be
1064   // encoded as a 32-bit literal. Note that this function handles all kinds of
1065   // operands, not just immediates.
1066   //
1067   // Some operands like FrameIndexes could resolve to an inline immediate value
1068   // that will not require an additional 4-bytes; this function assumes that it
1069   // will.
1070   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
1071 
1072   bool isInlineConstant(const MachineOperand &MO,
1073                         const MCOperandInfo &OpInfo) const {
1074     return isInlineConstant(MO, OpInfo.OperandType);
1075   }
1076 
1077   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1078   /// be an inline immediate.
1079   bool isInlineConstant(const MachineInstr &MI,
1080                         const MachineOperand &UseMO,
1081                         const MachineOperand &DefMO) const {
1082     assert(UseMO.getParent() == &MI);
1083     int OpIdx = UseMO.getOperandNo();
1084     if (OpIdx >= MI.getDesc().NumOperands)
1085       return false;
1086 
1087     return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
1088   }
1089 
1090   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1091   /// immediate.
1092   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1093     const MachineOperand &MO = MI.getOperand(OpIdx);
1094     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1095   }
1096 
1097   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1098                         const MachineOperand &MO) const {
1099     if (OpIdx >= MI.getDesc().NumOperands)
1100       return false;
1101 
1102     if (isCopyInstr(MI)) {
1103       unsigned Size = getOpSize(MI, OpIdx);
1104       assert(Size == 8 || Size == 4);
1105 
1106       uint8_t OpType = (Size == 8) ?
1107         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1108       return isInlineConstant(MO, OpType);
1109     }
1110 
1111     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1112   }
1113 
1114   bool isInlineConstant(const MachineOperand &MO) const {
1115     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
1116   }
1117 
1118   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1119                          const MachineOperand &MO) const;
1120 
1121   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1122   /// This function will return false if you pass it a 32-bit instruction.
1123   bool hasVALU32BitEncoding(unsigned Opcode) const;
1124 
1125   /// Returns true if this operand uses the constant bus.
1126   bool usesConstantBus(const MachineRegisterInfo &MRI,
1127                        const MachineOperand &MO,
1128                        const MCOperandInfo &OpInfo) const;
1129 
1130   bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
1131                        int OpIdx) const {
1132     return usesConstantBus(MRI, MI.getOperand(OpIdx),
1133                            MI.getDesc().operands()[OpIdx]);
1134   }
1135 
1136   /// Return true if this instruction has any modifiers.
1137   ///  e.g. src[012]_mod, omod, clamp.
1138   bool hasModifiers(unsigned Opcode) const;
1139 
1140   bool hasModifiersSet(const MachineInstr &MI,
1141                        unsigned OpName) const;
1142   bool hasAnyModifiersSet(const MachineInstr &MI) const;
1143 
1144   bool canShrink(const MachineInstr &MI,
1145                  const MachineRegisterInfo &MRI) const;
1146 
1147   MachineInstr *buildShrunkInst(MachineInstr &MI,
1148                                 unsigned NewOpcode) const;
1149 
1150   bool verifyInstruction(const MachineInstr &MI,
1151                          StringRef &ErrInfo) const override;
1152 
1153   unsigned getVALUOp(const MachineInstr &MI) const;
1154 
1155   void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1156                              MachineBasicBlock::iterator MBBI,
1157                              const DebugLoc &DL, Register Reg, bool IsSCCLive,
1158                              SlotIndexes *Indexes = nullptr) const;
1159 
1160   void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1161                    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1162                    Register Reg, SlotIndexes *Indexes = nullptr) const;
1163 
1164   /// Return the correct register class for \p OpNo.  For target-specific
1165   /// instructions, this will return the register class that has been defined
1166   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
1167   /// the register class of its machine operand.
1168   /// to infer the correct register class base on the other operands.
1169   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1170                                            unsigned OpNo) const;
1171 
1172   /// Return the size in bytes of the operand OpNo on the given
1173   // instruction opcode.
1174   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
1175     const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1176 
1177     if (OpInfo.RegClass == -1) {
1178       // If this is an immediate operand, this must be a 32-bit literal.
1179       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1180       return 4;
1181     }
1182 
1183     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
1184   }
1185 
1186   /// This form should usually be preferred since it handles operands
1187   /// with unknown register classes.
1188   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1189     const MachineOperand &MO = MI.getOperand(OpNo);
1190     if (MO.isReg()) {
1191       if (unsigned SubReg = MO.getSubReg()) {
1192         return RI.getSubRegIdxSize(SubReg) / 8;
1193       }
1194     }
1195     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
1196   }
1197 
1198   /// Legalize the \p OpIndex operand of this instruction by inserting
1199   /// a MOV.  For example:
1200   /// ADD_I32_e32 VGPR0, 15
1201   /// to
1202   /// MOV VGPR1, 15
1203   /// ADD_I32_e32 VGPR0, VGPR1
1204   ///
1205   /// If the operand being legalized is a register, then a COPY will be used
1206   /// instead of MOV.
1207   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1208 
1209   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1210   /// for \p MI.
1211   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1212                       const MachineOperand *MO = nullptr) const;
1213 
1214   /// Check if \p MO would be a valid operand for the given operand
1215   /// definition \p OpInfo. Note this does not attempt to validate constant bus
1216   /// restrictions (e.g. literal constant usage).
1217   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1218                           const MCOperandInfo &OpInfo,
1219                           const MachineOperand &MO) const;
1220 
1221   /// Check if \p MO (a register operand) is a legal register for the
1222   /// given operand description or operand index.
1223   /// The operand index version provide more legality checks
1224   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1225                          const MCOperandInfo &OpInfo,
1226                          const MachineOperand &MO) const;
1227   bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
1228                          const MachineOperand &MO) const;
1229   /// Legalize operands in \p MI by either commuting it or inserting a
1230   /// copy of src1.
1231   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1232 
1233   /// Fix operands in \p MI to satisfy constant bus requirements.
1234   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1235 
1236   /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1237   /// for the dst register (\p DstRC) can be optionally supplied. This function
1238   /// can only be used when it is know that the value in SrcReg is same across
1239   /// all threads in the wave.
1240   /// \returns The SGPR register that \p SrcReg was copied to.
1241   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1242                               MachineRegisterInfo &MRI,
1243                               const TargetRegisterClass *DstRC = nullptr) const;
1244 
1245   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1246   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1247 
1248   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1249                               MachineBasicBlock::iterator I,
1250                               const TargetRegisterClass *DstRC,
1251                               MachineOperand &Op, MachineRegisterInfo &MRI,
1252                               const DebugLoc &DL) const;
1253 
1254   /// Legalize all operands in this instruction.  This function may create new
1255   /// instructions and control-flow around \p MI.  If present, \p MDT is
1256   /// updated.
1257   /// \returns A new basic block that contains \p MI if new blocks were created.
1258   MachineBasicBlock *
1259   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1260 
1261   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1262   /// was moved to VGPR. \returns true if succeeded.
1263   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1264 
1265   /// Replace the instructions opcode with the equivalent VALU
1266   /// opcode.  This function will also move the users of MachineInstruntions
1267   /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1268   /// updated.
1269   void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1270 
1271   void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1272                       MachineInstr &Inst) const;
1273 
1274   void insertNoop(MachineBasicBlock &MBB,
1275                   MachineBasicBlock::iterator MI) const override;
1276 
1277   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1278                    unsigned Quantity) const override;
1279 
1280   void insertReturn(MachineBasicBlock &MBB) const;
1281 
1282   /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1283   /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1284   /// interpreted as a nop.
1285   MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1286                                          MachineBasicBlock &MBB,
1287                                          MachineInstr &MI,
1288                                          const DebugLoc &DL) const;
1289 
1290   /// Return the number of wait states that result from executing this
1291   /// instruction.
1292   static unsigned getNumWaitStates(const MachineInstr &MI);
1293 
1294   /// Returns the operand named \p Op.  If \p MI does not have an
1295   /// operand named \c Op, this function returns nullptr.
1296   LLVM_READONLY
1297   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1298 
1299   LLVM_READONLY
1300   const MachineOperand *getNamedOperand(const MachineInstr &MI,
1301                                         unsigned OpName) const {
1302     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1303   }
1304 
1305   /// Get required immediate operand
1306   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1307     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1308     return MI.getOperand(Idx).getImm();
1309   }
1310 
1311   uint64_t getDefaultRsrcDataFormat() const;
1312   uint64_t getScratchRsrcWords23() const;
1313 
1314   bool isLowLatencyInstruction(const MachineInstr &MI) const;
1315   bool isHighLatencyDef(int Opc) const override;
1316 
1317   /// Return the descriptor of the target-specific machine instruction
1318   /// that corresponds to the specified pseudo or native opcode.
1319   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1320     return get(pseudoToMCOpcode(Opcode));
1321   }
1322 
1323   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1324   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1325 
1326   Register isLoadFromStackSlot(const MachineInstr &MI,
1327                                int &FrameIndex) const override;
1328   Register isStoreToStackSlot(const MachineInstr &MI,
1329                               int &FrameIndex) const override;
1330 
1331   unsigned getInstBundleSize(const MachineInstr &MI) const;
1332   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1333 
1334   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1335 
1336   std::pair<unsigned, unsigned>
1337   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1338 
1339   ArrayRef<std::pair<int, const char *>>
1340   getSerializableTargetIndices() const override;
1341 
1342   ArrayRef<std::pair<unsigned, const char *>>
1343   getSerializableDirectMachineOperandTargetFlags() const override;
1344 
1345   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1346   getSerializableMachineMemOperandTargetFlags() const override;
1347 
1348   ScheduleHazardRecognizer *
1349   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1350                                  const ScheduleDAG *DAG) const override;
1351 
1352   ScheduleHazardRecognizer *
1353   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1354 
1355   ScheduleHazardRecognizer *
1356   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1357                                  const ScheduleDAGMI *DAG) const override;
1358 
1359   unsigned getLiveRangeSplitOpcode(Register Reg,
1360                                    const MachineFunction &MF) const override;
1361 
1362   bool isBasicBlockPrologue(const MachineInstr &MI,
1363                             Register Reg = Register()) const override;
1364 
1365   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1366                                          MachineBasicBlock::iterator InsPt,
1367                                          const DebugLoc &DL, Register Src,
1368                                          Register Dst) const override;
1369 
1370   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1371                                     MachineBasicBlock::iterator InsPt,
1372                                     const DebugLoc &DL, Register Src,
1373                                     unsigned SrcSubReg,
1374                                     Register Dst) const override;
1375 
1376   bool isWave32() const;
1377 
1378   /// Return a partially built integer add instruction without carry.
1379   /// Caller must add source operands.
1380   /// For pre-GFX9 it will generate unused carry destination operand.
1381   /// TODO: After GFX9 it should return a no-carry operation.
1382   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1383                                     MachineBasicBlock::iterator I,
1384                                     const DebugLoc &DL,
1385                                     Register DestReg) const;
1386 
1387   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1388                                     MachineBasicBlock::iterator I,
1389                                     const DebugLoc &DL,
1390                                     Register DestReg,
1391                                     RegScavenger &RS) const;
1392 
1393   static bool isKillTerminator(unsigned Opcode);
1394   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1395 
1396   bool isLegalMUBUFImmOffset(unsigned Imm) const;
1397 
1398   static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1399 
1400   bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1401                         Align Alignment = Align(4)) const;
1402 
1403   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1404   /// encoded instruction. If \p Signed, this is for an instruction that
1405   /// interprets the offset as signed.
1406   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1407                          uint64_t FlatVariant) const;
1408 
1409   /// Split \p COffsetVal into {immediate offset field, remainder offset}
1410   /// values.
1411   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1412                                               unsigned AddrSpace,
1413                                               uint64_t FlatVariant) const;
1414 
1415   /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1416   bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1417 
1418   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1419   /// Return -1 if the target-specific opcode for the pseudo instruction does
1420   /// not exist. If Opcode is not a pseudo instruction, this is identity.
1421   int pseudoToMCOpcode(int Opcode) const;
1422 
1423   /// \brief Check if this instruction should only be used by assembler.
1424   /// Return true if this opcode should not be used by codegen.
1425   bool isAsmOnlyOpcode(int MCOp) const;
1426 
1427   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1428                                          const TargetRegisterInfo *TRI,
1429                                          const MachineFunction &MF)
1430     const override;
1431 
1432   void fixImplicitOperands(MachineInstr &MI) const;
1433 
1434   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1435                                       ArrayRef<unsigned> Ops,
1436                                       MachineBasicBlock::iterator InsertPt,
1437                                       int FrameIndex,
1438                                       LiveIntervals *LIS = nullptr,
1439                                       VirtRegMap *VRM = nullptr) const override;
1440 
1441   unsigned getInstrLatency(const InstrItineraryData *ItinData,
1442                            const MachineInstr &MI,
1443                            unsigned *PredCost = nullptr) const override;
1444 
1445   InstructionUniformity
1446   getInstructionUniformity(const MachineInstr &MI) const override final;
1447 
1448   InstructionUniformity
1449   getGenericInstructionUniformity(const MachineInstr &MI) const;
1450 
1451   const MIRFormatter *getMIRFormatter() const override {
1452     if (!Formatter)
1453       Formatter = std::make_unique<AMDGPUMIRFormatter>();
1454     return Formatter.get();
1455   }
1456 
1457   static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1458 
1459   const TargetSchedModel &getSchedModel() const { return SchedModel; }
1460 
1461   // Enforce operand's \p OpName even alignment if required by target.
1462   // This is used if an operand is a 32 bit register but needs to be aligned
1463   // regardless.
1464   void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1465 };
1466 
1467 /// \brief Returns true if a reg:subreg pair P has a TRC class
1468 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1469                          const TargetRegisterClass &TRC,
1470                          MachineRegisterInfo &MRI) {
1471   auto *RC = MRI.getRegClass(P.Reg);
1472   if (!P.SubReg)
1473     return RC == &TRC;
1474   auto *TRI = MRI.getTargetRegisterInfo();
1475   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1476 }
1477 
1478 /// \brief Create RegSubRegPair from a register MachineOperand
1479 inline
1480 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1481   assert(O.isReg());
1482   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1483 }
1484 
1485 /// \brief Return the SubReg component from REG_SEQUENCE
1486 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1487                                                     unsigned SubReg);
1488 
1489 /// \brief Return the defining instruction for a given reg:subreg pair
1490 /// skipping copy like instructions and subreg-manipulation pseudos.
1491 /// Following another subreg of a reg:subreg isn't supported.
1492 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1493                                MachineRegisterInfo &MRI);
1494 
1495 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1496 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1497 /// attempt to track between blocks.
1498 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1499                                 Register VReg,
1500                                 const MachineInstr &DefMI,
1501                                 const MachineInstr &UseMI);
1502 
1503 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1504 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1505 /// track between blocks.
1506 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1507                                    Register VReg,
1508                                    const MachineInstr &DefMI);
1509 
1510 namespace AMDGPU {
1511 
1512   LLVM_READONLY
1513   int getVOPe64(uint16_t Opcode);
1514 
1515   LLVM_READONLY
1516   int getVOPe32(uint16_t Opcode);
1517 
1518   LLVM_READONLY
1519   int getSDWAOp(uint16_t Opcode);
1520 
1521   LLVM_READONLY
1522   int getDPPOp32(uint16_t Opcode);
1523 
1524   LLVM_READONLY
1525   int getDPPOp64(uint16_t Opcode);
1526 
1527   LLVM_READONLY
1528   int getBasicFromSDWAOp(uint16_t Opcode);
1529 
1530   LLVM_READONLY
1531   int getCommuteRev(uint16_t Opcode);
1532 
1533   LLVM_READONLY
1534   int getCommuteOrig(uint16_t Opcode);
1535 
1536   LLVM_READONLY
1537   int getAddr64Inst(uint16_t Opcode);
1538 
1539   /// Check if \p Opcode is an Addr64 opcode.
1540   ///
1541   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1542   LLVM_READONLY
1543   int getIfAddr64Inst(uint16_t Opcode);
1544 
1545   LLVM_READONLY
1546   int getSOPKOp(uint16_t Opcode);
1547 
1548   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1549   /// of a VADDR form.
1550   LLVM_READONLY
1551   int getGlobalSaddrOp(uint16_t Opcode);
1552 
1553   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1554   /// of a SADDR form.
1555   LLVM_READONLY
1556   int getGlobalVaddrOp(uint16_t Opcode);
1557 
1558   LLVM_READONLY
1559   int getVCMPXNoSDstOp(uint16_t Opcode);
1560 
1561   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1562   /// given an \p Opcode of an SS (SADDR) form.
1563   LLVM_READONLY
1564   int getFlatScratchInstSTfromSS(uint16_t Opcode);
1565 
1566   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1567   /// of an SVS (SADDR + VADDR) form.
1568   LLVM_READONLY
1569   int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1570 
1571   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1572   /// of an SV (VADDR) form.
1573   LLVM_READONLY
1574   int getFlatScratchInstSSfromSV(uint16_t Opcode);
1575 
1576   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1577   /// of an SS (SADDR) form.
1578   LLVM_READONLY
1579   int getFlatScratchInstSVfromSS(uint16_t Opcode);
1580 
1581   /// \returns earlyclobber version of a MAC MFMA is exists.
1582   LLVM_READONLY
1583   int getMFMAEarlyClobberOp(uint16_t Opcode);
1584 
1585   /// \returns Version of an MFMA instruction which uses AGPRs for srcC and
1586   /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
1587   LLVM_READONLY
1588   int getMFMASrcCVDstAGPROp(uint16_t Opcode);
1589 
1590   /// \returns v_cmpx version of a v_cmp instruction.
1591   LLVM_READONLY
1592   int getVCMPXOpFromVCMP(uint16_t Opcode);
1593 
1594   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1595   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1596   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1597   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1598 
1599 } // end namespace AMDGPU
1600 
1601 namespace AMDGPU {
1602 enum AsmComments {
1603   // For sgpr to vgpr spill instructions
1604   SGPR_SPILL = MachineInstr::TAsmComments
1605 };
1606 } // namespace AMDGPU
1607 
1608 namespace SI {
1609 namespace KernelInputOffsets {
1610 
1611 /// Offsets in bytes from the start of the input buffer
1612 enum Offsets {
1613   NGROUPS_X = 0,
1614   NGROUPS_Y = 4,
1615   NGROUPS_Z = 8,
1616   GLOBAL_SIZE_X = 12,
1617   GLOBAL_SIZE_Y = 16,
1618   GLOBAL_SIZE_Z = 20,
1619   LOCAL_SIZE_X = 24,
1620   LOCAL_SIZE_Y = 28,
1621   LOCAL_SIZE_Z = 32
1622 };
1623 
1624 } // end namespace KernelInputOffsets
1625 } // end namespace SI
1626 
1627 } // end namespace llvm
1628 
1629 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1630