xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision bb973785c9df1bf191ac4ff06ab1e7355dbd95c6)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIDefines.h"
22 #include "SIRegisterInfo.h"
23 #include "TargetInfo/AMDGPUTargetInfo.h"
24 #include "Utils/AMDGPUAsmUtils.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm-c/DisassemblerTypes.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCRegisterInfo.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "amdgpu-disassembler"
41 
42 #define SGPR_MAX                                                               \
43   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
44                  : AMDGPU::EncValues::SGPR_MAX_SI)
45 
46 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
47 
48 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
49                                        MCContext &Ctx, MCInstrInfo const *MCII)
50     : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
51       MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
52       CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
53   // ToDo: AMDGPUDisassembler supports only VI ISA.
54   if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
55     report_fatal_error("Disassembly not yet supported for subtarget");
56 
57   for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
58     createConstantSymbolExpr(Symbol, Code);
59 
60   UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
61   UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
62   UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
63 }
64 
65 void AMDGPUDisassembler::setABIVersion(unsigned Version) {
66   CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
67 }
68 
69 inline static MCDisassembler::DecodeStatus
70 addOperand(MCInst &Inst, const MCOperand& Opnd) {
71   Inst.addOperand(Opnd);
72   return Opnd.isValid() ?
73     MCDisassembler::Success :
74     MCDisassembler::Fail;
75 }
76 
77 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
78                                 uint16_t NameIdx) {
79   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
80   if (OpIdx != -1) {
81     auto I = MI.begin();
82     std::advance(I, OpIdx);
83     MI.insert(I, Op);
84   }
85   return OpIdx;
86 }
87 
88 static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
89                                        uint64_t Addr,
90                                        const MCDisassembler *Decoder) {
91   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
92 
93   // Our branches take a simm16, but we need two extra bits to account for the
94   // factor of 4.
95   APInt SignedOffset(18, Imm * 4, true);
96   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
97 
98   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
99     return MCDisassembler::Success;
100   return addOperand(Inst, MCOperand::createImm(Imm));
101 }
102 
103 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
104                                      const MCDisassembler *Decoder) {
105   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
106   int64_t Offset;
107   if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
108     Offset = SignExtend64<24>(Imm);
109   } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
110     Offset = Imm & 0xFFFFF;
111   } else { // GFX9+ supports 21-bit signed offsets.
112     Offset = SignExtend64<21>(Imm);
113   }
114   return addOperand(Inst, MCOperand::createImm(Offset));
115 }
116 
117 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
118                                   const MCDisassembler *Decoder) {
119   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
120   return addOperand(Inst, DAsm->decodeBoolReg(Val));
121 }
122 
123 static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
124                                        uint64_t Addr,
125                                        const MCDisassembler *Decoder) {
126   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
127   return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
128 }
129 
130 static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
131                                  const MCDisassembler *Decoder) {
132   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
133   return addOperand(Inst, DAsm->decodeDpp8FI(Val));
134 }
135 
136 #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
137   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
138                                         uint64_t /*Addr*/,                     \
139                                         const MCDisassembler *Decoder) {       \
140     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
141     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
142   }
143 
144 // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
145 // number of register. Used by VGPR only and AGPR only operands.
146 #define DECODE_OPERAND_REG_8(RegClass)                                         \
147   static DecodeStatus Decode##RegClass##RegisterClass(                         \
148       MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,                           \
149       const MCDisassembler *Decoder) {                                         \
150     assert(Imm < (1 << 8) && "8-bit encoding");                                \
151     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
152     return addOperand(                                                         \
153         Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm));      \
154   }
155 
156 #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral,         \
157                      ImmWidth)                                                 \
158   static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,      \
159                            const MCDisassembler *Decoder) {                    \
160     assert(Imm < (1 << EncSize) && #EncSize "-bit encoding");                  \
161     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
162     return addOperand(Inst,                                                    \
163                       DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm,   \
164                                         MandatoryLiteral, ImmWidth));          \
165   }
166 
167 static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
168                                 AMDGPUDisassembler::OpWidthTy OpWidth,
169                                 unsigned Imm, unsigned EncImm,
170                                 bool MandatoryLiteral, unsigned ImmWidth,
171                                 AMDGPU::OperandSemantics Sema,
172                                 const MCDisassembler *Decoder) {
173   assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
174   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
175   return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
176                                             ImmWidth, Sema));
177 }
178 
179 // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
180 // get register class. Used by SGPR only operands.
181 #define DECODE_OPERAND_REG_7(RegClass, OpWidth)                                \
182   DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
183 
184 // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
185 // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
186 // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
187 // Used by AV_ register classes (AGPR or VGPR only register operands).
188 template <AMDGPUDisassembler::OpWidthTy OpWidth>
189 static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
190                                const MCDisassembler *Decoder) {
191   return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
192                      false, 0, AMDGPU::OperandSemantics::INT, Decoder);
193 }
194 
195 // Decoder for Src(9-bit encoding) registers only.
196 template <AMDGPUDisassembler::OpWidthTy OpWidth>
197 static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
198                                   uint64_t /* Addr */,
199                                   const MCDisassembler *Decoder) {
200   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
201                      AMDGPU::OperandSemantics::INT, Decoder);
202 }
203 
204 // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
205 // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
206 // only.
207 template <AMDGPUDisassembler::OpWidthTy OpWidth>
208 static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
209                                 const MCDisassembler *Decoder) {
210   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
211                      AMDGPU::OperandSemantics::INT, Decoder);
212 }
213 
214 // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
215 // Imm{9} is acc, registers only.
216 template <AMDGPUDisassembler::OpWidthTy OpWidth>
217 static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
218                                   uint64_t /* Addr */,
219                                   const MCDisassembler *Decoder) {
220   return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
221                      AMDGPU::OperandSemantics::INT, Decoder);
222 }
223 
224 // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
225 // register from RegClass or immediate. Registers that don't belong to RegClass
226 // will be decoded and InstPrinter will report warning. Immediate will be
227 // decoded into constant of size ImmWidth, should match width of immediate used
228 // by OperandType (important for floating point types).
229 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
230           unsigned OperandSemantics>
231 static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
232                                        uint64_t /* Addr */,
233                                        const MCDisassembler *Decoder) {
234   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
235                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
236 }
237 
238 // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239 // and decode using 'enum10' from decodeSrcOp.
240 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
241           unsigned OperandSemantics>
242 static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
243                                         uint64_t /* Addr */,
244                                         const MCDisassembler *Decoder) {
245   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
246                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
247 }
248 
249 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
250           unsigned OperandSemantics>
251 static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
252                                                uint64_t /* Addr */,
253                                                const MCDisassembler *Decoder) {
254   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
255                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
256 }
257 
258 // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
259 // when RegisterClass is used as an operand. Most often used for destination
260 // operands.
261 
262 DECODE_OPERAND_REG_8(VGPR_32)
263 DECODE_OPERAND_REG_8(VGPR_32_Lo128)
264 DECODE_OPERAND_REG_8(VReg_64)
265 DECODE_OPERAND_REG_8(VReg_96)
266 DECODE_OPERAND_REG_8(VReg_128)
267 DECODE_OPERAND_REG_8(VReg_256)
268 DECODE_OPERAND_REG_8(VReg_288)
269 DECODE_OPERAND_REG_8(VReg_352)
270 DECODE_OPERAND_REG_8(VReg_384)
271 DECODE_OPERAND_REG_8(VReg_512)
272 DECODE_OPERAND_REG_8(VReg_1024)
273 
274 DECODE_OPERAND_REG_7(SReg_32, OPW32)
275 DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
276 DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
277 DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
278 DECODE_OPERAND_REG_7(SReg_64, OPW64)
279 DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
280 DECODE_OPERAND_REG_7(SReg_96, OPW96)
281 DECODE_OPERAND_REG_7(SReg_128, OPW128)
282 DECODE_OPERAND_REG_7(SReg_256, OPW256)
283 DECODE_OPERAND_REG_7(SReg_512, OPW512)
284 
285 DECODE_OPERAND_REG_8(AGPR_32)
286 DECODE_OPERAND_REG_8(AReg_64)
287 DECODE_OPERAND_REG_8(AReg_128)
288 DECODE_OPERAND_REG_8(AReg_256)
289 DECODE_OPERAND_REG_8(AReg_512)
290 DECODE_OPERAND_REG_8(AReg_1024)
291 
292 static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
293                                                uint64_t /*Addr*/,
294                                                const MCDisassembler *Decoder) {
295   assert(isUInt<10>(Imm) && "10-bit encoding expected");
296   assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
297 
298   bool IsHi = Imm & (1 << 9);
299   unsigned RegIdx = Imm & 0xff;
300   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
301   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
302 }
303 
304 static DecodeStatus
305 DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
306                                  const MCDisassembler *Decoder) {
307   assert(isUInt<8>(Imm) && "8-bit encoding expected");
308 
309   bool IsHi = Imm & (1 << 7);
310   unsigned RegIdx = Imm & 0x7f;
311   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
312   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
313 }
314 
315 static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
316                                                 uint64_t /*Addr*/,
317                                                 const MCDisassembler *Decoder) {
318   assert(isUInt<9>(Imm) && "9-bit encoding expected");
319 
320   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
321   bool IsVGPR = Imm & (1 << 8);
322   if (IsVGPR) {
323     bool IsHi = Imm & (1 << 7);
324     unsigned RegIdx = Imm & 0x7f;
325     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
326   }
327   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
328                                                    Imm & 0xFF, false, 16));
329 }
330 
331 static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
332                                           uint64_t /*Addr*/,
333                                           const MCDisassembler *Decoder) {
334   assert(isUInt<10>(Imm) && "10-bit encoding expected");
335 
336   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337   bool IsVGPR = Imm & (1 << 8);
338   if (IsVGPR) {
339     bool IsHi = Imm & (1 << 9);
340     unsigned RegIdx = Imm & 0xff;
341     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342   }
343   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344                                                    Imm & 0xFF, false, 16));
345 }
346 
347 static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
348                                          uint64_t Addr,
349                                          const MCDisassembler *Decoder) {
350   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
351   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
352 }
353 
354 static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
355                                           uint64_t Addr, const void *Decoder) {
356   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
357   return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
358 }
359 
360 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
361                           const MCRegisterInfo *MRI) {
362   if (OpIdx < 0)
363     return false;
364 
365   const MCOperand &Op = Inst.getOperand(OpIdx);
366   if (!Op.isReg())
367     return false;
368 
369   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
370   auto Reg = Sub ? Sub : Op.getReg();
371   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
372 }
373 
374 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
375                                  AMDGPUDisassembler::OpWidthTy Opw,
376                                  const MCDisassembler *Decoder) {
377   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
378   if (!DAsm->isGFX90A()) {
379     Imm &= 511;
380   } else {
381     // If atomic has both vdata and vdst their register classes are tied.
382     // The bit is decoded along with the vdst, first operand. We need to
383     // change register class to AGPR if vdst was AGPR.
384     // If a DS instruction has both data0 and data1 their register classes
385     // are also tied.
386     unsigned Opc = Inst.getOpcode();
387     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
388     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
389                                                         : AMDGPU::OpName::vdata;
390     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
391     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
392     if ((int)Inst.getNumOperands() == DataIdx) {
393       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
394       if (IsAGPROperand(Inst, DstIdx, MRI))
395         Imm |= 512;
396     }
397 
398     if (TSFlags & SIInstrFlags::DS) {
399       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
400       if ((int)Inst.getNumOperands() == Data2Idx &&
401           IsAGPROperand(Inst, DataIdx, MRI))
402         Imm |= 512;
403     }
404   }
405   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
406 }
407 
408 template <AMDGPUDisassembler::OpWidthTy Opw>
409 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
410                                  uint64_t /* Addr */,
411                                  const MCDisassembler *Decoder) {
412   return decodeAVLdSt(Inst, Imm, Opw, Decoder);
413 }
414 
415 static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
416                                            uint64_t Addr,
417                                            const MCDisassembler *Decoder) {
418   assert(Imm < (1 << 9) && "9-bit encoding");
419   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
420   return addOperand(Inst,
421                     DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
422                                       AMDGPU::OperandSemantics::FP64));
423 }
424 
425 #define DECODE_SDWA(DecName) \
426 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
427 
428 DECODE_SDWA(Src32)
429 DECODE_SDWA(Src16)
430 DECODE_SDWA(VopcDst)
431 
432 static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
433                                      uint64_t /* Addr */,
434                                      const MCDisassembler *Decoder) {
435   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436   return addOperand(Inst, DAsm->decodeVersionImm(Imm));
437 }
438 
439 #include "AMDGPUGenDisassemblerTables.inc"
440 
441 //===----------------------------------------------------------------------===//
442 //
443 //===----------------------------------------------------------------------===//
444 
445 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
446   assert(Bytes.size() >= sizeof(T));
447   const auto Res =
448       support::endian::read<T, llvm::endianness::little>(Bytes.data());
449   Bytes = Bytes.slice(sizeof(T));
450   return Res;
451 }
452 
453 static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
454   assert(Bytes.size() >= 12);
455   uint64_t Lo =
456       support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
457   Bytes = Bytes.slice(8);
458   uint64_t Hi =
459       support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
460   Bytes = Bytes.slice(4);
461   return DecoderUInt128(Lo, Hi);
462 }
463 
464 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
465                                                 ArrayRef<uint8_t> Bytes_,
466                                                 uint64_t Address,
467                                                 raw_ostream &CS) const {
468   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
469   Bytes = Bytes_.slice(0, MaxInstBytesNum);
470 
471   // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
472   // there are fewer bytes left). This will be overridden on success.
473   Size = std::min((size_t)4, Bytes_.size());
474 
475   do {
476     // ToDo: better to switch encoding length using some bit predicate
477     // but it is unknown yet, so try all we can
478 
479     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
480     // encodings
481     if (isGFX11Plus() && Bytes.size() >= 12 ) {
482       DecoderUInt128 DecW = eat12Bytes(Bytes);
483 
484       if (isGFX11() &&
485           tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
486                         DecW, Address, CS))
487         break;
488 
489       if (isGFX12() &&
490           tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
491                         DecW, Address, CS))
492         break;
493 
494       if (isGFX12() &&
495           tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
496         break;
497 
498       // Reinitialize Bytes
499       Bytes = Bytes_.slice(0, MaxInstBytesNum);
500     }
501 
502     if (Bytes.size() >= 8) {
503       const uint64_t QW = eatBytes<uint64_t>(Bytes);
504 
505       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
506           tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
507         break;
508 
509       if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
510           tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
511         break;
512 
513       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
514       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
515       // table first so we print the correct name.
516       if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
517           tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
518         break;
519 
520       if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
521           tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
522         break;
523 
524       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
525           tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
526         break;
527 
528       if ((isVI() || isGFX9()) &&
529           tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
530         break;
531 
532       if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
533         break;
534 
535       if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
536         break;
537 
538       if (isGFX12() &&
539           tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
540                         Address, CS))
541         break;
542 
543       if (isGFX11() &&
544           tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
545                         Address, CS))
546         break;
547 
548       if (isGFX11() &&
549           tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
550         break;
551 
552       if (isGFX12() &&
553           tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
554         break;
555 
556       // Reinitialize Bytes
557       Bytes = Bytes_.slice(0, MaxInstBytesNum);
558     }
559 
560     // Try decode 32-bit instruction
561     if (Bytes.size() >= 4) {
562       const uint32_t DW = eatBytes<uint32_t>(Bytes);
563 
564       if ((isVI() || isGFX9()) &&
565           tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
566         break;
567 
568       if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
569         break;
570 
571       if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
572         break;
573 
574       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
575           tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
576         break;
577 
578       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
579           tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
580         break;
581 
582       if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
583         break;
584 
585       if (isGFX11() &&
586           tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
587                         Address, CS))
588         break;
589 
590       if (isGFX12() &&
591           tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
592                         Address, CS))
593         break;
594     }
595 
596     return MCDisassembler::Fail;
597   } while (false);
598 
599   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
600     if (isMacDPP(MI))
601       convertMacDPPInst(MI);
602 
603     if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
604       convertVOP3PDPPInst(MI);
605     else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
606              AMDGPU::isVOPC64DPP(MI.getOpcode()))
607       convertVOPCDPPInst(MI); // Special VOP3 case
608     else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
609              -1)
610       convertDPP8Inst(MI);
611     else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
612       convertVOP3DPPInst(MI); // Regular VOP3 case
613   }
614 
615   if (AMDGPU::isMAC(MI.getOpcode())) {
616     // Insert dummy unused src2_modifiers.
617     insertNamedMCOperand(MI, MCOperand::createImm(0),
618                          AMDGPU::OpName::src2_modifiers);
619   }
620 
621   if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
622       MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
623     // Insert dummy unused src2_modifiers.
624     insertNamedMCOperand(MI, MCOperand::createImm(0),
625                          AMDGPU::OpName::src2_modifiers);
626   }
627 
628   if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
629       !AMDGPU::hasGDS(STI)) {
630     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
631   }
632 
633   if (MCII->get(MI.getOpcode()).TSFlags &
634       (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
635     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
636                                              AMDGPU::OpName::cpol);
637     if (CPolPos != -1) {
638       unsigned CPol =
639           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
640               AMDGPU::CPol::GLC : 0;
641       if (MI.getNumOperands() <= (unsigned)CPolPos) {
642         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
643                              AMDGPU::OpName::cpol);
644       } else if (CPol) {
645         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
646       }
647     }
648   }
649 
650   if ((MCII->get(MI.getOpcode()).TSFlags &
651        (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
652       (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
653     // GFX90A lost TFE, its place is occupied by ACC.
654     int TFEOpIdx =
655         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
656     if (TFEOpIdx != -1) {
657       auto TFEIter = MI.begin();
658       std::advance(TFEIter, TFEOpIdx);
659       MI.insert(TFEIter, MCOperand::createImm(0));
660     }
661   }
662 
663   if (MCII->get(MI.getOpcode()).TSFlags &
664       (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
665     int SWZOpIdx =
666         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
667     if (SWZOpIdx != -1) {
668       auto SWZIter = MI.begin();
669       std::advance(SWZIter, SWZOpIdx);
670       MI.insert(SWZIter, MCOperand::createImm(0));
671     }
672   }
673 
674   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
675     int VAddr0Idx =
676         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
677     int RsrcIdx =
678         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
679     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
680     if (VAddr0Idx >= 0 && NSAArgs > 0) {
681       unsigned NSAWords = (NSAArgs + 3) / 4;
682       if (Bytes.size() < 4 * NSAWords)
683         return MCDisassembler::Fail;
684       for (unsigned i = 0; i < NSAArgs; ++i) {
685         const unsigned VAddrIdx = VAddr0Idx + 1 + i;
686         auto VAddrRCID =
687             MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
688         MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
689       }
690       Bytes = Bytes.slice(4 * NSAWords);
691     }
692 
693     convertMIMGInst(MI);
694   }
695 
696   if (MCII->get(MI.getOpcode()).TSFlags &
697       (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
698     convertMIMGInst(MI);
699 
700   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
701     convertEXPInst(MI);
702 
703   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
704     convertVINTERPInst(MI);
705 
706   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
707     convertSDWAInst(MI);
708 
709   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
710                                               AMDGPU::OpName::vdst_in);
711   if (VDstIn_Idx != -1) {
712     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
713                            MCOI::OperandConstraint::TIED_TO);
714     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
715          !MI.getOperand(VDstIn_Idx).isReg() ||
716          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
717       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
718         MI.erase(&MI.getOperand(VDstIn_Idx));
719       insertNamedMCOperand(MI,
720         MCOperand::createReg(MI.getOperand(Tied).getReg()),
721         AMDGPU::OpName::vdst_in);
722     }
723   }
724 
725   int ImmLitIdx =
726       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
727   bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
728   if (ImmLitIdx != -1 && !IsSOPK)
729     convertFMAanyK(MI, ImmLitIdx);
730 
731   Size = MaxInstBytesNum - Bytes.size();
732   return MCDisassembler::Success;
733 }
734 
735 void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
736   if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
737     // The MCInst still has these fields even though they are no longer encoded
738     // in the GFX11 instruction.
739     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
740     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
741   }
742 }
743 
744 void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
745   if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
746       MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
747       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
748       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
749       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
750       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
751       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
752       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
753     // The MCInst has this field that is not directly encoded in the
754     // instruction.
755     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
756   }
757 }
758 
759 void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
760   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
761       STI.hasFeature(AMDGPU::FeatureGFX10)) {
762     if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
763       // VOPC - insert clamp
764       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
765   } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
766     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
767     if (SDst != -1) {
768       // VOPC - insert VCC register as sdst
769       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
770                            AMDGPU::OpName::sdst);
771     } else {
772       // VOP1/2 - insert omod if present in instruction
773       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
774     }
775   }
776 }
777 
778 struct VOPModifiers {
779   unsigned OpSel = 0;
780   unsigned OpSelHi = 0;
781   unsigned NegLo = 0;
782   unsigned NegHi = 0;
783 };
784 
785 // Reconstruct values of VOP3/VOP3P operands such as op_sel.
786 // Note that these values do not affect disassembler output,
787 // so this is only necessary for consistency with src_modifiers.
788 static VOPModifiers collectVOPModifiers(const MCInst &MI,
789                                         bool IsVOP3P = false) {
790   VOPModifiers Modifiers;
791   unsigned Opc = MI.getOpcode();
792   const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
793                         AMDGPU::OpName::src1_modifiers,
794                         AMDGPU::OpName::src2_modifiers};
795   for (int J = 0; J < 3; ++J) {
796     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
797     if (OpIdx == -1)
798       continue;
799 
800     unsigned Val = MI.getOperand(OpIdx).getImm();
801 
802     Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
803     if (IsVOP3P) {
804       Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
805       Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
806       Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
807     } else if (J == 0) {
808       Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
809     }
810   }
811 
812   return Modifiers;
813 }
814 
815 // Instructions decode the op_sel/suffix bits into the src_modifier
816 // operands. Copy those bits into the src operands for true16 VGPRs.
817 void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
818   const unsigned Opc = MI.getOpcode();
819   const MCRegisterClass &ConversionRC =
820       MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
821   constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
822       {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
823         SISrcMods::OP_SEL_0},
824        {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
825         SISrcMods::OP_SEL_0},
826        {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
827         SISrcMods::OP_SEL_0},
828        {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
829         SISrcMods::DST_OP_SEL}}};
830   for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
831     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
832     int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
833     if (OpIdx == -1 || OpModsIdx == -1)
834       continue;
835     MCOperand &Op = MI.getOperand(OpIdx);
836     if (!Op.isReg())
837       continue;
838     if (!ConversionRC.contains(Op.getReg()))
839       continue;
840     unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
841     const MCOperand &OpMods = MI.getOperand(OpModsIdx);
842     unsigned ModVal = OpMods.getImm();
843     if (ModVal & OpSelMask) { // isHi
844       unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
845       Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
846     }
847   }
848 }
849 
850 // MAC opcodes have special old and src2 operands.
851 // src2 is tied to dst, while old is not tied (but assumed to be).
852 bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
853   constexpr int DST_IDX = 0;
854   auto Opcode = MI.getOpcode();
855   const auto &Desc = MCII->get(Opcode);
856   auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
857 
858   if (OldIdx != -1 && Desc.getOperandConstraint(
859                           OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
860     assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
861     assert(Desc.getOperandConstraint(
862                AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
863                MCOI::OperandConstraint::TIED_TO) == DST_IDX);
864     (void)DST_IDX;
865     return true;
866   }
867 
868   return false;
869 }
870 
871 // Create dummy old operand and insert dummy unused src2_modifiers
872 void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
873   assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
874   insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
875   insertNamedMCOperand(MI, MCOperand::createImm(0),
876                        AMDGPU::OpName::src2_modifiers);
877 }
878 
879 void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
880   unsigned Opc = MI.getOpcode();
881 
882   int VDstInIdx =
883       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
884   if (VDstInIdx != -1)
885     insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
886 
887   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
888   if (MI.getNumOperands() < DescNumOps &&
889       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
890     convertTrue16OpSel(MI);
891     auto Mods = collectVOPModifiers(MI);
892     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
893                          AMDGPU::OpName::op_sel);
894   } else {
895     // Insert dummy unused src modifiers.
896     if (MI.getNumOperands() < DescNumOps &&
897         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
898       insertNamedMCOperand(MI, MCOperand::createImm(0),
899                            AMDGPU::OpName::src0_modifiers);
900 
901     if (MI.getNumOperands() < DescNumOps &&
902         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
903       insertNamedMCOperand(MI, MCOperand::createImm(0),
904                            AMDGPU::OpName::src1_modifiers);
905   }
906 }
907 
908 void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
909   convertTrue16OpSel(MI);
910 
911   int VDstInIdx =
912       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
913   if (VDstInIdx != -1)
914     insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
915 
916   unsigned Opc = MI.getOpcode();
917   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
918   if (MI.getNumOperands() < DescNumOps &&
919       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
920     auto Mods = collectVOPModifiers(MI);
921     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
922                          AMDGPU::OpName::op_sel);
923   }
924 }
925 
926 // Note that before gfx10, the MIMG encoding provided no information about
927 // VADDR size. Consequently, decoded instructions always show address as if it
928 // has 1 dword, which could be not really so.
929 void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
930   auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
931 
932   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
933                                            AMDGPU::OpName::vdst);
934 
935   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
936                                             AMDGPU::OpName::vdata);
937   int VAddr0Idx =
938       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
939   int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
940                                                   : AMDGPU::OpName::rsrc;
941   int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
942   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
943                                             AMDGPU::OpName::dmask);
944 
945   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
946                                             AMDGPU::OpName::tfe);
947   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
948                                             AMDGPU::OpName::d16);
949 
950   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
951   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
952       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
953 
954   assert(VDataIdx != -1);
955   if (BaseOpcode->BVH) {
956     // Add A16 operand for intersect_ray instructions
957     addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
958     return;
959   }
960 
961   bool IsAtomic = (VDstIdx != -1);
962   bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
963   bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
964   bool IsNSA = false;
965   bool IsPartialNSA = false;
966   unsigned AddrSize = Info->VAddrDwords;
967 
968   if (isGFX10Plus()) {
969     unsigned DimIdx =
970         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
971     int A16Idx =
972         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
973     const AMDGPU::MIMGDimInfo *Dim =
974         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
975     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
976 
977     AddrSize =
978         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
979 
980     // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
981     // VIMAGE insts other than BVH never use vaddr4.
982     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
983             Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
984             Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
985     if (!IsNSA) {
986       if (!IsVSample && AddrSize > 12)
987         AddrSize = 16;
988     } else {
989       if (AddrSize > Info->VAddrDwords) {
990         if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
991           // The NSA encoding does not contain enough operands for the
992           // combination of base opcode / dimension. Should this be an error?
993           return;
994         }
995         IsPartialNSA = true;
996       }
997     }
998   }
999 
1000   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1001   unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1002 
1003   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1004   if (D16 && AMDGPU::hasPackedD16(STI)) {
1005     DstSize = (DstSize + 1) / 2;
1006   }
1007 
1008   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1009     DstSize += 1;
1010 
1011   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1012     return;
1013 
1014   int NewOpcode =
1015       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1016   if (NewOpcode == -1)
1017     return;
1018 
1019   // Widen the register to the correct number of enabled channels.
1020   unsigned NewVdata = AMDGPU::NoRegister;
1021   if (DstSize != Info->VDataDwords) {
1022     auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1023 
1024     // Get first subregister of VData
1025     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1026     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1027     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1028 
1029     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1030                                        &MRI.getRegClass(DataRCID));
1031     if (NewVdata == AMDGPU::NoRegister) {
1032       // It's possible to encode this such that the low register + enabled
1033       // components exceeds the register count.
1034       return;
1035     }
1036   }
1037 
1038   // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1039   // If using partial NSA on GFX11+ widen last address register.
1040   int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1041   unsigned NewVAddrSA = AMDGPU::NoRegister;
1042   if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1043       AddrSize != Info->VAddrDwords) {
1044     unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1045     unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1046     VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1047 
1048     auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1049     NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1050                                         &MRI.getRegClass(AddrRCID));
1051     if (!NewVAddrSA)
1052       return;
1053   }
1054 
1055   MI.setOpcode(NewOpcode);
1056 
1057   if (NewVdata != AMDGPU::NoRegister) {
1058     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1059 
1060     if (IsAtomic) {
1061       // Atomic operations have an additional operand (a copy of data)
1062       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1063     }
1064   }
1065 
1066   if (NewVAddrSA) {
1067     MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1068   } else if (IsNSA) {
1069     assert(AddrSize <= Info->VAddrDwords);
1070     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1071              MI.begin() + VAddr0Idx + Info->VAddrDwords);
1072   }
1073 }
1074 
1075 // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1076 // decoder only adds to src_modifiers, so manually add the bits to the other
1077 // operands.
1078 void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1079   unsigned Opc = MI.getOpcode();
1080   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1081   auto Mods = collectVOPModifiers(MI, true);
1082 
1083   if (MI.getNumOperands() < DescNumOps &&
1084       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1085     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1086 
1087   if (MI.getNumOperands() < DescNumOps &&
1088       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1089     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1090                          AMDGPU::OpName::op_sel);
1091   if (MI.getNumOperands() < DescNumOps &&
1092       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1093     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1094                          AMDGPU::OpName::op_sel_hi);
1095   if (MI.getNumOperands() < DescNumOps &&
1096       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1097     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1098                          AMDGPU::OpName::neg_lo);
1099   if (MI.getNumOperands() < DescNumOps &&
1100       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1101     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1102                          AMDGPU::OpName::neg_hi);
1103 }
1104 
1105 // Create dummy old operand and insert optional operands
1106 void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1107   unsigned Opc = MI.getOpcode();
1108   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1109 
1110   if (MI.getNumOperands() < DescNumOps &&
1111       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1112     insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1113 
1114   if (MI.getNumOperands() < DescNumOps &&
1115       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1116     insertNamedMCOperand(MI, MCOperand::createImm(0),
1117                          AMDGPU::OpName::src0_modifiers);
1118 
1119   if (MI.getNumOperands() < DescNumOps &&
1120       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1121     insertNamedMCOperand(MI, MCOperand::createImm(0),
1122                          AMDGPU::OpName::src1_modifiers);
1123 }
1124 
1125 void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1126   assert(HasLiteral && "Should have decoded a literal");
1127   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1128   unsigned DescNumOps = Desc.getNumOperands();
1129   insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1130                        AMDGPU::OpName::immDeferred);
1131   assert(DescNumOps == MI.getNumOperands());
1132   for (unsigned I = 0; I < DescNumOps; ++I) {
1133     auto &Op = MI.getOperand(I);
1134     auto OpType = Desc.operands()[I].OperandType;
1135     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1136                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1137     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1138         IsDeferredOp)
1139       Op.setImm(Literal);
1140   }
1141 }
1142 
1143 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1144   return getContext().getRegisterInfo()->
1145     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1146 }
1147 
1148 inline
1149 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1150                                          const Twine& ErrMsg) const {
1151   *CommentStream << "Error: " + ErrMsg;
1152 
1153   // ToDo: add support for error operands to MCInst.h
1154   // return MCOperand::createError(V);
1155   return MCOperand();
1156 }
1157 
1158 inline
1159 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1160   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
1161 }
1162 
1163 inline
1164 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1165                                                unsigned Val) const {
1166   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1167   if (Val >= RegCl.getNumRegs())
1168     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1169                            ": unknown register " + Twine(Val));
1170   return createRegOperand(RegCl.getRegister(Val));
1171 }
1172 
1173 inline
1174 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1175                                                 unsigned Val) const {
1176   // ToDo: SI/CI have 104 SGPRs, VI - 102
1177   // Valery: here we accepting as much as we can, let assembler sort it out
1178   int shift = 0;
1179   switch (SRegClassID) {
1180   case AMDGPU::SGPR_32RegClassID:
1181   case AMDGPU::TTMP_32RegClassID:
1182     break;
1183   case AMDGPU::SGPR_64RegClassID:
1184   case AMDGPU::TTMP_64RegClassID:
1185     shift = 1;
1186     break;
1187   case AMDGPU::SGPR_96RegClassID:
1188   case AMDGPU::TTMP_96RegClassID:
1189   case AMDGPU::SGPR_128RegClassID:
1190   case AMDGPU::TTMP_128RegClassID:
1191   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1192   // this bundle?
1193   case AMDGPU::SGPR_256RegClassID:
1194   case AMDGPU::TTMP_256RegClassID:
1195     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1196   // this bundle?
1197   case AMDGPU::SGPR_288RegClassID:
1198   case AMDGPU::TTMP_288RegClassID:
1199   case AMDGPU::SGPR_320RegClassID:
1200   case AMDGPU::TTMP_320RegClassID:
1201   case AMDGPU::SGPR_352RegClassID:
1202   case AMDGPU::TTMP_352RegClassID:
1203   case AMDGPU::SGPR_384RegClassID:
1204   case AMDGPU::TTMP_384RegClassID:
1205   case AMDGPU::SGPR_512RegClassID:
1206   case AMDGPU::TTMP_512RegClassID:
1207     shift = 2;
1208     break;
1209   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1210   // this bundle?
1211   default:
1212     llvm_unreachable("unhandled register class");
1213   }
1214 
1215   if (Val % (1 << shift)) {
1216     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1217                    << ": scalar reg isn't aligned " << Val;
1218   }
1219 
1220   return createRegOperand(SRegClassID, Val >> shift);
1221 }
1222 
1223 MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1224                                                   bool IsHi) const {
1225   unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1226   return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1227 }
1228 
1229 // Decode Literals for insts which always have a literal in the encoding
1230 MCOperand
1231 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1232   if (HasLiteral) {
1233     assert(
1234         AMDGPU::hasVOPD(STI) &&
1235         "Should only decode multiple kimm with VOPD, check VSrc operand types");
1236     if (Literal != Val)
1237       return errOperand(Val, "More than one unique literal is illegal");
1238   }
1239   HasLiteral = true;
1240   Literal = Val;
1241   return MCOperand::createImm(Literal);
1242 }
1243 
1244 MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1245   // For now all literal constants are supposed to be unsigned integer
1246   // ToDo: deal with signed/unsigned 64-bit integer constants
1247   // ToDo: deal with float/double constants
1248   if (!HasLiteral) {
1249     if (Bytes.size() < 4) {
1250       return errOperand(0, "cannot read literal, inst bytes left " +
1251                         Twine(Bytes.size()));
1252     }
1253     HasLiteral = true;
1254     Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1255     if (ExtendFP64)
1256       Literal64 <<= 32;
1257   }
1258   return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1259 }
1260 
1261 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1262   using namespace AMDGPU::EncValues;
1263 
1264   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1265   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1266     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1267     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1268       // Cast prevents negative overflow.
1269 }
1270 
1271 static int64_t getInlineImmVal32(unsigned Imm) {
1272   switch (Imm) {
1273   case 240:
1274     return llvm::bit_cast<uint32_t>(0.5f);
1275   case 241:
1276     return llvm::bit_cast<uint32_t>(-0.5f);
1277   case 242:
1278     return llvm::bit_cast<uint32_t>(1.0f);
1279   case 243:
1280     return llvm::bit_cast<uint32_t>(-1.0f);
1281   case 244:
1282     return llvm::bit_cast<uint32_t>(2.0f);
1283   case 245:
1284     return llvm::bit_cast<uint32_t>(-2.0f);
1285   case 246:
1286     return llvm::bit_cast<uint32_t>(4.0f);
1287   case 247:
1288     return llvm::bit_cast<uint32_t>(-4.0f);
1289   case 248: // 1 / (2 * PI)
1290     return 0x3e22f983;
1291   default:
1292     llvm_unreachable("invalid fp inline imm");
1293   }
1294 }
1295 
1296 static int64_t getInlineImmVal64(unsigned Imm) {
1297   switch (Imm) {
1298   case 240:
1299     return llvm::bit_cast<uint64_t>(0.5);
1300   case 241:
1301     return llvm::bit_cast<uint64_t>(-0.5);
1302   case 242:
1303     return llvm::bit_cast<uint64_t>(1.0);
1304   case 243:
1305     return llvm::bit_cast<uint64_t>(-1.0);
1306   case 244:
1307     return llvm::bit_cast<uint64_t>(2.0);
1308   case 245:
1309     return llvm::bit_cast<uint64_t>(-2.0);
1310   case 246:
1311     return llvm::bit_cast<uint64_t>(4.0);
1312   case 247:
1313     return llvm::bit_cast<uint64_t>(-4.0);
1314   case 248: // 1 / (2 * PI)
1315     return 0x3fc45f306dc9c882;
1316   default:
1317     llvm_unreachable("invalid fp inline imm");
1318   }
1319 }
1320 
1321 static int64_t getInlineImmValF16(unsigned Imm) {
1322   switch (Imm) {
1323   case 240:
1324     return 0x3800;
1325   case 241:
1326     return 0xB800;
1327   case 242:
1328     return 0x3C00;
1329   case 243:
1330     return 0xBC00;
1331   case 244:
1332     return 0x4000;
1333   case 245:
1334     return 0xC000;
1335   case 246:
1336     return 0x4400;
1337   case 247:
1338     return 0xC400;
1339   case 248: // 1 / (2 * PI)
1340     return 0x3118;
1341   default:
1342     llvm_unreachable("invalid fp inline imm");
1343   }
1344 }
1345 
1346 static int64_t getInlineImmValBF16(unsigned Imm) {
1347   switch (Imm) {
1348   case 240:
1349     return 0x3F00;
1350   case 241:
1351     return 0xBF00;
1352   case 242:
1353     return 0x3F80;
1354   case 243:
1355     return 0xBF80;
1356   case 244:
1357     return 0x4000;
1358   case 245:
1359     return 0xC000;
1360   case 246:
1361     return 0x4080;
1362   case 247:
1363     return 0xC080;
1364   case 248: // 1 / (2 * PI)
1365     return 0x3E22;
1366   default:
1367     llvm_unreachable("invalid fp inline imm");
1368   }
1369 }
1370 
1371 static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1372   return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
1373                                                   : getInlineImmValF16(Imm);
1374 }
1375 
1376 MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1377                                             AMDGPU::OperandSemantics Sema) {
1378   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
1379          Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1380 
1381   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1382   // ImmWidth 0 is a default case where operand should not allow immediates.
1383   // Imm value is still decoded into 32 bit immediate operand, inst printer will
1384   // use it to print verbose error message.
1385   switch (ImmWidth) {
1386   case 0:
1387   case 32:
1388     return MCOperand::createImm(getInlineImmVal32(Imm));
1389   case 64:
1390     return MCOperand::createImm(getInlineImmVal64(Imm));
1391   case 16:
1392     return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1393   default:
1394     llvm_unreachable("implement me");
1395   }
1396 }
1397 
1398 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1399   using namespace AMDGPU;
1400 
1401   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1402   switch (Width) {
1403   default: // fall
1404   case OPW32:
1405   case OPW16:
1406   case OPWV216:
1407     return VGPR_32RegClassID;
1408   case OPW64:
1409   case OPWV232: return VReg_64RegClassID;
1410   case OPW96: return VReg_96RegClassID;
1411   case OPW128: return VReg_128RegClassID;
1412   case OPW160: return VReg_160RegClassID;
1413   case OPW256: return VReg_256RegClassID;
1414   case OPW288: return VReg_288RegClassID;
1415   case OPW320: return VReg_320RegClassID;
1416   case OPW352: return VReg_352RegClassID;
1417   case OPW384: return VReg_384RegClassID;
1418   case OPW512: return VReg_512RegClassID;
1419   case OPW1024: return VReg_1024RegClassID;
1420   }
1421 }
1422 
1423 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1424   using namespace AMDGPU;
1425 
1426   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1427   switch (Width) {
1428   default: // fall
1429   case OPW32:
1430   case OPW16:
1431   case OPWV216:
1432     return AGPR_32RegClassID;
1433   case OPW64:
1434   case OPWV232: return AReg_64RegClassID;
1435   case OPW96: return AReg_96RegClassID;
1436   case OPW128: return AReg_128RegClassID;
1437   case OPW160: return AReg_160RegClassID;
1438   case OPW256: return AReg_256RegClassID;
1439   case OPW288: return AReg_288RegClassID;
1440   case OPW320: return AReg_320RegClassID;
1441   case OPW352: return AReg_352RegClassID;
1442   case OPW384: return AReg_384RegClassID;
1443   case OPW512: return AReg_512RegClassID;
1444   case OPW1024: return AReg_1024RegClassID;
1445   }
1446 }
1447 
1448 
1449 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1450   using namespace AMDGPU;
1451 
1452   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1453   switch (Width) {
1454   default: // fall
1455   case OPW32:
1456   case OPW16:
1457   case OPWV216:
1458     return SGPR_32RegClassID;
1459   case OPW64:
1460   case OPWV232: return SGPR_64RegClassID;
1461   case OPW96: return SGPR_96RegClassID;
1462   case OPW128: return SGPR_128RegClassID;
1463   case OPW160: return SGPR_160RegClassID;
1464   case OPW256: return SGPR_256RegClassID;
1465   case OPW288: return SGPR_288RegClassID;
1466   case OPW320: return SGPR_320RegClassID;
1467   case OPW352: return SGPR_352RegClassID;
1468   case OPW384: return SGPR_384RegClassID;
1469   case OPW512: return SGPR_512RegClassID;
1470   }
1471 }
1472 
1473 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1474   using namespace AMDGPU;
1475 
1476   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1477   switch (Width) {
1478   default: // fall
1479   case OPW32:
1480   case OPW16:
1481   case OPWV216:
1482     return TTMP_32RegClassID;
1483   case OPW64:
1484   case OPWV232: return TTMP_64RegClassID;
1485   case OPW128: return TTMP_128RegClassID;
1486   case OPW256: return TTMP_256RegClassID;
1487   case OPW288: return TTMP_288RegClassID;
1488   case OPW320: return TTMP_320RegClassID;
1489   case OPW352: return TTMP_352RegClassID;
1490   case OPW384: return TTMP_384RegClassID;
1491   case OPW512: return TTMP_512RegClassID;
1492   }
1493 }
1494 
1495 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1496   using namespace AMDGPU::EncValues;
1497 
1498   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1499   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1500 
1501   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1502 }
1503 
1504 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1505                                           bool MandatoryLiteral,
1506                                           unsigned ImmWidth,
1507                                           AMDGPU::OperandSemantics Sema) const {
1508   using namespace AMDGPU::EncValues;
1509 
1510   assert(Val < 1024); // enum10
1511 
1512   bool IsAGPR = Val & 512;
1513   Val &= 511;
1514 
1515   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1516     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1517                                    : getVgprClassId(Width), Val - VGPR_MIN);
1518   }
1519   return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1520                             Sema);
1521 }
1522 
1523 MCOperand
1524 AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
1525                                        bool MandatoryLiteral, unsigned ImmWidth,
1526                                        AMDGPU::OperandSemantics Sema) const {
1527   // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1528   // decoded earlier.
1529   assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1530   using namespace AMDGPU::EncValues;
1531 
1532   if (Val <= SGPR_MAX) {
1533     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1534     static_assert(SGPR_MIN == 0);
1535     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1536   }
1537 
1538   int TTmpIdx = getTTmpIdx(Val);
1539   if (TTmpIdx >= 0) {
1540     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1541   }
1542 
1543   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1544     return decodeIntImmed(Val);
1545 
1546   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1547     return decodeFPImmed(ImmWidth, Val, Sema);
1548 
1549   if (Val == LITERAL_CONST) {
1550     if (MandatoryLiteral)
1551       // Keep a sentinel value for deferred setting
1552       return MCOperand::createImm(LITERAL_CONST);
1553     else
1554       return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64);
1555   }
1556 
1557   switch (Width) {
1558   case OPW32:
1559   case OPW16:
1560   case OPWV216:
1561     return decodeSpecialReg32(Val);
1562   case OPW64:
1563   case OPWV232:
1564     return decodeSpecialReg64(Val);
1565   default:
1566     llvm_unreachable("unexpected immediate type");
1567   }
1568 }
1569 
1570 // Bit 0 of DstY isn't stored in the instruction, because it's always the
1571 // opposite of bit 0 of DstX.
1572 MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1573                                                unsigned Val) const {
1574   int VDstXInd =
1575       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1576   assert(VDstXInd != -1);
1577   assert(Inst.getOperand(VDstXInd).isReg());
1578   unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1579   Val |= ~XDstReg & 1;
1580   auto Width = llvm::AMDGPUDisassembler::OPW32;
1581   return createRegOperand(getVgprClassId(Width), Val);
1582 }
1583 
1584 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1585   using namespace AMDGPU;
1586 
1587   switch (Val) {
1588   // clang-format off
1589   case 102: return createRegOperand(FLAT_SCR_LO);
1590   case 103: return createRegOperand(FLAT_SCR_HI);
1591   case 104: return createRegOperand(XNACK_MASK_LO);
1592   case 105: return createRegOperand(XNACK_MASK_HI);
1593   case 106: return createRegOperand(VCC_LO);
1594   case 107: return createRegOperand(VCC_HI);
1595   case 108: return createRegOperand(TBA_LO);
1596   case 109: return createRegOperand(TBA_HI);
1597   case 110: return createRegOperand(TMA_LO);
1598   case 111: return createRegOperand(TMA_HI);
1599   case 124:
1600     return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1601   case 125:
1602     return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1603   case 126: return createRegOperand(EXEC_LO);
1604   case 127: return createRegOperand(EXEC_HI);
1605   case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1606   case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1607   case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1608   case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1609   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1610   case 251: return createRegOperand(SRC_VCCZ);
1611   case 252: return createRegOperand(SRC_EXECZ);
1612   case 253: return createRegOperand(SRC_SCC);
1613   case 254: return createRegOperand(LDS_DIRECT);
1614   default: break;
1615     // clang-format on
1616   }
1617   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1618 }
1619 
1620 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1621   using namespace AMDGPU;
1622 
1623   switch (Val) {
1624   case 102: return createRegOperand(FLAT_SCR);
1625   case 104: return createRegOperand(XNACK_MASK);
1626   case 106: return createRegOperand(VCC);
1627   case 108: return createRegOperand(TBA);
1628   case 110: return createRegOperand(TMA);
1629   case 124:
1630     if (isGFX11Plus())
1631       return createRegOperand(SGPR_NULL);
1632     break;
1633   case 125:
1634     if (!isGFX11Plus())
1635       return createRegOperand(SGPR_NULL);
1636     break;
1637   case 126: return createRegOperand(EXEC);
1638   case 235: return createRegOperand(SRC_SHARED_BASE);
1639   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1640   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1641   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1642   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1643   case 251: return createRegOperand(SRC_VCCZ);
1644   case 252: return createRegOperand(SRC_EXECZ);
1645   case 253: return createRegOperand(SRC_SCC);
1646   default: break;
1647   }
1648   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1649 }
1650 
1651 MCOperand
1652 AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1653                                   unsigned ImmWidth,
1654                                   AMDGPU::OperandSemantics Sema) const {
1655   using namespace AMDGPU::SDWA;
1656   using namespace AMDGPU::EncValues;
1657 
1658   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1659       STI.hasFeature(AMDGPU::FeatureGFX10)) {
1660     // XXX: cast to int is needed to avoid stupid warning:
1661     // compare with unsigned is always true
1662     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1663         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1664       return createRegOperand(getVgprClassId(Width),
1665                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1666     }
1667     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1668         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1669                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1670       return createSRegOperand(getSgprClassId(Width),
1671                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1672     }
1673     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1674         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1675       return createSRegOperand(getTtmpClassId(Width),
1676                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1677     }
1678 
1679     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1680 
1681     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1682       return decodeIntImmed(SVal);
1683 
1684     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1685       return decodeFPImmed(ImmWidth, SVal, Sema);
1686 
1687     return decodeSpecialReg32(SVal);
1688   } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1689     return createRegOperand(getVgprClassId(Width), Val);
1690   }
1691   llvm_unreachable("unsupported target");
1692 }
1693 
1694 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1695   return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16);
1696 }
1697 
1698 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1699   return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32);
1700 }
1701 
1702 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1703   using namespace AMDGPU::SDWA;
1704 
1705   assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1706           STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1707          "SDWAVopcDst should be present only on GFX9+");
1708 
1709   bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1710 
1711   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1712     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1713 
1714     int TTmpIdx = getTTmpIdx(Val);
1715     if (TTmpIdx >= 0) {
1716       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1717       return createSRegOperand(TTmpClsId, TTmpIdx);
1718     } else if (Val > SGPR_MAX) {
1719       return IsWave64 ? decodeSpecialReg64(Val)
1720                       : decodeSpecialReg32(Val);
1721     } else {
1722       return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1723     }
1724   } else {
1725     return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1726   }
1727 }
1728 
1729 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1730   return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1731              ? decodeSrcOp(OPW64, Val)
1732              : decodeSrcOp(OPW32, Val);
1733 }
1734 
1735 MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1736   return decodeSrcOp(OPW32, Val);
1737 }
1738 
1739 MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
1740   if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
1741     return MCOperand();
1742   return MCOperand::createImm(Val);
1743 }
1744 
1745 MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
1746   using VersionField = AMDGPU::EncodingField<7, 0>;
1747   using W64Bit = AMDGPU::EncodingBit<13>;
1748   using W32Bit = AMDGPU::EncodingBit<14>;
1749   using MDPBit = AMDGPU::EncodingBit<15>;
1750   using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
1751 
1752   auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1753 
1754   // Decode into a plain immediate if any unused bits are raised.
1755   if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1756     return MCOperand::createImm(Imm);
1757 
1758   const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1759   auto I = find_if(Versions,
1760                    [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
1761                      return V.Code == Version;
1762                    });
1763   MCContext &Ctx = getContext();
1764   const MCExpr *E;
1765   if (I == Versions.end())
1766     E = MCConstantExpr::create(Version, Ctx);
1767   else
1768     E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1769 
1770   if (W64)
1771     E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1772   if (W32)
1773     E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1774   if (MDP)
1775     E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1776 
1777   return MCOperand::createExpr(E);
1778 }
1779 
1780 bool AMDGPUDisassembler::isVI() const {
1781   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1782 }
1783 
1784 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1785 
1786 bool AMDGPUDisassembler::isGFX90A() const {
1787   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1788 }
1789 
1790 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1791 
1792 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1793 
1794 bool AMDGPUDisassembler::isGFX10Plus() const {
1795   return AMDGPU::isGFX10Plus(STI);
1796 }
1797 
1798 bool AMDGPUDisassembler::isGFX11() const {
1799   return STI.hasFeature(AMDGPU::FeatureGFX11);
1800 }
1801 
1802 bool AMDGPUDisassembler::isGFX11Plus() const {
1803   return AMDGPU::isGFX11Plus(STI);
1804 }
1805 
1806 bool AMDGPUDisassembler::isGFX12() const {
1807   return STI.hasFeature(AMDGPU::FeatureGFX12);
1808 }
1809 
1810 bool AMDGPUDisassembler::isGFX12Plus() const {
1811   return AMDGPU::isGFX12Plus(STI);
1812 }
1813 
1814 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1815   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1816 }
1817 
1818 bool AMDGPUDisassembler::hasKernargPreload() const {
1819   return AMDGPU::hasKernargPreload(STI);
1820 }
1821 
1822 //===----------------------------------------------------------------------===//
1823 // AMDGPU specific symbol handling
1824 //===----------------------------------------------------------------------===//
1825 
1826 /// Print a string describing the reserved bit range specified by Mask with
1827 /// offset BaseBytes for use in error comments. Mask is a single continuous
1828 /// range of 1s surrounded by zeros. The format here is meant to align with the
1829 /// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1830 static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1831   SmallString<32> Result;
1832   raw_svector_ostream S(Result);
1833 
1834   int TrailingZeros = llvm::countr_zero(Mask);
1835   int PopCount = llvm::popcount(Mask);
1836 
1837   if (PopCount == 1) {
1838     S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1839   } else {
1840     S << "bits in range ("
1841       << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1842       << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1843   }
1844 
1845   return Result;
1846 }
1847 
1848 #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1849 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1850   do {                                                                         \
1851     KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n';            \
1852   } while (0)
1853 #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)                        \
1854   do {                                                                         \
1855     KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " "       \
1856              << GET_FIELD(MASK) << '\n';                                       \
1857   } while (0)
1858 
1859 #define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG)                              \
1860   do {                                                                         \
1861     if (FourByteBuffer & (MASK)) {                                             \
1862       return createStringError(std::errc::invalid_argument,                    \
1863                                "kernel descriptor " DESC                       \
1864                                " reserved %s set" MSG,                         \
1865                                getBitRangeFromMask((MASK), 0).c_str());        \
1866     }                                                                          \
1867   } while (0)
1868 
1869 #define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1870 #define CHECK_RESERVED_BITS_MSG(MASK, MSG)                                     \
1871   CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1872 #define CHECK_RESERVED_BITS_DESC(MASK, DESC)                                   \
1873   CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1874 #define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)                          \
1875   CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1876 
1877 // NOLINTNEXTLINE(readability-identifier-naming)
1878 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1879     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1880   using namespace amdhsa;
1881   StringRef Indent = "\t";
1882 
1883   // We cannot accurately backward compute #VGPRs used from
1884   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1885   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1886   // simply calculate the inverse of what the assembler does.
1887 
1888   uint32_t GranulatedWorkitemVGPRCount =
1889       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1890 
1891   uint32_t NextFreeVGPR =
1892       (GranulatedWorkitemVGPRCount + 1) *
1893       AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1894 
1895   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1896 
1897   // We cannot backward compute values used to calculate
1898   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1899   // directives can't be computed:
1900   // .amdhsa_reserve_vcc
1901   // .amdhsa_reserve_flat_scratch
1902   // .amdhsa_reserve_xnack_mask
1903   // They take their respective default values if not specified in the assembly.
1904   //
1905   // GRANULATED_WAVEFRONT_SGPR_COUNT
1906   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1907   //
1908   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1909   // are set to 0. So while disassembling we consider that:
1910   //
1911   // GRANULATED_WAVEFRONT_SGPR_COUNT
1912   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1913   //
1914   // The disassembler cannot recover the original values of those 3 directives.
1915 
1916   uint32_t GranulatedWavefrontSGPRCount =
1917       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1918 
1919   if (isGFX10Plus())
1920     CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1921                             "must be zero on gfx10+");
1922 
1923   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1924                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1925 
1926   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1927   if (!hasArchitectedFlatScratch())
1928     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1929   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1930   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1931 
1932   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1933 
1934   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1935                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1936   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1937                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1938   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1939                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1940   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1941                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1942 
1943   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1944 
1945   if (!isGFX12Plus())
1946     PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1947                     COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1948 
1949   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1950 
1951   if (!isGFX12Plus())
1952     PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1953                     COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1954 
1955   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1956   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1957 
1958   if (isGFX9Plus())
1959     PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1960 
1961   if (!isGFX9Plus())
1962     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1963                                  "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1964 
1965   CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1966 
1967   if (!isGFX10Plus())
1968     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1969                                  "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1970 
1971   if (isGFX10Plus()) {
1972     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1973                     COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1974     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1975     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1976   }
1977 
1978   if (isGFX12Plus())
1979     PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1980                     COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1981 
1982   return true;
1983 }
1984 
1985 // NOLINTNEXTLINE(readability-identifier-naming)
1986 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1987     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1988   using namespace amdhsa;
1989   StringRef Indent = "\t";
1990   if (hasArchitectedFlatScratch())
1991     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1992                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1993   else
1994     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1995                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1996   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1997                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1998   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1999                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2000   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2001                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2002   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2003                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2004   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2005                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2006 
2007   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2008   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2009   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2010 
2011   PRINT_DIRECTIVE(
2012       ".amdhsa_exception_fp_ieee_invalid_op",
2013       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2014   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2015                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2016   PRINT_DIRECTIVE(
2017       ".amdhsa_exception_fp_ieee_div_zero",
2018       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2019   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2020                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2021   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2022                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2023   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2024                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2025   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2026                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2027 
2028   CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2029 
2030   return true;
2031 }
2032 
2033 // NOLINTNEXTLINE(readability-identifier-naming)
2034 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2035     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2036   using namespace amdhsa;
2037   StringRef Indent = "\t";
2038   if (isGFX90A()) {
2039     KdStream << Indent << ".amdhsa_accum_offset "
2040              << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2041              << '\n';
2042 
2043     PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2044 
2045     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2046                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2047     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2048                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2049   } else if (isGFX10Plus()) {
2050     // Bits [0-3].
2051     if (!isGFX12Plus()) {
2052       if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2053         PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2054                         COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2055       } else {
2056         PRINT_PSEUDO_DIRECTIVE_COMMENT(
2057             "SHARED_VGPR_COUNT",
2058             COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2059       }
2060     } else {
2061       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2062                                    "COMPUTE_PGM_RSRC3",
2063                                    "must be zero on gfx12+");
2064     }
2065 
2066     // Bits [4-11].
2067     if (isGFX11()) {
2068       PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2069                                      COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2070       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2071                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2072       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2073                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2074     } else if (isGFX12Plus()) {
2075       PRINT_PSEUDO_DIRECTIVE_COMMENT(
2076           "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2077     } else {
2078       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2079                                    "COMPUTE_PGM_RSRC3",
2080                                    "must be zero on gfx10");
2081     }
2082 
2083     // Bits [12].
2084     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2085                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2086 
2087     // Bits [13].
2088     if (isGFX12Plus()) {
2089       PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2090                                      COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2091     } else {
2092       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2093                                    "COMPUTE_PGM_RSRC3",
2094                                    "must be zero on gfx10 or gfx11");
2095     }
2096 
2097     // Bits [14-30].
2098     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2099                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2100 
2101     // Bits [31].
2102     if (isGFX11Plus()) {
2103       PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2104                                      COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2105     } else {
2106       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2107                                    "COMPUTE_PGM_RSRC3",
2108                                    "must be zero on gfx10");
2109     }
2110   } else if (FourByteBuffer) {
2111     return createStringError(
2112         std::errc::invalid_argument,
2113         "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2114   }
2115   return true;
2116 }
2117 #undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2118 #undef PRINT_DIRECTIVE
2119 #undef GET_FIELD
2120 #undef CHECK_RESERVED_BITS_IMPL
2121 #undef CHECK_RESERVED_BITS
2122 #undef CHECK_RESERVED_BITS_MSG
2123 #undef CHECK_RESERVED_BITS_DESC
2124 #undef CHECK_RESERVED_BITS_DESC_MSG
2125 
2126 /// Create an error object to return from onSymbolStart for reserved kernel
2127 /// descriptor bits being set.
2128 static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2129                                        const char *Msg = "") {
2130   return createStringError(
2131       std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2132       getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2133 }
2134 
2135 /// Create an error object to return from onSymbolStart for reserved kernel
2136 /// descriptor bytes being set.
2137 static Error createReservedKDBytesError(unsigned BaseInBytes,
2138                                         unsigned WidthInBytes) {
2139   // Create an error comment in the same format as the "Kernel Descriptor"
2140   // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2141   return createStringError(
2142       std::errc::invalid_argument,
2143       "kernel descriptor reserved bits in range (%u:%u) set",
2144       (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2145 }
2146 
2147 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2148     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2149     raw_string_ostream &KdStream) const {
2150 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
2151   do {                                                                         \
2152     KdStream << Indent << DIRECTIVE " "                                        \
2153              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
2154   } while (0)
2155 
2156   uint16_t TwoByteBuffer = 0;
2157   uint32_t FourByteBuffer = 0;
2158 
2159   StringRef ReservedBytes;
2160   StringRef Indent = "\t";
2161 
2162   assert(Bytes.size() == 64);
2163   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2164 
2165   switch (Cursor.tell()) {
2166   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2167     FourByteBuffer = DE.getU32(Cursor);
2168     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2169              << '\n';
2170     return true;
2171 
2172   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2173     FourByteBuffer = DE.getU32(Cursor);
2174     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2175              << FourByteBuffer << '\n';
2176     return true;
2177 
2178   case amdhsa::KERNARG_SIZE_OFFSET:
2179     FourByteBuffer = DE.getU32(Cursor);
2180     KdStream << Indent << ".amdhsa_kernarg_size "
2181              << FourByteBuffer << '\n';
2182     return true;
2183 
2184   case amdhsa::RESERVED0_OFFSET:
2185     // 4 reserved bytes, must be 0.
2186     ReservedBytes = DE.getBytes(Cursor, 4);
2187     for (int I = 0; I < 4; ++I) {
2188       if (ReservedBytes[I] != 0)
2189         return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4);
2190     }
2191     return true;
2192 
2193   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2194     // KERNEL_CODE_ENTRY_BYTE_OFFSET
2195     // So far no directive controls this for Code Object V3, so simply skip for
2196     // disassembly.
2197     DE.skip(Cursor, 8);
2198     return true;
2199 
2200   case amdhsa::RESERVED1_OFFSET:
2201     // 20 reserved bytes, must be 0.
2202     ReservedBytes = DE.getBytes(Cursor, 20);
2203     for (int I = 0; I < 20; ++I) {
2204       if (ReservedBytes[I] != 0)
2205         return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20);
2206     }
2207     return true;
2208 
2209   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2210     FourByteBuffer = DE.getU32(Cursor);
2211     return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2212 
2213   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2214     FourByteBuffer = DE.getU32(Cursor);
2215     return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2216 
2217   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2218     FourByteBuffer = DE.getU32(Cursor);
2219     return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2220 
2221   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2222     using namespace amdhsa;
2223     TwoByteBuffer = DE.getU16(Cursor);
2224 
2225     if (!hasArchitectedFlatScratch())
2226       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2227                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2228     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2229                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2230     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2231                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2232     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2233                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2234     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2235                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2236     if (!hasArchitectedFlatScratch())
2237       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2238                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2239     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2240                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2241 
2242     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2243       return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2244                                        amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2245 
2246     // Reserved for GFX9
2247     if (isGFX9() &&
2248         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2249       return createReservedKDBitsError(
2250           KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2251           amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2252     } else if (isGFX10Plus()) {
2253       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2254                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2255     }
2256 
2257     if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2258       PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2259                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2260 
2261     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2262       return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2263                                        amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2264     }
2265 
2266     return true;
2267 
2268   case amdhsa::KERNARG_PRELOAD_OFFSET:
2269     using namespace amdhsa;
2270     TwoByteBuffer = DE.getU16(Cursor);
2271     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2272       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2273                       KERNARG_PRELOAD_SPEC_LENGTH);
2274     }
2275 
2276     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2277       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2278                       KERNARG_PRELOAD_SPEC_OFFSET);
2279     }
2280     return true;
2281 
2282   case amdhsa::RESERVED3_OFFSET:
2283     // 4 bytes from here are reserved, must be 0.
2284     ReservedBytes = DE.getBytes(Cursor, 4);
2285     for (int I = 0; I < 4; ++I) {
2286       if (ReservedBytes[I] != 0)
2287         return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4);
2288     }
2289     return true;
2290 
2291   default:
2292     llvm_unreachable("Unhandled index. Case statements cover everything.");
2293     return true;
2294   }
2295 #undef PRINT_DIRECTIVE
2296 }
2297 
2298 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2299     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2300 
2301   // CP microcode requires the kernel descriptor to be 64 aligned.
2302   if (Bytes.size() != 64 || KdAddress % 64 != 0)
2303     return createStringError(std::errc::invalid_argument,
2304                              "kernel descriptor must be 64-byte aligned");
2305 
2306   // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2307   // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2308   // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2309   // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2310   // when required.
2311   if (isGFX10Plus()) {
2312     uint16_t KernelCodeProperties =
2313         support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2314                                 llvm::endianness::little);
2315     EnableWavefrontSize32 =
2316         AMDHSA_BITS_GET(KernelCodeProperties,
2317                         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2318   }
2319 
2320   std::string Kd;
2321   raw_string_ostream KdStream(Kd);
2322   KdStream << ".amdhsa_kernel " << KdName << '\n';
2323 
2324   DataExtractor::Cursor C(0);
2325   while (C && C.tell() < Bytes.size()) {
2326     Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2327 
2328     cantFail(C.takeError());
2329 
2330     if (!Res)
2331       return Res;
2332   }
2333   KdStream << ".end_amdhsa_kernel\n";
2334   outs() << KdStream.str();
2335   return true;
2336 }
2337 
2338 Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2339                                                  uint64_t &Size,
2340                                                  ArrayRef<uint8_t> Bytes,
2341                                                  uint64_t Address) const {
2342   // Right now only kernel descriptor needs to be handled.
2343   // We ignore all other symbols for target specific handling.
2344   // TODO:
2345   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2346   // Object V2 and V3 when symbols are marked protected.
2347 
2348   // amd_kernel_code_t for Code Object V2.
2349   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2350     Size = 256;
2351     return createStringError(std::errc::invalid_argument,
2352                              "code object v2 is not supported");
2353   }
2354 
2355   // Code Object V3 kernel descriptors.
2356   StringRef Name = Symbol.Name;
2357   if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2358     Size = 64; // Size = 64 regardless of success or failure.
2359     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2360   }
2361 
2362   return false;
2363 }
2364 
2365 const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2366                                                            int64_t Val) {
2367   MCContext &Ctx = getContext();
2368   MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2369   assert(!Sym->isVariable());
2370   Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2371   return MCSymbolRefExpr::create(Sym, Ctx);
2372 }
2373 
2374 //===----------------------------------------------------------------------===//
2375 // AMDGPUSymbolizer
2376 //===----------------------------------------------------------------------===//
2377 
2378 // Try to find symbol name for specified label
2379 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2380     MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2381     uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2382     uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2383 
2384   if (!IsBranch) {
2385     return false;
2386   }
2387 
2388   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2389   if (!Symbols)
2390     return false;
2391 
2392   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2393     return Val.Addr == static_cast<uint64_t>(Value) &&
2394            Val.Type == ELF::STT_NOTYPE;
2395   });
2396   if (Result != Symbols->end()) {
2397     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2398     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2399     Inst.addOperand(MCOperand::createExpr(Add));
2400     return true;
2401   }
2402   // Add to list of referenced addresses, so caller can synthesize a label.
2403   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2404   return false;
2405 }
2406 
2407 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2408                                                        int64_t Value,
2409                                                        uint64_t Address) {
2410   llvm_unreachable("unimplemented");
2411 }
2412 
2413 //===----------------------------------------------------------------------===//
2414 // Initialization
2415 //===----------------------------------------------------------------------===//
2416 
2417 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2418                               LLVMOpInfoCallback /*GetOpInfo*/,
2419                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
2420                               void *DisInfo,
2421                               MCContext *Ctx,
2422                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2423   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2424 }
2425 
2426 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2427                                                 const MCSubtargetInfo &STI,
2428                                                 MCContext &Ctx) {
2429   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2430 }
2431 
2432 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2433   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2434                                          createAMDGPUDisassembler);
2435   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2436                                        createAMDGPUSymbolizer);
2437 }
2438