xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision f4c16c44737caac25bf09ec2d85809820579ae40)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIDefines.h"
22 #include "SIRegisterInfo.h"
23 #include "TargetInfo/AMDGPUTargetInfo.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm-c/DisassemblerTypes.h"
26 #include "llvm/BinaryFormat/ELF.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCDecoderOps.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "amdgpu-disassembler"
40 
41 #define SGPR_MAX                                                               \
42   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
43                  : AMDGPU::EncValues::SGPR_MAX_SI)
44 
45 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
46 
47 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
48                                        MCContext &Ctx,
49                                        MCInstrInfo const *MCII) :
50   MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
51   TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
52 
53   // ToDo: AMDGPUDisassembler supports only VI ISA.
54   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
55     report_fatal_error("Disassembly not yet supported for subtarget");
56 }
57 
58 inline static MCDisassembler::DecodeStatus
59 addOperand(MCInst &Inst, const MCOperand& Opnd) {
60   Inst.addOperand(Opnd);
61   return Opnd.isValid() ?
62     MCDisassembler::Success :
63     MCDisassembler::Fail;
64 }
65 
66 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
67                                 uint16_t NameIdx) {
68   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
69   if (OpIdx != -1) {
70     auto I = MI.begin();
71     std::advance(I, OpIdx);
72     MI.insert(I, Op);
73   }
74   return OpIdx;
75 }
76 
77 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
78                                        uint64_t Addr,
79                                        const MCDisassembler *Decoder) {
80   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
81 
82   // Our branches take a simm16, but we need two extra bits to account for the
83   // factor of 4.
84   APInt SignedOffset(18, Imm * 4, true);
85   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
86 
87   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
88     return MCDisassembler::Success;
89   return addOperand(Inst, MCOperand::createImm(Imm));
90 }
91 
92 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
93                                      const MCDisassembler *Decoder) {
94   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
95   int64_t Offset;
96   if (DAsm->isVI()) {         // VI supports 20-bit unsigned offsets.
97     Offset = Imm & 0xFFFFF;
98   } else {                    // GFX9+ supports 21-bit signed offsets.
99     Offset = SignExtend64<21>(Imm);
100   }
101   return addOperand(Inst, MCOperand::createImm(Offset));
102 }
103 
104 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
105                                   const MCDisassembler *Decoder) {
106   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
107   return addOperand(Inst, DAsm->decodeBoolReg(Val));
108 }
109 
110 #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
111   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
112                                         uint64_t /*Addr*/,                     \
113                                         const MCDisassembler *Decoder) {       \
114     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
115     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
116   }
117 
118 #define DECODE_OPERAND_REG(RegClass) \
119 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
120 
121 DECODE_OPERAND_REG(VGPR_32)
122 DECODE_OPERAND_REG(VGPR_32_Lo128)
123 DECODE_OPERAND_REG(VRegOrLds_32)
124 DECODE_OPERAND_REG(VS_32)
125 DECODE_OPERAND_REG(VS_64)
126 DECODE_OPERAND_REG(VS_128)
127 
128 DECODE_OPERAND_REG(VReg_64)
129 DECODE_OPERAND_REG(VReg_96)
130 DECODE_OPERAND_REG(VReg_128)
131 DECODE_OPERAND_REG(VReg_256)
132 DECODE_OPERAND_REG(VReg_288)
133 DECODE_OPERAND_REG(VReg_352)
134 DECODE_OPERAND_REG(VReg_384)
135 DECODE_OPERAND_REG(VReg_512)
136 DECODE_OPERAND_REG(VReg_1024)
137 
138 DECODE_OPERAND_REG(SReg_32)
139 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
140 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
141 DECODE_OPERAND_REG(SRegOrLds_32)
142 DECODE_OPERAND_REG(SReg_64)
143 DECODE_OPERAND_REG(SReg_64_XEXEC)
144 DECODE_OPERAND_REG(SReg_128)
145 DECODE_OPERAND_REG(SReg_256)
146 DECODE_OPERAND_REG(SReg_512)
147 
148 DECODE_OPERAND_REG(AGPR_32)
149 DECODE_OPERAND_REG(AReg_64)
150 DECODE_OPERAND_REG(AReg_128)
151 DECODE_OPERAND_REG(AReg_256)
152 DECODE_OPERAND_REG(AReg_512)
153 DECODE_OPERAND_REG(AReg_1024)
154 DECODE_OPERAND_REG(AV_32)
155 DECODE_OPERAND_REG(AV_64)
156 DECODE_OPERAND_REG(AV_128)
157 DECODE_OPERAND_REG(AVDst_128)
158 DECODE_OPERAND_REG(AVDst_512)
159 
160 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
161                                          uint64_t Addr,
162                                          const MCDisassembler *Decoder) {
163   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
164   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
165 }
166 
167 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm,
168                                            uint64_t Addr,
169                                            const MCDisassembler *Decoder) {
170   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
171   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
172 }
173 
174 static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm,
175                                            uint64_t Addr,
176                                            const MCDisassembler *Decoder) {
177   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
178   return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
179 }
180 
181 static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm,
182                                         uint64_t Addr,
183                                         const MCDisassembler *Decoder) {
184   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
185   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
186 }
187 
188 static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm,
189                                         uint64_t Addr,
190                                         const MCDisassembler *Decoder) {
191   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
192   return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
193 }
194 
195 static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm,
196                                           uint64_t Addr,
197                                           const MCDisassembler *Decoder) {
198   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
199   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
200 }
201 
202 static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm,
203                                            uint64_t Addr,
204                                            const MCDisassembler *Decoder) {
205   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
206   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
207 }
208 
209 static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm,
210                                            uint64_t Addr,
211                                            const MCDisassembler *Decoder) {
212   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
213   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
214 }
215 
216 static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm,
217                                            uint64_t Addr,
218                                            const MCDisassembler *Decoder) {
219   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
220   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
221 }
222 
223 static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm,
224                                             uint64_t Addr,
225                                             const MCDisassembler *Decoder) {
226   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
227   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
228 }
229 
230 static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
231                                           uint64_t Addr,
232                                           const MCDisassembler *Decoder) {
233   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
234   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
235 }
236 
237 static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
238                                            uint64_t Addr,
239                                            const MCDisassembler *Decoder) {
240   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
241   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
242 }
243 
244 static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
245                                            uint64_t Addr,
246                                            const MCDisassembler *Decoder) {
247   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
248   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
249 }
250 
251 static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
252                                            uint64_t Addr,
253                                            const MCDisassembler *Decoder) {
254   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
255   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
256 }
257 
258 static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
259                                             uint64_t Addr,
260                                             const MCDisassembler *Decoder) {
261   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
262   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
263 }
264 
265 static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
266                                           uint64_t Addr,
267                                           const MCDisassembler *Decoder) {
268   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
269   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
270 }
271 
272 static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
273                                           uint64_t Addr,
274                                           const MCDisassembler *Decoder) {
275   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
276   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
277 }
278 
279 static DecodeStatus
280 decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
281                              const MCDisassembler *Decoder) {
282   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
283   return addOperand(
284       Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
285 }
286 
287 static DecodeStatus
288 decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
289                              const MCDisassembler *Decoder) {
290   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
291   return addOperand(
292       Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
293 }
294 
295 static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
296                                           uint64_t Addr, const void *Decoder) {
297   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
298   return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
299 }
300 
301 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
302                           const MCRegisterInfo *MRI) {
303   if (OpIdx < 0)
304     return false;
305 
306   const MCOperand &Op = Inst.getOperand(OpIdx);
307   if (!Op.isReg())
308     return false;
309 
310   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
311   auto Reg = Sub ? Sub : Op.getReg();
312   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
313 }
314 
315 static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
316                                              AMDGPUDisassembler::OpWidthTy Opw,
317                                              const MCDisassembler *Decoder) {
318   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
319   if (!DAsm->isGFX90A()) {
320     Imm &= 511;
321   } else {
322     // If atomic has both vdata and vdst their register classes are tied.
323     // The bit is decoded along with the vdst, first operand. We need to
324     // change register class to AGPR if vdst was AGPR.
325     // If a DS instruction has both data0 and data1 their register classes
326     // are also tied.
327     unsigned Opc = Inst.getOpcode();
328     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
329     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
330                                                         : AMDGPU::OpName::vdata;
331     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
332     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
333     if ((int)Inst.getNumOperands() == DataIdx) {
334       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
335       if (IsAGPROperand(Inst, DstIdx, MRI))
336         Imm |= 512;
337     }
338 
339     if (TSFlags & SIInstrFlags::DS) {
340       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
341       if ((int)Inst.getNumOperands() == Data2Idx &&
342           IsAGPROperand(Inst, DataIdx, MRI))
343         Imm |= 512;
344     }
345   }
346   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
347 }
348 
349 static DecodeStatus
350 DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
351                              const MCDisassembler *Decoder) {
352   return decodeOperand_AVLdSt_Any(Inst, Imm,
353                                   AMDGPUDisassembler::OPW32, Decoder);
354 }
355 
356 static DecodeStatus
357 DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
358                              const MCDisassembler *Decoder) {
359   return decodeOperand_AVLdSt_Any(Inst, Imm,
360                                   AMDGPUDisassembler::OPW64, Decoder);
361 }
362 
363 static DecodeStatus
364 DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
365                              const MCDisassembler *Decoder) {
366   return decodeOperand_AVLdSt_Any(Inst, Imm,
367                                   AMDGPUDisassembler::OPW96, Decoder);
368 }
369 
370 static DecodeStatus
371 DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
372                               const MCDisassembler *Decoder) {
373   return decodeOperand_AVLdSt_Any(Inst, Imm,
374                                   AMDGPUDisassembler::OPW128, Decoder);
375 }
376 
377 static DecodeStatus
378 DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
379                               const MCDisassembler *Decoder) {
380   return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW160,
381                                   Decoder);
382 }
383 
384 static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm,
385                                           uint64_t Addr,
386                                           const MCDisassembler *Decoder) {
387   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
388   return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
389 }
390 
391 #define DECODE_SDWA(DecName) \
392 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
393 
394 DECODE_SDWA(Src32)
395 DECODE_SDWA(Src16)
396 DECODE_SDWA(VopcDst)
397 
398 #include "AMDGPUGenDisassemblerTables.inc"
399 
400 //===----------------------------------------------------------------------===//
401 //
402 //===----------------------------------------------------------------------===//
403 
404 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
405   assert(Bytes.size() >= sizeof(T));
406   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
407   Bytes = Bytes.slice(sizeof(T));
408   return Res;
409 }
410 
411 static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
412   assert(Bytes.size() >= 12);
413   uint64_t Lo = support::endian::read<uint64_t, support::endianness::little>(
414       Bytes.data());
415   Bytes = Bytes.slice(8);
416   uint64_t Hi = support::endian::read<uint32_t, support::endianness::little>(
417       Bytes.data());
418   Bytes = Bytes.slice(4);
419   return DecoderUInt128(Lo, Hi);
420 }
421 
422 // The disassembler is greedy, so we need to check FI operand value to
423 // not parse a dpp if the correct literal is not set. For dpp16 the
424 // autogenerated decoder checks the dpp literal
425 static bool isValidDPP8(const MCInst &MI) {
426   using namespace llvm::AMDGPU::DPP;
427   int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
428   assert(FiIdx != -1);
429   if ((unsigned)FiIdx >= MI.getNumOperands())
430     return false;
431   unsigned Fi = MI.getOperand(FiIdx).getImm();
432   return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
433 }
434 
435 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
436                                                 ArrayRef<uint8_t> Bytes_,
437                                                 uint64_t Address,
438                                                 raw_ostream &CS) const {
439   CommentStream = &CS;
440   bool IsSDWA = false;
441 
442   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
443   Bytes = Bytes_.slice(0, MaxInstBytesNum);
444 
445   DecodeStatus Res = MCDisassembler::Fail;
446   do {
447     // ToDo: better to switch encoding length using some bit predicate
448     // but it is unknown yet, so try all we can
449 
450     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
451     // encodings
452     if (isGFX11Plus() && Bytes.size() >= 12 ) {
453       DecoderUInt128 DecW = eat12Bytes(Bytes);
454       Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW,
455                                           Address);
456       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
457         break;
458       MI = MCInst(); // clear
459       Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW,
460                                           Address);
461       if (Res) {
462         if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
463           convertVOP3PDPPInst(MI);
464         else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
465           convertVOPCDPPInst(MI); // Special VOP3 case
466         else {
467           assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
468           convertVOP3DPPInst(MI); // Regular VOP3 case
469         }
470         break;
471       }
472       Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
473       if (Res)
474         break;
475     }
476     // Reinitialize Bytes
477     Bytes = Bytes_.slice(0, MaxInstBytesNum);
478 
479     if (Bytes.size() >= 8) {
480       const uint64_t QW = eatBytes<uint64_t>(Bytes);
481 
482       if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
483         Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
484         if (Res) {
485           if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
486               == -1)
487             break;
488           if (convertDPP8Inst(MI) == MCDisassembler::Success)
489             break;
490           MI = MCInst(); // clear
491         }
492       }
493 
494       Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
495       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
496         break;
497       MI = MCInst(); // clear
498 
499       Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address);
500       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
501         break;
502       MI = MCInst(); // clear
503 
504       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
505       if (Res) break;
506 
507       Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address);
508       if (Res) {
509         if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
510           convertVOPCDPPInst(MI);
511         break;
512       }
513 
514       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
515       if (Res) { IsSDWA = true;  break; }
516 
517       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
518       if (Res) { IsSDWA = true;  break; }
519 
520       Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
521       if (Res) { IsSDWA = true;  break; }
522 
523       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
524         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
525         if (Res)
526           break;
527       }
528 
529       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
530       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
531       // table first so we print the correct name.
532       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
533         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
534         if (Res)
535           break;
536       }
537     }
538 
539     // Reinitialize Bytes as DPP64 could have eaten too much
540     Bytes = Bytes_.slice(0, MaxInstBytesNum);
541 
542     // Try decode 32-bit instruction
543     if (Bytes.size() < 4) break;
544     const uint32_t DW = eatBytes<uint32_t>(Bytes);
545     Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
546     if (Res) break;
547 
548     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
549     if (Res) break;
550 
551     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
552     if (Res) break;
553 
554     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
555       Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
556       if (Res)
557         break;
558     }
559 
560     if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
561       Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
562       if (Res) break;
563     }
564 
565     Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
566     if (Res) break;
567 
568     Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address);
569     if (Res) break;
570 
571     if (Bytes.size() < 4) break;
572     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
573 
574     if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) {
575       Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address);
576       if (Res)
577         break;
578     }
579 
580     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
581       Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
582       if (Res)
583         break;
584     }
585 
586     Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
587     if (Res) break;
588 
589     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
590     if (Res) break;
591 
592     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
593     if (Res) break;
594 
595     Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
596     if (Res) break;
597 
598     Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address);
599     if (Res)
600       break;
601 
602     Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address);
603   } while (false);
604 
605   if (Res && AMDGPU::isMAC(MI.getOpcode())) {
606     // Insert dummy unused src2_modifiers.
607     insertNamedMCOperand(MI, MCOperand::createImm(0),
608                          AMDGPU::OpName::src2_modifiers);
609   }
610 
611   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
612           (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
613     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
614                                              AMDGPU::OpName::cpol);
615     if (CPolPos != -1) {
616       unsigned CPol =
617           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
618               AMDGPU::CPol::GLC : 0;
619       if (MI.getNumOperands() <= (unsigned)CPolPos) {
620         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
621                              AMDGPU::OpName::cpol);
622       } else if (CPol) {
623         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
624       }
625     }
626   }
627 
628   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
629               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
630              (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) {
631     // GFX90A lost TFE, its place is occupied by ACC.
632     int TFEOpIdx =
633         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
634     if (TFEOpIdx != -1) {
635       auto TFEIter = MI.begin();
636       std::advance(TFEIter, TFEOpIdx);
637       MI.insert(TFEIter, MCOperand::createImm(0));
638     }
639   }
640 
641   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
642               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
643     int SWZOpIdx =
644         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
645     if (SWZOpIdx != -1) {
646       auto SWZIter = MI.begin();
647       std::advance(SWZIter, SWZOpIdx);
648       MI.insert(SWZIter, MCOperand::createImm(0));
649     }
650   }
651 
652   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
653     int VAddr0Idx =
654         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
655     int RsrcIdx =
656         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
657     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
658     if (VAddr0Idx >= 0 && NSAArgs > 0) {
659       unsigned NSAWords = (NSAArgs + 3) / 4;
660       if (Bytes.size() < 4 * NSAWords) {
661         Res = MCDisassembler::Fail;
662       } else {
663         for (unsigned i = 0; i < NSAArgs; ++i) {
664           const unsigned VAddrIdx = VAddr0Idx + 1 + i;
665           auto VAddrRCID = MCII->get(MI.getOpcode()).OpInfo[VAddrIdx].RegClass;
666           MI.insert(MI.begin() + VAddrIdx,
667                     createRegOperand(VAddrRCID, Bytes[i]));
668         }
669         Bytes = Bytes.slice(4 * NSAWords);
670       }
671     }
672 
673     if (Res)
674       Res = convertMIMGInst(MI);
675   }
676 
677   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
678     Res = convertEXPInst(MI);
679 
680   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
681     Res = convertVINTERPInst(MI);
682 
683   if (Res && IsSDWA)
684     Res = convertSDWAInst(MI);
685 
686   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
687                                               AMDGPU::OpName::vdst_in);
688   if (VDstIn_Idx != -1) {
689     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
690                            MCOI::OperandConstraint::TIED_TO);
691     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
692          !MI.getOperand(VDstIn_Idx).isReg() ||
693          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
694       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
695         MI.erase(&MI.getOperand(VDstIn_Idx));
696       insertNamedMCOperand(MI,
697         MCOperand::createReg(MI.getOperand(Tied).getReg()),
698         AMDGPU::OpName::vdst_in);
699     }
700   }
701 
702   int ImmLitIdx =
703       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
704   bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
705   if (Res && ImmLitIdx != -1 && !IsSOPK)
706     Res = convertFMAanyK(MI, ImmLitIdx);
707 
708   // if the opcode was not recognized we'll assume a Size of 4 bytes
709   // (unless there are fewer bytes left)
710   Size = Res ? (MaxInstBytesNum - Bytes.size())
711              : std::min((size_t)4, Bytes_.size());
712   return Res;
713 }
714 
715 DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
716   if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) {
717     // The MCInst still has these fields even though they are no longer encoded
718     // in the GFX11 instruction.
719     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
720     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
721   }
722   return MCDisassembler::Success;
723 }
724 
725 DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
726   if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
727       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
728       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
729       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
730     // The MCInst has this field that is not directly encoded in the
731     // instruction.
732     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
733   }
734   return MCDisassembler::Success;
735 }
736 
737 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
738   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
739       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
740     if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
741       // VOPC - insert clamp
742       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
743   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
744     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
745     if (SDst != -1) {
746       // VOPC - insert VCC register as sdst
747       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
748                            AMDGPU::OpName::sdst);
749     } else {
750       // VOP1/2 - insert omod if present in instruction
751       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
752     }
753   }
754   return MCDisassembler::Success;
755 }
756 
757 struct VOPModifiers {
758   unsigned OpSel = 0;
759   unsigned OpSelHi = 0;
760   unsigned NegLo = 0;
761   unsigned NegHi = 0;
762 };
763 
764 // Reconstruct values of VOP3/VOP3P operands such as op_sel.
765 // Note that these values do not affect disassembler output,
766 // so this is only necessary for consistency with src_modifiers.
767 static VOPModifiers collectVOPModifiers(const MCInst &MI,
768                                         bool IsVOP3P = false) {
769   VOPModifiers Modifiers;
770   unsigned Opc = MI.getOpcode();
771   const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
772                         AMDGPU::OpName::src1_modifiers,
773                         AMDGPU::OpName::src2_modifiers};
774   for (int J = 0; J < 3; ++J) {
775     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
776     if (OpIdx == -1)
777       continue;
778 
779     unsigned Val = MI.getOperand(OpIdx).getImm();
780 
781     Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
782     if (IsVOP3P) {
783       Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
784       Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
785       Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
786     } else if (J == 0) {
787       Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
788     }
789   }
790 
791   return Modifiers;
792 }
793 
794 // MAC opcodes have special old and src2 operands.
795 // src2 is tied to dst, while old is not tied (but assumed to be).
796 bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
797   constexpr int DST_IDX = 0;
798   auto Opcode = MI.getOpcode();
799   const auto &Desc = MCII->get(Opcode);
800   auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
801 
802   if (OldIdx != -1 && Desc.getOperandConstraint(
803                           OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
804     assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
805     assert(Desc.getOperandConstraint(
806                AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
807                MCOI::OperandConstraint::TIED_TO) == DST_IDX);
808     (void)DST_IDX;
809     return true;
810   }
811 
812   return false;
813 }
814 
815 // Create dummy old operand and insert dummy unused src2_modifiers
816 void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
817   assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
818   insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
819   insertNamedMCOperand(MI, MCOperand::createImm(0),
820                        AMDGPU::OpName::src2_modifiers);
821 }
822 
823 // We must check FI == literal to reject not genuine dpp8 insts, and we must
824 // first add optional MI operands to check FI
825 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
826   unsigned Opc = MI.getOpcode();
827   if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
828     convertVOP3PDPPInst(MI);
829   } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
830              AMDGPU::isVOPC64DPP(Opc)) {
831     convertVOPCDPPInst(MI);
832   } else {
833     if (isMacDPP(MI))
834       convertMacDPPInst(MI);
835 
836     unsigned DescNumOps = MCII->get(Opc).getNumOperands();
837     if (MI.getNumOperands() < DescNumOps &&
838         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
839       auto Mods = collectVOPModifiers(MI);
840       insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
841                            AMDGPU::OpName::op_sel);
842     } else {
843       // Insert dummy unused src modifiers.
844       if (MI.getNumOperands() < DescNumOps &&
845           AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
846         insertNamedMCOperand(MI, MCOperand::createImm(0),
847                              AMDGPU::OpName::src0_modifiers);
848 
849       if (MI.getNumOperands() < DescNumOps &&
850           AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
851         insertNamedMCOperand(MI, MCOperand::createImm(0),
852                              AMDGPU::OpName::src1_modifiers);
853     }
854   }
855   return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
856 }
857 
858 DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
859   if (isMacDPP(MI))
860     convertMacDPPInst(MI);
861 
862   unsigned Opc = MI.getOpcode();
863   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
864   if (MI.getNumOperands() < DescNumOps &&
865       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
866     auto Mods = collectVOPModifiers(MI);
867     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
868                          AMDGPU::OpName::op_sel);
869   }
870   return MCDisassembler::Success;
871 }
872 
873 // Note that before gfx10, the MIMG encoding provided no information about
874 // VADDR size. Consequently, decoded instructions always show address as if it
875 // has 1 dword, which could be not really so.
876 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
877 
878   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
879                                            AMDGPU::OpName::vdst);
880 
881   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
882                                             AMDGPU::OpName::vdata);
883   int VAddr0Idx =
884       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
885   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
886                                             AMDGPU::OpName::dmask);
887 
888   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
889                                             AMDGPU::OpName::tfe);
890   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
891                                             AMDGPU::OpName::d16);
892 
893   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
894   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
895       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
896 
897   assert(VDataIdx != -1);
898   if (BaseOpcode->BVH) {
899     // Add A16 operand for intersect_ray instructions
900     if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16))
901       addOperand(MI, MCOperand::createImm(1));
902     return MCDisassembler::Success;
903   }
904 
905   bool IsAtomic = (VDstIdx != -1);
906   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
907   bool IsNSA = false;
908   unsigned AddrSize = Info->VAddrDwords;
909 
910   if (isGFX10Plus()) {
911     unsigned DimIdx =
912         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
913     int A16Idx =
914         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
915     const AMDGPU::MIMGDimInfo *Dim =
916         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
917     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
918 
919     AddrSize =
920         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
921 
922     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
923             Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
924     if (!IsNSA) {
925       if (AddrSize > 12)
926         AddrSize = 16;
927     } else {
928       if (AddrSize > Info->VAddrDwords) {
929         // The NSA encoding does not contain enough operands for the combination
930         // of base opcode / dimension. Should this be an error?
931         return MCDisassembler::Success;
932       }
933     }
934   }
935 
936   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
937   unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
938 
939   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
940   if (D16 && AMDGPU::hasPackedD16(STI)) {
941     DstSize = (DstSize + 1) / 2;
942   }
943 
944   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
945     DstSize += 1;
946 
947   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
948     return MCDisassembler::Success;
949 
950   int NewOpcode =
951       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
952   if (NewOpcode == -1)
953     return MCDisassembler::Success;
954 
955   // Widen the register to the correct number of enabled channels.
956   unsigned NewVdata = AMDGPU::NoRegister;
957   if (DstSize != Info->VDataDwords) {
958     auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
959 
960     // Get first subregister of VData
961     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
962     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
963     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
964 
965     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
966                                        &MRI.getRegClass(DataRCID));
967     if (NewVdata == AMDGPU::NoRegister) {
968       // It's possible to encode this such that the low register + enabled
969       // components exceeds the register count.
970       return MCDisassembler::Success;
971     }
972   }
973 
974   // If not using NSA on GFX10+, widen address register to correct size.
975   unsigned NewVAddr0 = AMDGPU::NoRegister;
976   if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) {
977     unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
978     unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
979     VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
980 
981     auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
982     NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
983                                         &MRI.getRegClass(AddrRCID));
984     if (NewVAddr0 == AMDGPU::NoRegister)
985       return MCDisassembler::Success;
986   }
987 
988   MI.setOpcode(NewOpcode);
989 
990   if (NewVdata != AMDGPU::NoRegister) {
991     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
992 
993     if (IsAtomic) {
994       // Atomic operations have an additional operand (a copy of data)
995       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
996     }
997   }
998 
999   if (NewVAddr0 != AMDGPU::NoRegister) {
1000     MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
1001   } else if (IsNSA) {
1002     assert(AddrSize <= Info->VAddrDwords);
1003     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1004              MI.begin() + VAddr0Idx + Info->VAddrDwords);
1005   }
1006 
1007   return MCDisassembler::Success;
1008 }
1009 
1010 // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1011 // decoder only adds to src_modifiers, so manually add the bits to the other
1012 // operands.
1013 DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1014   unsigned Opc = MI.getOpcode();
1015   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1016   auto Mods = collectVOPModifiers(MI, true);
1017 
1018   if (MI.getNumOperands() < DescNumOps &&
1019       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1020     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1021 
1022   if (MI.getNumOperands() < DescNumOps &&
1023       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1024     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1025                          AMDGPU::OpName::op_sel);
1026   if (MI.getNumOperands() < DescNumOps &&
1027       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1028     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1029                          AMDGPU::OpName::op_sel_hi);
1030   if (MI.getNumOperands() < DescNumOps &&
1031       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1032     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1033                          AMDGPU::OpName::neg_lo);
1034   if (MI.getNumOperands() < DescNumOps &&
1035       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1036     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1037                          AMDGPU::OpName::neg_hi);
1038 
1039   return MCDisassembler::Success;
1040 }
1041 
1042 // Create dummy old operand and insert optional operands
1043 DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1044   unsigned Opc = MI.getOpcode();
1045   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1046 
1047   if (MI.getNumOperands() < DescNumOps &&
1048       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1049     insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1050 
1051   if (MI.getNumOperands() < DescNumOps &&
1052       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1053     insertNamedMCOperand(MI, MCOperand::createImm(0),
1054                          AMDGPU::OpName::src0_modifiers);
1055 
1056   if (MI.getNumOperands() < DescNumOps &&
1057       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1058     insertNamedMCOperand(MI, MCOperand::createImm(0),
1059                          AMDGPU::OpName::src1_modifiers);
1060   return MCDisassembler::Success;
1061 }
1062 
1063 DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
1064                                                 int ImmLitIdx) const {
1065   assert(HasLiteral && "Should have decoded a literal");
1066   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1067   unsigned DescNumOps = Desc.getNumOperands();
1068   insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1069                        AMDGPU::OpName::immDeferred);
1070   assert(DescNumOps == MI.getNumOperands());
1071   for (unsigned I = 0; I < DescNumOps; ++I) {
1072     auto &Op = MI.getOperand(I);
1073     auto OpType = Desc.OpInfo[I].OperandType;
1074     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1075                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1076     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1077         IsDeferredOp)
1078       Op.setImm(Literal);
1079   }
1080   return MCDisassembler::Success;
1081 }
1082 
1083 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1084   return getContext().getRegisterInfo()->
1085     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1086 }
1087 
1088 inline
1089 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1090                                          const Twine& ErrMsg) const {
1091   *CommentStream << "Error: " + ErrMsg;
1092 
1093   // ToDo: add support for error operands to MCInst.h
1094   // return MCOperand::createError(V);
1095   return MCOperand();
1096 }
1097 
1098 inline
1099 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1100   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
1101 }
1102 
1103 inline
1104 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1105                                                unsigned Val) const {
1106   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1107   if (Val >= RegCl.getNumRegs())
1108     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1109                            ": unknown register " + Twine(Val));
1110   return createRegOperand(RegCl.getRegister(Val));
1111 }
1112 
1113 inline
1114 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1115                                                 unsigned Val) const {
1116   // ToDo: SI/CI have 104 SGPRs, VI - 102
1117   // Valery: here we accepting as much as we can, let assembler sort it out
1118   int shift = 0;
1119   switch (SRegClassID) {
1120   case AMDGPU::SGPR_32RegClassID:
1121   case AMDGPU::TTMP_32RegClassID:
1122     break;
1123   case AMDGPU::SGPR_64RegClassID:
1124   case AMDGPU::TTMP_64RegClassID:
1125     shift = 1;
1126     break;
1127   case AMDGPU::SGPR_128RegClassID:
1128   case AMDGPU::TTMP_128RegClassID:
1129   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1130   // this bundle?
1131   case AMDGPU::SGPR_256RegClassID:
1132   case AMDGPU::TTMP_256RegClassID:
1133     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1134   // this bundle?
1135   case AMDGPU::SGPR_288RegClassID:
1136   case AMDGPU::TTMP_288RegClassID:
1137   case AMDGPU::SGPR_320RegClassID:
1138   case AMDGPU::TTMP_320RegClassID:
1139   case AMDGPU::SGPR_352RegClassID:
1140   case AMDGPU::TTMP_352RegClassID:
1141   case AMDGPU::SGPR_384RegClassID:
1142   case AMDGPU::TTMP_384RegClassID:
1143   case AMDGPU::SGPR_512RegClassID:
1144   case AMDGPU::TTMP_512RegClassID:
1145     shift = 2;
1146     break;
1147   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1148   // this bundle?
1149   default:
1150     llvm_unreachable("unhandled register class");
1151   }
1152 
1153   if (Val % (1 << shift)) {
1154     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1155                    << ": scalar reg isn't aligned " << Val;
1156   }
1157 
1158   return createRegOperand(SRegClassID, Val >> shift);
1159 }
1160 
1161 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
1162   return decodeSrcOp(OPW32, Val);
1163 }
1164 
1165 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
1166   return decodeSrcOp(OPW64, Val);
1167 }
1168 
1169 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
1170   return decodeSrcOp(OPW128, Val);
1171 }
1172 
1173 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
1174   return decodeSrcOp(OPW16, Val);
1175 }
1176 
1177 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
1178   return decodeSrcOp(OPWV216, Val);
1179 }
1180 
1181 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const {
1182   return decodeSrcOp(OPWV232, Val);
1183 }
1184 
1185 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32_Lo128(unsigned Val) const {
1186   return createRegOperand(AMDGPU::VGPR_32_Lo128RegClassID, Val);
1187 }
1188 
1189 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
1190   // Some instructions have operand restrictions beyond what the encoding
1191   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
1192   // high bit.
1193   Val &= 255;
1194 
1195   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
1196 }
1197 
1198 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
1199   return decodeSrcOp(OPW32, Val);
1200 }
1201 
1202 MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
1203   return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
1204 }
1205 
1206 MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const {
1207   return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255);
1208 }
1209 
1210 MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
1211   return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
1212 }
1213 
1214 MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
1215   return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
1216 }
1217 
1218 MCOperand AMDGPUDisassembler::decodeOperand_AReg_288(unsigned Val) const {
1219   return createRegOperand(AMDGPU::AReg_288RegClassID, Val & 255);
1220 }
1221 
1222 MCOperand AMDGPUDisassembler::decodeOperand_AReg_320(unsigned Val) const {
1223   return createRegOperand(AMDGPU::AReg_320RegClassID, Val & 255);
1224 }
1225 
1226 MCOperand AMDGPUDisassembler::decodeOperand_AReg_352(unsigned Val) const {
1227   return createRegOperand(AMDGPU::AReg_352RegClassID, Val & 255);
1228 }
1229 
1230 MCOperand AMDGPUDisassembler::decodeOperand_AReg_384(unsigned Val) const {
1231   return createRegOperand(AMDGPU::AReg_384RegClassID, Val & 255);
1232 }
1233 
1234 
1235 MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
1236   return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
1237 }
1238 
1239 MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
1240   return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
1241 }
1242 
1243 MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
1244   return decodeSrcOp(OPW32, Val);
1245 }
1246 
1247 MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
1248   return decodeSrcOp(OPW64, Val);
1249 }
1250 
1251 MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
1252   return decodeSrcOp(OPW128, Val);
1253 }
1254 
1255 MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
1256   using namespace AMDGPU::EncValues;
1257   assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
1258   return decodeSrcOp(OPW128, Val | IS_VGPR);
1259 }
1260 
1261 MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
1262   using namespace AMDGPU::EncValues;
1263   assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
1264   return decodeSrcOp(OPW512, Val | IS_VGPR);
1265 }
1266 
1267 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
1268   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
1269 }
1270 
1271 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
1272   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
1273 }
1274 
1275 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
1276   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
1277 }
1278 
1279 MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
1280   return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
1281 }
1282 
1283 MCOperand AMDGPUDisassembler::decodeOperand_VReg_288(unsigned Val) const {
1284   return createRegOperand(AMDGPU::VReg_288RegClassID, Val);
1285 }
1286 
1287 MCOperand AMDGPUDisassembler::decodeOperand_VReg_320(unsigned Val) const {
1288   return createRegOperand(AMDGPU::VReg_320RegClassID, Val);
1289 }
1290 
1291 MCOperand AMDGPUDisassembler::decodeOperand_VReg_352(unsigned Val) const {
1292   return createRegOperand(AMDGPU::VReg_352RegClassID, Val);
1293 }
1294 
1295 MCOperand AMDGPUDisassembler::decodeOperand_VReg_384(unsigned Val) const {
1296   return createRegOperand(AMDGPU::VReg_384RegClassID, Val);
1297 }
1298 
1299 MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
1300   return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
1301 }
1302 
1303 MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
1304   return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
1305 }
1306 
1307 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
1308   // table-gen generated disassembler doesn't care about operand types
1309   // leaving only registry class so SSrc_32 operand turns into SReg_32
1310   // and therefore we accept immediates and literals here as well
1311   return decodeSrcOp(OPW32, Val);
1312 }
1313 
1314 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
1315   unsigned Val) const {
1316   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
1317   return decodeOperand_SReg_32(Val);
1318 }
1319 
1320 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
1321   unsigned Val) const {
1322   // SReg_32_XM0 is SReg_32 without EXEC_HI
1323   return decodeOperand_SReg_32(Val);
1324 }
1325 
1326 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
1327   // table-gen generated disassembler doesn't care about operand types
1328   // leaving only registry class so SSrc_32 operand turns into SReg_32
1329   // and therefore we accept immediates and literals here as well
1330   return decodeSrcOp(OPW32, Val);
1331 }
1332 
1333 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
1334   return decodeSrcOp(OPW64, Val);
1335 }
1336 
1337 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
1338   return decodeSrcOp(OPW64, Val);
1339 }
1340 
1341 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
1342   return decodeSrcOp(OPW128, Val);
1343 }
1344 
1345 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
1346   return decodeDstOp(OPW256, Val);
1347 }
1348 
1349 MCOperand AMDGPUDisassembler::decodeOperand_SReg_288(unsigned Val) const {
1350   return decodeDstOp(OPW288, Val);
1351 }
1352 
1353 MCOperand AMDGPUDisassembler::decodeOperand_SReg_320(unsigned Val) const {
1354   return decodeDstOp(OPW320, Val);
1355 }
1356 
1357 MCOperand AMDGPUDisassembler::decodeOperand_SReg_352(unsigned Val) const {
1358   return decodeDstOp(OPW352, Val);
1359 }
1360 
1361 MCOperand AMDGPUDisassembler::decodeOperand_SReg_384(unsigned Val) const {
1362   return decodeDstOp(OPW384, Val);
1363 }
1364 
1365 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
1366   return decodeDstOp(OPW512, Val);
1367 }
1368 
1369 // Decode Literals for insts which always have a literal in the encoding
1370 MCOperand
1371 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1372   if (HasLiteral) {
1373     assert(
1374         AMDGPU::hasVOPD(STI) &&
1375         "Should only decode multiple kimm with VOPD, check VSrc operand types");
1376     if (Literal != Val)
1377       return errOperand(Val, "More than one unique literal is illegal");
1378   }
1379   HasLiteral = true;
1380   Literal = Val;
1381   return MCOperand::createImm(Literal);
1382 }
1383 
1384 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
1385   // For now all literal constants are supposed to be unsigned integer
1386   // ToDo: deal with signed/unsigned 64-bit integer constants
1387   // ToDo: deal with float/double constants
1388   if (!HasLiteral) {
1389     if (Bytes.size() < 4) {
1390       return errOperand(0, "cannot read literal, inst bytes left " +
1391                         Twine(Bytes.size()));
1392     }
1393     HasLiteral = true;
1394     Literal = eatBytes<uint32_t>(Bytes);
1395   }
1396   return MCOperand::createImm(Literal);
1397 }
1398 
1399 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1400   using namespace AMDGPU::EncValues;
1401 
1402   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1403   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1404     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1405     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1406       // Cast prevents negative overflow.
1407 }
1408 
1409 static int64_t getInlineImmVal32(unsigned Imm) {
1410   switch (Imm) {
1411   case 240:
1412     return FloatToBits(0.5f);
1413   case 241:
1414     return FloatToBits(-0.5f);
1415   case 242:
1416     return FloatToBits(1.0f);
1417   case 243:
1418     return FloatToBits(-1.0f);
1419   case 244:
1420     return FloatToBits(2.0f);
1421   case 245:
1422     return FloatToBits(-2.0f);
1423   case 246:
1424     return FloatToBits(4.0f);
1425   case 247:
1426     return FloatToBits(-4.0f);
1427   case 248: // 1 / (2 * PI)
1428     return 0x3e22f983;
1429   default:
1430     llvm_unreachable("invalid fp inline imm");
1431   }
1432 }
1433 
1434 static int64_t getInlineImmVal64(unsigned Imm) {
1435   switch (Imm) {
1436   case 240:
1437     return DoubleToBits(0.5);
1438   case 241:
1439     return DoubleToBits(-0.5);
1440   case 242:
1441     return DoubleToBits(1.0);
1442   case 243:
1443     return DoubleToBits(-1.0);
1444   case 244:
1445     return DoubleToBits(2.0);
1446   case 245:
1447     return DoubleToBits(-2.0);
1448   case 246:
1449     return DoubleToBits(4.0);
1450   case 247:
1451     return DoubleToBits(-4.0);
1452   case 248: // 1 / (2 * PI)
1453     return 0x3fc45f306dc9c882;
1454   default:
1455     llvm_unreachable("invalid fp inline imm");
1456   }
1457 }
1458 
1459 static int64_t getInlineImmVal16(unsigned Imm) {
1460   switch (Imm) {
1461   case 240:
1462     return 0x3800;
1463   case 241:
1464     return 0xB800;
1465   case 242:
1466     return 0x3C00;
1467   case 243:
1468     return 0xBC00;
1469   case 244:
1470     return 0x4000;
1471   case 245:
1472     return 0xC000;
1473   case 246:
1474     return 0x4400;
1475   case 247:
1476     return 0xC400;
1477   case 248: // 1 / (2 * PI)
1478     return 0x3118;
1479   default:
1480     llvm_unreachable("invalid fp inline imm");
1481   }
1482 }
1483 
1484 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
1485   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1486       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1487 
1488   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1489   switch (Width) {
1490   case OPW32:
1491   case OPW128: // splat constants
1492   case OPW512:
1493   case OPW1024:
1494   case OPWV232:
1495     return MCOperand::createImm(getInlineImmVal32(Imm));
1496   case OPW64:
1497   case OPW256:
1498     return MCOperand::createImm(getInlineImmVal64(Imm));
1499   case OPW16:
1500   case OPWV216:
1501     return MCOperand::createImm(getInlineImmVal16(Imm));
1502   default:
1503     llvm_unreachable("implement me");
1504   }
1505 }
1506 
1507 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1508   using namespace AMDGPU;
1509 
1510   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1511   switch (Width) {
1512   default: // fall
1513   case OPW32:
1514   case OPW16:
1515   case OPWV216:
1516     return VGPR_32RegClassID;
1517   case OPW64:
1518   case OPWV232: return VReg_64RegClassID;
1519   case OPW96: return VReg_96RegClassID;
1520   case OPW128: return VReg_128RegClassID;
1521   case OPW160: return VReg_160RegClassID;
1522   case OPW256: return VReg_256RegClassID;
1523   case OPW288: return VReg_288RegClassID;
1524   case OPW320: return VReg_320RegClassID;
1525   case OPW352: return VReg_352RegClassID;
1526   case OPW384: return VReg_384RegClassID;
1527   case OPW512: return VReg_512RegClassID;
1528   case OPW1024: return VReg_1024RegClassID;
1529   }
1530 }
1531 
1532 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1533   using namespace AMDGPU;
1534 
1535   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1536   switch (Width) {
1537   default: // fall
1538   case OPW32:
1539   case OPW16:
1540   case OPWV216:
1541     return AGPR_32RegClassID;
1542   case OPW64:
1543   case OPWV232: return AReg_64RegClassID;
1544   case OPW96: return AReg_96RegClassID;
1545   case OPW128: return AReg_128RegClassID;
1546   case OPW160: return AReg_160RegClassID;
1547   case OPW256: return AReg_256RegClassID;
1548   case OPW288: return AReg_288RegClassID;
1549   case OPW320: return AReg_320RegClassID;
1550   case OPW352: return AReg_352RegClassID;
1551   case OPW384: return AReg_384RegClassID;
1552   case OPW512: return AReg_512RegClassID;
1553   case OPW1024: return AReg_1024RegClassID;
1554   }
1555 }
1556 
1557 
1558 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1559   using namespace AMDGPU;
1560 
1561   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1562   switch (Width) {
1563   default: // fall
1564   case OPW32:
1565   case OPW16:
1566   case OPWV216:
1567     return SGPR_32RegClassID;
1568   case OPW64:
1569   case OPWV232: return SGPR_64RegClassID;
1570   case OPW96: return SGPR_96RegClassID;
1571   case OPW128: return SGPR_128RegClassID;
1572   case OPW160: return SGPR_160RegClassID;
1573   case OPW256: return SGPR_256RegClassID;
1574   case OPW288: return SGPR_288RegClassID;
1575   case OPW320: return SGPR_320RegClassID;
1576   case OPW352: return SGPR_352RegClassID;
1577   case OPW384: return SGPR_384RegClassID;
1578   case OPW512: return SGPR_512RegClassID;
1579   }
1580 }
1581 
1582 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1583   using namespace AMDGPU;
1584 
1585   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1586   switch (Width) {
1587   default: // fall
1588   case OPW32:
1589   case OPW16:
1590   case OPWV216:
1591     return TTMP_32RegClassID;
1592   case OPW64:
1593   case OPWV232: return TTMP_64RegClassID;
1594   case OPW128: return TTMP_128RegClassID;
1595   case OPW256: return TTMP_256RegClassID;
1596   case OPW288: return TTMP_288RegClassID;
1597   case OPW320: return TTMP_320RegClassID;
1598   case OPW352: return TTMP_352RegClassID;
1599   case OPW384: return TTMP_384RegClassID;
1600   case OPW512: return TTMP_512RegClassID;
1601   }
1602 }
1603 
1604 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1605   using namespace AMDGPU::EncValues;
1606 
1607   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1608   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1609 
1610   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1611 }
1612 
1613 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1614                                           bool MandatoryLiteral) const {
1615   using namespace AMDGPU::EncValues;
1616 
1617   assert(Val < 1024); // enum10
1618 
1619   bool IsAGPR = Val & 512;
1620   Val &= 511;
1621 
1622   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1623     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1624                                    : getVgprClassId(Width), Val - VGPR_MIN);
1625   }
1626   if (Val <= SGPR_MAX) {
1627     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1628     static_assert(SGPR_MIN == 0);
1629     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1630   }
1631 
1632   int TTmpIdx = getTTmpIdx(Val);
1633   if (TTmpIdx >= 0) {
1634     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1635   }
1636 
1637   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1638     return decodeIntImmed(Val);
1639 
1640   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1641     return decodeFPImmed(Width, Val);
1642 
1643   if (Val == LITERAL_CONST) {
1644     if (MandatoryLiteral)
1645       // Keep a sentinel value for deferred setting
1646       return MCOperand::createImm(LITERAL_CONST);
1647     else
1648       return decodeLiteralConstant();
1649   }
1650 
1651   switch (Width) {
1652   case OPW32:
1653   case OPW16:
1654   case OPWV216:
1655     return decodeSpecialReg32(Val);
1656   case OPW64:
1657   case OPWV232:
1658     return decodeSpecialReg64(Val);
1659   default:
1660     llvm_unreachable("unexpected immediate type");
1661   }
1662 }
1663 
1664 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
1665   using namespace AMDGPU::EncValues;
1666 
1667   assert(Val < 128);
1668   assert(Width == OPW256 || Width == OPW512);
1669 
1670   if (Val <= SGPR_MAX) {
1671     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1672     static_assert(SGPR_MIN == 0);
1673     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1674   }
1675 
1676   int TTmpIdx = getTTmpIdx(Val);
1677   if (TTmpIdx >= 0) {
1678     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1679   }
1680 
1681   llvm_unreachable("unknown dst register");
1682 }
1683 
1684 // Bit 0 of DstY isn't stored in the instruction, because it's always the
1685 // opposite of bit 0 of DstX.
1686 MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1687                                                unsigned Val) const {
1688   int VDstXInd =
1689       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1690   assert(VDstXInd != -1);
1691   assert(Inst.getOperand(VDstXInd).isReg());
1692   unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1693   Val |= ~XDstReg & 1;
1694   auto Width = llvm::AMDGPUDisassembler::OPW32;
1695   return createRegOperand(getVgprClassId(Width), Val);
1696 }
1697 
1698 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1699   using namespace AMDGPU;
1700 
1701   switch (Val) {
1702   // clang-format off
1703   case 102: return createRegOperand(FLAT_SCR_LO);
1704   case 103: return createRegOperand(FLAT_SCR_HI);
1705   case 104: return createRegOperand(XNACK_MASK_LO);
1706   case 105: return createRegOperand(XNACK_MASK_HI);
1707   case 106: return createRegOperand(VCC_LO);
1708   case 107: return createRegOperand(VCC_HI);
1709   case 108: return createRegOperand(TBA_LO);
1710   case 109: return createRegOperand(TBA_HI);
1711   case 110: return createRegOperand(TMA_LO);
1712   case 111: return createRegOperand(TMA_HI);
1713   case 124:
1714     return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1715   case 125:
1716     return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1717   case 126: return createRegOperand(EXEC_LO);
1718   case 127: return createRegOperand(EXEC_HI);
1719   case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1720   case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1721   case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1722   case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1723   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1724   case 251: return createRegOperand(SRC_VCCZ);
1725   case 252: return createRegOperand(SRC_EXECZ);
1726   case 253: return createRegOperand(SRC_SCC);
1727   case 254: return createRegOperand(LDS_DIRECT);
1728   default: break;
1729     // clang-format on
1730   }
1731   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1732 }
1733 
1734 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1735   using namespace AMDGPU;
1736 
1737   switch (Val) {
1738   case 102: return createRegOperand(FLAT_SCR);
1739   case 104: return createRegOperand(XNACK_MASK);
1740   case 106: return createRegOperand(VCC);
1741   case 108: return createRegOperand(TBA);
1742   case 110: return createRegOperand(TMA);
1743   case 124:
1744     if (isGFX11Plus())
1745       return createRegOperand(SGPR_NULL);
1746     break;
1747   case 125:
1748     if (!isGFX11Plus())
1749       return createRegOperand(SGPR_NULL);
1750     break;
1751   case 126: return createRegOperand(EXEC);
1752   case 235: return createRegOperand(SRC_SHARED_BASE);
1753   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1754   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1755   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1756   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1757   case 251: return createRegOperand(SRC_VCCZ);
1758   case 252: return createRegOperand(SRC_EXECZ);
1759   case 253: return createRegOperand(SRC_SCC);
1760   default: break;
1761   }
1762   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1763 }
1764 
1765 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1766                                             const unsigned Val) const {
1767   using namespace AMDGPU::SDWA;
1768   using namespace AMDGPU::EncValues;
1769 
1770   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1771       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
1772     // XXX: cast to int is needed to avoid stupid warning:
1773     // compare with unsigned is always true
1774     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1775         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1776       return createRegOperand(getVgprClassId(Width),
1777                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1778     }
1779     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1780         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1781                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1782       return createSRegOperand(getSgprClassId(Width),
1783                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1784     }
1785     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1786         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1787       return createSRegOperand(getTtmpClassId(Width),
1788                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1789     }
1790 
1791     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1792 
1793     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1794       return decodeIntImmed(SVal);
1795 
1796     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1797       return decodeFPImmed(Width, SVal);
1798 
1799     return decodeSpecialReg32(SVal);
1800   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
1801     return createRegOperand(getVgprClassId(Width), Val);
1802   }
1803   llvm_unreachable("unsupported target");
1804 }
1805 
1806 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1807   return decodeSDWASrc(OPW16, Val);
1808 }
1809 
1810 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1811   return decodeSDWASrc(OPW32, Val);
1812 }
1813 
1814 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1815   using namespace AMDGPU::SDWA;
1816 
1817   assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1818           STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
1819          "SDWAVopcDst should be present only on GFX9+");
1820 
1821   bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
1822 
1823   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1824     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1825 
1826     int TTmpIdx = getTTmpIdx(Val);
1827     if (TTmpIdx >= 0) {
1828       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1829       return createSRegOperand(TTmpClsId, TTmpIdx);
1830     } else if (Val > SGPR_MAX) {
1831       return IsWave64 ? decodeSpecialReg64(Val)
1832                       : decodeSpecialReg32(Val);
1833     } else {
1834       return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1835     }
1836   } else {
1837     return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1838   }
1839 }
1840 
1841 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1842   return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1843     decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
1844 }
1845 
1846 bool AMDGPUDisassembler::isVI() const {
1847   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1848 }
1849 
1850 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1851 
1852 bool AMDGPUDisassembler::isGFX90A() const {
1853   return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
1854 }
1855 
1856 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1857 
1858 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1859 
1860 bool AMDGPUDisassembler::isGFX10Plus() const {
1861   return AMDGPU::isGFX10Plus(STI);
1862 }
1863 
1864 bool AMDGPUDisassembler::isGFX11() const {
1865   return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
1866 }
1867 
1868 bool AMDGPUDisassembler::isGFX11Plus() const {
1869   return AMDGPU::isGFX11Plus(STI);
1870 }
1871 
1872 
1873 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1874   return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1875 }
1876 
1877 //===----------------------------------------------------------------------===//
1878 // AMDGPU specific symbol handling
1879 //===----------------------------------------------------------------------===//
1880 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1881   do {                                                                         \
1882     KdStream << Indent << DIRECTIVE " "                                        \
1883              << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';           \
1884   } while (0)
1885 
1886 // NOLINTNEXTLINE(readability-identifier-naming)
1887 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1888     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1889   using namespace amdhsa;
1890   StringRef Indent = "\t";
1891 
1892   // We cannot accurately backward compute #VGPRs used from
1893   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1894   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1895   // simply calculate the inverse of what the assembler does.
1896 
1897   uint32_t GranulatedWorkitemVGPRCount =
1898       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
1899       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
1900 
1901   uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
1902                           AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
1903 
1904   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1905 
1906   // We cannot backward compute values used to calculate
1907   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1908   // directives can't be computed:
1909   // .amdhsa_reserve_vcc
1910   // .amdhsa_reserve_flat_scratch
1911   // .amdhsa_reserve_xnack_mask
1912   // They take their respective default values if not specified in the assembly.
1913   //
1914   // GRANULATED_WAVEFRONT_SGPR_COUNT
1915   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1916   //
1917   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1918   // are set to 0. So while disassembling we consider that:
1919   //
1920   // GRANULATED_WAVEFRONT_SGPR_COUNT
1921   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1922   //
1923   // The disassembler cannot recover the original values of those 3 directives.
1924 
1925   uint32_t GranulatedWavefrontSGPRCount =
1926       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
1927       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
1928 
1929   if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1930     return MCDisassembler::Fail;
1931 
1932   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1933                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1934 
1935   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1936   if (!hasArchitectedFlatScratch())
1937     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1938   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1939   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1940 
1941   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1942     return MCDisassembler::Fail;
1943 
1944   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1945                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1946   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1947                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1948   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1949                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1950   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1951                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1952 
1953   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1954     return MCDisassembler::Fail;
1955 
1956   PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1957 
1958   if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1959     return MCDisassembler::Fail;
1960 
1961   PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1962 
1963   if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1964     return MCDisassembler::Fail;
1965 
1966   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1967     return MCDisassembler::Fail;
1968 
1969   PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1970 
1971   if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1972     return MCDisassembler::Fail;
1973 
1974   if (isGFX10Plus()) {
1975     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1976                     COMPUTE_PGM_RSRC1_WGP_MODE);
1977     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1978     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1979   }
1980   return MCDisassembler::Success;
1981 }
1982 
1983 // NOLINTNEXTLINE(readability-identifier-naming)
1984 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1985     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1986   using namespace amdhsa;
1987   StringRef Indent = "\t";
1988   if (hasArchitectedFlatScratch())
1989     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1990                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1991   else
1992     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1993                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1994   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1995                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1996   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1997                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1998   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1999                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2000   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2001                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2002   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2003                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2004 
2005   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
2006     return MCDisassembler::Fail;
2007 
2008   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
2009     return MCDisassembler::Fail;
2010 
2011   if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
2012     return MCDisassembler::Fail;
2013 
2014   PRINT_DIRECTIVE(
2015       ".amdhsa_exception_fp_ieee_invalid_op",
2016       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2017   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2018                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2019   PRINT_DIRECTIVE(
2020       ".amdhsa_exception_fp_ieee_div_zero",
2021       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2022   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2023                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2024   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2025                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2026   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2027                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2028   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2029                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2030 
2031   if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
2032     return MCDisassembler::Fail;
2033 
2034   return MCDisassembler::Success;
2035 }
2036 
2037 #undef PRINT_DIRECTIVE
2038 
2039 MCDisassembler::DecodeStatus
2040 AMDGPUDisassembler::decodeKernelDescriptorDirective(
2041     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2042     raw_string_ostream &KdStream) const {
2043 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
2044   do {                                                                         \
2045     KdStream << Indent << DIRECTIVE " "                                        \
2046              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
2047   } while (0)
2048 
2049   uint16_t TwoByteBuffer = 0;
2050   uint32_t FourByteBuffer = 0;
2051 
2052   StringRef ReservedBytes;
2053   StringRef Indent = "\t";
2054 
2055   assert(Bytes.size() == 64);
2056   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2057 
2058   switch (Cursor.tell()) {
2059   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2060     FourByteBuffer = DE.getU32(Cursor);
2061     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2062              << '\n';
2063     return MCDisassembler::Success;
2064 
2065   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2066     FourByteBuffer = DE.getU32(Cursor);
2067     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2068              << FourByteBuffer << '\n';
2069     return MCDisassembler::Success;
2070 
2071   case amdhsa::KERNARG_SIZE_OFFSET:
2072     FourByteBuffer = DE.getU32(Cursor);
2073     KdStream << Indent << ".amdhsa_kernarg_size "
2074              << FourByteBuffer << '\n';
2075     return MCDisassembler::Success;
2076 
2077   case amdhsa::RESERVED0_OFFSET:
2078     // 4 reserved bytes, must be 0.
2079     ReservedBytes = DE.getBytes(Cursor, 4);
2080     for (int I = 0; I < 4; ++I) {
2081       if (ReservedBytes[I] != 0) {
2082         return MCDisassembler::Fail;
2083       }
2084     }
2085     return MCDisassembler::Success;
2086 
2087   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2088     // KERNEL_CODE_ENTRY_BYTE_OFFSET
2089     // So far no directive controls this for Code Object V3, so simply skip for
2090     // disassembly.
2091     DE.skip(Cursor, 8);
2092     return MCDisassembler::Success;
2093 
2094   case amdhsa::RESERVED1_OFFSET:
2095     // 20 reserved bytes, must be 0.
2096     ReservedBytes = DE.getBytes(Cursor, 20);
2097     for (int I = 0; I < 20; ++I) {
2098       if (ReservedBytes[I] != 0) {
2099         return MCDisassembler::Fail;
2100       }
2101     }
2102     return MCDisassembler::Success;
2103 
2104   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2105     // COMPUTE_PGM_RSRC3
2106     //  - Only set for GFX10, GFX6-9 have this to be 0.
2107     //  - Currently no directives directly control this.
2108     FourByteBuffer = DE.getU32(Cursor);
2109     if (!isGFX10Plus() && FourByteBuffer) {
2110       return MCDisassembler::Fail;
2111     }
2112     return MCDisassembler::Success;
2113 
2114   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2115     FourByteBuffer = DE.getU32(Cursor);
2116     if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
2117         MCDisassembler::Fail) {
2118       return MCDisassembler::Fail;
2119     }
2120     return MCDisassembler::Success;
2121 
2122   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2123     FourByteBuffer = DE.getU32(Cursor);
2124     if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
2125         MCDisassembler::Fail) {
2126       return MCDisassembler::Fail;
2127     }
2128     return MCDisassembler::Success;
2129 
2130   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2131     using namespace amdhsa;
2132     TwoByteBuffer = DE.getU16(Cursor);
2133 
2134     if (!hasArchitectedFlatScratch())
2135       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2136                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2137     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2138                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2139     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2140                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2141     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2142                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2143     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2144                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2145     if (!hasArchitectedFlatScratch())
2146       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2147                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2148     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2149                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2150 
2151     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2152       return MCDisassembler::Fail;
2153 
2154     // Reserved for GFX9
2155     if (isGFX9() &&
2156         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2157       return MCDisassembler::Fail;
2158     } else if (isGFX10Plus()) {
2159       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2160                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2161     }
2162 
2163     if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
2164       PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2165                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2166 
2167     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2168       return MCDisassembler::Fail;
2169 
2170     return MCDisassembler::Success;
2171 
2172   case amdhsa::RESERVED2_OFFSET:
2173     // 6 bytes from here are reserved, must be 0.
2174     ReservedBytes = DE.getBytes(Cursor, 6);
2175     for (int I = 0; I < 6; ++I) {
2176       if (ReservedBytes[I] != 0)
2177         return MCDisassembler::Fail;
2178     }
2179     return MCDisassembler::Success;
2180 
2181   default:
2182     llvm_unreachable("Unhandled index. Case statements cover everything.");
2183     return MCDisassembler::Fail;
2184   }
2185 #undef PRINT_DIRECTIVE
2186 }
2187 
2188 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
2189     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2190   // CP microcode requires the kernel descriptor to be 64 aligned.
2191   if (Bytes.size() != 64 || KdAddress % 64 != 0)
2192     return MCDisassembler::Fail;
2193 
2194   std::string Kd;
2195   raw_string_ostream KdStream(Kd);
2196   KdStream << ".amdhsa_kernel " << KdName << '\n';
2197 
2198   DataExtractor::Cursor C(0);
2199   while (C && C.tell() < Bytes.size()) {
2200     MCDisassembler::DecodeStatus Status =
2201         decodeKernelDescriptorDirective(C, Bytes, KdStream);
2202 
2203     cantFail(C.takeError());
2204 
2205     if (Status == MCDisassembler::Fail)
2206       return MCDisassembler::Fail;
2207   }
2208   KdStream << ".end_amdhsa_kernel\n";
2209   outs() << KdStream.str();
2210   return MCDisassembler::Success;
2211 }
2212 
2213 std::optional<MCDisassembler::DecodeStatus>
2214 AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
2215                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
2216                                   raw_ostream &CStream) const {
2217   // Right now only kernel descriptor needs to be handled.
2218   // We ignore all other symbols for target specific handling.
2219   // TODO:
2220   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2221   // Object V2 and V3 when symbols are marked protected.
2222 
2223   // amd_kernel_code_t for Code Object V2.
2224   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2225     Size = 256;
2226     return MCDisassembler::Fail;
2227   }
2228 
2229   // Code Object V3 kernel descriptors.
2230   StringRef Name = Symbol.Name;
2231   if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
2232     Size = 64; // Size = 64 regardless of success or failure.
2233     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2234   }
2235   return std::nullopt;
2236 }
2237 
2238 //===----------------------------------------------------------------------===//
2239 // AMDGPUSymbolizer
2240 //===----------------------------------------------------------------------===//
2241 
2242 // Try to find symbol name for specified label
2243 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2244     MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2245     uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2246     uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2247 
2248   if (!IsBranch) {
2249     return false;
2250   }
2251 
2252   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2253   if (!Symbols)
2254     return false;
2255 
2256   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2257     return Val.Addr == static_cast<uint64_t>(Value) &&
2258            Val.Type == ELF::STT_NOTYPE;
2259   });
2260   if (Result != Symbols->end()) {
2261     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2262     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2263     Inst.addOperand(MCOperand::createExpr(Add));
2264     return true;
2265   }
2266   // Add to list of referenced addresses, so caller can synthesize a label.
2267   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2268   return false;
2269 }
2270 
2271 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2272                                                        int64_t Value,
2273                                                        uint64_t Address) {
2274   llvm_unreachable("unimplemented");
2275 }
2276 
2277 //===----------------------------------------------------------------------===//
2278 // Initialization
2279 //===----------------------------------------------------------------------===//
2280 
2281 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2282                               LLVMOpInfoCallback /*GetOpInfo*/,
2283                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
2284                               void *DisInfo,
2285                               MCContext *Ctx,
2286                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2287   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2288 }
2289 
2290 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2291                                                 const MCSubtargetInfo &STI,
2292                                                 MCContext &Ctx) {
2293   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2294 }
2295 
2296 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2297   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2298                                          createAMDGPUDisassembler);
2299   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2300                                        createAMDGPUSymbolizer);
2301 }
2302