xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision c70259405c61b203682e2a03c4688c6d6bc89856)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "TargetInfo/AMDGPUTargetInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm-c/DisassemblerTypes.h"
24 #include "llvm/BinaryFormat/ELF.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDecoderOps.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/TargetRegistry.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "amdgpu-disassembler"
38 
39 #define SGPR_MAX                                                               \
40   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
41                  : AMDGPU::EncValues::SGPR_MAX_SI)
42 
43 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
44 
45 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
46                                        MCContext &Ctx,
47                                        MCInstrInfo const *MCII) :
48   MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
49   TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
50 
51   // ToDo: AMDGPUDisassembler supports only VI ISA.
52   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
53     report_fatal_error("Disassembly not yet supported for subtarget");
54 }
55 
56 inline static MCDisassembler::DecodeStatus
57 addOperand(MCInst &Inst, const MCOperand& Opnd) {
58   Inst.addOperand(Opnd);
59   return Opnd.isValid() ?
60     MCDisassembler::Success :
61     MCDisassembler::Fail;
62 }
63 
64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
65                                 uint16_t NameIdx) {
66   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67   if (OpIdx != -1) {
68     auto I = MI.begin();
69     std::advance(I, OpIdx);
70     MI.insert(I, Op);
71   }
72   return OpIdx;
73 }
74 
75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
76                                        uint64_t Addr,
77                                        const MCDisassembler *Decoder) {
78   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
79 
80   // Our branches take a simm16, but we need two extra bits to account for the
81   // factor of 4.
82   APInt SignedOffset(18, Imm * 4, true);
83   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
84 
85   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
86     return MCDisassembler::Success;
87   return addOperand(Inst, MCOperand::createImm(Imm));
88 }
89 
90 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
91                                      const MCDisassembler *Decoder) {
92   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93   int64_t Offset;
94   if (DAsm->isVI()) {         // VI supports 20-bit unsigned offsets.
95     Offset = Imm & 0xFFFFF;
96   } else {                    // GFX9+ supports 21-bit signed offsets.
97     Offset = SignExtend64<21>(Imm);
98   }
99   return addOperand(Inst, MCOperand::createImm(Offset));
100 }
101 
102 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
103                                   const MCDisassembler *Decoder) {
104   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
105   return addOperand(Inst, DAsm->decodeBoolReg(Val));
106 }
107 
108 #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
109   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
110                                         uint64_t /*Addr*/,                     \
111                                         const MCDisassembler *Decoder) {       \
112     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
113     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
114   }
115 
116 #define DECODE_OPERAND_REG(RegClass) \
117 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
118 
119 DECODE_OPERAND_REG(VGPR_32)
120 DECODE_OPERAND_REG(VRegOrLds_32)
121 DECODE_OPERAND_REG(VS_32)
122 DECODE_OPERAND_REG(VS_64)
123 DECODE_OPERAND_REG(VS_128)
124 
125 DECODE_OPERAND_REG(VReg_64)
126 DECODE_OPERAND_REG(VReg_96)
127 DECODE_OPERAND_REG(VReg_128)
128 DECODE_OPERAND_REG(VReg_256)
129 DECODE_OPERAND_REG(VReg_512)
130 DECODE_OPERAND_REG(VReg_1024)
131 
132 DECODE_OPERAND_REG(SReg_32)
133 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
134 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
135 DECODE_OPERAND_REG(SRegOrLds_32)
136 DECODE_OPERAND_REG(SReg_64)
137 DECODE_OPERAND_REG(SReg_64_XEXEC)
138 DECODE_OPERAND_REG(SReg_128)
139 DECODE_OPERAND_REG(SReg_256)
140 DECODE_OPERAND_REG(SReg_512)
141 
142 DECODE_OPERAND_REG(AGPR_32)
143 DECODE_OPERAND_REG(AReg_64)
144 DECODE_OPERAND_REG(AReg_128)
145 DECODE_OPERAND_REG(AReg_256)
146 DECODE_OPERAND_REG(AReg_512)
147 DECODE_OPERAND_REG(AReg_1024)
148 DECODE_OPERAND_REG(AV_32)
149 DECODE_OPERAND_REG(AV_64)
150 DECODE_OPERAND_REG(AV_128)
151 DECODE_OPERAND_REG(AV_512)
152 
153 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
154                                          uint64_t Addr,
155                                          const MCDisassembler *Decoder) {
156   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
157   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
158 }
159 
160 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm,
161                                            uint64_t Addr,
162                                            const MCDisassembler *Decoder) {
163   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
164   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
165 }
166 
167 static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm,
168                                            uint64_t Addr,
169                                            const MCDisassembler *Decoder) {
170   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
171   return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
172 }
173 
174 static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm,
175                                         uint64_t Addr,
176                                         const MCDisassembler *Decoder) {
177   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
178   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
179 }
180 
181 static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm,
182                                         uint64_t Addr,
183                                         const MCDisassembler *Decoder) {
184   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
185   return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
186 }
187 
188 static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm,
189                                           uint64_t Addr,
190                                           const MCDisassembler *Decoder) {
191   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
192   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
193 }
194 
195 static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm,
196                                            uint64_t Addr,
197                                            const MCDisassembler *Decoder) {
198   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
199   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
200 }
201 
202 static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm,
203                                            uint64_t Addr,
204                                            const MCDisassembler *Decoder) {
205   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
206   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
207 }
208 
209 static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm,
210                                            uint64_t Addr,
211                                            const MCDisassembler *Decoder) {
212   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
213   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
214 }
215 
216 static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm,
217                                             uint64_t Addr,
218                                             const MCDisassembler *Decoder) {
219   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
220   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
221 }
222 
223 static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
224                                           uint64_t Addr,
225                                           const MCDisassembler *Decoder) {
226   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
227   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
228 }
229 
230 static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
231                                            uint64_t Addr,
232                                            const MCDisassembler *Decoder) {
233   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
234   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
235 }
236 
237 static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
238                                            uint64_t Addr,
239                                            const MCDisassembler *Decoder) {
240   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
241   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
242 }
243 
244 static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
245                                            uint64_t Addr,
246                                            const MCDisassembler *Decoder) {
247   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
248   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
249 }
250 
251 static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
252                                             uint64_t Addr,
253                                             const MCDisassembler *Decoder) {
254   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
255   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
256 }
257 
258 static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
259                                           uint64_t Addr,
260                                           const MCDisassembler *Decoder) {
261   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
262   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
263 }
264 
265 static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
266                                           uint64_t Addr,
267                                           const MCDisassembler *Decoder) {
268   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
269   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
270 }
271 
272 static DecodeStatus
273 decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
274                              const MCDisassembler *Decoder) {
275   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
276   return addOperand(
277       Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
278 }
279 
280 static DecodeStatus
281 decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
282                              const MCDisassembler *Decoder) {
283   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
284   return addOperand(
285       Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
286 }
287 
288 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
289                           const MCRegisterInfo *MRI) {
290   if (OpIdx < 0)
291     return false;
292 
293   const MCOperand &Op = Inst.getOperand(OpIdx);
294   if (!Op.isReg())
295     return false;
296 
297   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
298   auto Reg = Sub ? Sub : Op.getReg();
299   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
300 }
301 
302 static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
303                                              AMDGPUDisassembler::OpWidthTy Opw,
304                                              const MCDisassembler *Decoder) {
305   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
306   if (!DAsm->isGFX90A()) {
307     Imm &= 511;
308   } else {
309     // If atomic has both vdata and vdst their register classes are tied.
310     // The bit is decoded along with the vdst, first operand. We need to
311     // change register class to AGPR if vdst was AGPR.
312     // If a DS instruction has both data0 and data1 their register classes
313     // are also tied.
314     unsigned Opc = Inst.getOpcode();
315     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
316     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
317                                                         : AMDGPU::OpName::vdata;
318     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
319     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
320     if ((int)Inst.getNumOperands() == DataIdx) {
321       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
322       if (IsAGPROperand(Inst, DstIdx, MRI))
323         Imm |= 512;
324     }
325 
326     if (TSFlags & SIInstrFlags::DS) {
327       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
328       if ((int)Inst.getNumOperands() == Data2Idx &&
329           IsAGPROperand(Inst, DataIdx, MRI))
330         Imm |= 512;
331     }
332   }
333   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
334 }
335 
336 static DecodeStatus
337 DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
338                              const MCDisassembler *Decoder) {
339   return decodeOperand_AVLdSt_Any(Inst, Imm,
340                                   AMDGPUDisassembler::OPW32, Decoder);
341 }
342 
343 static DecodeStatus
344 DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
345                              const MCDisassembler *Decoder) {
346   return decodeOperand_AVLdSt_Any(Inst, Imm,
347                                   AMDGPUDisassembler::OPW64, Decoder);
348 }
349 
350 static DecodeStatus
351 DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
352                              const MCDisassembler *Decoder) {
353   return decodeOperand_AVLdSt_Any(Inst, Imm,
354                                   AMDGPUDisassembler::OPW96, Decoder);
355 }
356 
357 static DecodeStatus
358 DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
359                               const MCDisassembler *Decoder) {
360   return decodeOperand_AVLdSt_Any(Inst, Imm,
361                                   AMDGPUDisassembler::OPW128, Decoder);
362 }
363 
364 static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm,
365                                           uint64_t Addr,
366                                           const MCDisassembler *Decoder) {
367   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
368   return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
369 }
370 
371 #define DECODE_SDWA(DecName) \
372 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
373 
374 DECODE_SDWA(Src32)
375 DECODE_SDWA(Src16)
376 DECODE_SDWA(VopcDst)
377 
378 #include "AMDGPUGenDisassemblerTables.inc"
379 
380 //===----------------------------------------------------------------------===//
381 //
382 //===----------------------------------------------------------------------===//
383 
384 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
385   assert(Bytes.size() >= sizeof(T));
386   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
387   Bytes = Bytes.slice(sizeof(T));
388   return Res;
389 }
390 
391 // The disassembler is greedy, so we need to check FI operand value to
392 // not parse a dpp if the correct literal is not set. For dpp16 the
393 // autogenerated decoder checks the dpp literal
394 static bool isValidDPP8(const MCInst &MI) {
395   using namespace llvm::AMDGPU::DPP;
396   int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
397   assert(FiIdx != -1);
398   if ((unsigned)FiIdx >= MI.getNumOperands())
399     return false;
400   unsigned Fi = MI.getOperand(FiIdx).getImm();
401   return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
402 }
403 
404 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
405                                                 ArrayRef<uint8_t> Bytes_,
406                                                 uint64_t Address,
407                                                 raw_ostream &CS) const {
408   CommentStream = &CS;
409   bool IsSDWA = false;
410 
411   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
412   Bytes = Bytes_.slice(0, MaxInstBytesNum);
413 
414   DecodeStatus Res = MCDisassembler::Fail;
415   do {
416     // ToDo: better to switch encoding length using some bit predicate
417     // but it is unknown yet, so try all we can
418 
419     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
420     // encodings
421     if (Bytes.size() >= 8) {
422       const uint64_t QW = eatBytes<uint64_t>(Bytes);
423 
424       if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
425         Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
426         if (Res) {
427           if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
428               == -1)
429             break;
430           if (convertDPP8Inst(MI) == MCDisassembler::Success)
431             break;
432           MI = MCInst(); // clear
433         }
434       }
435 
436       Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
437       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
438         break;
439 
440       MI = MCInst(); // clear
441 
442       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
443       if (Res) break;
444 
445       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
446       if (Res) { IsSDWA = true;  break; }
447 
448       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
449       if (Res) { IsSDWA = true;  break; }
450 
451       Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
452       if (Res) { IsSDWA = true;  break; }
453 
454       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
455         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
456         if (Res)
457           break;
458       }
459 
460       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
461       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
462       // table first so we print the correct name.
463       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
464         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
465         if (Res)
466           break;
467       }
468     }
469 
470     // Reinitialize Bytes as DPP64 could have eaten too much
471     Bytes = Bytes_.slice(0, MaxInstBytesNum);
472 
473     // Try decode 32-bit instruction
474     if (Bytes.size() < 4) break;
475     const uint32_t DW = eatBytes<uint32_t>(Bytes);
476     Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
477     if (Res) break;
478 
479     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
480     if (Res) break;
481 
482     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
483     if (Res) break;
484 
485     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
486       Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
487       if (Res)
488         break;
489     }
490 
491     if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
492       Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
493       if (Res) break;
494     }
495 
496     Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
497     if (Res) break;
498 
499     if (Bytes.size() < 4) break;
500     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
501 
502     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
503       Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
504       if (Res)
505         break;
506     }
507 
508     Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
509     if (Res) break;
510 
511     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
512     if (Res) break;
513 
514     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
515     if (Res) break;
516 
517     Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
518     if (Res) break;
519 
520     Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address);
521   } while (false);
522 
523   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
524               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
525               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
526               MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
527               MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
528               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
529               MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||
530               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
531               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
532               MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
533               MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
534     // Insert dummy unused src2_modifiers.
535     insertNamedMCOperand(MI, MCOperand::createImm(0),
536                          AMDGPU::OpName::src2_modifiers);
537   }
538 
539   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
540           (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
541     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
542                                              AMDGPU::OpName::cpol);
543     if (CPolPos != -1) {
544       unsigned CPol =
545           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
546               AMDGPU::CPol::GLC : 0;
547       if (MI.getNumOperands() <= (unsigned)CPolPos) {
548         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
549                              AMDGPU::OpName::cpol);
550       } else if (CPol) {
551         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
552       }
553     }
554   }
555 
556   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
557               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
558              (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) {
559     // GFX90A lost TFE, its place is occupied by ACC.
560     int TFEOpIdx =
561         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
562     if (TFEOpIdx != -1) {
563       auto TFEIter = MI.begin();
564       std::advance(TFEIter, TFEOpIdx);
565       MI.insert(TFEIter, MCOperand::createImm(0));
566     }
567   }
568 
569   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
570               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
571     int SWZOpIdx =
572         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
573     if (SWZOpIdx != -1) {
574       auto SWZIter = MI.begin();
575       std::advance(SWZIter, SWZOpIdx);
576       MI.insert(SWZIter, MCOperand::createImm(0));
577     }
578   }
579 
580   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
581     int VAddr0Idx =
582         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
583     int RsrcIdx =
584         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
585     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
586     if (VAddr0Idx >= 0 && NSAArgs > 0) {
587       unsigned NSAWords = (NSAArgs + 3) / 4;
588       if (Bytes.size() < 4 * NSAWords) {
589         Res = MCDisassembler::Fail;
590       } else {
591         for (unsigned i = 0; i < NSAArgs; ++i) {
592           MI.insert(MI.begin() + VAddr0Idx + 1 + i,
593                     decodeOperand_VGPR_32(Bytes[i]));
594         }
595         Bytes = Bytes.slice(4 * NSAWords);
596       }
597     }
598 
599     if (Res)
600       Res = convertMIMGInst(MI);
601   }
602 
603   if (Res && IsSDWA)
604     Res = convertSDWAInst(MI);
605 
606   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
607                                               AMDGPU::OpName::vdst_in);
608   if (VDstIn_Idx != -1) {
609     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
610                            MCOI::OperandConstraint::TIED_TO);
611     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
612          !MI.getOperand(VDstIn_Idx).isReg() ||
613          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
614       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
615         MI.erase(&MI.getOperand(VDstIn_Idx));
616       insertNamedMCOperand(MI,
617         MCOperand::createReg(MI.getOperand(Tied).getReg()),
618         AMDGPU::OpName::vdst_in);
619     }
620   }
621 
622   int ImmLitIdx =
623       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
624   if (Res && ImmLitIdx != -1)
625     Res = convertFMAanyK(MI, ImmLitIdx);
626 
627   // if the opcode was not recognized we'll assume a Size of 4 bytes
628   // (unless there are fewer bytes left)
629   Size = Res ? (MaxInstBytesNum - Bytes.size())
630              : std::min((size_t)4, Bytes_.size());
631   return Res;
632 }
633 
634 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
635   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
636       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
637     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
638       // VOPC - insert clamp
639       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
640   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
641     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
642     if (SDst != -1) {
643       // VOPC - insert VCC register as sdst
644       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
645                            AMDGPU::OpName::sdst);
646     } else {
647       // VOP1/2 - insert omod if present in instruction
648       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
649     }
650   }
651   return MCDisassembler::Success;
652 }
653 
654 // We must check FI == literal to reject not genuine dpp8 insts, and we must
655 // first add optional MI operands to check FI
656 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
657   unsigned Opc = MI.getOpcode();
658   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
659 
660   // Insert dummy unused src modifiers.
661   if (MI.getNumOperands() < DescNumOps &&
662       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
663     insertNamedMCOperand(MI, MCOperand::createImm(0),
664                          AMDGPU::OpName::src0_modifiers);
665 
666   if (MI.getNumOperands() < DescNumOps &&
667       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
668     insertNamedMCOperand(MI, MCOperand::createImm(0),
669                          AMDGPU::OpName::src1_modifiers);
670 
671   return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
672 }
673 
674 // Note that before gfx10, the MIMG encoding provided no information about
675 // VADDR size. Consequently, decoded instructions always show address as if it
676 // has 1 dword, which could be not really so.
677 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
678 
679   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
680                                            AMDGPU::OpName::vdst);
681 
682   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
683                                             AMDGPU::OpName::vdata);
684   int VAddr0Idx =
685       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
686   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
687                                             AMDGPU::OpName::dmask);
688 
689   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
690                                             AMDGPU::OpName::tfe);
691   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
692                                             AMDGPU::OpName::d16);
693 
694   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
695   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
696       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
697 
698   assert(VDataIdx != -1);
699   if (BaseOpcode->BVH) {
700     // Add A16 operand for intersect_ray instructions
701     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
702       addOperand(MI, MCOperand::createImm(1));
703     }
704     return MCDisassembler::Success;
705   }
706 
707   bool IsAtomic = (VDstIdx != -1);
708   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
709   bool IsNSA = false;
710   unsigned AddrSize = Info->VAddrDwords;
711 
712   if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
713     unsigned DimIdx =
714         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
715     int A16Idx =
716         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
717     const AMDGPU::MIMGDimInfo *Dim =
718         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
719     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
720 
721     AddrSize =
722         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
723 
724     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
725     if (!IsNSA) {
726       if (AddrSize > 8)
727         AddrSize = 16;
728     } else {
729       if (AddrSize > Info->VAddrDwords) {
730         // The NSA encoding does not contain enough operands for the combination
731         // of base opcode / dimension. Should this be an error?
732         return MCDisassembler::Success;
733       }
734     }
735   }
736 
737   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
738   unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
739 
740   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
741   if (D16 && AMDGPU::hasPackedD16(STI)) {
742     DstSize = (DstSize + 1) / 2;
743   }
744 
745   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
746     DstSize += 1;
747 
748   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
749     return MCDisassembler::Success;
750 
751   int NewOpcode =
752       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
753   if (NewOpcode == -1)
754     return MCDisassembler::Success;
755 
756   // Widen the register to the correct number of enabled channels.
757   unsigned NewVdata = AMDGPU::NoRegister;
758   if (DstSize != Info->VDataDwords) {
759     auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
760 
761     // Get first subregister of VData
762     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
763     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
764     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
765 
766     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
767                                        &MRI.getRegClass(DataRCID));
768     if (NewVdata == AMDGPU::NoRegister) {
769       // It's possible to encode this such that the low register + enabled
770       // components exceeds the register count.
771       return MCDisassembler::Success;
772     }
773   }
774 
775   unsigned NewVAddr0 = AMDGPU::NoRegister;
776   if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
777       AddrSize != Info->VAddrDwords) {
778     unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
779     unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
780     VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
781 
782     auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
783     NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
784                                         &MRI.getRegClass(AddrRCID));
785     if (NewVAddr0 == AMDGPU::NoRegister)
786       return MCDisassembler::Success;
787   }
788 
789   MI.setOpcode(NewOpcode);
790 
791   if (NewVdata != AMDGPU::NoRegister) {
792     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
793 
794     if (IsAtomic) {
795       // Atomic operations have an additional operand (a copy of data)
796       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
797     }
798   }
799 
800   if (NewVAddr0 != AMDGPU::NoRegister) {
801     MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
802   } else if (IsNSA) {
803     assert(AddrSize <= Info->VAddrDwords);
804     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
805              MI.begin() + VAddr0Idx + Info->VAddrDwords);
806   }
807 
808   return MCDisassembler::Success;
809 }
810 
811 DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
812                                                 int ImmLitIdx) const {
813   assert(HasLiteral && "Should have decoded a literal");
814   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
815   unsigned DescNumOps = Desc.getNumOperands();
816   assert(DescNumOps == MI.getNumOperands());
817   for (unsigned I = 0; I < DescNumOps; ++I) {
818     auto &Op = MI.getOperand(I);
819     auto OpType = Desc.OpInfo[I].OperandType;
820     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
821                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
822     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
823         IsDeferredOp)
824       Op.setImm(Literal);
825   }
826   return MCDisassembler::Success;
827 }
828 
829 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
830   return getContext().getRegisterInfo()->
831     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
832 }
833 
834 inline
835 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
836                                          const Twine& ErrMsg) const {
837   *CommentStream << "Error: " + ErrMsg;
838 
839   // ToDo: add support for error operands to MCInst.h
840   // return MCOperand::createError(V);
841   return MCOperand();
842 }
843 
844 inline
845 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
846   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
847 }
848 
849 inline
850 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
851                                                unsigned Val) const {
852   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
853   if (Val >= RegCl.getNumRegs())
854     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
855                            ": unknown register " + Twine(Val));
856   return createRegOperand(RegCl.getRegister(Val));
857 }
858 
859 inline
860 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
861                                                 unsigned Val) const {
862   // ToDo: SI/CI have 104 SGPRs, VI - 102
863   // Valery: here we accepting as much as we can, let assembler sort it out
864   int shift = 0;
865   switch (SRegClassID) {
866   case AMDGPU::SGPR_32RegClassID:
867   case AMDGPU::TTMP_32RegClassID:
868     break;
869   case AMDGPU::SGPR_64RegClassID:
870   case AMDGPU::TTMP_64RegClassID:
871     shift = 1;
872     break;
873   case AMDGPU::SGPR_128RegClassID:
874   case AMDGPU::TTMP_128RegClassID:
875   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
876   // this bundle?
877   case AMDGPU::SGPR_256RegClassID:
878   case AMDGPU::TTMP_256RegClassID:
879     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
880   // this bundle?
881   case AMDGPU::SGPR_512RegClassID:
882   case AMDGPU::TTMP_512RegClassID:
883     shift = 2;
884     break;
885   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
886   // this bundle?
887   default:
888     llvm_unreachable("unhandled register class");
889   }
890 
891   if (Val % (1 << shift)) {
892     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
893                    << ": scalar reg isn't aligned " << Val;
894   }
895 
896   return createRegOperand(SRegClassID, Val >> shift);
897 }
898 
899 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
900   return decodeSrcOp(OPW32, Val);
901 }
902 
903 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
904   return decodeSrcOp(OPW64, Val);
905 }
906 
907 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
908   return decodeSrcOp(OPW128, Val);
909 }
910 
911 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
912   return decodeSrcOp(OPW16, Val);
913 }
914 
915 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
916   return decodeSrcOp(OPWV216, Val);
917 }
918 
919 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const {
920   return decodeSrcOp(OPWV232, Val);
921 }
922 
923 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
924   // Some instructions have operand restrictions beyond what the encoding
925   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
926   // high bit.
927   Val &= 255;
928 
929   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
930 }
931 
932 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
933   return decodeSrcOp(OPW32, Val);
934 }
935 
936 MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
937   return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
938 }
939 
940 MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const {
941   return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255);
942 }
943 
944 MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
945   return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
946 }
947 
948 MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
949   return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
950 }
951 
952 MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
953   return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
954 }
955 
956 MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
957   return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
958 }
959 
960 MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
961   return decodeSrcOp(OPW32, Val);
962 }
963 
964 MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
965   return decodeSrcOp(OPW64, Val);
966 }
967 
968 MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
969   return decodeSrcOp(OPW128, Val);
970 }
971 
972 MCOperand AMDGPUDisassembler::decodeOperand_AV_512(unsigned Val) const {
973   return decodeSrcOp(OPW512, Val);
974 }
975 
976 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
977   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
978 }
979 
980 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
981   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
982 }
983 
984 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
985   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
986 }
987 
988 MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
989   return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
990 }
991 
992 MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
993   return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
994 }
995 
996 MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
997   return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
998 }
999 
1000 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
1001   // table-gen generated disassembler doesn't care about operand types
1002   // leaving only registry class so SSrc_32 operand turns into SReg_32
1003   // and therefore we accept immediates and literals here as well
1004   return decodeSrcOp(OPW32, Val);
1005 }
1006 
1007 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
1008   unsigned Val) const {
1009   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
1010   return decodeOperand_SReg_32(Val);
1011 }
1012 
1013 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
1014   unsigned Val) const {
1015   // SReg_32_XM0 is SReg_32 without EXEC_HI
1016   return decodeOperand_SReg_32(Val);
1017 }
1018 
1019 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
1020   // table-gen generated disassembler doesn't care about operand types
1021   // leaving only registry class so SSrc_32 operand turns into SReg_32
1022   // and therefore we accept immediates and literals here as well
1023   return decodeSrcOp(OPW32, Val);
1024 }
1025 
1026 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
1027   return decodeSrcOp(OPW64, Val);
1028 }
1029 
1030 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
1031   return decodeSrcOp(OPW64, Val);
1032 }
1033 
1034 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
1035   return decodeSrcOp(OPW128, Val);
1036 }
1037 
1038 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
1039   return decodeDstOp(OPW256, Val);
1040 }
1041 
1042 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
1043   return decodeDstOp(OPW512, Val);
1044 }
1045 
1046 // Decode Literals for insts which always have a literal in the encoding
1047 MCOperand
1048 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1049   if (HasLiteral) {
1050     if (Literal != Val)
1051       return errOperand(Val, "More than one unique literal is illegal");
1052   }
1053   HasLiteral = true;
1054   Literal = Val;
1055   return MCOperand::createImm(Literal);
1056 }
1057 
1058 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
1059   // For now all literal constants are supposed to be unsigned integer
1060   // ToDo: deal with signed/unsigned 64-bit integer constants
1061   // ToDo: deal with float/double constants
1062   if (!HasLiteral) {
1063     if (Bytes.size() < 4) {
1064       return errOperand(0, "cannot read literal, inst bytes left " +
1065                         Twine(Bytes.size()));
1066     }
1067     HasLiteral = true;
1068     Literal = eatBytes<uint32_t>(Bytes);
1069   }
1070   return MCOperand::createImm(Literal);
1071 }
1072 
1073 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1074   using namespace AMDGPU::EncValues;
1075 
1076   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1077   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1078     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1079     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1080       // Cast prevents negative overflow.
1081 }
1082 
1083 static int64_t getInlineImmVal32(unsigned Imm) {
1084   switch (Imm) {
1085   case 240:
1086     return FloatToBits(0.5f);
1087   case 241:
1088     return FloatToBits(-0.5f);
1089   case 242:
1090     return FloatToBits(1.0f);
1091   case 243:
1092     return FloatToBits(-1.0f);
1093   case 244:
1094     return FloatToBits(2.0f);
1095   case 245:
1096     return FloatToBits(-2.0f);
1097   case 246:
1098     return FloatToBits(4.0f);
1099   case 247:
1100     return FloatToBits(-4.0f);
1101   case 248: // 1 / (2 * PI)
1102     return 0x3e22f983;
1103   default:
1104     llvm_unreachable("invalid fp inline imm");
1105   }
1106 }
1107 
1108 static int64_t getInlineImmVal64(unsigned Imm) {
1109   switch (Imm) {
1110   case 240:
1111     return DoubleToBits(0.5);
1112   case 241:
1113     return DoubleToBits(-0.5);
1114   case 242:
1115     return DoubleToBits(1.0);
1116   case 243:
1117     return DoubleToBits(-1.0);
1118   case 244:
1119     return DoubleToBits(2.0);
1120   case 245:
1121     return DoubleToBits(-2.0);
1122   case 246:
1123     return DoubleToBits(4.0);
1124   case 247:
1125     return DoubleToBits(-4.0);
1126   case 248: // 1 / (2 * PI)
1127     return 0x3fc45f306dc9c882;
1128   default:
1129     llvm_unreachable("invalid fp inline imm");
1130   }
1131 }
1132 
1133 static int64_t getInlineImmVal16(unsigned Imm) {
1134   switch (Imm) {
1135   case 240:
1136     return 0x3800;
1137   case 241:
1138     return 0xB800;
1139   case 242:
1140     return 0x3C00;
1141   case 243:
1142     return 0xBC00;
1143   case 244:
1144     return 0x4000;
1145   case 245:
1146     return 0xC000;
1147   case 246:
1148     return 0x4400;
1149   case 247:
1150     return 0xC400;
1151   case 248: // 1 / (2 * PI)
1152     return 0x3118;
1153   default:
1154     llvm_unreachable("invalid fp inline imm");
1155   }
1156 }
1157 
1158 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
1159   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1160       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1161 
1162   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1163   switch (Width) {
1164   case OPW32:
1165   case OPW128: // splat constants
1166   case OPW512:
1167   case OPW1024:
1168   case OPWV232:
1169     return MCOperand::createImm(getInlineImmVal32(Imm));
1170   case OPW64:
1171   case OPW256:
1172     return MCOperand::createImm(getInlineImmVal64(Imm));
1173   case OPW16:
1174   case OPWV216:
1175     return MCOperand::createImm(getInlineImmVal16(Imm));
1176   default:
1177     llvm_unreachable("implement me");
1178   }
1179 }
1180 
1181 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1182   using namespace AMDGPU;
1183 
1184   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1185   switch (Width) {
1186   default: // fall
1187   case OPW32:
1188   case OPW16:
1189   case OPWV216:
1190     return VGPR_32RegClassID;
1191   case OPW64:
1192   case OPWV232: return VReg_64RegClassID;
1193   case OPW96: return VReg_96RegClassID;
1194   case OPW128: return VReg_128RegClassID;
1195   case OPW160: return VReg_160RegClassID;
1196   case OPW256: return VReg_256RegClassID;
1197   case OPW512: return VReg_512RegClassID;
1198   case OPW1024: return VReg_1024RegClassID;
1199   }
1200 }
1201 
1202 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1203   using namespace AMDGPU;
1204 
1205   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1206   switch (Width) {
1207   default: // fall
1208   case OPW32:
1209   case OPW16:
1210   case OPWV216:
1211     return AGPR_32RegClassID;
1212   case OPW64:
1213   case OPWV232: return AReg_64RegClassID;
1214   case OPW96: return AReg_96RegClassID;
1215   case OPW128: return AReg_128RegClassID;
1216   case OPW160: return AReg_160RegClassID;
1217   case OPW256: return AReg_256RegClassID;
1218   case OPW512: return AReg_512RegClassID;
1219   case OPW1024: return AReg_1024RegClassID;
1220   }
1221 }
1222 
1223 
1224 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1225   using namespace AMDGPU;
1226 
1227   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1228   switch (Width) {
1229   default: // fall
1230   case OPW32:
1231   case OPW16:
1232   case OPWV216:
1233     return SGPR_32RegClassID;
1234   case OPW64:
1235   case OPWV232: return SGPR_64RegClassID;
1236   case OPW96: return SGPR_96RegClassID;
1237   case OPW128: return SGPR_128RegClassID;
1238   case OPW160: return SGPR_160RegClassID;
1239   case OPW256: return SGPR_256RegClassID;
1240   case OPW512: return SGPR_512RegClassID;
1241   }
1242 }
1243 
1244 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1245   using namespace AMDGPU;
1246 
1247   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1248   switch (Width) {
1249   default: // fall
1250   case OPW32:
1251   case OPW16:
1252   case OPWV216:
1253     return TTMP_32RegClassID;
1254   case OPW64:
1255   case OPWV232: return TTMP_64RegClassID;
1256   case OPW128: return TTMP_128RegClassID;
1257   case OPW256: return TTMP_256RegClassID;
1258   case OPW512: return TTMP_512RegClassID;
1259   }
1260 }
1261 
1262 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1263   using namespace AMDGPU::EncValues;
1264 
1265   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1266   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1267 
1268   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1269 }
1270 
1271 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1272                                           bool MandatoryLiteral) const {
1273   using namespace AMDGPU::EncValues;
1274 
1275   assert(Val < 1024); // enum10
1276 
1277   bool IsAGPR = Val & 512;
1278   Val &= 511;
1279 
1280   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1281     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1282                                    : getVgprClassId(Width), Val - VGPR_MIN);
1283   }
1284   if (Val <= SGPR_MAX) {
1285     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1286     static_assert(SGPR_MIN == 0, "");
1287     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1288   }
1289 
1290   int TTmpIdx = getTTmpIdx(Val);
1291   if (TTmpIdx >= 0) {
1292     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1293   }
1294 
1295   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1296     return decodeIntImmed(Val);
1297 
1298   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1299     return decodeFPImmed(Width, Val);
1300 
1301   if (Val == LITERAL_CONST) {
1302     if (MandatoryLiteral)
1303       // Keep a sentinel value for deferred setting
1304       return MCOperand::createImm(LITERAL_CONST);
1305     else
1306       return decodeLiteralConstant();
1307   }
1308 
1309   switch (Width) {
1310   case OPW32:
1311   case OPW16:
1312   case OPWV216:
1313     return decodeSpecialReg32(Val);
1314   case OPW64:
1315   case OPWV232:
1316     return decodeSpecialReg64(Val);
1317   default:
1318     llvm_unreachable("unexpected immediate type");
1319   }
1320 }
1321 
1322 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
1323   using namespace AMDGPU::EncValues;
1324 
1325   assert(Val < 128);
1326   assert(Width == OPW256 || Width == OPW512);
1327 
1328   if (Val <= SGPR_MAX) {
1329     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1330     static_assert(SGPR_MIN == 0, "");
1331     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1332   }
1333 
1334   int TTmpIdx = getTTmpIdx(Val);
1335   if (TTmpIdx >= 0) {
1336     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1337   }
1338 
1339   llvm_unreachable("unknown dst register");
1340 }
1341 
1342 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1343   using namespace AMDGPU;
1344 
1345   switch (Val) {
1346   case 102: return createRegOperand(FLAT_SCR_LO);
1347   case 103: return createRegOperand(FLAT_SCR_HI);
1348   case 104: return createRegOperand(XNACK_MASK_LO);
1349   case 105: return createRegOperand(XNACK_MASK_HI);
1350   case 106: return createRegOperand(VCC_LO);
1351   case 107: return createRegOperand(VCC_HI);
1352   case 108: return createRegOperand(TBA_LO);
1353   case 109: return createRegOperand(TBA_HI);
1354   case 110: return createRegOperand(TMA_LO);
1355   case 111: return createRegOperand(TMA_HI);
1356   case 124:
1357     return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1358   case 125:
1359     return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1360   case 126: return createRegOperand(EXEC_LO);
1361   case 127: return createRegOperand(EXEC_HI);
1362   case 235: return createRegOperand(SRC_SHARED_BASE);
1363   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1364   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1365   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1366   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1367   case 251: return createRegOperand(SRC_VCCZ);
1368   case 252: return createRegOperand(SRC_EXECZ);
1369   case 253: return createRegOperand(SRC_SCC);
1370   case 254: return createRegOperand(LDS_DIRECT);
1371   default: break;
1372   }
1373   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1374 }
1375 
1376 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1377   using namespace AMDGPU;
1378 
1379   switch (Val) {
1380   case 102: return createRegOperand(FLAT_SCR);
1381   case 104: return createRegOperand(XNACK_MASK);
1382   case 106: return createRegOperand(VCC);
1383   case 108: return createRegOperand(TBA);
1384   case 110: return createRegOperand(TMA);
1385   case 124:
1386     if (isGFX11Plus())
1387       return createRegOperand(SGPR_NULL);
1388     break;
1389   case 125:
1390     if (!isGFX11Plus())
1391       return createRegOperand(SGPR_NULL);
1392     break;
1393   case 126: return createRegOperand(EXEC);
1394   case 235: return createRegOperand(SRC_SHARED_BASE);
1395   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1396   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1397   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1398   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1399   case 251: return createRegOperand(SRC_VCCZ);
1400   case 252: return createRegOperand(SRC_EXECZ);
1401   case 253: return createRegOperand(SRC_SCC);
1402   default: break;
1403   }
1404   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1405 }
1406 
1407 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1408                                             const unsigned Val) const {
1409   using namespace AMDGPU::SDWA;
1410   using namespace AMDGPU::EncValues;
1411 
1412   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1413       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
1414     // XXX: cast to int is needed to avoid stupid warning:
1415     // compare with unsigned is always true
1416     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1417         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1418       return createRegOperand(getVgprClassId(Width),
1419                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1420     }
1421     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1422         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1423                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1424       return createSRegOperand(getSgprClassId(Width),
1425                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1426     }
1427     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1428         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1429       return createSRegOperand(getTtmpClassId(Width),
1430                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1431     }
1432 
1433     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1434 
1435     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1436       return decodeIntImmed(SVal);
1437 
1438     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1439       return decodeFPImmed(Width, SVal);
1440 
1441     return decodeSpecialReg32(SVal);
1442   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
1443     return createRegOperand(getVgprClassId(Width), Val);
1444   }
1445   llvm_unreachable("unsupported target");
1446 }
1447 
1448 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1449   return decodeSDWASrc(OPW16, Val);
1450 }
1451 
1452 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1453   return decodeSDWASrc(OPW32, Val);
1454 }
1455 
1456 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1457   using namespace AMDGPU::SDWA;
1458 
1459   assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1460           STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
1461          "SDWAVopcDst should be present only on GFX9+");
1462 
1463   bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
1464 
1465   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1466     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1467 
1468     int TTmpIdx = getTTmpIdx(Val);
1469     if (TTmpIdx >= 0) {
1470       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1471       return createSRegOperand(TTmpClsId, TTmpIdx);
1472     } else if (Val > SGPR_MAX) {
1473       return IsWave64 ? decodeSpecialReg64(Val)
1474                       : decodeSpecialReg32(Val);
1475     } else {
1476       return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1477     }
1478   } else {
1479     return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1480   }
1481 }
1482 
1483 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1484   return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1485     decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
1486 }
1487 
1488 bool AMDGPUDisassembler::isVI() const {
1489   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1490 }
1491 
1492 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1493 
1494 bool AMDGPUDisassembler::isGFX90A() const {
1495   return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
1496 }
1497 
1498 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1499 
1500 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1501 
1502 bool AMDGPUDisassembler::isGFX10Plus() const {
1503   return AMDGPU::isGFX10Plus(STI);
1504 }
1505 
1506 bool AMDGPUDisassembler::isGFX11() const {
1507   return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
1508 }
1509 
1510 bool AMDGPUDisassembler::isGFX11Plus() const {
1511   return AMDGPU::isGFX11Plus(STI);
1512 }
1513 
1514 
1515 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1516   return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1517 }
1518 
1519 //===----------------------------------------------------------------------===//
1520 // AMDGPU specific symbol handling
1521 //===----------------------------------------------------------------------===//
1522 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1523   do {                                                                         \
1524     KdStream << Indent << DIRECTIVE " "                                        \
1525              << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';           \
1526   } while (0)
1527 
1528 // NOLINTNEXTLINE(readability-identifier-naming)
1529 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1530     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1531   using namespace amdhsa;
1532   StringRef Indent = "\t";
1533 
1534   // We cannot accurately backward compute #VGPRs used from
1535   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1536   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1537   // simply calculate the inverse of what the assembler does.
1538 
1539   uint32_t GranulatedWorkitemVGPRCount =
1540       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
1541       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
1542 
1543   uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
1544                           AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
1545 
1546   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1547 
1548   // We cannot backward compute values used to calculate
1549   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1550   // directives can't be computed:
1551   // .amdhsa_reserve_vcc
1552   // .amdhsa_reserve_flat_scratch
1553   // .amdhsa_reserve_xnack_mask
1554   // They take their respective default values if not specified in the assembly.
1555   //
1556   // GRANULATED_WAVEFRONT_SGPR_COUNT
1557   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1558   //
1559   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1560   // are set to 0. So while disassembling we consider that:
1561   //
1562   // GRANULATED_WAVEFRONT_SGPR_COUNT
1563   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1564   //
1565   // The disassembler cannot recover the original values of those 3 directives.
1566 
1567   uint32_t GranulatedWavefrontSGPRCount =
1568       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
1569       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
1570 
1571   if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1572     return MCDisassembler::Fail;
1573 
1574   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1575                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1576 
1577   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1578   if (!hasArchitectedFlatScratch())
1579     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1580   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1581   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1582 
1583   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1584     return MCDisassembler::Fail;
1585 
1586   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1587                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1588   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1589                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1590   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1591                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1592   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1593                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1594 
1595   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1596     return MCDisassembler::Fail;
1597 
1598   PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1599 
1600   if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1601     return MCDisassembler::Fail;
1602 
1603   PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1604 
1605   if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1606     return MCDisassembler::Fail;
1607 
1608   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1609     return MCDisassembler::Fail;
1610 
1611   PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1612 
1613   if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1614     return MCDisassembler::Fail;
1615 
1616   if (isGFX10Plus()) {
1617     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1618                     COMPUTE_PGM_RSRC1_WGP_MODE);
1619     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1620     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1621   }
1622   return MCDisassembler::Success;
1623 }
1624 
1625 // NOLINTNEXTLINE(readability-identifier-naming)
1626 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1627     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1628   using namespace amdhsa;
1629   StringRef Indent = "\t";
1630   if (hasArchitectedFlatScratch())
1631     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1632                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1633   else
1634     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1635                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1636   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1637                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1638   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1639                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1640   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1641                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1642   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1643                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1644   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1645                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1646 
1647   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1648     return MCDisassembler::Fail;
1649 
1650   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1651     return MCDisassembler::Fail;
1652 
1653   if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1654     return MCDisassembler::Fail;
1655 
1656   PRINT_DIRECTIVE(
1657       ".amdhsa_exception_fp_ieee_invalid_op",
1658       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1659   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1660                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1661   PRINT_DIRECTIVE(
1662       ".amdhsa_exception_fp_ieee_div_zero",
1663       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1664   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1665                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1666   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1667                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1668   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1669                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1670   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1671                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1672 
1673   if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1674     return MCDisassembler::Fail;
1675 
1676   return MCDisassembler::Success;
1677 }
1678 
1679 #undef PRINT_DIRECTIVE
1680 
1681 MCDisassembler::DecodeStatus
1682 AMDGPUDisassembler::decodeKernelDescriptorDirective(
1683     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
1684     raw_string_ostream &KdStream) const {
1685 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1686   do {                                                                         \
1687     KdStream << Indent << DIRECTIVE " "                                        \
1688              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
1689   } while (0)
1690 
1691   uint16_t TwoByteBuffer = 0;
1692   uint32_t FourByteBuffer = 0;
1693 
1694   StringRef ReservedBytes;
1695   StringRef Indent = "\t";
1696 
1697   assert(Bytes.size() == 64);
1698   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
1699 
1700   switch (Cursor.tell()) {
1701   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
1702     FourByteBuffer = DE.getU32(Cursor);
1703     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
1704              << '\n';
1705     return MCDisassembler::Success;
1706 
1707   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
1708     FourByteBuffer = DE.getU32(Cursor);
1709     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
1710              << FourByteBuffer << '\n';
1711     return MCDisassembler::Success;
1712 
1713   case amdhsa::KERNARG_SIZE_OFFSET:
1714     FourByteBuffer = DE.getU32(Cursor);
1715     KdStream << Indent << ".amdhsa_kernarg_size "
1716              << FourByteBuffer << '\n';
1717     return MCDisassembler::Success;
1718 
1719   case amdhsa::RESERVED0_OFFSET:
1720     // 4 reserved bytes, must be 0.
1721     ReservedBytes = DE.getBytes(Cursor, 4);
1722     for (int I = 0; I < 4; ++I) {
1723       if (ReservedBytes[I] != 0) {
1724         return MCDisassembler::Fail;
1725       }
1726     }
1727     return MCDisassembler::Success;
1728 
1729   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
1730     // KERNEL_CODE_ENTRY_BYTE_OFFSET
1731     // So far no directive controls this for Code Object V3, so simply skip for
1732     // disassembly.
1733     DE.skip(Cursor, 8);
1734     return MCDisassembler::Success;
1735 
1736   case amdhsa::RESERVED1_OFFSET:
1737     // 20 reserved bytes, must be 0.
1738     ReservedBytes = DE.getBytes(Cursor, 20);
1739     for (int I = 0; I < 20; ++I) {
1740       if (ReservedBytes[I] != 0) {
1741         return MCDisassembler::Fail;
1742       }
1743     }
1744     return MCDisassembler::Success;
1745 
1746   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
1747     // COMPUTE_PGM_RSRC3
1748     //  - Only set for GFX10, GFX6-9 have this to be 0.
1749     //  - Currently no directives directly control this.
1750     FourByteBuffer = DE.getU32(Cursor);
1751     if (!isGFX10Plus() && FourByteBuffer) {
1752       return MCDisassembler::Fail;
1753     }
1754     return MCDisassembler::Success;
1755 
1756   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
1757     FourByteBuffer = DE.getU32(Cursor);
1758     if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
1759         MCDisassembler::Fail) {
1760       return MCDisassembler::Fail;
1761     }
1762     return MCDisassembler::Success;
1763 
1764   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
1765     FourByteBuffer = DE.getU32(Cursor);
1766     if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
1767         MCDisassembler::Fail) {
1768       return MCDisassembler::Fail;
1769     }
1770     return MCDisassembler::Success;
1771 
1772   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
1773     using namespace amdhsa;
1774     TwoByteBuffer = DE.getU16(Cursor);
1775 
1776     if (!hasArchitectedFlatScratch())
1777       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
1778                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
1779     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
1780                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
1781     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
1782                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
1783     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
1784                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
1785     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
1786                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
1787     if (!hasArchitectedFlatScratch())
1788       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
1789                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
1790     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
1791                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
1792 
1793     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
1794       return MCDisassembler::Fail;
1795 
1796     // Reserved for GFX9
1797     if (isGFX9() &&
1798         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
1799       return MCDisassembler::Fail;
1800     } else if (isGFX10Plus()) {
1801       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
1802                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
1803     }
1804 
1805     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
1806       return MCDisassembler::Fail;
1807 
1808     return MCDisassembler::Success;
1809 
1810   case amdhsa::RESERVED2_OFFSET:
1811     // 6 bytes from here are reserved, must be 0.
1812     ReservedBytes = DE.getBytes(Cursor, 6);
1813     for (int I = 0; I < 6; ++I) {
1814       if (ReservedBytes[I] != 0)
1815         return MCDisassembler::Fail;
1816     }
1817     return MCDisassembler::Success;
1818 
1819   default:
1820     llvm_unreachable("Unhandled index. Case statements cover everything.");
1821     return MCDisassembler::Fail;
1822   }
1823 #undef PRINT_DIRECTIVE
1824 }
1825 
1826 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
1827     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
1828   // CP microcode requires the kernel descriptor to be 64 aligned.
1829   if (Bytes.size() != 64 || KdAddress % 64 != 0)
1830     return MCDisassembler::Fail;
1831 
1832   std::string Kd;
1833   raw_string_ostream KdStream(Kd);
1834   KdStream << ".amdhsa_kernel " << KdName << '\n';
1835 
1836   DataExtractor::Cursor C(0);
1837   while (C && C.tell() < Bytes.size()) {
1838     MCDisassembler::DecodeStatus Status =
1839         decodeKernelDescriptorDirective(C, Bytes, KdStream);
1840 
1841     cantFail(C.takeError());
1842 
1843     if (Status == MCDisassembler::Fail)
1844       return MCDisassembler::Fail;
1845   }
1846   KdStream << ".end_amdhsa_kernel\n";
1847   outs() << KdStream.str();
1848   return MCDisassembler::Success;
1849 }
1850 
1851 Optional<MCDisassembler::DecodeStatus>
1852 AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
1853                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
1854                                   raw_ostream &CStream) const {
1855   // Right now only kernel descriptor needs to be handled.
1856   // We ignore all other symbols for target specific handling.
1857   // TODO:
1858   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
1859   // Object V2 and V3 when symbols are marked protected.
1860 
1861   // amd_kernel_code_t for Code Object V2.
1862   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
1863     Size = 256;
1864     return MCDisassembler::Fail;
1865   }
1866 
1867   // Code Object V3 kernel descriptors.
1868   StringRef Name = Symbol.Name;
1869   if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
1870     Size = 64; // Size = 64 regardless of success or failure.
1871     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
1872   }
1873   return None;
1874 }
1875 
1876 //===----------------------------------------------------------------------===//
1877 // AMDGPUSymbolizer
1878 //===----------------------------------------------------------------------===//
1879 
1880 // Try to find symbol name for specified label
1881 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
1882                                 raw_ostream &/*cStream*/, int64_t Value,
1883                                 uint64_t /*Address*/, bool IsBranch,
1884                                 uint64_t /*Offset*/, uint64_t /*InstSize*/) {
1885 
1886   if (!IsBranch) {
1887     return false;
1888   }
1889 
1890   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
1891   if (!Symbols)
1892     return false;
1893 
1894   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
1895     return Val.Addr == static_cast<uint64_t>(Value) &&
1896            Val.Type == ELF::STT_NOTYPE;
1897   });
1898   if (Result != Symbols->end()) {
1899     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
1900     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
1901     Inst.addOperand(MCOperand::createExpr(Add));
1902     return true;
1903   }
1904   // Add to list of referenced addresses, so caller can synthesize a label.
1905   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
1906   return false;
1907 }
1908 
1909 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
1910                                                        int64_t Value,
1911                                                        uint64_t Address) {
1912   llvm_unreachable("unimplemented");
1913 }
1914 
1915 //===----------------------------------------------------------------------===//
1916 // Initialization
1917 //===----------------------------------------------------------------------===//
1918 
1919 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
1920                               LLVMOpInfoCallback /*GetOpInfo*/,
1921                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
1922                               void *DisInfo,
1923                               MCContext *Ctx,
1924                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
1925   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
1926 }
1927 
1928 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
1929                                                 const MCSubtargetInfo &STI,
1930                                                 MCContext &Ctx) {
1931   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
1932 }
1933 
1934 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
1935   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
1936                                          createAMDGPUDisassembler);
1937   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
1938                                        createAMDGPUSymbolizer);
1939 }
1940