xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision 4f87d30a06dd08cec45cb595e9dbed6345c9a7c5)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "AMDGPU.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "SIDefines.h"
23 #include "TargetInfo/AMDGPUTargetInfo.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm-c/Disassembler.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/Twine.h"
29 #include "llvm/BinaryFormat/ELF.h"
30 #include "llvm/MC/MCAsmInfo.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
33 #include "llvm/MC/MCExpr.h"
34 #include "llvm/MC/MCFixedLenDisassembler.h"
35 #include "llvm/MC/MCInst.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Endian.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include "llvm/Support/MathExtras.h"
41 #include "llvm/Support/TargetRegistry.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <algorithm>
44 #include <cassert>
45 #include <cstddef>
46 #include <cstdint>
47 #include <iterator>
48 #include <tuple>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "amdgpu-disassembler"
54 
55 #define SGPR_MAX                                                               \
56   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
57                  : AMDGPU::EncValues::SGPR_MAX_SI)
58 
59 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
60 
61 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
62                                        MCContext &Ctx,
63                                        MCInstrInfo const *MCII) :
64   MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
65   TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
66 
67   // ToDo: AMDGPUDisassembler supports only VI ISA.
68   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
69     report_fatal_error("Disassembly not yet supported for subtarget");
70 }
71 
72 inline static MCDisassembler::DecodeStatus
73 addOperand(MCInst &Inst, const MCOperand& Opnd) {
74   Inst.addOperand(Opnd);
75   return Opnd.isValid() ?
76     MCDisassembler::Success :
77     MCDisassembler::Fail;
78 }
79 
80 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
81                                 uint16_t NameIdx) {
82   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
83   if (OpIdx != -1) {
84     auto I = MI.begin();
85     std::advance(I, OpIdx);
86     MI.insert(I, Op);
87   }
88   return OpIdx;
89 }
90 
91 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
92                                        uint64_t Addr, const void *Decoder) {
93   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
94 
95   // Our branches take a simm16, but we need two extra bits to account for the
96   // factor of 4.
97   APInt SignedOffset(18, Imm * 4, true);
98   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
99 
100   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
101     return MCDisassembler::Success;
102   return addOperand(Inst, MCOperand::createImm(Imm));
103 }
104 
105 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm,
106                                      uint64_t Addr, const void *Decoder) {
107   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
108   int64_t Offset;
109   if (DAsm->isVI()) {         // VI supports 20-bit unsigned offsets.
110     Offset = Imm & 0xFFFFF;
111   } else {                    // GFX9+ supports 21-bit signed offsets.
112     Offset = SignExtend64<21>(Imm);
113   }
114   return addOperand(Inst, MCOperand::createImm(Offset));
115 }
116 
117 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
118                                   uint64_t Addr, const void *Decoder) {
119   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
120   return addOperand(Inst, DAsm->decodeBoolReg(Val));
121 }
122 
123 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
124 static DecodeStatus StaticDecoderName(MCInst &Inst, \
125                                        unsigned Imm, \
126                                        uint64_t /*Addr*/, \
127                                        const void *Decoder) { \
128   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
129   return addOperand(Inst, DAsm->DecoderName(Imm)); \
130 }
131 
132 #define DECODE_OPERAND_REG(RegClass) \
133 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
134 
135 DECODE_OPERAND_REG(VGPR_32)
136 DECODE_OPERAND_REG(VRegOrLds_32)
137 DECODE_OPERAND_REG(VS_32)
138 DECODE_OPERAND_REG(VS_64)
139 DECODE_OPERAND_REG(VS_128)
140 
141 DECODE_OPERAND_REG(VReg_64)
142 DECODE_OPERAND_REG(VReg_96)
143 DECODE_OPERAND_REG(VReg_128)
144 DECODE_OPERAND_REG(VReg_256)
145 DECODE_OPERAND_REG(VReg_512)
146 
147 DECODE_OPERAND_REG(SReg_32)
148 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
149 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
150 DECODE_OPERAND_REG(SRegOrLds_32)
151 DECODE_OPERAND_REG(SReg_64)
152 DECODE_OPERAND_REG(SReg_64_XEXEC)
153 DECODE_OPERAND_REG(SReg_128)
154 DECODE_OPERAND_REG(SReg_256)
155 DECODE_OPERAND_REG(SReg_512)
156 
157 DECODE_OPERAND_REG(AGPR_32)
158 DECODE_OPERAND_REG(AReg_128)
159 DECODE_OPERAND_REG(AReg_512)
160 DECODE_OPERAND_REG(AReg_1024)
161 DECODE_OPERAND_REG(AV_32)
162 DECODE_OPERAND_REG(AV_64)
163 
164 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
165                                          unsigned Imm,
166                                          uint64_t Addr,
167                                          const void *Decoder) {
168   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
169   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
170 }
171 
172 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
173                                          unsigned Imm,
174                                          uint64_t Addr,
175                                          const void *Decoder) {
176   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
177   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
178 }
179 
180 static DecodeStatus decodeOperand_VS_16(MCInst &Inst,
181                                         unsigned Imm,
182                                         uint64_t Addr,
183                                         const void *Decoder) {
184   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
185   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
186 }
187 
188 static DecodeStatus decodeOperand_VS_32(MCInst &Inst,
189                                         unsigned Imm,
190                                         uint64_t Addr,
191                                         const void *Decoder) {
192   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
193   return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
194 }
195 
196 static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
197                                            unsigned Imm,
198                                            uint64_t Addr,
199                                            const void *Decoder) {
200   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
201   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
202 }
203 
204 static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,
205                                            unsigned Imm,
206                                            uint64_t Addr,
207                                            const void *Decoder) {
208   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
209   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
210 }
211 
212 static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,
213                                             unsigned Imm,
214                                             uint64_t Addr,
215                                             const void *Decoder) {
216   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
217   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
218 }
219 
220 static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,
221                                           unsigned Imm,
222                                           uint64_t Addr,
223                                           const void *Decoder) {
224   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
225   return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
226 }
227 
228 static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst,
229                                          unsigned Imm,
230                                          uint64_t Addr,
231                                          const void *Decoder) {
232   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
233   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm));
234 }
235 
236 #define DECODE_SDWA(DecName) \
237 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
238 
239 DECODE_SDWA(Src32)
240 DECODE_SDWA(Src16)
241 DECODE_SDWA(VopcDst)
242 
243 #include "AMDGPUGenDisassemblerTables.inc"
244 
245 //===----------------------------------------------------------------------===//
246 //
247 //===----------------------------------------------------------------------===//
248 
249 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
250   assert(Bytes.size() >= sizeof(T));
251   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
252   Bytes = Bytes.slice(sizeof(T));
253   return Res;
254 }
255 
256 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
257                                                MCInst &MI,
258                                                uint64_t Inst,
259                                                uint64_t Address) const {
260   assert(MI.getOpcode() == 0);
261   assert(MI.getNumOperands() == 0);
262   MCInst TmpInst;
263   HasLiteral = false;
264   const auto SavedBytes = Bytes;
265   if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
266     MI = TmpInst;
267     return MCDisassembler::Success;
268   }
269   Bytes = SavedBytes;
270   return MCDisassembler::Fail;
271 }
272 
273 static bool isValidDPP8(const MCInst &MI) {
274   using namespace llvm::AMDGPU::DPP;
275   int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
276   assert(FiIdx != -1);
277   if ((unsigned)FiIdx >= MI.getNumOperands())
278     return false;
279   unsigned Fi = MI.getOperand(FiIdx).getImm();
280   return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
281 }
282 
283 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
284                                                 ArrayRef<uint8_t> Bytes_,
285                                                 uint64_t Address,
286                                                 raw_ostream &CS) const {
287   CommentStream = &CS;
288   bool IsSDWA = false;
289 
290   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
291   Bytes = Bytes_.slice(0, MaxInstBytesNum);
292 
293   DecodeStatus Res = MCDisassembler::Fail;
294   do {
295     // ToDo: better to switch encoding length using some bit predicate
296     // but it is unknown yet, so try all we can
297 
298     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
299     // encodings
300     if (Bytes.size() >= 8) {
301       const uint64_t QW = eatBytes<uint64_t>(Bytes);
302 
303       if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
304         Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
305         if (Res) {
306           if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
307               == -1)
308             break;
309           if (convertDPP8Inst(MI) == MCDisassembler::Success)
310             break;
311           MI = MCInst(); // clear
312         }
313       }
314 
315       Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
316       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
317         break;
318 
319       MI = MCInst(); // clear
320 
321       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
322       if (Res) break;
323 
324       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
325       if (Res) { IsSDWA = true;  break; }
326 
327       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
328       if (Res) { IsSDWA = true;  break; }
329 
330       Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
331       if (Res) { IsSDWA = true;  break; }
332 
333       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
334         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
335         if (Res)
336           break;
337       }
338 
339       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
340       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
341       // table first so we print the correct name.
342       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
343         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
344         if (Res)
345           break;
346       }
347     }
348 
349     // Reinitialize Bytes as DPP64 could have eaten too much
350     Bytes = Bytes_.slice(0, MaxInstBytesNum);
351 
352     // Try decode 32-bit instruction
353     if (Bytes.size() < 4) break;
354     const uint32_t DW = eatBytes<uint32_t>(Bytes);
355     Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
356     if (Res) break;
357 
358     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
359     if (Res) break;
360 
361     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
362     if (Res) break;
363 
364     if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
365       Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
366       if (Res) break;
367     }
368 
369     Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
370     if (Res) break;
371 
372     if (Bytes.size() < 4) break;
373     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
374     Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
375     if (Res) break;
376 
377     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
378     if (Res) break;
379 
380     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
381     if (Res) break;
382 
383     Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
384   } while (false);
385 
386   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
387               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
388               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
389               MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
390               MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
391               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
392               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
393               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
394               MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
395               MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
396     // Insert dummy unused src2_modifiers.
397     insertNamedMCOperand(MI, MCOperand::createImm(0),
398                          AMDGPU::OpName::src2_modifiers);
399   }
400 
401   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
402                         (SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
403       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
404     insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
405   }
406 
407   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
408     int VAddr0Idx =
409         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
410     int RsrcIdx =
411         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
412     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
413     if (VAddr0Idx >= 0 && NSAArgs > 0) {
414       unsigned NSAWords = (NSAArgs + 3) / 4;
415       if (Bytes.size() < 4 * NSAWords) {
416         Res = MCDisassembler::Fail;
417       } else {
418         for (unsigned i = 0; i < NSAArgs; ++i) {
419           MI.insert(MI.begin() + VAddr0Idx + 1 + i,
420                     decodeOperand_VGPR_32(Bytes[i]));
421         }
422         Bytes = Bytes.slice(4 * NSAWords);
423       }
424     }
425 
426     if (Res)
427       Res = convertMIMGInst(MI);
428   }
429 
430   if (Res && IsSDWA)
431     Res = convertSDWAInst(MI);
432 
433   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
434                                               AMDGPU::OpName::vdst_in);
435   if (VDstIn_Idx != -1) {
436     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
437                            MCOI::OperandConstraint::TIED_TO);
438     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
439          !MI.getOperand(VDstIn_Idx).isReg() ||
440          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
441       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
442         MI.erase(&MI.getOperand(VDstIn_Idx));
443       insertNamedMCOperand(MI,
444         MCOperand::createReg(MI.getOperand(Tied).getReg()),
445         AMDGPU::OpName::vdst_in);
446     }
447   }
448 
449   // if the opcode was not recognized we'll assume a Size of 4 bytes
450   // (unless there are fewer bytes left)
451   Size = Res ? (MaxInstBytesNum - Bytes.size())
452              : std::min((size_t)4, Bytes_.size());
453   return Res;
454 }
455 
456 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
457   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
458       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
459     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
460       // VOPC - insert clamp
461       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
462   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
463     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
464     if (SDst != -1) {
465       // VOPC - insert VCC register as sdst
466       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
467                            AMDGPU::OpName::sdst);
468     } else {
469       // VOP1/2 - insert omod if present in instruction
470       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
471     }
472   }
473   return MCDisassembler::Success;
474 }
475 
476 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
477   unsigned Opc = MI.getOpcode();
478   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
479 
480   // Insert dummy unused src modifiers.
481   if (MI.getNumOperands() < DescNumOps &&
482       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
483     insertNamedMCOperand(MI, MCOperand::createImm(0),
484                          AMDGPU::OpName::src0_modifiers);
485 
486   if (MI.getNumOperands() < DescNumOps &&
487       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
488     insertNamedMCOperand(MI, MCOperand::createImm(0),
489                          AMDGPU::OpName::src1_modifiers);
490 
491   return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
492 }
493 
494 // Note that before gfx10, the MIMG encoding provided no information about
495 // VADDR size. Consequently, decoded instructions always show address as if it
496 // has 1 dword, which could be not really so.
497 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
498 
499   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
500                                            AMDGPU::OpName::vdst);
501 
502   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
503                                             AMDGPU::OpName::vdata);
504   int VAddr0Idx =
505       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
506   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
507                                             AMDGPU::OpName::dmask);
508 
509   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
510                                             AMDGPU::OpName::tfe);
511   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
512                                             AMDGPU::OpName::d16);
513 
514   assert(VDataIdx != -1);
515   if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
516     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
517       assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
518              MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
519              MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
520              MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
521       addOperand(MI, MCOperand::createImm(1));
522     }
523     return MCDisassembler::Success;
524   }
525 
526   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
527   bool IsAtomic = (VDstIdx != -1);
528   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
529 
530   bool IsNSA = false;
531   unsigned AddrSize = Info->VAddrDwords;
532 
533   if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
534     unsigned DimIdx =
535         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
536     const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
537         AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
538     const AMDGPU::MIMGDimInfo *Dim =
539         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
540 
541     AddrSize = BaseOpcode->NumExtraArgs +
542                (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
543                (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
544                (BaseOpcode->LodOrClampOrMip ? 1 : 0);
545     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
546     if (!IsNSA) {
547       if (AddrSize > 8)
548         AddrSize = 16;
549       else if (AddrSize > 4)
550         AddrSize = 8;
551     } else {
552       if (AddrSize > Info->VAddrDwords) {
553         // The NSA encoding does not contain enough operands for the combination
554         // of base opcode / dimension. Should this be an error?
555         return MCDisassembler::Success;
556       }
557     }
558   }
559 
560   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
561   unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
562 
563   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
564   if (D16 && AMDGPU::hasPackedD16(STI)) {
565     DstSize = (DstSize + 1) / 2;
566   }
567 
568   // FIXME: Add tfe support
569   if (MI.getOperand(TFEIdx).getImm())
570     return MCDisassembler::Success;
571 
572   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
573     return MCDisassembler::Success;
574 
575   int NewOpcode =
576       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
577   if (NewOpcode == -1)
578     return MCDisassembler::Success;
579 
580   // Widen the register to the correct number of enabled channels.
581   unsigned NewVdata = AMDGPU::NoRegister;
582   if (DstSize != Info->VDataDwords) {
583     auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
584 
585     // Get first subregister of VData
586     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
587     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
588     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
589 
590     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
591                                        &MRI.getRegClass(DataRCID));
592     if (NewVdata == AMDGPU::NoRegister) {
593       // It's possible to encode this such that the low register + enabled
594       // components exceeds the register count.
595       return MCDisassembler::Success;
596     }
597   }
598 
599   unsigned NewVAddr0 = AMDGPU::NoRegister;
600   if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
601       AddrSize != Info->VAddrDwords) {
602     unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
603     unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
604     VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
605 
606     auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
607     NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
608                                         &MRI.getRegClass(AddrRCID));
609     if (NewVAddr0 == AMDGPU::NoRegister)
610       return MCDisassembler::Success;
611   }
612 
613   MI.setOpcode(NewOpcode);
614 
615   if (NewVdata != AMDGPU::NoRegister) {
616     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
617 
618     if (IsAtomic) {
619       // Atomic operations have an additional operand (a copy of data)
620       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
621     }
622   }
623 
624   if (NewVAddr0 != AMDGPU::NoRegister) {
625     MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
626   } else if (IsNSA) {
627     assert(AddrSize <= Info->VAddrDwords);
628     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
629              MI.begin() + VAddr0Idx + Info->VAddrDwords);
630   }
631 
632   return MCDisassembler::Success;
633 }
634 
635 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
636   return getContext().getRegisterInfo()->
637     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
638 }
639 
640 inline
641 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
642                                          const Twine& ErrMsg) const {
643   *CommentStream << "Error: " + ErrMsg;
644 
645   // ToDo: add support for error operands to MCInst.h
646   // return MCOperand::createError(V);
647   return MCOperand();
648 }
649 
650 inline
651 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
652   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
653 }
654 
655 inline
656 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
657                                                unsigned Val) const {
658   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
659   if (Val >= RegCl.getNumRegs())
660     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
661                            ": unknown register " + Twine(Val));
662   return createRegOperand(RegCl.getRegister(Val));
663 }
664 
665 inline
666 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
667                                                 unsigned Val) const {
668   // ToDo: SI/CI have 104 SGPRs, VI - 102
669   // Valery: here we accepting as much as we can, let assembler sort it out
670   int shift = 0;
671   switch (SRegClassID) {
672   case AMDGPU::SGPR_32RegClassID:
673   case AMDGPU::TTMP_32RegClassID:
674     break;
675   case AMDGPU::SGPR_64RegClassID:
676   case AMDGPU::TTMP_64RegClassID:
677     shift = 1;
678     break;
679   case AMDGPU::SGPR_128RegClassID:
680   case AMDGPU::TTMP_128RegClassID:
681   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
682   // this bundle?
683   case AMDGPU::SGPR_256RegClassID:
684   case AMDGPU::TTMP_256RegClassID:
685     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
686   // this bundle?
687   case AMDGPU::SGPR_512RegClassID:
688   case AMDGPU::TTMP_512RegClassID:
689     shift = 2;
690     break;
691   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
692   // this bundle?
693   default:
694     llvm_unreachable("unhandled register class");
695   }
696 
697   if (Val % (1 << shift)) {
698     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
699                    << ": scalar reg isn't aligned " << Val;
700   }
701 
702   return createRegOperand(SRegClassID, Val >> shift);
703 }
704 
705 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
706   return decodeSrcOp(OPW32, Val);
707 }
708 
709 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
710   return decodeSrcOp(OPW64, Val);
711 }
712 
713 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
714   return decodeSrcOp(OPW128, Val);
715 }
716 
717 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
718   return decodeSrcOp(OPW16, Val);
719 }
720 
721 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
722   return decodeSrcOp(OPWV216, Val);
723 }
724 
725 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
726   // Some instructions have operand restrictions beyond what the encoding
727   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
728   // high bit.
729   Val &= 255;
730 
731   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
732 }
733 
734 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
735   return decodeSrcOp(OPW32, Val);
736 }
737 
738 MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
739   return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
740 }
741 
742 MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
743   return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
744 }
745 
746 MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
747   return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
748 }
749 
750 MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
751   return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
752 }
753 
754 MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
755   return decodeSrcOp(OPW32, Val);
756 }
757 
758 MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
759   return decodeSrcOp(OPW64, Val);
760 }
761 
762 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
763   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
764 }
765 
766 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
767   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
768 }
769 
770 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
771   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
772 }
773 
774 MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
775   return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
776 }
777 
778 MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
779   return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
780 }
781 
782 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
783   // table-gen generated disassembler doesn't care about operand types
784   // leaving only registry class so SSrc_32 operand turns into SReg_32
785   // and therefore we accept immediates and literals here as well
786   return decodeSrcOp(OPW32, Val);
787 }
788 
789 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
790   unsigned Val) const {
791   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
792   return decodeOperand_SReg_32(Val);
793 }
794 
795 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
796   unsigned Val) const {
797   // SReg_32_XM0 is SReg_32 without EXEC_HI
798   return decodeOperand_SReg_32(Val);
799 }
800 
801 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
802   // table-gen generated disassembler doesn't care about operand types
803   // leaving only registry class so SSrc_32 operand turns into SReg_32
804   // and therefore we accept immediates and literals here as well
805   return decodeSrcOp(OPW32, Val);
806 }
807 
808 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
809   return decodeSrcOp(OPW64, Val);
810 }
811 
812 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
813   return decodeSrcOp(OPW64, Val);
814 }
815 
816 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
817   return decodeSrcOp(OPW128, Val);
818 }
819 
820 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
821   return decodeDstOp(OPW256, Val);
822 }
823 
824 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
825   return decodeDstOp(OPW512, Val);
826 }
827 
828 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
829   // For now all literal constants are supposed to be unsigned integer
830   // ToDo: deal with signed/unsigned 64-bit integer constants
831   // ToDo: deal with float/double constants
832   if (!HasLiteral) {
833     if (Bytes.size() < 4) {
834       return errOperand(0, "cannot read literal, inst bytes left " +
835                         Twine(Bytes.size()));
836     }
837     HasLiteral = true;
838     Literal = eatBytes<uint32_t>(Bytes);
839   }
840   return MCOperand::createImm(Literal);
841 }
842 
843 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
844   using namespace AMDGPU::EncValues;
845 
846   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
847   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
848     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
849     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
850       // Cast prevents negative overflow.
851 }
852 
853 static int64_t getInlineImmVal32(unsigned Imm) {
854   switch (Imm) {
855   case 240:
856     return FloatToBits(0.5f);
857   case 241:
858     return FloatToBits(-0.5f);
859   case 242:
860     return FloatToBits(1.0f);
861   case 243:
862     return FloatToBits(-1.0f);
863   case 244:
864     return FloatToBits(2.0f);
865   case 245:
866     return FloatToBits(-2.0f);
867   case 246:
868     return FloatToBits(4.0f);
869   case 247:
870     return FloatToBits(-4.0f);
871   case 248: // 1 / (2 * PI)
872     return 0x3e22f983;
873   default:
874     llvm_unreachable("invalid fp inline imm");
875   }
876 }
877 
878 static int64_t getInlineImmVal64(unsigned Imm) {
879   switch (Imm) {
880   case 240:
881     return DoubleToBits(0.5);
882   case 241:
883     return DoubleToBits(-0.5);
884   case 242:
885     return DoubleToBits(1.0);
886   case 243:
887     return DoubleToBits(-1.0);
888   case 244:
889     return DoubleToBits(2.0);
890   case 245:
891     return DoubleToBits(-2.0);
892   case 246:
893     return DoubleToBits(4.0);
894   case 247:
895     return DoubleToBits(-4.0);
896   case 248: // 1 / (2 * PI)
897     return 0x3fc45f306dc9c882;
898   default:
899     llvm_unreachable("invalid fp inline imm");
900   }
901 }
902 
903 static int64_t getInlineImmVal16(unsigned Imm) {
904   switch (Imm) {
905   case 240:
906     return 0x3800;
907   case 241:
908     return 0xB800;
909   case 242:
910     return 0x3C00;
911   case 243:
912     return 0xBC00;
913   case 244:
914     return 0x4000;
915   case 245:
916     return 0xC000;
917   case 246:
918     return 0x4400;
919   case 247:
920     return 0xC400;
921   case 248: // 1 / (2 * PI)
922     return 0x3118;
923   default:
924     llvm_unreachable("invalid fp inline imm");
925   }
926 }
927 
928 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
929   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
930       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
931 
932   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
933   switch (Width) {
934   case OPW32:
935   case OPW128: // splat constants
936   case OPW512:
937   case OPW1024:
938     return MCOperand::createImm(getInlineImmVal32(Imm));
939   case OPW64:
940     return MCOperand::createImm(getInlineImmVal64(Imm));
941   case OPW16:
942   case OPWV216:
943     return MCOperand::createImm(getInlineImmVal16(Imm));
944   default:
945     llvm_unreachable("implement me");
946   }
947 }
948 
949 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
950   using namespace AMDGPU;
951 
952   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
953   switch (Width) {
954   default: // fall
955   case OPW32:
956   case OPW16:
957   case OPWV216:
958     return VGPR_32RegClassID;
959   case OPW64: return VReg_64RegClassID;
960   case OPW128: return VReg_128RegClassID;
961   }
962 }
963 
964 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
965   using namespace AMDGPU;
966 
967   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
968   switch (Width) {
969   default: // fall
970   case OPW32:
971   case OPW16:
972   case OPWV216:
973     return AGPR_32RegClassID;
974   case OPW64: return AReg_64RegClassID;
975   case OPW128: return AReg_128RegClassID;
976   case OPW256: return AReg_256RegClassID;
977   case OPW512: return AReg_512RegClassID;
978   case OPW1024: return AReg_1024RegClassID;
979   }
980 }
981 
982 
983 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
984   using namespace AMDGPU;
985 
986   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
987   switch (Width) {
988   default: // fall
989   case OPW32:
990   case OPW16:
991   case OPWV216:
992     return SGPR_32RegClassID;
993   case OPW64: return SGPR_64RegClassID;
994   case OPW128: return SGPR_128RegClassID;
995   case OPW256: return SGPR_256RegClassID;
996   case OPW512: return SGPR_512RegClassID;
997   }
998 }
999 
1000 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1001   using namespace AMDGPU;
1002 
1003   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1004   switch (Width) {
1005   default: // fall
1006   case OPW32:
1007   case OPW16:
1008   case OPWV216:
1009     return TTMP_32RegClassID;
1010   case OPW64: return TTMP_64RegClassID;
1011   case OPW128: return TTMP_128RegClassID;
1012   case OPW256: return TTMP_256RegClassID;
1013   case OPW512: return TTMP_512RegClassID;
1014   }
1015 }
1016 
1017 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1018   using namespace AMDGPU::EncValues;
1019 
1020   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
1021   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
1022 
1023   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1024 }
1025 
1026 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
1027   using namespace AMDGPU::EncValues;
1028 
1029   assert(Val < 1024); // enum10
1030 
1031   bool IsAGPR = Val & 512;
1032   Val &= 511;
1033 
1034   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1035     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1036                                    : getVgprClassId(Width), Val - VGPR_MIN);
1037   }
1038   if (Val <= SGPR_MAX) {
1039     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
1040     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1041   }
1042 
1043   int TTmpIdx = getTTmpIdx(Val);
1044   if (TTmpIdx >= 0) {
1045     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1046   }
1047 
1048   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1049     return decodeIntImmed(Val);
1050 
1051   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1052     return decodeFPImmed(Width, Val);
1053 
1054   if (Val == LITERAL_CONST)
1055     return decodeLiteralConstant();
1056 
1057   switch (Width) {
1058   case OPW32:
1059   case OPW16:
1060   case OPWV216:
1061     return decodeSpecialReg32(Val);
1062   case OPW64:
1063     return decodeSpecialReg64(Val);
1064   default:
1065     llvm_unreachable("unexpected immediate type");
1066   }
1067 }
1068 
1069 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
1070   using namespace AMDGPU::EncValues;
1071 
1072   assert(Val < 128);
1073   assert(Width == OPW256 || Width == OPW512);
1074 
1075   if (Val <= SGPR_MAX) {
1076     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
1077     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1078   }
1079 
1080   int TTmpIdx = getTTmpIdx(Val);
1081   if (TTmpIdx >= 0) {
1082     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1083   }
1084 
1085   llvm_unreachable("unknown dst register");
1086 }
1087 
1088 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1089   using namespace AMDGPU;
1090 
1091   switch (Val) {
1092   case 102: return createRegOperand(FLAT_SCR_LO);
1093   case 103: return createRegOperand(FLAT_SCR_HI);
1094   case 104: return createRegOperand(XNACK_MASK_LO);
1095   case 105: return createRegOperand(XNACK_MASK_HI);
1096   case 106: return createRegOperand(VCC_LO);
1097   case 107: return createRegOperand(VCC_HI);
1098   case 108: return createRegOperand(TBA_LO);
1099   case 109: return createRegOperand(TBA_HI);
1100   case 110: return createRegOperand(TMA_LO);
1101   case 111: return createRegOperand(TMA_HI);
1102   case 124: return createRegOperand(M0);
1103   case 125: return createRegOperand(SGPR_NULL);
1104   case 126: return createRegOperand(EXEC_LO);
1105   case 127: return createRegOperand(EXEC_HI);
1106   case 235: return createRegOperand(SRC_SHARED_BASE);
1107   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1108   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1109   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1110   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1111   case 251: return createRegOperand(SRC_VCCZ);
1112   case 252: return createRegOperand(SRC_EXECZ);
1113   case 253: return createRegOperand(SRC_SCC);
1114   case 254: return createRegOperand(LDS_DIRECT);
1115   default: break;
1116   }
1117   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1118 }
1119 
1120 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1121   using namespace AMDGPU;
1122 
1123   switch (Val) {
1124   case 102: return createRegOperand(FLAT_SCR);
1125   case 104: return createRegOperand(XNACK_MASK);
1126   case 106: return createRegOperand(VCC);
1127   case 108: return createRegOperand(TBA);
1128   case 110: return createRegOperand(TMA);
1129   case 125: return createRegOperand(SGPR_NULL);
1130   case 126: return createRegOperand(EXEC);
1131   case 235: return createRegOperand(SRC_SHARED_BASE);
1132   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1133   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1134   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1135   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1136   case 251: return createRegOperand(SRC_VCCZ);
1137   case 252: return createRegOperand(SRC_EXECZ);
1138   case 253: return createRegOperand(SRC_SCC);
1139   default: break;
1140   }
1141   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1142 }
1143 
1144 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1145                                             const unsigned Val) const {
1146   using namespace AMDGPU::SDWA;
1147   using namespace AMDGPU::EncValues;
1148 
1149   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1150       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
1151     // XXX: cast to int is needed to avoid stupid warning:
1152     // compare with unsigned is always true
1153     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1154         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1155       return createRegOperand(getVgprClassId(Width),
1156                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1157     }
1158     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1159         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1160                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1161       return createSRegOperand(getSgprClassId(Width),
1162                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1163     }
1164     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1165         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1166       return createSRegOperand(getTtmpClassId(Width),
1167                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1168     }
1169 
1170     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1171 
1172     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1173       return decodeIntImmed(SVal);
1174 
1175     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1176       return decodeFPImmed(Width, SVal);
1177 
1178     return decodeSpecialReg32(SVal);
1179   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
1180     return createRegOperand(getVgprClassId(Width), Val);
1181   }
1182   llvm_unreachable("unsupported target");
1183 }
1184 
1185 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1186   return decodeSDWASrc(OPW16, Val);
1187 }
1188 
1189 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1190   return decodeSDWASrc(OPW32, Val);
1191 }
1192 
1193 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1194   using namespace AMDGPU::SDWA;
1195 
1196   assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1197           STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
1198          "SDWAVopcDst should be present only on GFX9+");
1199 
1200   bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
1201 
1202   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1203     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1204 
1205     int TTmpIdx = getTTmpIdx(Val);
1206     if (TTmpIdx >= 0) {
1207       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1208       return createSRegOperand(TTmpClsId, TTmpIdx);
1209     } else if (Val > SGPR_MAX) {
1210       return IsWave64 ? decodeSpecialReg64(Val)
1211                       : decodeSpecialReg32(Val);
1212     } else {
1213       return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1214     }
1215   } else {
1216     return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1217   }
1218 }
1219 
1220 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1221   return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1222     decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
1223 }
1224 
1225 bool AMDGPUDisassembler::isVI() const {
1226   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1227 }
1228 
1229 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1230 
1231 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1232 
1233 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1234 
1235 bool AMDGPUDisassembler::isGFX10Plus() const {
1236   return AMDGPU::isGFX10Plus(STI);
1237 }
1238 
1239 //===----------------------------------------------------------------------===//
1240 // AMDGPU specific symbol handling
1241 //===----------------------------------------------------------------------===//
1242 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1243   do {                                                                         \
1244     KdStream << Indent << DIRECTIVE " "                                        \
1245              << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';           \
1246   } while (0)
1247 
1248 // NOLINTNEXTLINE(readability-identifier-naming)
1249 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1250     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1251   using namespace amdhsa;
1252   StringRef Indent = "\t";
1253 
1254   // We cannot accurately backward compute #VGPRs used from
1255   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1256   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1257   // simply calculate the inverse of what the assembler does.
1258 
1259   uint32_t GranulatedWorkitemVGPRCount =
1260       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
1261       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
1262 
1263   uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
1264                           AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
1265 
1266   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1267 
1268   // We cannot backward compute values used to calculate
1269   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1270   // directives can't be computed:
1271   // .amdhsa_reserve_vcc
1272   // .amdhsa_reserve_flat_scratch
1273   // .amdhsa_reserve_xnack_mask
1274   // They take their respective default values if not specified in the assembly.
1275   //
1276   // GRANULATED_WAVEFRONT_SGPR_COUNT
1277   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1278   //
1279   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1280   // are set to 0. So while disassembling we consider that:
1281   //
1282   // GRANULATED_WAVEFRONT_SGPR_COUNT
1283   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1284   //
1285   // The disassembler cannot recover the original values of those 3 directives.
1286 
1287   uint32_t GranulatedWavefrontSGPRCount =
1288       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
1289       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
1290 
1291   if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1292     return MCDisassembler::Fail;
1293 
1294   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1295                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1296 
1297   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1298   KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1299   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1300   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1301 
1302   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1303     return MCDisassembler::Fail;
1304 
1305   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1306                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1307   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1308                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1309   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1310                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1311   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1312                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1313 
1314   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1315     return MCDisassembler::Fail;
1316 
1317   PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1318 
1319   if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1320     return MCDisassembler::Fail;
1321 
1322   PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1323 
1324   if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1325     return MCDisassembler::Fail;
1326 
1327   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1328     return MCDisassembler::Fail;
1329 
1330   PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1331 
1332   if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1333     return MCDisassembler::Fail;
1334 
1335   if (isGFX10Plus()) {
1336     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1337                     COMPUTE_PGM_RSRC1_WGP_MODE);
1338     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1339     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1340   }
1341   return MCDisassembler::Success;
1342 }
1343 
1344 // NOLINTNEXTLINE(readability-identifier-naming)
1345 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1346     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1347   using namespace amdhsa;
1348   StringRef Indent = "\t";
1349   PRINT_DIRECTIVE(
1350       ".amdhsa_system_sgpr_private_segment_wavefront_offset",
1351       COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
1352   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1353                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1354   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1355                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1356   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1357                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1358   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1359                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1360   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1361                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1362 
1363   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1364     return MCDisassembler::Fail;
1365 
1366   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1367     return MCDisassembler::Fail;
1368 
1369   if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1370     return MCDisassembler::Fail;
1371 
1372   PRINT_DIRECTIVE(
1373       ".amdhsa_exception_fp_ieee_invalid_op",
1374       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1375   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1376                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1377   PRINT_DIRECTIVE(
1378       ".amdhsa_exception_fp_ieee_div_zero",
1379       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1380   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1381                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1382   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1383                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1384   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1385                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1386   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1387                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1388 
1389   if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1390     return MCDisassembler::Fail;
1391 
1392   return MCDisassembler::Success;
1393 }
1394 
1395 #undef PRINT_DIRECTIVE
1396 
1397 MCDisassembler::DecodeStatus
1398 AMDGPUDisassembler::decodeKernelDescriptorDirective(
1399     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
1400     raw_string_ostream &KdStream) const {
1401 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1402   do {                                                                         \
1403     KdStream << Indent << DIRECTIVE " "                                        \
1404              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
1405   } while (0)
1406 
1407   uint16_t TwoByteBuffer = 0;
1408   uint32_t FourByteBuffer = 0;
1409   uint64_t EightByteBuffer = 0;
1410 
1411   StringRef ReservedBytes;
1412   StringRef Indent = "\t";
1413 
1414   assert(Bytes.size() == 64);
1415   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
1416 
1417   switch (Cursor.tell()) {
1418   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
1419     FourByteBuffer = DE.getU32(Cursor);
1420     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
1421              << '\n';
1422     return MCDisassembler::Success;
1423 
1424   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
1425     FourByteBuffer = DE.getU32(Cursor);
1426     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
1427              << FourByteBuffer << '\n';
1428     return MCDisassembler::Success;
1429 
1430   case amdhsa::RESERVED0_OFFSET:
1431     // 8 reserved bytes, must be 0.
1432     EightByteBuffer = DE.getU64(Cursor);
1433     if (EightByteBuffer) {
1434       return MCDisassembler::Fail;
1435     }
1436     return MCDisassembler::Success;
1437 
1438   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
1439     // KERNEL_CODE_ENTRY_BYTE_OFFSET
1440     // So far no directive controls this for Code Object V3, so simply skip for
1441     // disassembly.
1442     DE.skip(Cursor, 8);
1443     return MCDisassembler::Success;
1444 
1445   case amdhsa::RESERVED1_OFFSET:
1446     // 20 reserved bytes, must be 0.
1447     ReservedBytes = DE.getBytes(Cursor, 20);
1448     for (int I = 0; I < 20; ++I) {
1449       if (ReservedBytes[I] != 0) {
1450         return MCDisassembler::Fail;
1451       }
1452     }
1453     return MCDisassembler::Success;
1454 
1455   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
1456     // COMPUTE_PGM_RSRC3
1457     //  - Only set for GFX10, GFX6-9 have this to be 0.
1458     //  - Currently no directives directly control this.
1459     FourByteBuffer = DE.getU32(Cursor);
1460     if (!isGFX10Plus() && FourByteBuffer) {
1461       return MCDisassembler::Fail;
1462     }
1463     return MCDisassembler::Success;
1464 
1465   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
1466     FourByteBuffer = DE.getU32(Cursor);
1467     if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
1468         MCDisassembler::Fail) {
1469       return MCDisassembler::Fail;
1470     }
1471     return MCDisassembler::Success;
1472 
1473   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
1474     FourByteBuffer = DE.getU32(Cursor);
1475     if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
1476         MCDisassembler::Fail) {
1477       return MCDisassembler::Fail;
1478     }
1479     return MCDisassembler::Success;
1480 
1481   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
1482     using namespace amdhsa;
1483     TwoByteBuffer = DE.getU16(Cursor);
1484 
1485     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
1486                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
1487     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
1488                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
1489     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
1490                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
1491     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
1492                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
1493     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
1494                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
1495     PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
1496                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
1497     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
1498                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
1499 
1500     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
1501       return MCDisassembler::Fail;
1502 
1503     // Reserved for GFX9
1504     if (isGFX9() &&
1505         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
1506       return MCDisassembler::Fail;
1507     } else if (isGFX10Plus()) {
1508       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
1509                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
1510     }
1511 
1512     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
1513       return MCDisassembler::Fail;
1514 
1515     return MCDisassembler::Success;
1516 
1517   case amdhsa::RESERVED2_OFFSET:
1518     // 6 bytes from here are reserved, must be 0.
1519     ReservedBytes = DE.getBytes(Cursor, 6);
1520     for (int I = 0; I < 6; ++I) {
1521       if (ReservedBytes[I] != 0)
1522         return MCDisassembler::Fail;
1523     }
1524     return MCDisassembler::Success;
1525 
1526   default:
1527     llvm_unreachable("Unhandled index. Case statements cover everything.");
1528     return MCDisassembler::Fail;
1529   }
1530 #undef PRINT_DIRECTIVE
1531 }
1532 
1533 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
1534     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
1535   // CP microcode requires the kernel descriptor to be 64 aligned.
1536   if (Bytes.size() != 64 || KdAddress % 64 != 0)
1537     return MCDisassembler::Fail;
1538 
1539   std::string Kd;
1540   raw_string_ostream KdStream(Kd);
1541   KdStream << ".amdhsa_kernel " << KdName << '\n';
1542 
1543   DataExtractor::Cursor C(0);
1544   while (C && C.tell() < Bytes.size()) {
1545     MCDisassembler::DecodeStatus Status =
1546         decodeKernelDescriptorDirective(C, Bytes, KdStream);
1547 
1548     cantFail(C.takeError());
1549 
1550     if (Status == MCDisassembler::Fail)
1551       return MCDisassembler::Fail;
1552   }
1553   KdStream << ".end_amdhsa_kernel\n";
1554   outs() << KdStream.str();
1555   return MCDisassembler::Success;
1556 }
1557 
1558 Optional<MCDisassembler::DecodeStatus>
1559 AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
1560                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
1561                                   raw_ostream &CStream) const {
1562   // Right now only kernel descriptor needs to be handled.
1563   // We ignore all other symbols for target specific handling.
1564   // TODO:
1565   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
1566   // Object V2 and V3 when symbols are marked protected.
1567 
1568   // amd_kernel_code_t for Code Object V2.
1569   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
1570     Size = 256;
1571     return MCDisassembler::Fail;
1572   }
1573 
1574   // Code Object V3 kernel descriptors.
1575   StringRef Name = Symbol.Name;
1576   if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
1577     Size = 64; // Size = 64 regardless of success or failure.
1578     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
1579   }
1580   return None;
1581 }
1582 
1583 //===----------------------------------------------------------------------===//
1584 // AMDGPUSymbolizer
1585 //===----------------------------------------------------------------------===//
1586 
1587 // Try to find symbol name for specified label
1588 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
1589                                 raw_ostream &/*cStream*/, int64_t Value,
1590                                 uint64_t /*Address*/, bool IsBranch,
1591                                 uint64_t /*Offset*/, uint64_t /*InstSize*/) {
1592 
1593   if (!IsBranch) {
1594     return false;
1595   }
1596 
1597   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
1598   if (!Symbols)
1599     return false;
1600 
1601   auto Result = std::find_if(Symbols->begin(), Symbols->end(),
1602                              [Value](const SymbolInfoTy& Val) {
1603                                 return Val.Addr == static_cast<uint64_t>(Value)
1604                                     && Val.Type == ELF::STT_NOTYPE;
1605                              });
1606   if (Result != Symbols->end()) {
1607     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
1608     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
1609     Inst.addOperand(MCOperand::createExpr(Add));
1610     return true;
1611   }
1612   return false;
1613 }
1614 
1615 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
1616                                                        int64_t Value,
1617                                                        uint64_t Address) {
1618   llvm_unreachable("unimplemented");
1619 }
1620 
1621 //===----------------------------------------------------------------------===//
1622 // Initialization
1623 //===----------------------------------------------------------------------===//
1624 
1625 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
1626                               LLVMOpInfoCallback /*GetOpInfo*/,
1627                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
1628                               void *DisInfo,
1629                               MCContext *Ctx,
1630                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
1631   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
1632 }
1633 
1634 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
1635                                                 const MCSubtargetInfo &STI,
1636                                                 MCContext &Ctx) {
1637   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
1638 }
1639 
1640 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
1641   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
1642                                          createAMDGPUDisassembler);
1643   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
1644                                        createAMDGPUSymbolizer);
1645 }
1646