xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision c644488a8b8a2e831c5665a6167a9debabbb2d72)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "TargetInfo/AMDGPUTargetInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm-c/DisassemblerTypes.h"
24 #include "llvm/BinaryFormat/ELF.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCDecoderOps.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/TargetRegistry.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "amdgpu-disassembler"
38 
39 #define SGPR_MAX                                                               \
40   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
41                  : AMDGPU::EncValues::SGPR_MAX_SI)
42 
43 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
44 
45 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
46                                        MCContext &Ctx,
47                                        MCInstrInfo const *MCII) :
48   MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
49   TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
50 
51   // ToDo: AMDGPUDisassembler supports only VI ISA.
52   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
53     report_fatal_error("Disassembly not yet supported for subtarget");
54 }
55 
56 inline static MCDisassembler::DecodeStatus
57 addOperand(MCInst &Inst, const MCOperand& Opnd) {
58   Inst.addOperand(Opnd);
59   return Opnd.isValid() ?
60     MCDisassembler::Success :
61     MCDisassembler::Fail;
62 }
63 
64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
65                                 uint16_t NameIdx) {
66   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67   if (OpIdx != -1) {
68     auto I = MI.begin();
69     std::advance(I, OpIdx);
70     MI.insert(I, Op);
71   }
72   return OpIdx;
73 }
74 
75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
76                                        uint64_t Addr,
77                                        const MCDisassembler *Decoder) {
78   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
79 
80   // Our branches take a simm16, but we need two extra bits to account for the
81   // factor of 4.
82   APInt SignedOffset(18, Imm * 4, true);
83   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
84 
85   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
86     return MCDisassembler::Success;
87   return addOperand(Inst, MCOperand::createImm(Imm));
88 }
89 
90 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
91                                      const MCDisassembler *Decoder) {
92   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93   int64_t Offset;
94   if (DAsm->isVI()) {         // VI supports 20-bit unsigned offsets.
95     Offset = Imm & 0xFFFFF;
96   } else {                    // GFX9+ supports 21-bit signed offsets.
97     Offset = SignExtend64<21>(Imm);
98   }
99   return addOperand(Inst, MCOperand::createImm(Offset));
100 }
101 
102 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
103                                   const MCDisassembler *Decoder) {
104   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
105   return addOperand(Inst, DAsm->decodeBoolReg(Val));
106 }
107 
108 #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
109   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
110                                         uint64_t /*Addr*/,                     \
111                                         const MCDisassembler *Decoder) {       \
112     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
113     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
114   }
115 
116 #define DECODE_OPERAND_REG(RegClass) \
117 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
118 
119 DECODE_OPERAND_REG(VGPR_32)
120 DECODE_OPERAND_REG(VRegOrLds_32)
121 DECODE_OPERAND_REG(VS_32)
122 DECODE_OPERAND_REG(VS_64)
123 DECODE_OPERAND_REG(VS_128)
124 
125 DECODE_OPERAND_REG(VReg_64)
126 DECODE_OPERAND_REG(VReg_96)
127 DECODE_OPERAND_REG(VReg_128)
128 DECODE_OPERAND_REG(VReg_256)
129 DECODE_OPERAND_REG(VReg_512)
130 DECODE_OPERAND_REG(VReg_1024)
131 
132 DECODE_OPERAND_REG(SReg_32)
133 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
134 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
135 DECODE_OPERAND_REG(SRegOrLds_32)
136 DECODE_OPERAND_REG(SReg_64)
137 DECODE_OPERAND_REG(SReg_64_XEXEC)
138 DECODE_OPERAND_REG(SReg_128)
139 DECODE_OPERAND_REG(SReg_256)
140 DECODE_OPERAND_REG(SReg_512)
141 
142 DECODE_OPERAND_REG(AGPR_32)
143 DECODE_OPERAND_REG(AReg_64)
144 DECODE_OPERAND_REG(AReg_128)
145 DECODE_OPERAND_REG(AReg_256)
146 DECODE_OPERAND_REG(AReg_512)
147 DECODE_OPERAND_REG(AReg_1024)
148 DECODE_OPERAND_REG(AV_32)
149 DECODE_OPERAND_REG(AV_64)
150 DECODE_OPERAND_REG(AV_128)
151 DECODE_OPERAND_REG(AV_512)
152 
153 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
154                                          uint64_t Addr,
155                                          const MCDisassembler *Decoder) {
156   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
157   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
158 }
159 
160 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm,
161                                            uint64_t Addr,
162                                            const MCDisassembler *Decoder) {
163   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
164   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
165 }
166 
167 static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm,
168                                            uint64_t Addr,
169                                            const MCDisassembler *Decoder) {
170   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
171   return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
172 }
173 
174 static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm,
175                                         uint64_t Addr,
176                                         const MCDisassembler *Decoder) {
177   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
178   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
179 }
180 
181 static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm,
182                                         uint64_t Addr,
183                                         const MCDisassembler *Decoder) {
184   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
185   return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
186 }
187 
188 static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm,
189                                           uint64_t Addr,
190                                           const MCDisassembler *Decoder) {
191   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
192   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
193 }
194 
195 static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm,
196                                            uint64_t Addr,
197                                            const MCDisassembler *Decoder) {
198   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
199   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
200 }
201 
202 static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm,
203                                            uint64_t Addr,
204                                            const MCDisassembler *Decoder) {
205   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
206   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
207 }
208 
209 static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm,
210                                            uint64_t Addr,
211                                            const MCDisassembler *Decoder) {
212   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
213   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
214 }
215 
216 static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm,
217                                             uint64_t Addr,
218                                             const MCDisassembler *Decoder) {
219   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
220   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
221 }
222 
223 static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
224                                           uint64_t Addr,
225                                           const MCDisassembler *Decoder) {
226   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
227   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
228 }
229 
230 static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
231                                            uint64_t Addr,
232                                            const MCDisassembler *Decoder) {
233   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
234   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
235 }
236 
237 static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
238                                            uint64_t Addr,
239                                            const MCDisassembler *Decoder) {
240   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
241   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
242 }
243 
244 static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
245                                            uint64_t Addr,
246                                            const MCDisassembler *Decoder) {
247   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
248   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
249 }
250 
251 static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
252                                             uint64_t Addr,
253                                             const MCDisassembler *Decoder) {
254   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
255   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
256 }
257 
258 static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
259                                           uint64_t Addr,
260                                           const MCDisassembler *Decoder) {
261   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
262   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
263 }
264 
265 static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
266                                           uint64_t Addr,
267                                           const MCDisassembler *Decoder) {
268   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
269   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
270 }
271 
272 static DecodeStatus
273 decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
274                              const MCDisassembler *Decoder) {
275   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
276   return addOperand(
277       Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
278 }
279 
280 static DecodeStatus
281 decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
282                              const MCDisassembler *Decoder) {
283   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
284   return addOperand(
285       Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
286 }
287 
288 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
289                           const MCRegisterInfo *MRI) {
290   if (OpIdx < 0)
291     return false;
292 
293   const MCOperand &Op = Inst.getOperand(OpIdx);
294   if (!Op.isReg())
295     return false;
296 
297   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
298   auto Reg = Sub ? Sub : Op.getReg();
299   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
300 }
301 
302 static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
303                                              AMDGPUDisassembler::OpWidthTy Opw,
304                                              const MCDisassembler *Decoder) {
305   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
306   if (!DAsm->isGFX90A()) {
307     Imm &= 511;
308   } else {
309     // If atomic has both vdata and vdst their register classes are tied.
310     // The bit is decoded along with the vdst, first operand. We need to
311     // change register class to AGPR if vdst was AGPR.
312     // If a DS instruction has both data0 and data1 their register classes
313     // are also tied.
314     unsigned Opc = Inst.getOpcode();
315     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
316     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
317                                                         : AMDGPU::OpName::vdata;
318     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
319     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
320     if ((int)Inst.getNumOperands() == DataIdx) {
321       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
322       if (IsAGPROperand(Inst, DstIdx, MRI))
323         Imm |= 512;
324     }
325 
326     if (TSFlags & SIInstrFlags::DS) {
327       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
328       if ((int)Inst.getNumOperands() == Data2Idx &&
329           IsAGPROperand(Inst, DataIdx, MRI))
330         Imm |= 512;
331     }
332   }
333   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
334 }
335 
336 static DecodeStatus
337 DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
338                              const MCDisassembler *Decoder) {
339   return decodeOperand_AVLdSt_Any(Inst, Imm,
340                                   AMDGPUDisassembler::OPW32, Decoder);
341 }
342 
343 static DecodeStatus
344 DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
345                              const MCDisassembler *Decoder) {
346   return decodeOperand_AVLdSt_Any(Inst, Imm,
347                                   AMDGPUDisassembler::OPW64, Decoder);
348 }
349 
350 static DecodeStatus
351 DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
352                              const MCDisassembler *Decoder) {
353   return decodeOperand_AVLdSt_Any(Inst, Imm,
354                                   AMDGPUDisassembler::OPW96, Decoder);
355 }
356 
357 static DecodeStatus
358 DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
359                               const MCDisassembler *Decoder) {
360   return decodeOperand_AVLdSt_Any(Inst, Imm,
361                                   AMDGPUDisassembler::OPW128, Decoder);
362 }
363 
364 static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm,
365                                           uint64_t Addr,
366                                           const MCDisassembler *Decoder) {
367   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
368   return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
369 }
370 
371 #define DECODE_SDWA(DecName) \
372 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
373 
374 DECODE_SDWA(Src32)
375 DECODE_SDWA(Src16)
376 DECODE_SDWA(VopcDst)
377 
378 #include "AMDGPUGenDisassemblerTables.inc"
379 
380 //===----------------------------------------------------------------------===//
381 //
382 //===----------------------------------------------------------------------===//
383 
384 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
385   assert(Bytes.size() >= sizeof(T));
386   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
387   Bytes = Bytes.slice(sizeof(T));
388   return Res;
389 }
390 
391 // The disassembler is greedy, so we need to check FI operand value to
392 // not parse a dpp if the correct literal is not set. For dpp16 the
393 // autogenerated decoder checks the dpp literal
394 static bool isValidDPP8(const MCInst &MI) {
395   using namespace llvm::AMDGPU::DPP;
396   int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
397   assert(FiIdx != -1);
398   if ((unsigned)FiIdx >= MI.getNumOperands())
399     return false;
400   unsigned Fi = MI.getOperand(FiIdx).getImm();
401   return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
402 }
403 
404 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
405                                                 ArrayRef<uint8_t> Bytes_,
406                                                 uint64_t Address,
407                                                 raw_ostream &CS) const {
408   CommentStream = &CS;
409   bool IsSDWA = false;
410 
411   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
412   Bytes = Bytes_.slice(0, MaxInstBytesNum);
413 
414   DecodeStatus Res = MCDisassembler::Fail;
415   do {
416     // ToDo: better to switch encoding length using some bit predicate
417     // but it is unknown yet, so try all we can
418 
419     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
420     // encodings
421     if (Bytes.size() >= 8) {
422       const uint64_t QW = eatBytes<uint64_t>(Bytes);
423 
424       if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
425         Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
426         if (Res) {
427           if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
428               == -1)
429             break;
430           if (convertDPP8Inst(MI) == MCDisassembler::Success)
431             break;
432           MI = MCInst(); // clear
433         }
434       }
435 
436       Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
437       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
438         break;
439 
440       MI = MCInst(); // clear
441 
442       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
443       if (Res) break;
444 
445       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
446       if (Res) { IsSDWA = true;  break; }
447 
448       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
449       if (Res) { IsSDWA = true;  break; }
450 
451       Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
452       if (Res) { IsSDWA = true;  break; }
453 
454       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
455         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
456         if (Res)
457           break;
458       }
459 
460       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
461       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
462       // table first so we print the correct name.
463       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
464         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
465         if (Res)
466           break;
467       }
468     }
469 
470     // Reinitialize Bytes as DPP64 could have eaten too much
471     Bytes = Bytes_.slice(0, MaxInstBytesNum);
472 
473     // Try decode 32-bit instruction
474     if (Bytes.size() < 4) break;
475     const uint32_t DW = eatBytes<uint32_t>(Bytes);
476     Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
477     if (Res) break;
478 
479     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
480     if (Res) break;
481 
482     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
483     if (Res) break;
484 
485     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
486       Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
487       if (Res)
488         break;
489     }
490 
491     if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
492       Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
493       if (Res) break;
494     }
495 
496     Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
497     if (Res) break;
498 
499     if (Bytes.size() < 4) break;
500     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
501 
502     if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
503       Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
504       if (Res)
505         break;
506     }
507 
508     Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
509     if (Res) break;
510 
511     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
512     if (Res) break;
513 
514     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
515     if (Res) break;
516 
517     Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
518   } while (false);
519 
520   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
521               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
522               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
523               MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
524               MI.getOpcode() == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
525               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
526               MI.getOpcode() == AMDGPU::V_FMAC_F64_e64_gfx90a ||
527               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
528               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
529               MI.getOpcode() == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
530               MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
531     // Insert dummy unused src2_modifiers.
532     insertNamedMCOperand(MI, MCOperand::createImm(0),
533                          AMDGPU::OpName::src2_modifiers);
534   }
535 
536   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
537           (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
538     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
539                                              AMDGPU::OpName::cpol);
540     if (CPolPos != -1) {
541       unsigned CPol =
542           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
543               AMDGPU::CPol::GLC : 0;
544       if (MI.getNumOperands() <= (unsigned)CPolPos) {
545         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
546                              AMDGPU::OpName::cpol);
547       } else if (CPol) {
548         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
549       }
550     }
551   }
552 
553   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
554               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
555              (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) {
556     // GFX90A lost TFE, its place is occupied by ACC.
557     int TFEOpIdx =
558         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
559     if (TFEOpIdx != -1) {
560       auto TFEIter = MI.begin();
561       std::advance(TFEIter, TFEOpIdx);
562       MI.insert(TFEIter, MCOperand::createImm(0));
563     }
564   }
565 
566   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
567               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
568     int SWZOpIdx =
569         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
570     if (SWZOpIdx != -1) {
571       auto SWZIter = MI.begin();
572       std::advance(SWZIter, SWZOpIdx);
573       MI.insert(SWZIter, MCOperand::createImm(0));
574     }
575   }
576 
577   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
578     int VAddr0Idx =
579         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
580     int RsrcIdx =
581         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
582     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
583     if (VAddr0Idx >= 0 && NSAArgs > 0) {
584       unsigned NSAWords = (NSAArgs + 3) / 4;
585       if (Bytes.size() < 4 * NSAWords) {
586         Res = MCDisassembler::Fail;
587       } else {
588         for (unsigned i = 0; i < NSAArgs; ++i) {
589           MI.insert(MI.begin() + VAddr0Idx + 1 + i,
590                     decodeOperand_VGPR_32(Bytes[i]));
591         }
592         Bytes = Bytes.slice(4 * NSAWords);
593       }
594     }
595 
596     if (Res)
597       Res = convertMIMGInst(MI);
598   }
599 
600   if (Res && IsSDWA)
601     Res = convertSDWAInst(MI);
602 
603   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
604                                               AMDGPU::OpName::vdst_in);
605   if (VDstIn_Idx != -1) {
606     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
607                            MCOI::OperandConstraint::TIED_TO);
608     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
609          !MI.getOperand(VDstIn_Idx).isReg() ||
610          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
611       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
612         MI.erase(&MI.getOperand(VDstIn_Idx));
613       insertNamedMCOperand(MI,
614         MCOperand::createReg(MI.getOperand(Tied).getReg()),
615         AMDGPU::OpName::vdst_in);
616     }
617   }
618 
619   int ImmLitIdx =
620       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
621   if (Res && ImmLitIdx != -1)
622     Res = convertFMAanyK(MI, ImmLitIdx);
623 
624   // if the opcode was not recognized we'll assume a Size of 4 bytes
625   // (unless there are fewer bytes left)
626   Size = Res ? (MaxInstBytesNum - Bytes.size())
627              : std::min((size_t)4, Bytes_.size());
628   return Res;
629 }
630 
631 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
632   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
633       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
634     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
635       // VOPC - insert clamp
636       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
637   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
638     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
639     if (SDst != -1) {
640       // VOPC - insert VCC register as sdst
641       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
642                            AMDGPU::OpName::sdst);
643     } else {
644       // VOP1/2 - insert omod if present in instruction
645       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
646     }
647   }
648   return MCDisassembler::Success;
649 }
650 
651 // We must check FI == literal to reject not genuine dpp8 insts, and we must
652 // first add optional MI operands to check FI
653 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
654   unsigned Opc = MI.getOpcode();
655   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
656 
657   // Insert dummy unused src modifiers.
658   if (MI.getNumOperands() < DescNumOps &&
659       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
660     insertNamedMCOperand(MI, MCOperand::createImm(0),
661                          AMDGPU::OpName::src0_modifiers);
662 
663   if (MI.getNumOperands() < DescNumOps &&
664       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
665     insertNamedMCOperand(MI, MCOperand::createImm(0),
666                          AMDGPU::OpName::src1_modifiers);
667 
668   return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
669 }
670 
671 // Note that before gfx10, the MIMG encoding provided no information about
672 // VADDR size. Consequently, decoded instructions always show address as if it
673 // has 1 dword, which could be not really so.
674 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
675 
676   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
677                                            AMDGPU::OpName::vdst);
678 
679   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
680                                             AMDGPU::OpName::vdata);
681   int VAddr0Idx =
682       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
683   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
684                                             AMDGPU::OpName::dmask);
685 
686   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
687                                             AMDGPU::OpName::tfe);
688   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
689                                             AMDGPU::OpName::d16);
690 
691   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
692   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
693       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
694 
695   assert(VDataIdx != -1);
696   if (BaseOpcode->BVH) {
697     // Add A16 operand for intersect_ray instructions
698     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
699       addOperand(MI, MCOperand::createImm(1));
700     }
701     return MCDisassembler::Success;
702   }
703 
704   bool IsAtomic = (VDstIdx != -1);
705   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
706   bool IsNSA = false;
707   unsigned AddrSize = Info->VAddrDwords;
708 
709   if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
710     unsigned DimIdx =
711         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
712     int A16Idx =
713         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
714     const AMDGPU::MIMGDimInfo *Dim =
715         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
716     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
717 
718     AddrSize =
719         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
720 
721     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
722     if (!IsNSA) {
723       if (AddrSize > 8)
724         AddrSize = 16;
725     } else {
726       if (AddrSize > Info->VAddrDwords) {
727         // The NSA encoding does not contain enough operands for the combination
728         // of base opcode / dimension. Should this be an error?
729         return MCDisassembler::Success;
730       }
731     }
732   }
733 
734   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
735   unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
736 
737   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
738   if (D16 && AMDGPU::hasPackedD16(STI)) {
739     DstSize = (DstSize + 1) / 2;
740   }
741 
742   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
743     DstSize += 1;
744 
745   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
746     return MCDisassembler::Success;
747 
748   int NewOpcode =
749       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
750   if (NewOpcode == -1)
751     return MCDisassembler::Success;
752 
753   // Widen the register to the correct number of enabled channels.
754   unsigned NewVdata = AMDGPU::NoRegister;
755   if (DstSize != Info->VDataDwords) {
756     auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
757 
758     // Get first subregister of VData
759     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
760     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
761     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
762 
763     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
764                                        &MRI.getRegClass(DataRCID));
765     if (NewVdata == AMDGPU::NoRegister) {
766       // It's possible to encode this such that the low register + enabled
767       // components exceeds the register count.
768       return MCDisassembler::Success;
769     }
770   }
771 
772   unsigned NewVAddr0 = AMDGPU::NoRegister;
773   if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
774       AddrSize != Info->VAddrDwords) {
775     unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
776     unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
777     VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
778 
779     auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
780     NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
781                                         &MRI.getRegClass(AddrRCID));
782     if (NewVAddr0 == AMDGPU::NoRegister)
783       return MCDisassembler::Success;
784   }
785 
786   MI.setOpcode(NewOpcode);
787 
788   if (NewVdata != AMDGPU::NoRegister) {
789     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
790 
791     if (IsAtomic) {
792       // Atomic operations have an additional operand (a copy of data)
793       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
794     }
795   }
796 
797   if (NewVAddr0 != AMDGPU::NoRegister) {
798     MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
799   } else if (IsNSA) {
800     assert(AddrSize <= Info->VAddrDwords);
801     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
802              MI.begin() + VAddr0Idx + Info->VAddrDwords);
803   }
804 
805   return MCDisassembler::Success;
806 }
807 
808 DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
809                                                 int ImmLitIdx) const {
810   assert(HasLiteral && "Should have decoded a literal");
811   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
812   unsigned DescNumOps = Desc.getNumOperands();
813   assert(DescNumOps == MI.getNumOperands());
814   for (unsigned I = 0; I < DescNumOps; ++I) {
815     auto &Op = MI.getOperand(I);
816     auto OpType = Desc.OpInfo[I].OperandType;
817     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
818                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
819     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
820         IsDeferredOp)
821       Op.setImm(Literal);
822   }
823   return MCDisassembler::Success;
824 }
825 
826 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
827   return getContext().getRegisterInfo()->
828     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
829 }
830 
831 inline
832 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
833                                          const Twine& ErrMsg) const {
834   *CommentStream << "Error: " + ErrMsg;
835 
836   // ToDo: add support for error operands to MCInst.h
837   // return MCOperand::createError(V);
838   return MCOperand();
839 }
840 
841 inline
842 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
843   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
844 }
845 
846 inline
847 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
848                                                unsigned Val) const {
849   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
850   if (Val >= RegCl.getNumRegs())
851     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
852                            ": unknown register " + Twine(Val));
853   return createRegOperand(RegCl.getRegister(Val));
854 }
855 
856 inline
857 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
858                                                 unsigned Val) const {
859   // ToDo: SI/CI have 104 SGPRs, VI - 102
860   // Valery: here we accepting as much as we can, let assembler sort it out
861   int shift = 0;
862   switch (SRegClassID) {
863   case AMDGPU::SGPR_32RegClassID:
864   case AMDGPU::TTMP_32RegClassID:
865     break;
866   case AMDGPU::SGPR_64RegClassID:
867   case AMDGPU::TTMP_64RegClassID:
868     shift = 1;
869     break;
870   case AMDGPU::SGPR_128RegClassID:
871   case AMDGPU::TTMP_128RegClassID:
872   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
873   // this bundle?
874   case AMDGPU::SGPR_256RegClassID:
875   case AMDGPU::TTMP_256RegClassID:
876     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
877   // this bundle?
878   case AMDGPU::SGPR_512RegClassID:
879   case AMDGPU::TTMP_512RegClassID:
880     shift = 2;
881     break;
882   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
883   // this bundle?
884   default:
885     llvm_unreachable("unhandled register class");
886   }
887 
888   if (Val % (1 << shift)) {
889     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
890                    << ": scalar reg isn't aligned " << Val;
891   }
892 
893   return createRegOperand(SRegClassID, Val >> shift);
894 }
895 
896 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
897   return decodeSrcOp(OPW32, Val);
898 }
899 
900 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
901   return decodeSrcOp(OPW64, Val);
902 }
903 
904 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
905   return decodeSrcOp(OPW128, Val);
906 }
907 
908 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
909   return decodeSrcOp(OPW16, Val);
910 }
911 
912 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
913   return decodeSrcOp(OPWV216, Val);
914 }
915 
916 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const {
917   return decodeSrcOp(OPWV232, Val);
918 }
919 
920 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
921   // Some instructions have operand restrictions beyond what the encoding
922   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
923   // high bit.
924   Val &= 255;
925 
926   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
927 }
928 
929 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
930   return decodeSrcOp(OPW32, Val);
931 }
932 
933 MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
934   return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
935 }
936 
937 MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const {
938   return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255);
939 }
940 
941 MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
942   return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
943 }
944 
945 MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
946   return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
947 }
948 
949 MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
950   return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
951 }
952 
953 MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
954   return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
955 }
956 
957 MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
958   return decodeSrcOp(OPW32, Val);
959 }
960 
961 MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
962   return decodeSrcOp(OPW64, Val);
963 }
964 
965 MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
966   return decodeSrcOp(OPW128, Val);
967 }
968 
969 MCOperand AMDGPUDisassembler::decodeOperand_AV_512(unsigned Val) const {
970   return decodeSrcOp(OPW512, Val);
971 }
972 
973 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
974   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
975 }
976 
977 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
978   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
979 }
980 
981 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
982   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
983 }
984 
985 MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
986   return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
987 }
988 
989 MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
990   return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
991 }
992 
993 MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
994   return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
995 }
996 
997 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
998   // table-gen generated disassembler doesn't care about operand types
999   // leaving only registry class so SSrc_32 operand turns into SReg_32
1000   // and therefore we accept immediates and literals here as well
1001   return decodeSrcOp(OPW32, Val);
1002 }
1003 
1004 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
1005   unsigned Val) const {
1006   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
1007   return decodeOperand_SReg_32(Val);
1008 }
1009 
1010 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
1011   unsigned Val) const {
1012   // SReg_32_XM0 is SReg_32 without EXEC_HI
1013   return decodeOperand_SReg_32(Val);
1014 }
1015 
1016 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
1017   // table-gen generated disassembler doesn't care about operand types
1018   // leaving only registry class so SSrc_32 operand turns into SReg_32
1019   // and therefore we accept immediates and literals here as well
1020   return decodeSrcOp(OPW32, Val);
1021 }
1022 
1023 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
1024   return decodeSrcOp(OPW64, Val);
1025 }
1026 
1027 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
1028   return decodeSrcOp(OPW64, Val);
1029 }
1030 
1031 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
1032   return decodeSrcOp(OPW128, Val);
1033 }
1034 
1035 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
1036   return decodeDstOp(OPW256, Val);
1037 }
1038 
1039 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
1040   return decodeDstOp(OPW512, Val);
1041 }
1042 
1043 // Decode Literals for insts which always have a literal in the encoding
1044 MCOperand
1045 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1046   if (HasLiteral) {
1047     if (Literal != Val)
1048       return errOperand(Val, "More than one unique literal is illegal");
1049   }
1050   HasLiteral = true;
1051   Literal = Val;
1052   return MCOperand::createImm(Literal);
1053 }
1054 
1055 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
1056   // For now all literal constants are supposed to be unsigned integer
1057   // ToDo: deal with signed/unsigned 64-bit integer constants
1058   // ToDo: deal with float/double constants
1059   if (!HasLiteral) {
1060     if (Bytes.size() < 4) {
1061       return errOperand(0, "cannot read literal, inst bytes left " +
1062                         Twine(Bytes.size()));
1063     }
1064     HasLiteral = true;
1065     Literal = eatBytes<uint32_t>(Bytes);
1066   }
1067   return MCOperand::createImm(Literal);
1068 }
1069 
1070 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1071   using namespace AMDGPU::EncValues;
1072 
1073   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1074   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1075     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1076     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1077       // Cast prevents negative overflow.
1078 }
1079 
1080 static int64_t getInlineImmVal32(unsigned Imm) {
1081   switch (Imm) {
1082   case 240:
1083     return FloatToBits(0.5f);
1084   case 241:
1085     return FloatToBits(-0.5f);
1086   case 242:
1087     return FloatToBits(1.0f);
1088   case 243:
1089     return FloatToBits(-1.0f);
1090   case 244:
1091     return FloatToBits(2.0f);
1092   case 245:
1093     return FloatToBits(-2.0f);
1094   case 246:
1095     return FloatToBits(4.0f);
1096   case 247:
1097     return FloatToBits(-4.0f);
1098   case 248: // 1 / (2 * PI)
1099     return 0x3e22f983;
1100   default:
1101     llvm_unreachable("invalid fp inline imm");
1102   }
1103 }
1104 
1105 static int64_t getInlineImmVal64(unsigned Imm) {
1106   switch (Imm) {
1107   case 240:
1108     return DoubleToBits(0.5);
1109   case 241:
1110     return DoubleToBits(-0.5);
1111   case 242:
1112     return DoubleToBits(1.0);
1113   case 243:
1114     return DoubleToBits(-1.0);
1115   case 244:
1116     return DoubleToBits(2.0);
1117   case 245:
1118     return DoubleToBits(-2.0);
1119   case 246:
1120     return DoubleToBits(4.0);
1121   case 247:
1122     return DoubleToBits(-4.0);
1123   case 248: // 1 / (2 * PI)
1124     return 0x3fc45f306dc9c882;
1125   default:
1126     llvm_unreachable("invalid fp inline imm");
1127   }
1128 }
1129 
1130 static int64_t getInlineImmVal16(unsigned Imm) {
1131   switch (Imm) {
1132   case 240:
1133     return 0x3800;
1134   case 241:
1135     return 0xB800;
1136   case 242:
1137     return 0x3C00;
1138   case 243:
1139     return 0xBC00;
1140   case 244:
1141     return 0x4000;
1142   case 245:
1143     return 0xC000;
1144   case 246:
1145     return 0x4400;
1146   case 247:
1147     return 0xC400;
1148   case 248: // 1 / (2 * PI)
1149     return 0x3118;
1150   default:
1151     llvm_unreachable("invalid fp inline imm");
1152   }
1153 }
1154 
1155 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
1156   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1157       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1158 
1159   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1160   switch (Width) {
1161   case OPW32:
1162   case OPW128: // splat constants
1163   case OPW512:
1164   case OPW1024:
1165   case OPWV232:
1166     return MCOperand::createImm(getInlineImmVal32(Imm));
1167   case OPW64:
1168   case OPW256:
1169     return MCOperand::createImm(getInlineImmVal64(Imm));
1170   case OPW16:
1171   case OPWV216:
1172     return MCOperand::createImm(getInlineImmVal16(Imm));
1173   default:
1174     llvm_unreachable("implement me");
1175   }
1176 }
1177 
1178 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1179   using namespace AMDGPU;
1180 
1181   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1182   switch (Width) {
1183   default: // fall
1184   case OPW32:
1185   case OPW16:
1186   case OPWV216:
1187     return VGPR_32RegClassID;
1188   case OPW64:
1189   case OPWV232: return VReg_64RegClassID;
1190   case OPW96: return VReg_96RegClassID;
1191   case OPW128: return VReg_128RegClassID;
1192   case OPW160: return VReg_160RegClassID;
1193   case OPW256: return VReg_256RegClassID;
1194   case OPW512: return VReg_512RegClassID;
1195   case OPW1024: return VReg_1024RegClassID;
1196   }
1197 }
1198 
1199 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1200   using namespace AMDGPU;
1201 
1202   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1203   switch (Width) {
1204   default: // fall
1205   case OPW32:
1206   case OPW16:
1207   case OPWV216:
1208     return AGPR_32RegClassID;
1209   case OPW64:
1210   case OPWV232: return AReg_64RegClassID;
1211   case OPW96: return AReg_96RegClassID;
1212   case OPW128: return AReg_128RegClassID;
1213   case OPW160: return AReg_160RegClassID;
1214   case OPW256: return AReg_256RegClassID;
1215   case OPW512: return AReg_512RegClassID;
1216   case OPW1024: return AReg_1024RegClassID;
1217   }
1218 }
1219 
1220 
1221 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1222   using namespace AMDGPU;
1223 
1224   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1225   switch (Width) {
1226   default: // fall
1227   case OPW32:
1228   case OPW16:
1229   case OPWV216:
1230     return SGPR_32RegClassID;
1231   case OPW64:
1232   case OPWV232: return SGPR_64RegClassID;
1233   case OPW96: return SGPR_96RegClassID;
1234   case OPW128: return SGPR_128RegClassID;
1235   case OPW160: return SGPR_160RegClassID;
1236   case OPW256: return SGPR_256RegClassID;
1237   case OPW512: return SGPR_512RegClassID;
1238   }
1239 }
1240 
1241 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1242   using namespace AMDGPU;
1243 
1244   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1245   switch (Width) {
1246   default: // fall
1247   case OPW32:
1248   case OPW16:
1249   case OPWV216:
1250     return TTMP_32RegClassID;
1251   case OPW64:
1252   case OPWV232: return TTMP_64RegClassID;
1253   case OPW128: return TTMP_128RegClassID;
1254   case OPW256: return TTMP_256RegClassID;
1255   case OPW512: return TTMP_512RegClassID;
1256   }
1257 }
1258 
1259 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1260   using namespace AMDGPU::EncValues;
1261 
1262   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1263   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1264 
1265   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1266 }
1267 
1268 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1269                                           bool MandatoryLiteral) const {
1270   using namespace AMDGPU::EncValues;
1271 
1272   assert(Val < 1024); // enum10
1273 
1274   bool IsAGPR = Val & 512;
1275   Val &= 511;
1276 
1277   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1278     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1279                                    : getVgprClassId(Width), Val - VGPR_MIN);
1280   }
1281   if (Val <= SGPR_MAX) {
1282     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1283     static_assert(SGPR_MIN == 0, "");
1284     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1285   }
1286 
1287   int TTmpIdx = getTTmpIdx(Val);
1288   if (TTmpIdx >= 0) {
1289     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1290   }
1291 
1292   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1293     return decodeIntImmed(Val);
1294 
1295   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1296     return decodeFPImmed(Width, Val);
1297 
1298   if (Val == LITERAL_CONST) {
1299     if (MandatoryLiteral)
1300       // Keep a sentinel value for deferred setting
1301       return MCOperand::createImm(LITERAL_CONST);
1302     else
1303       return decodeLiteralConstant();
1304   }
1305 
1306   switch (Width) {
1307   case OPW32:
1308   case OPW16:
1309   case OPWV216:
1310     return decodeSpecialReg32(Val);
1311   case OPW64:
1312   case OPWV232:
1313     return decodeSpecialReg64(Val);
1314   default:
1315     llvm_unreachable("unexpected immediate type");
1316   }
1317 }
1318 
1319 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
1320   using namespace AMDGPU::EncValues;
1321 
1322   assert(Val < 128);
1323   assert(Width == OPW256 || Width == OPW512);
1324 
1325   if (Val <= SGPR_MAX) {
1326     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1327     static_assert(SGPR_MIN == 0, "");
1328     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1329   }
1330 
1331   int TTmpIdx = getTTmpIdx(Val);
1332   if (TTmpIdx >= 0) {
1333     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1334   }
1335 
1336   llvm_unreachable("unknown dst register");
1337 }
1338 
1339 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1340   using namespace AMDGPU;
1341 
1342   switch (Val) {
1343   case 102: return createRegOperand(FLAT_SCR_LO);
1344   case 103: return createRegOperand(FLAT_SCR_HI);
1345   case 104: return createRegOperand(XNACK_MASK_LO);
1346   case 105: return createRegOperand(XNACK_MASK_HI);
1347   case 106: return createRegOperand(VCC_LO);
1348   case 107: return createRegOperand(VCC_HI);
1349   case 108: return createRegOperand(TBA_LO);
1350   case 109: return createRegOperand(TBA_HI);
1351   case 110: return createRegOperand(TMA_LO);
1352   case 111: return createRegOperand(TMA_HI);
1353   case 124: return createRegOperand(M0);
1354   case 125: return createRegOperand(SGPR_NULL);
1355   case 126: return createRegOperand(EXEC_LO);
1356   case 127: return createRegOperand(EXEC_HI);
1357   case 235: return createRegOperand(SRC_SHARED_BASE);
1358   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1359   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1360   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1361   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1362   case 251: return createRegOperand(SRC_VCCZ);
1363   case 252: return createRegOperand(SRC_EXECZ);
1364   case 253: return createRegOperand(SRC_SCC);
1365   case 254: return createRegOperand(LDS_DIRECT);
1366   default: break;
1367   }
1368   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1369 }
1370 
1371 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1372   using namespace AMDGPU;
1373 
1374   switch (Val) {
1375   case 102: return createRegOperand(FLAT_SCR);
1376   case 104: return createRegOperand(XNACK_MASK);
1377   case 106: return createRegOperand(VCC);
1378   case 108: return createRegOperand(TBA);
1379   case 110: return createRegOperand(TMA);
1380   case 125: return createRegOperand(SGPR_NULL);
1381   case 126: return createRegOperand(EXEC);
1382   case 235: return createRegOperand(SRC_SHARED_BASE);
1383   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1384   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1385   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1386   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1387   case 251: return createRegOperand(SRC_VCCZ);
1388   case 252: return createRegOperand(SRC_EXECZ);
1389   case 253: return createRegOperand(SRC_SCC);
1390   default: break;
1391   }
1392   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1393 }
1394 
1395 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1396                                             const unsigned Val) const {
1397   using namespace AMDGPU::SDWA;
1398   using namespace AMDGPU::EncValues;
1399 
1400   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1401       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
1402     // XXX: cast to int is needed to avoid stupid warning:
1403     // compare with unsigned is always true
1404     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1405         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1406       return createRegOperand(getVgprClassId(Width),
1407                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1408     }
1409     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1410         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1411                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1412       return createSRegOperand(getSgprClassId(Width),
1413                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1414     }
1415     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1416         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1417       return createSRegOperand(getTtmpClassId(Width),
1418                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1419     }
1420 
1421     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1422 
1423     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1424       return decodeIntImmed(SVal);
1425 
1426     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1427       return decodeFPImmed(Width, SVal);
1428 
1429     return decodeSpecialReg32(SVal);
1430   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
1431     return createRegOperand(getVgprClassId(Width), Val);
1432   }
1433   llvm_unreachable("unsupported target");
1434 }
1435 
1436 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1437   return decodeSDWASrc(OPW16, Val);
1438 }
1439 
1440 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1441   return decodeSDWASrc(OPW32, Val);
1442 }
1443 
1444 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1445   using namespace AMDGPU::SDWA;
1446 
1447   assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
1448           STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
1449          "SDWAVopcDst should be present only on GFX9+");
1450 
1451   bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
1452 
1453   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1454     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1455 
1456     int TTmpIdx = getTTmpIdx(Val);
1457     if (TTmpIdx >= 0) {
1458       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1459       return createSRegOperand(TTmpClsId, TTmpIdx);
1460     } else if (Val > SGPR_MAX) {
1461       return IsWave64 ? decodeSpecialReg64(Val)
1462                       : decodeSpecialReg32(Val);
1463     } else {
1464       return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1465     }
1466   } else {
1467     return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1468   }
1469 }
1470 
1471 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1472   return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
1473     decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
1474 }
1475 
1476 bool AMDGPUDisassembler::isVI() const {
1477   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1478 }
1479 
1480 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1481 
1482 bool AMDGPUDisassembler::isGFX90A() const {
1483   return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
1484 }
1485 
1486 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1487 
1488 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1489 
1490 bool AMDGPUDisassembler::isGFX10Plus() const {
1491   return AMDGPU::isGFX10Plus(STI);
1492 }
1493 
1494 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1495   return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1496 }
1497 
1498 //===----------------------------------------------------------------------===//
1499 // AMDGPU specific symbol handling
1500 //===----------------------------------------------------------------------===//
1501 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1502   do {                                                                         \
1503     KdStream << Indent << DIRECTIVE " "                                        \
1504              << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';           \
1505   } while (0)
1506 
1507 // NOLINTNEXTLINE(readability-identifier-naming)
1508 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1509     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1510   using namespace amdhsa;
1511   StringRef Indent = "\t";
1512 
1513   // We cannot accurately backward compute #VGPRs used from
1514   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1515   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1516   // simply calculate the inverse of what the assembler does.
1517 
1518   uint32_t GranulatedWorkitemVGPRCount =
1519       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
1520       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
1521 
1522   uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
1523                           AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
1524 
1525   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1526 
1527   // We cannot backward compute values used to calculate
1528   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1529   // directives can't be computed:
1530   // .amdhsa_reserve_vcc
1531   // .amdhsa_reserve_flat_scratch
1532   // .amdhsa_reserve_xnack_mask
1533   // They take their respective default values if not specified in the assembly.
1534   //
1535   // GRANULATED_WAVEFRONT_SGPR_COUNT
1536   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1537   //
1538   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1539   // are set to 0. So while disassembling we consider that:
1540   //
1541   // GRANULATED_WAVEFRONT_SGPR_COUNT
1542   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1543   //
1544   // The disassembler cannot recover the original values of those 3 directives.
1545 
1546   uint32_t GranulatedWavefrontSGPRCount =
1547       (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
1548       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
1549 
1550   if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1551     return MCDisassembler::Fail;
1552 
1553   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1554                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1555 
1556   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1557   if (!hasArchitectedFlatScratch())
1558     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1559   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1560   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1561 
1562   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1563     return MCDisassembler::Fail;
1564 
1565   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1566                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1567   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1568                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1569   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1570                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1571   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1572                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1573 
1574   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1575     return MCDisassembler::Fail;
1576 
1577   PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1578 
1579   if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1580     return MCDisassembler::Fail;
1581 
1582   PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1583 
1584   if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1585     return MCDisassembler::Fail;
1586 
1587   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1588     return MCDisassembler::Fail;
1589 
1590   PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1591 
1592   if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1593     return MCDisassembler::Fail;
1594 
1595   if (isGFX10Plus()) {
1596     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1597                     COMPUTE_PGM_RSRC1_WGP_MODE);
1598     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1599     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1600   }
1601   return MCDisassembler::Success;
1602 }
1603 
1604 // NOLINTNEXTLINE(readability-identifier-naming)
1605 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1606     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1607   using namespace amdhsa;
1608   StringRef Indent = "\t";
1609   if (hasArchitectedFlatScratch())
1610     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1611                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1612   else
1613     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1614                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1615   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1616                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1617   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1618                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1619   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1620                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1621   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1622                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1623   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1624                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1625 
1626   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1627     return MCDisassembler::Fail;
1628 
1629   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1630     return MCDisassembler::Fail;
1631 
1632   if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1633     return MCDisassembler::Fail;
1634 
1635   PRINT_DIRECTIVE(
1636       ".amdhsa_exception_fp_ieee_invalid_op",
1637       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1638   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1639                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1640   PRINT_DIRECTIVE(
1641       ".amdhsa_exception_fp_ieee_div_zero",
1642       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1643   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1644                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1645   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1646                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1647   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1648                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1649   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1650                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1651 
1652   if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1653     return MCDisassembler::Fail;
1654 
1655   return MCDisassembler::Success;
1656 }
1657 
1658 #undef PRINT_DIRECTIVE
1659 
1660 MCDisassembler::DecodeStatus
1661 AMDGPUDisassembler::decodeKernelDescriptorDirective(
1662     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
1663     raw_string_ostream &KdStream) const {
1664 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1665   do {                                                                         \
1666     KdStream << Indent << DIRECTIVE " "                                        \
1667              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
1668   } while (0)
1669 
1670   uint16_t TwoByteBuffer = 0;
1671   uint32_t FourByteBuffer = 0;
1672 
1673   StringRef ReservedBytes;
1674   StringRef Indent = "\t";
1675 
1676   assert(Bytes.size() == 64);
1677   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
1678 
1679   switch (Cursor.tell()) {
1680   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
1681     FourByteBuffer = DE.getU32(Cursor);
1682     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
1683              << '\n';
1684     return MCDisassembler::Success;
1685 
1686   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
1687     FourByteBuffer = DE.getU32(Cursor);
1688     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
1689              << FourByteBuffer << '\n';
1690     return MCDisassembler::Success;
1691 
1692   case amdhsa::KERNARG_SIZE_OFFSET:
1693     FourByteBuffer = DE.getU32(Cursor);
1694     KdStream << Indent << ".amdhsa_kernarg_size "
1695              << FourByteBuffer << '\n';
1696     return MCDisassembler::Success;
1697 
1698   case amdhsa::RESERVED0_OFFSET:
1699     // 4 reserved bytes, must be 0.
1700     ReservedBytes = DE.getBytes(Cursor, 4);
1701     for (int I = 0; I < 4; ++I) {
1702       if (ReservedBytes[I] != 0) {
1703         return MCDisassembler::Fail;
1704       }
1705     }
1706     return MCDisassembler::Success;
1707 
1708   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
1709     // KERNEL_CODE_ENTRY_BYTE_OFFSET
1710     // So far no directive controls this for Code Object V3, so simply skip for
1711     // disassembly.
1712     DE.skip(Cursor, 8);
1713     return MCDisassembler::Success;
1714 
1715   case amdhsa::RESERVED1_OFFSET:
1716     // 20 reserved bytes, must be 0.
1717     ReservedBytes = DE.getBytes(Cursor, 20);
1718     for (int I = 0; I < 20; ++I) {
1719       if (ReservedBytes[I] != 0) {
1720         return MCDisassembler::Fail;
1721       }
1722     }
1723     return MCDisassembler::Success;
1724 
1725   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
1726     // COMPUTE_PGM_RSRC3
1727     //  - Only set for GFX10, GFX6-9 have this to be 0.
1728     //  - Currently no directives directly control this.
1729     FourByteBuffer = DE.getU32(Cursor);
1730     if (!isGFX10Plus() && FourByteBuffer) {
1731       return MCDisassembler::Fail;
1732     }
1733     return MCDisassembler::Success;
1734 
1735   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
1736     FourByteBuffer = DE.getU32(Cursor);
1737     if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
1738         MCDisassembler::Fail) {
1739       return MCDisassembler::Fail;
1740     }
1741     return MCDisassembler::Success;
1742 
1743   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
1744     FourByteBuffer = DE.getU32(Cursor);
1745     if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
1746         MCDisassembler::Fail) {
1747       return MCDisassembler::Fail;
1748     }
1749     return MCDisassembler::Success;
1750 
1751   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
1752     using namespace amdhsa;
1753     TwoByteBuffer = DE.getU16(Cursor);
1754 
1755     if (!hasArchitectedFlatScratch())
1756       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
1757                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
1758     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
1759                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
1760     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
1761                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
1762     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
1763                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
1764     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
1765                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
1766     if (!hasArchitectedFlatScratch())
1767       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
1768                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
1769     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
1770                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
1771 
1772     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
1773       return MCDisassembler::Fail;
1774 
1775     // Reserved for GFX9
1776     if (isGFX9() &&
1777         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
1778       return MCDisassembler::Fail;
1779     } else if (isGFX10Plus()) {
1780       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
1781                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
1782     }
1783 
1784     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
1785       return MCDisassembler::Fail;
1786 
1787     return MCDisassembler::Success;
1788 
1789   case amdhsa::RESERVED2_OFFSET:
1790     // 6 bytes from here are reserved, must be 0.
1791     ReservedBytes = DE.getBytes(Cursor, 6);
1792     for (int I = 0; I < 6; ++I) {
1793       if (ReservedBytes[I] != 0)
1794         return MCDisassembler::Fail;
1795     }
1796     return MCDisassembler::Success;
1797 
1798   default:
1799     llvm_unreachable("Unhandled index. Case statements cover everything.");
1800     return MCDisassembler::Fail;
1801   }
1802 #undef PRINT_DIRECTIVE
1803 }
1804 
1805 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
1806     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
1807   // CP microcode requires the kernel descriptor to be 64 aligned.
1808   if (Bytes.size() != 64 || KdAddress % 64 != 0)
1809     return MCDisassembler::Fail;
1810 
1811   std::string Kd;
1812   raw_string_ostream KdStream(Kd);
1813   KdStream << ".amdhsa_kernel " << KdName << '\n';
1814 
1815   DataExtractor::Cursor C(0);
1816   while (C && C.tell() < Bytes.size()) {
1817     MCDisassembler::DecodeStatus Status =
1818         decodeKernelDescriptorDirective(C, Bytes, KdStream);
1819 
1820     cantFail(C.takeError());
1821 
1822     if (Status == MCDisassembler::Fail)
1823       return MCDisassembler::Fail;
1824   }
1825   KdStream << ".end_amdhsa_kernel\n";
1826   outs() << KdStream.str();
1827   return MCDisassembler::Success;
1828 }
1829 
1830 Optional<MCDisassembler::DecodeStatus>
1831 AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
1832                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
1833                                   raw_ostream &CStream) const {
1834   // Right now only kernel descriptor needs to be handled.
1835   // We ignore all other symbols for target specific handling.
1836   // TODO:
1837   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
1838   // Object V2 and V3 when symbols are marked protected.
1839 
1840   // amd_kernel_code_t for Code Object V2.
1841   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
1842     Size = 256;
1843     return MCDisassembler::Fail;
1844   }
1845 
1846   // Code Object V3 kernel descriptors.
1847   StringRef Name = Symbol.Name;
1848   if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
1849     Size = 64; // Size = 64 regardless of success or failure.
1850     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
1851   }
1852   return None;
1853 }
1854 
1855 //===----------------------------------------------------------------------===//
1856 // AMDGPUSymbolizer
1857 //===----------------------------------------------------------------------===//
1858 
1859 // Try to find symbol name for specified label
1860 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
1861                                 raw_ostream &/*cStream*/, int64_t Value,
1862                                 uint64_t /*Address*/, bool IsBranch,
1863                                 uint64_t /*Offset*/, uint64_t /*InstSize*/) {
1864 
1865   if (!IsBranch) {
1866     return false;
1867   }
1868 
1869   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
1870   if (!Symbols)
1871     return false;
1872 
1873   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
1874     return Val.Addr == static_cast<uint64_t>(Value) &&
1875            Val.Type == ELF::STT_NOTYPE;
1876   });
1877   if (Result != Symbols->end()) {
1878     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
1879     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
1880     Inst.addOperand(MCOperand::createExpr(Add));
1881     return true;
1882   }
1883   // Add to list of referenced addresses, so caller can synthesize a label.
1884   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
1885   return false;
1886 }
1887 
1888 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
1889                                                        int64_t Value,
1890                                                        uint64_t Address) {
1891   llvm_unreachable("unimplemented");
1892 }
1893 
1894 //===----------------------------------------------------------------------===//
1895 // Initialization
1896 //===----------------------------------------------------------------------===//
1897 
1898 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
1899                               LLVMOpInfoCallback /*GetOpInfo*/,
1900                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
1901                               void *DisInfo,
1902                               MCContext *Ctx,
1903                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
1904   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
1905 }
1906 
1907 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
1908                                                 const MCSubtargetInfo &STI,
1909                                                 MCContext &Ctx) {
1910   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
1911 }
1912 
1913 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
1914   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
1915                                          createAMDGPUDisassembler);
1916   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
1917                                        createAMDGPUSymbolizer);
1918 }
1919