xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision 33d806a517994f8242f3fb1d35ad32f53604df61)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "AMDGPU.h"
21 #include "AMDGPURegisterInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "SIDefines.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm-c/Disassembler.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Twine.h"
30 #include "llvm/BinaryFormat/ELF.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
33 #include "llvm/MC/MCExpr.h"
34 #include "llvm/MC/MCFixedLenDisassembler.h"
35 #include "llvm/MC/MCInst.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/Support/Endian.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetRegistry.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <cstdint>
46 #include <iterator>
47 #include <tuple>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-disassembler"
53 
54 #define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
55                             : AMDGPU::EncValues::SGPR_MAX_SI)
56 
57 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
58 
59 inline static MCDisassembler::DecodeStatus
60 addOperand(MCInst &Inst, const MCOperand& Opnd) {
61   Inst.addOperand(Opnd);
62   return Opnd.isValid() ?
63     MCDisassembler::Success :
64     MCDisassembler::SoftFail;
65 }
66 
67 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
68                                 uint16_t NameIdx) {
69   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
70   if (OpIdx != -1) {
71     auto I = MI.begin();
72     std::advance(I, OpIdx);
73     MI.insert(I, Op);
74   }
75   return OpIdx;
76 }
77 
78 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
79                                        uint64_t Addr, const void *Decoder) {
80   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
81 
82   // Our branches take a simm16, but we need two extra bits to account for the
83   // factor of 4.
84   APInt SignedOffset(18, Imm * 4, true);
85   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
86 
87   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
88     return MCDisassembler::Success;
89   return addOperand(Inst, MCOperand::createImm(Imm));
90 }
91 
92 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
93 static DecodeStatus StaticDecoderName(MCInst &Inst, \
94                                        unsigned Imm, \
95                                        uint64_t /*Addr*/, \
96                                        const void *Decoder) { \
97   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
98   return addOperand(Inst, DAsm->DecoderName(Imm)); \
99 }
100 
101 #define DECODE_OPERAND_REG(RegClass) \
102 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
103 
104 DECODE_OPERAND_REG(VGPR_32)
105 DECODE_OPERAND_REG(VRegOrLds_32)
106 DECODE_OPERAND_REG(VS_32)
107 DECODE_OPERAND_REG(VS_64)
108 DECODE_OPERAND_REG(VS_128)
109 
110 DECODE_OPERAND_REG(VReg_64)
111 DECODE_OPERAND_REG(VReg_96)
112 DECODE_OPERAND_REG(VReg_128)
113 
114 DECODE_OPERAND_REG(SReg_32)
115 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
116 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
117 DECODE_OPERAND_REG(SRegOrLds_32)
118 DECODE_OPERAND_REG(SReg_64)
119 DECODE_OPERAND_REG(SReg_64_XEXEC)
120 DECODE_OPERAND_REG(SReg_128)
121 DECODE_OPERAND_REG(SReg_256)
122 DECODE_OPERAND_REG(SReg_512)
123 
124 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
125                                          unsigned Imm,
126                                          uint64_t Addr,
127                                          const void *Decoder) {
128   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
129   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
130 }
131 
132 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
133                                          unsigned Imm,
134                                          uint64_t Addr,
135                                          const void *Decoder) {
136   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
137   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
138 }
139 
140 #define DECODE_SDWA(DecName) \
141 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
142 
143 DECODE_SDWA(Src32)
144 DECODE_SDWA(Src16)
145 DECODE_SDWA(VopcDst)
146 
147 #include "AMDGPUGenDisassemblerTables.inc"
148 
149 //===----------------------------------------------------------------------===//
150 //
151 //===----------------------------------------------------------------------===//
152 
153 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
154   assert(Bytes.size() >= sizeof(T));
155   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
156   Bytes = Bytes.slice(sizeof(T));
157   return Res;
158 }
159 
160 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
161                                                MCInst &MI,
162                                                uint64_t Inst,
163                                                uint64_t Address) const {
164   assert(MI.getOpcode() == 0);
165   assert(MI.getNumOperands() == 0);
166   MCInst TmpInst;
167   HasLiteral = false;
168   const auto SavedBytes = Bytes;
169   if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
170     MI = TmpInst;
171     return MCDisassembler::Success;
172   }
173   Bytes = SavedBytes;
174   return MCDisassembler::Fail;
175 }
176 
177 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
178                                                 ArrayRef<uint8_t> Bytes_,
179                                                 uint64_t Address,
180                                                 raw_ostream &WS,
181                                                 raw_ostream &CS) const {
182   CommentStream = &CS;
183   bool IsSDWA = false;
184 
185   // ToDo: AMDGPUDisassembler supports only VI ISA.
186   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
187     report_fatal_error("Disassembly not yet supported for subtarget");
188 
189   const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
190   Bytes = Bytes_.slice(0, MaxInstBytesNum);
191 
192   DecodeStatus Res = MCDisassembler::Fail;
193   do {
194     // ToDo: better to switch encoding length using some bit predicate
195     // but it is unknown yet, so try all we can
196 
197     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
198     // encodings
199     if (Bytes.size() >= 8) {
200       const uint64_t QW = eatBytes<uint64_t>(Bytes);
201       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
202       if (Res) break;
203 
204       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
205       if (Res) { IsSDWA = true;  break; }
206 
207       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
208       if (Res) { IsSDWA = true;  break; }
209 
210       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
211         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
212         if (Res)
213           break;
214       }
215 
216       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
217       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
218       // table first so we print the correct name.
219       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
220         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
221         if (Res)
222           break;
223       }
224     }
225 
226     // Reinitialize Bytes as DPP64 could have eaten too much
227     Bytes = Bytes_.slice(0, MaxInstBytesNum);
228 
229     // Try decode 32-bit instruction
230     if (Bytes.size() < 4) break;
231     const uint32_t DW = eatBytes<uint32_t>(Bytes);
232     Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
233     if (Res) break;
234 
235     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
236     if (Res) break;
237 
238     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
239     if (Res) break;
240 
241     if (Bytes.size() < 4) break;
242     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
243     Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
244     if (Res) break;
245 
246     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
247     if (Res) break;
248 
249     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
250   } while (false);
251 
252   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
253               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
254               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
255               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) {
256     // Insert dummy unused src2_modifiers.
257     insertNamedMCOperand(MI, MCOperand::createImm(0),
258                          AMDGPU::OpName::src2_modifiers);
259   }
260 
261   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
262     Res = convertMIMGInst(MI);
263   }
264 
265   if (Res && IsSDWA)
266     Res = convertSDWAInst(MI);
267 
268   // if the opcode was not recognized we'll assume a Size of 4 bytes
269   // (unless there are fewer bytes left)
270   Size = Res ? (MaxInstBytesNum - Bytes.size())
271              : std::min((size_t)4, Bytes_.size());
272   return Res;
273 }
274 
275 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
276   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
277     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
278       // VOPC - insert clamp
279       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
280   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
281     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
282     if (SDst != -1) {
283       // VOPC - insert VCC register as sdst
284       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
285                            AMDGPU::OpName::sdst);
286     } else {
287       // VOP1/2 - insert omod if present in instruction
288       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
289     }
290   }
291   return MCDisassembler::Success;
292 }
293 
294 // Note that MIMG format provides no information about VADDR size.
295 // Consequently, decoded instructions always show address
296 // as if it has 1 dword, which could be not really so.
297 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
298 
299   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
300                                            AMDGPU::OpName::vdst);
301 
302   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
303                                             AMDGPU::OpName::vdata);
304 
305   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
306                                             AMDGPU::OpName::dmask);
307 
308   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
309                                             AMDGPU::OpName::tfe);
310   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
311                                             AMDGPU::OpName::d16);
312 
313   assert(VDataIdx != -1);
314   assert(DMaskIdx != -1);
315   assert(TFEIdx != -1);
316 
317   bool IsAtomic = (VDstIdx != -1);
318   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
319 
320   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
321   if (DMask == 0)
322     return MCDisassembler::Success;
323 
324   unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
325   if (DstSize == 1)
326     return MCDisassembler::Success;
327 
328   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
329   if (D16 && AMDGPU::hasPackedD16(STI)) {
330     DstSize = (DstSize + 1) / 2;
331   }
332 
333   // FIXME: Add tfe support
334   if (MI.getOperand(TFEIdx).getImm())
335     return MCDisassembler::Success;
336 
337   int NewOpcode = -1;
338 
339   if (IsGather4) {
340     if (D16 && AMDGPU::hasPackedD16(STI))
341       NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2);
342     else
343       return MCDisassembler::Success;
344   } else {
345     NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize);
346     if (NewOpcode == -1)
347       return MCDisassembler::Success;
348   }
349 
350   auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
351 
352   // Get first subregister of VData
353   unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
354   unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
355   Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
356 
357   // Widen the register to the correct number of enabled channels.
358   auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
359                                           &MRI.getRegClass(RCID));
360   if (NewVdata == AMDGPU::NoRegister) {
361     // It's possible to encode this such that the low register + enabled
362     // components exceeds the register count.
363     return MCDisassembler::Success;
364   }
365 
366   MI.setOpcode(NewOpcode);
367   // vaddr will be always appear as a single VGPR. This will look different than
368   // how it is usually emitted because the number of register components is not
369   // in the instruction encoding.
370   MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
371 
372   if (IsAtomic) {
373     // Atomic operations have an additional operand (a copy of data)
374     MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
375   }
376 
377   return MCDisassembler::Success;
378 }
379 
380 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
381   return getContext().getRegisterInfo()->
382     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
383 }
384 
385 inline
386 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
387                                          const Twine& ErrMsg) const {
388   *CommentStream << "Error: " + ErrMsg;
389 
390   // ToDo: add support for error operands to MCInst.h
391   // return MCOperand::createError(V);
392   return MCOperand();
393 }
394 
395 inline
396 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
397   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
398 }
399 
400 inline
401 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
402                                                unsigned Val) const {
403   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
404   if (Val >= RegCl.getNumRegs())
405     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
406                            ": unknown register " + Twine(Val));
407   return createRegOperand(RegCl.getRegister(Val));
408 }
409 
410 inline
411 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
412                                                 unsigned Val) const {
413   // ToDo: SI/CI have 104 SGPRs, VI - 102
414   // Valery: here we accepting as much as we can, let assembler sort it out
415   int shift = 0;
416   switch (SRegClassID) {
417   case AMDGPU::SGPR_32RegClassID:
418   case AMDGPU::TTMP_32RegClassID:
419     break;
420   case AMDGPU::SGPR_64RegClassID:
421   case AMDGPU::TTMP_64RegClassID:
422     shift = 1;
423     break;
424   case AMDGPU::SGPR_128RegClassID:
425   case AMDGPU::TTMP_128RegClassID:
426   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
427   // this bundle?
428   case AMDGPU::SGPR_256RegClassID:
429   case AMDGPU::TTMP_256RegClassID:
430     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
431   // this bundle?
432   case AMDGPU::SGPR_512RegClassID:
433   case AMDGPU::TTMP_512RegClassID:
434     shift = 2;
435     break;
436   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
437   // this bundle?
438   default:
439     llvm_unreachable("unhandled register class");
440   }
441 
442   if (Val % (1 << shift)) {
443     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
444                    << ": scalar reg isn't aligned " << Val;
445   }
446 
447   return createRegOperand(SRegClassID, Val >> shift);
448 }
449 
450 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
451   return decodeSrcOp(OPW32, Val);
452 }
453 
454 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
455   return decodeSrcOp(OPW64, Val);
456 }
457 
458 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
459   return decodeSrcOp(OPW128, Val);
460 }
461 
462 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
463   return decodeSrcOp(OPW16, Val);
464 }
465 
466 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
467   return decodeSrcOp(OPWV216, Val);
468 }
469 
470 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
471   // Some instructions have operand restrictions beyond what the encoding
472   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
473   // high bit.
474   Val &= 255;
475 
476   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
477 }
478 
479 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
480   return decodeSrcOp(OPW32, Val);
481 }
482 
483 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
484   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
485 }
486 
487 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
488   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
489 }
490 
491 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
492   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
493 }
494 
495 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
496   // table-gen generated disassembler doesn't care about operand types
497   // leaving only registry class so SSrc_32 operand turns into SReg_32
498   // and therefore we accept immediates and literals here as well
499   return decodeSrcOp(OPW32, Val);
500 }
501 
502 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
503   unsigned Val) const {
504   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
505   return decodeOperand_SReg_32(Val);
506 }
507 
508 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
509   unsigned Val) const {
510   // SReg_32_XM0 is SReg_32 without EXEC_HI
511   return decodeOperand_SReg_32(Val);
512 }
513 
514 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
515   // table-gen generated disassembler doesn't care about operand types
516   // leaving only registry class so SSrc_32 operand turns into SReg_32
517   // and therefore we accept immediates and literals here as well
518   return decodeSrcOp(OPW32, Val);
519 }
520 
521 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
522   return decodeSrcOp(OPW64, Val);
523 }
524 
525 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
526   return decodeSrcOp(OPW64, Val);
527 }
528 
529 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
530   return decodeSrcOp(OPW128, Val);
531 }
532 
533 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
534   return decodeDstOp(OPW256, Val);
535 }
536 
537 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
538   return decodeDstOp(OPW512, Val);
539 }
540 
541 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
542   // For now all literal constants are supposed to be unsigned integer
543   // ToDo: deal with signed/unsigned 64-bit integer constants
544   // ToDo: deal with float/double constants
545   if (!HasLiteral) {
546     if (Bytes.size() < 4) {
547       return errOperand(0, "cannot read literal, inst bytes left " +
548                         Twine(Bytes.size()));
549     }
550     HasLiteral = true;
551     Literal = eatBytes<uint32_t>(Bytes);
552   }
553   return MCOperand::createImm(Literal);
554 }
555 
556 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
557   using namespace AMDGPU::EncValues;
558 
559   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
560   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
561     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
562     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
563       // Cast prevents negative overflow.
564 }
565 
566 static int64_t getInlineImmVal32(unsigned Imm) {
567   switch (Imm) {
568   case 240:
569     return FloatToBits(0.5f);
570   case 241:
571     return FloatToBits(-0.5f);
572   case 242:
573     return FloatToBits(1.0f);
574   case 243:
575     return FloatToBits(-1.0f);
576   case 244:
577     return FloatToBits(2.0f);
578   case 245:
579     return FloatToBits(-2.0f);
580   case 246:
581     return FloatToBits(4.0f);
582   case 247:
583     return FloatToBits(-4.0f);
584   case 248: // 1 / (2 * PI)
585     return 0x3e22f983;
586   default:
587     llvm_unreachable("invalid fp inline imm");
588   }
589 }
590 
591 static int64_t getInlineImmVal64(unsigned Imm) {
592   switch (Imm) {
593   case 240:
594     return DoubleToBits(0.5);
595   case 241:
596     return DoubleToBits(-0.5);
597   case 242:
598     return DoubleToBits(1.0);
599   case 243:
600     return DoubleToBits(-1.0);
601   case 244:
602     return DoubleToBits(2.0);
603   case 245:
604     return DoubleToBits(-2.0);
605   case 246:
606     return DoubleToBits(4.0);
607   case 247:
608     return DoubleToBits(-4.0);
609   case 248: // 1 / (2 * PI)
610     return 0x3fc45f306dc9c882;
611   default:
612     llvm_unreachable("invalid fp inline imm");
613   }
614 }
615 
616 static int64_t getInlineImmVal16(unsigned Imm) {
617   switch (Imm) {
618   case 240:
619     return 0x3800;
620   case 241:
621     return 0xB800;
622   case 242:
623     return 0x3C00;
624   case 243:
625     return 0xBC00;
626   case 244:
627     return 0x4000;
628   case 245:
629     return 0xC000;
630   case 246:
631     return 0x4400;
632   case 247:
633     return 0xC400;
634   case 248: // 1 / (2 * PI)
635     return 0x3118;
636   default:
637     llvm_unreachable("invalid fp inline imm");
638   }
639 }
640 
641 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
642   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
643       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
644 
645   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
646   switch (Width) {
647   case OPW32:
648     return MCOperand::createImm(getInlineImmVal32(Imm));
649   case OPW64:
650     return MCOperand::createImm(getInlineImmVal64(Imm));
651   case OPW16:
652   case OPWV216:
653     return MCOperand::createImm(getInlineImmVal16(Imm));
654   default:
655     llvm_unreachable("implement me");
656   }
657 }
658 
659 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
660   using namespace AMDGPU;
661 
662   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
663   switch (Width) {
664   default: // fall
665   case OPW32:
666   case OPW16:
667   case OPWV216:
668     return VGPR_32RegClassID;
669   case OPW64: return VReg_64RegClassID;
670   case OPW128: return VReg_128RegClassID;
671   }
672 }
673 
674 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
675   using namespace AMDGPU;
676 
677   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
678   switch (Width) {
679   default: // fall
680   case OPW32:
681   case OPW16:
682   case OPWV216:
683     return SGPR_32RegClassID;
684   case OPW64: return SGPR_64RegClassID;
685   case OPW128: return SGPR_128RegClassID;
686   case OPW256: return SGPR_256RegClassID;
687   case OPW512: return SGPR_512RegClassID;
688   }
689 }
690 
691 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
692   using namespace AMDGPU;
693 
694   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
695   switch (Width) {
696   default: // fall
697   case OPW32:
698   case OPW16:
699   case OPWV216:
700     return TTMP_32RegClassID;
701   case OPW64: return TTMP_64RegClassID;
702   case OPW128: return TTMP_128RegClassID;
703   case OPW256: return TTMP_256RegClassID;
704   case OPW512: return TTMP_512RegClassID;
705   }
706 }
707 
708 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
709   using namespace AMDGPU::EncValues;
710 
711   unsigned TTmpMin =
712       (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
713   unsigned TTmpMax =
714       (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
715 
716   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
717 }
718 
719 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
720   using namespace AMDGPU::EncValues;
721 
722   assert(Val < 512); // enum9
723 
724   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
725     return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
726   }
727   if (Val <= SGPR_MAX) {
728     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
729     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
730   }
731 
732   int TTmpIdx = getTTmpIdx(Val);
733   if (TTmpIdx >= 0) {
734     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
735   }
736 
737   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
738     return decodeIntImmed(Val);
739 
740   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
741     return decodeFPImmed(Width, Val);
742 
743   if (Val == LITERAL_CONST)
744     return decodeLiteralConstant();
745 
746   switch (Width) {
747   case OPW32:
748   case OPW16:
749   case OPWV216:
750     return decodeSpecialReg32(Val);
751   case OPW64:
752     return decodeSpecialReg64(Val);
753   default:
754     llvm_unreachable("unexpected immediate type");
755   }
756 }
757 
758 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
759   using namespace AMDGPU::EncValues;
760 
761   assert(Val < 128);
762   assert(Width == OPW256 || Width == OPW512);
763 
764   if (Val <= SGPR_MAX) {
765     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
766     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
767   }
768 
769   int TTmpIdx = getTTmpIdx(Val);
770   if (TTmpIdx >= 0) {
771     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
772   }
773 
774   llvm_unreachable("unknown dst register");
775 }
776 
777 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
778   using namespace AMDGPU;
779 
780   switch (Val) {
781   case 102: return createRegOperand(FLAT_SCR_LO);
782   case 103: return createRegOperand(FLAT_SCR_HI);
783   case 104: return createRegOperand(XNACK_MASK_LO);
784   case 105: return createRegOperand(XNACK_MASK_HI);
785   case 106: return createRegOperand(VCC_LO);
786   case 107: return createRegOperand(VCC_HI);
787   case 108: return createRegOperand(TBA_LO);
788   case 109: return createRegOperand(TBA_HI);
789   case 110: return createRegOperand(TMA_LO);
790   case 111: return createRegOperand(TMA_HI);
791   case 124: return createRegOperand(M0);
792   case 125: return createRegOperand(SGPR_NULL);
793   case 126: return createRegOperand(EXEC_LO);
794   case 127: return createRegOperand(EXEC_HI);
795   case 235: return createRegOperand(SRC_SHARED_BASE);
796   case 236: return createRegOperand(SRC_SHARED_LIMIT);
797   case 237: return createRegOperand(SRC_PRIVATE_BASE);
798   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
799   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
800     // ToDo: no support for vccz register
801   case 251: break;
802     // ToDo: no support for execz register
803   case 252: break;
804   case 253: return createRegOperand(SCC);
805   case 254: return createRegOperand(LDS_DIRECT);
806   default: break;
807   }
808   return errOperand(Val, "unknown operand encoding " + Twine(Val));
809 }
810 
811 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
812   using namespace AMDGPU;
813 
814   switch (Val) {
815   case 102: return createRegOperand(FLAT_SCR);
816   case 104: return createRegOperand(XNACK_MASK);
817   case 106: return createRegOperand(VCC);
818   case 108: return createRegOperand(TBA);
819   case 110: return createRegOperand(TMA);
820   case 126: return createRegOperand(EXEC);
821   case 235: return createRegOperand(SRC_SHARED_BASE);
822   case 236: return createRegOperand(SRC_SHARED_LIMIT);
823   case 237: return createRegOperand(SRC_PRIVATE_BASE);
824   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
825   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
826   default: break;
827   }
828   return errOperand(Val, "unknown operand encoding " + Twine(Val));
829 }
830 
831 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
832                                             const unsigned Val) const {
833   using namespace AMDGPU::SDWA;
834   using namespace AMDGPU::EncValues;
835 
836   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
837       STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
838     // XXX: cast to int is needed to avoid stupid warning:
839     // compare with unsigned is always true
840     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
841         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
842       return createRegOperand(getVgprClassId(Width),
843                               Val - SDWA9EncValues::SRC_VGPR_MIN);
844     }
845     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
846         Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
847                           : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
848       return createSRegOperand(getSgprClassId(Width),
849                                Val - SDWA9EncValues::SRC_SGPR_MIN);
850     }
851     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
852         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
853       return createSRegOperand(getTtmpClassId(Width),
854                                Val - SDWA9EncValues::SRC_TTMP_MIN);
855     }
856 
857     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
858 
859     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
860       return decodeIntImmed(SVal);
861 
862     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
863       return decodeFPImmed(Width, SVal);
864 
865     return decodeSpecialReg32(SVal);
866   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
867     return createRegOperand(getVgprClassId(Width), Val);
868   }
869   llvm_unreachable("unsupported target");
870 }
871 
872 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
873   return decodeSDWASrc(OPW16, Val);
874 }
875 
876 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
877   return decodeSDWASrc(OPW32, Val);
878 }
879 
880 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
881   using namespace AMDGPU::SDWA;
882 
883   assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
884           STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
885          "SDWAVopcDst should be present only on GFX9+");
886 
887   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
888     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
889 
890     int TTmpIdx = getTTmpIdx(Val);
891     if (TTmpIdx >= 0) {
892       return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
893     } else if (Val > SGPR_MAX) {
894       return decodeSpecialReg64(Val);
895     } else {
896       return createSRegOperand(getSgprClassId(OPW64), Val);
897     }
898   } else {
899     return createRegOperand(AMDGPU::VCC);
900   }
901 }
902 
903 bool AMDGPUDisassembler::isVI() const {
904   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
905 }
906 
907 bool AMDGPUDisassembler::isGFX9() const {
908   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
909 }
910 
911 bool AMDGPUDisassembler::isGFX10() const {
912   return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
913 }
914 
915 //===----------------------------------------------------------------------===//
916 // AMDGPUSymbolizer
917 //===----------------------------------------------------------------------===//
918 
919 // Try to find symbol name for specified label
920 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
921                                 raw_ostream &/*cStream*/, int64_t Value,
922                                 uint64_t /*Address*/, bool IsBranch,
923                                 uint64_t /*Offset*/, uint64_t /*InstSize*/) {
924   using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>;
925   using SectionSymbolsTy = std::vector<SymbolInfoTy>;
926 
927   if (!IsBranch) {
928     return false;
929   }
930 
931   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
932   if (!Symbols)
933     return false;
934 
935   auto Result = std::find_if(Symbols->begin(), Symbols->end(),
936                              [Value](const SymbolInfoTy& Val) {
937                                 return std::get<0>(Val) == static_cast<uint64_t>(Value)
938                                     && std::get<2>(Val) == ELF::STT_NOTYPE;
939                              });
940   if (Result != Symbols->end()) {
941     auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result));
942     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
943     Inst.addOperand(MCOperand::createExpr(Add));
944     return true;
945   }
946   return false;
947 }
948 
949 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
950                                                        int64_t Value,
951                                                        uint64_t Address) {
952   llvm_unreachable("unimplemented");
953 }
954 
955 //===----------------------------------------------------------------------===//
956 // Initialization
957 //===----------------------------------------------------------------------===//
958 
959 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
960                               LLVMOpInfoCallback /*GetOpInfo*/,
961                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
962                               void *DisInfo,
963                               MCContext *Ctx,
964                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
965   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
966 }
967 
968 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
969                                                 const MCSubtargetInfo &STI,
970                                                 MCContext &Ctx) {
971   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
972 }
973 
974 extern "C" void LLVMInitializeAMDGPUDisassembler() {
975   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
976                                          createAMDGPUDisassembler);
977   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
978                                        createAMDGPUSymbolizer);
979 }
980