xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision 137976fae22df17daa16dd9002a2c74cd38b9c67)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "AMDGPU.h"
21 #include "AMDGPURegisterInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "SIDefines.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm-c/Disassembler.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/Twine.h"
30 #include "llvm/BinaryFormat/ELF.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
33 #include "llvm/MC/MCExpr.h"
34 #include "llvm/MC/MCFixedLenDisassembler.h"
35 #include "llvm/MC/MCInst.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/Support/Endian.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetRegistry.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <cstdint>
46 #include <iterator>
47 #include <tuple>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-disassembler"
53 
54 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
55 
56 inline static MCDisassembler::DecodeStatus
57 addOperand(MCInst &Inst, const MCOperand& Opnd) {
58   Inst.addOperand(Opnd);
59   return Opnd.isValid() ?
60     MCDisassembler::Success :
61     MCDisassembler::SoftFail;
62 }
63 
64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
65                                 uint16_t NameIdx) {
66   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67   if (OpIdx != -1) {
68     auto I = MI.begin();
69     std::advance(I, OpIdx);
70     MI.insert(I, Op);
71   }
72   return OpIdx;
73 }
74 
75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
76                                        uint64_t Addr, const void *Decoder) {
77   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
78 
79   // Our branches take a simm16, but we need two extra bits to account for the
80   // factor of 4.
81   APInt SignedOffset(18, Imm * 4, true);
82   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
83 
84   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
85     return MCDisassembler::Success;
86   return addOperand(Inst, MCOperand::createImm(Imm));
87 }
88 
89 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
90 static DecodeStatus StaticDecoderName(MCInst &Inst, \
91                                        unsigned Imm, \
92                                        uint64_t /*Addr*/, \
93                                        const void *Decoder) { \
94   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \
95   return addOperand(Inst, DAsm->DecoderName(Imm)); \
96 }
97 
98 #define DECODE_OPERAND_REG(RegClass) \
99 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
100 
101 DECODE_OPERAND_REG(VGPR_32)
102 DECODE_OPERAND_REG(VRegOrLds_32)
103 DECODE_OPERAND_REG(VS_32)
104 DECODE_OPERAND_REG(VS_64)
105 DECODE_OPERAND_REG(VS_128)
106 
107 DECODE_OPERAND_REG(VReg_64)
108 DECODE_OPERAND_REG(VReg_96)
109 DECODE_OPERAND_REG(VReg_128)
110 
111 DECODE_OPERAND_REG(SReg_32)
112 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
113 DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
114 DECODE_OPERAND_REG(SRegOrLds_32)
115 DECODE_OPERAND_REG(SReg_64)
116 DECODE_OPERAND_REG(SReg_64_XEXEC)
117 DECODE_OPERAND_REG(SReg_128)
118 DECODE_OPERAND_REG(SReg_256)
119 DECODE_OPERAND_REG(SReg_512)
120 
121 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
122                                          unsigned Imm,
123                                          uint64_t Addr,
124                                          const void *Decoder) {
125   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
126   return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
127 }
128 
129 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
130                                          unsigned Imm,
131                                          uint64_t Addr,
132                                          const void *Decoder) {
133   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
134   return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
135 }
136 
137 #define DECODE_SDWA(DecName) \
138 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
139 
140 DECODE_SDWA(Src32)
141 DECODE_SDWA(Src16)
142 DECODE_SDWA(VopcDst)
143 
144 #include "AMDGPUGenDisassemblerTables.inc"
145 
146 //===----------------------------------------------------------------------===//
147 //
148 //===----------------------------------------------------------------------===//
149 
150 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
151   assert(Bytes.size() >= sizeof(T));
152   const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data());
153   Bytes = Bytes.slice(sizeof(T));
154   return Res;
155 }
156 
157 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
158                                                MCInst &MI,
159                                                uint64_t Inst,
160                                                uint64_t Address) const {
161   assert(MI.getOpcode() == 0);
162   assert(MI.getNumOperands() == 0);
163   MCInst TmpInst;
164   HasLiteral = false;
165   const auto SavedBytes = Bytes;
166   if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
167     MI = TmpInst;
168     return MCDisassembler::Success;
169   }
170   Bytes = SavedBytes;
171   return MCDisassembler::Fail;
172 }
173 
174 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
175                                                 ArrayRef<uint8_t> Bytes_,
176                                                 uint64_t Address,
177                                                 raw_ostream &WS,
178                                                 raw_ostream &CS) const {
179   CommentStream = &CS;
180   bool IsSDWA = false;
181 
182   // ToDo: AMDGPUDisassembler supports only VI ISA.
183   if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
184     report_fatal_error("Disassembly not yet supported for subtarget");
185 
186   const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
187   Bytes = Bytes_.slice(0, MaxInstBytesNum);
188 
189   DecodeStatus Res = MCDisassembler::Fail;
190   do {
191     // ToDo: better to switch encoding length using some bit predicate
192     // but it is unknown yet, so try all we can
193 
194     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
195     // encodings
196     if (Bytes.size() >= 8) {
197       const uint64_t QW = eatBytes<uint64_t>(Bytes);
198       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
199       if (Res) break;
200 
201       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
202       if (Res) { IsSDWA = true;  break; }
203 
204       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
205       if (Res) { IsSDWA = true;  break; }
206 
207       if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
208         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
209         if (Res)
210           break;
211       }
212 
213       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
214       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
215       // table first so we print the correct name.
216       if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
217         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
218         if (Res)
219           break;
220       }
221     }
222 
223     // Reinitialize Bytes as DPP64 could have eaten too much
224     Bytes = Bytes_.slice(0, MaxInstBytesNum);
225 
226     // Try decode 32-bit instruction
227     if (Bytes.size() < 4) break;
228     const uint32_t DW = eatBytes<uint32_t>(Bytes);
229     Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address);
230     if (Res) break;
231 
232     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
233     if (Res) break;
234 
235     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
236     if (Res) break;
237 
238     if (Bytes.size() < 4) break;
239     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
240     Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
241     if (Res) break;
242 
243     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
244     if (Res) break;
245 
246     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
247   } while (false);
248 
249   if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
250               MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
251               MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
252               MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) {
253     // Insert dummy unused src2_modifiers.
254     insertNamedMCOperand(MI, MCOperand::createImm(0),
255                          AMDGPU::OpName::src2_modifiers);
256   }
257 
258   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
259     Res = convertMIMGInst(MI);
260   }
261 
262   if (Res && IsSDWA)
263     Res = convertSDWAInst(MI);
264 
265   // if the opcode was not recognized we'll assume a Size of 4 bytes
266   // (unless there are fewer bytes left)
267   Size = Res ? (MaxInstBytesNum - Bytes.size())
268              : std::min((size_t)4, Bytes_.size());
269   return Res;
270 }
271 
272 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
273   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
274     if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
275       // VOPC - insert clamp
276       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
277   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
278     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
279     if (SDst != -1) {
280       // VOPC - insert VCC register as sdst
281       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
282                            AMDGPU::OpName::sdst);
283     } else {
284       // VOP1/2 - insert omod if present in instruction
285       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
286     }
287   }
288   return MCDisassembler::Success;
289 }
290 
291 // Note that MIMG format provides no information about VADDR size.
292 // Consequently, decoded instructions always show address
293 // as if it has 1 dword, which could be not really so.
294 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
295 
296   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
297                                            AMDGPU::OpName::vdst);
298 
299   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
300                                             AMDGPU::OpName::vdata);
301 
302   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
303                                             AMDGPU::OpName::dmask);
304 
305   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
306                                             AMDGPU::OpName::tfe);
307   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
308                                             AMDGPU::OpName::d16);
309 
310   assert(VDataIdx != -1);
311   assert(DMaskIdx != -1);
312   assert(TFEIdx != -1);
313 
314   bool IsAtomic = (VDstIdx != -1);
315   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
316 
317   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
318   if (DMask == 0)
319     return MCDisassembler::Success;
320 
321   unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
322   if (DstSize == 1)
323     return MCDisassembler::Success;
324 
325   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
326   if (D16 && AMDGPU::hasPackedD16(STI)) {
327     DstSize = (DstSize + 1) / 2;
328   }
329 
330   // FIXME: Add tfe support
331   if (MI.getOperand(TFEIdx).getImm())
332     return MCDisassembler::Success;
333 
334   int NewOpcode = -1;
335 
336   if (IsGather4) {
337     if (D16 && AMDGPU::hasPackedD16(STI))
338       NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2);
339     else
340       return MCDisassembler::Success;
341   } else {
342     NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize);
343     if (NewOpcode == -1)
344       return MCDisassembler::Success;
345   }
346 
347   auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
348 
349   // Get first subregister of VData
350   unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
351   unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
352   Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
353 
354   // Widen the register to the correct number of enabled channels.
355   auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
356                                           &MRI.getRegClass(RCID));
357   if (NewVdata == AMDGPU::NoRegister) {
358     // It's possible to encode this such that the low register + enabled
359     // components exceeds the register count.
360     return MCDisassembler::Success;
361   }
362 
363   MI.setOpcode(NewOpcode);
364   // vaddr will be always appear as a single VGPR. This will look different than
365   // how it is usually emitted because the number of register components is not
366   // in the instruction encoding.
367   MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
368 
369   if (IsAtomic) {
370     // Atomic operations have an additional operand (a copy of data)
371     MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
372   }
373 
374   return MCDisassembler::Success;
375 }
376 
377 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
378   return getContext().getRegisterInfo()->
379     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
380 }
381 
382 inline
383 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
384                                          const Twine& ErrMsg) const {
385   *CommentStream << "Error: " + ErrMsg;
386 
387   // ToDo: add support for error operands to MCInst.h
388   // return MCOperand::createError(V);
389   return MCOperand();
390 }
391 
392 inline
393 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
394   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
395 }
396 
397 inline
398 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
399                                                unsigned Val) const {
400   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
401   if (Val >= RegCl.getNumRegs())
402     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
403                            ": unknown register " + Twine(Val));
404   return createRegOperand(RegCl.getRegister(Val));
405 }
406 
407 inline
408 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
409                                                 unsigned Val) const {
410   // ToDo: SI/CI have 104 SGPRs, VI - 102
411   // Valery: here we accepting as much as we can, let assembler sort it out
412   int shift = 0;
413   switch (SRegClassID) {
414   case AMDGPU::SGPR_32RegClassID:
415   case AMDGPU::TTMP_32RegClassID:
416     break;
417   case AMDGPU::SGPR_64RegClassID:
418   case AMDGPU::TTMP_64RegClassID:
419     shift = 1;
420     break;
421   case AMDGPU::SGPR_128RegClassID:
422   case AMDGPU::TTMP_128RegClassID:
423   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
424   // this bundle?
425   case AMDGPU::SGPR_256RegClassID:
426   case AMDGPU::TTMP_256RegClassID:
427     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
428   // this bundle?
429   case AMDGPU::SGPR_512RegClassID:
430   case AMDGPU::TTMP_512RegClassID:
431     shift = 2;
432     break;
433   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
434   // this bundle?
435   default:
436     llvm_unreachable("unhandled register class");
437   }
438 
439   if (Val % (1 << shift)) {
440     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
441                    << ": scalar reg isn't aligned " << Val;
442   }
443 
444   return createRegOperand(SRegClassID, Val >> shift);
445 }
446 
447 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
448   return decodeSrcOp(OPW32, Val);
449 }
450 
451 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
452   return decodeSrcOp(OPW64, Val);
453 }
454 
455 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
456   return decodeSrcOp(OPW128, Val);
457 }
458 
459 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
460   return decodeSrcOp(OPW16, Val);
461 }
462 
463 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
464   return decodeSrcOp(OPWV216, Val);
465 }
466 
467 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
468   // Some instructions have operand restrictions beyond what the encoding
469   // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
470   // high bit.
471   Val &= 255;
472 
473   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
474 }
475 
476 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
477   return decodeSrcOp(OPW32, Val);
478 }
479 
480 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
481   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
482 }
483 
484 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
485   return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
486 }
487 
488 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
489   return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
490 }
491 
492 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
493   // table-gen generated disassembler doesn't care about operand types
494   // leaving only registry class so SSrc_32 operand turns into SReg_32
495   // and therefore we accept immediates and literals here as well
496   return decodeSrcOp(OPW32, Val);
497 }
498 
499 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
500   unsigned Val) const {
501   // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
502   return decodeOperand_SReg_32(Val);
503 }
504 
505 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
506   unsigned Val) const {
507   // SReg_32_XM0 is SReg_32 without EXEC_HI
508   return decodeOperand_SReg_32(Val);
509 }
510 
511 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
512   // table-gen generated disassembler doesn't care about operand types
513   // leaving only registry class so SSrc_32 operand turns into SReg_32
514   // and therefore we accept immediates and literals here as well
515   return decodeSrcOp(OPW32, Val);
516 }
517 
518 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
519   return decodeSrcOp(OPW64, Val);
520 }
521 
522 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
523   return decodeSrcOp(OPW64, Val);
524 }
525 
526 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
527   return decodeSrcOp(OPW128, Val);
528 }
529 
530 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
531   return decodeDstOp(OPW256, Val);
532 }
533 
534 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
535   return decodeDstOp(OPW512, Val);
536 }
537 
538 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
539   // For now all literal constants are supposed to be unsigned integer
540   // ToDo: deal with signed/unsigned 64-bit integer constants
541   // ToDo: deal with float/double constants
542   if (!HasLiteral) {
543     if (Bytes.size() < 4) {
544       return errOperand(0, "cannot read literal, inst bytes left " +
545                         Twine(Bytes.size()));
546     }
547     HasLiteral = true;
548     Literal = eatBytes<uint32_t>(Bytes);
549   }
550   return MCOperand::createImm(Literal);
551 }
552 
553 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
554   using namespace AMDGPU::EncValues;
555 
556   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
557   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
558     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
559     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
560       // Cast prevents negative overflow.
561 }
562 
563 static int64_t getInlineImmVal32(unsigned Imm) {
564   switch (Imm) {
565   case 240:
566     return FloatToBits(0.5f);
567   case 241:
568     return FloatToBits(-0.5f);
569   case 242:
570     return FloatToBits(1.0f);
571   case 243:
572     return FloatToBits(-1.0f);
573   case 244:
574     return FloatToBits(2.0f);
575   case 245:
576     return FloatToBits(-2.0f);
577   case 246:
578     return FloatToBits(4.0f);
579   case 247:
580     return FloatToBits(-4.0f);
581   case 248: // 1 / (2 * PI)
582     return 0x3e22f983;
583   default:
584     llvm_unreachable("invalid fp inline imm");
585   }
586 }
587 
588 static int64_t getInlineImmVal64(unsigned Imm) {
589   switch (Imm) {
590   case 240:
591     return DoubleToBits(0.5);
592   case 241:
593     return DoubleToBits(-0.5);
594   case 242:
595     return DoubleToBits(1.0);
596   case 243:
597     return DoubleToBits(-1.0);
598   case 244:
599     return DoubleToBits(2.0);
600   case 245:
601     return DoubleToBits(-2.0);
602   case 246:
603     return DoubleToBits(4.0);
604   case 247:
605     return DoubleToBits(-4.0);
606   case 248: // 1 / (2 * PI)
607     return 0x3fc45f306dc9c882;
608   default:
609     llvm_unreachable("invalid fp inline imm");
610   }
611 }
612 
613 static int64_t getInlineImmVal16(unsigned Imm) {
614   switch (Imm) {
615   case 240:
616     return 0x3800;
617   case 241:
618     return 0xB800;
619   case 242:
620     return 0x3C00;
621   case 243:
622     return 0xBC00;
623   case 244:
624     return 0x4000;
625   case 245:
626     return 0xC000;
627   case 246:
628     return 0x4400;
629   case 247:
630     return 0xC400;
631   case 248: // 1 / (2 * PI)
632     return 0x3118;
633   default:
634     llvm_unreachable("invalid fp inline imm");
635   }
636 }
637 
638 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
639   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
640       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
641 
642   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
643   switch (Width) {
644   case OPW32:
645     return MCOperand::createImm(getInlineImmVal32(Imm));
646   case OPW64:
647     return MCOperand::createImm(getInlineImmVal64(Imm));
648   case OPW16:
649   case OPWV216:
650     return MCOperand::createImm(getInlineImmVal16(Imm));
651   default:
652     llvm_unreachable("implement me");
653   }
654 }
655 
656 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
657   using namespace AMDGPU;
658 
659   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
660   switch (Width) {
661   default: // fall
662   case OPW32:
663   case OPW16:
664   case OPWV216:
665     return VGPR_32RegClassID;
666   case OPW64: return VReg_64RegClassID;
667   case OPW128: return VReg_128RegClassID;
668   }
669 }
670 
671 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
672   using namespace AMDGPU;
673 
674   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
675   switch (Width) {
676   default: // fall
677   case OPW32:
678   case OPW16:
679   case OPWV216:
680     return SGPR_32RegClassID;
681   case OPW64: return SGPR_64RegClassID;
682   case OPW128: return SGPR_128RegClassID;
683   case OPW256: return SGPR_256RegClassID;
684   case OPW512: return SGPR_512RegClassID;
685   }
686 }
687 
688 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
689   using namespace AMDGPU;
690 
691   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
692   switch (Width) {
693   default: // fall
694   case OPW32:
695   case OPW16:
696   case OPWV216:
697     return TTMP_32RegClassID;
698   case OPW64: return TTMP_64RegClassID;
699   case OPW128: return TTMP_128RegClassID;
700   case OPW256: return TTMP_256RegClassID;
701   case OPW512: return TTMP_512RegClassID;
702   }
703 }
704 
705 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
706   using namespace AMDGPU::EncValues;
707 
708   unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN;
709   unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX;
710 
711   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
712 }
713 
714 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
715   using namespace AMDGPU::EncValues;
716 
717   assert(Val < 512); // enum9
718 
719   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
720     return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
721   }
722   if (Val <= SGPR_MAX) {
723     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
724     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
725   }
726 
727   int TTmpIdx = getTTmpIdx(Val);
728   if (TTmpIdx >= 0) {
729     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
730   }
731 
732   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
733     return decodeIntImmed(Val);
734 
735   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
736     return decodeFPImmed(Width, Val);
737 
738   if (Val == LITERAL_CONST)
739     return decodeLiteralConstant();
740 
741   switch (Width) {
742   case OPW32:
743   case OPW16:
744   case OPWV216:
745     return decodeSpecialReg32(Val);
746   case OPW64:
747     return decodeSpecialReg64(Val);
748   default:
749     llvm_unreachable("unexpected immediate type");
750   }
751 }
752 
753 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
754   using namespace AMDGPU::EncValues;
755 
756   assert(Val < 128);
757   assert(Width == OPW256 || Width == OPW512);
758 
759   if (Val <= SGPR_MAX) {
760     assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
761     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
762   }
763 
764   int TTmpIdx = getTTmpIdx(Val);
765   if (TTmpIdx >= 0) {
766     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
767   }
768 
769   llvm_unreachable("unknown dst register");
770 }
771 
772 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
773   using namespace AMDGPU;
774 
775   switch (Val) {
776   case 102: return createRegOperand(FLAT_SCR_LO);
777   case 103: return createRegOperand(FLAT_SCR_HI);
778   case 104: return createRegOperand(XNACK_MASK_LO);
779   case 105: return createRegOperand(XNACK_MASK_HI);
780   case 106: return createRegOperand(VCC_LO);
781   case 107: return createRegOperand(VCC_HI);
782   case 108: return createRegOperand(TBA_LO);
783   case 109: return createRegOperand(TBA_HI);
784   case 110: return createRegOperand(TMA_LO);
785   case 111: return createRegOperand(TMA_HI);
786   case 124: return createRegOperand(M0);
787   case 126: return createRegOperand(EXEC_LO);
788   case 127: return createRegOperand(EXEC_HI);
789   case 235: return createRegOperand(SRC_SHARED_BASE);
790   case 236: return createRegOperand(SRC_SHARED_LIMIT);
791   case 237: return createRegOperand(SRC_PRIVATE_BASE);
792   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
793   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
794     // ToDo: no support for vccz register
795   case 251: break;
796     // ToDo: no support for execz register
797   case 252: break;
798   case 253: return createRegOperand(SCC);
799   case 254: return createRegOperand(LDS_DIRECT);
800   default: break;
801   }
802   return errOperand(Val, "unknown operand encoding " + Twine(Val));
803 }
804 
805 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
806   using namespace AMDGPU;
807 
808   switch (Val) {
809   case 102: return createRegOperand(FLAT_SCR);
810   case 104: return createRegOperand(XNACK_MASK);
811   case 106: return createRegOperand(VCC);
812   case 108: return createRegOperand(TBA);
813   case 110: return createRegOperand(TMA);
814   case 126: return createRegOperand(EXEC);
815   case 235: return createRegOperand(SRC_SHARED_BASE);
816   case 236: return createRegOperand(SRC_SHARED_LIMIT);
817   case 237: return createRegOperand(SRC_PRIVATE_BASE);
818   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
819   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
820   default: break;
821   }
822   return errOperand(Val, "unknown operand encoding " + Twine(Val));
823 }
824 
825 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
826                                             const unsigned Val) const {
827   using namespace AMDGPU::SDWA;
828   using namespace AMDGPU::EncValues;
829 
830   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
831     // XXX: cast to int is needed to avoid stupid warning:
832     // compare with unsigned is always true
833     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
834         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
835       return createRegOperand(getVgprClassId(Width),
836                               Val - SDWA9EncValues::SRC_VGPR_MIN);
837     }
838     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
839         Val <= SDWA9EncValues::SRC_SGPR_MAX) {
840       return createSRegOperand(getSgprClassId(Width),
841                                Val - SDWA9EncValues::SRC_SGPR_MIN);
842     }
843     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
844         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
845       return createSRegOperand(getTtmpClassId(Width),
846                                Val - SDWA9EncValues::SRC_TTMP_MIN);
847     }
848 
849     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
850 
851     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
852       return decodeIntImmed(SVal);
853 
854     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
855       return decodeFPImmed(Width, SVal);
856 
857     return decodeSpecialReg32(SVal);
858   } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
859     return createRegOperand(getVgprClassId(Width), Val);
860   }
861   llvm_unreachable("unsupported target");
862 }
863 
864 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
865   return decodeSDWASrc(OPW16, Val);
866 }
867 
868 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
869   return decodeSDWASrc(OPW32, Val);
870 }
871 
872 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
873   using namespace AMDGPU::SDWA;
874 
875   assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] &&
876          "SDWAVopcDst should be present only on GFX9");
877   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
878     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
879 
880     int TTmpIdx = getTTmpIdx(Val);
881     if (TTmpIdx >= 0) {
882       return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
883     } else if (Val > AMDGPU::EncValues::SGPR_MAX) {
884       return decodeSpecialReg64(Val);
885     } else {
886       return createSRegOperand(getSgprClassId(OPW64), Val);
887     }
888   } else {
889     return createRegOperand(AMDGPU::VCC);
890   }
891 }
892 
893 bool AMDGPUDisassembler::isVI() const {
894   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
895 }
896 
897 bool AMDGPUDisassembler::isGFX9() const {
898   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
899 }
900 
901 //===----------------------------------------------------------------------===//
902 // AMDGPUSymbolizer
903 //===----------------------------------------------------------------------===//
904 
905 // Try to find symbol name for specified label
906 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
907                                 raw_ostream &/*cStream*/, int64_t Value,
908                                 uint64_t /*Address*/, bool IsBranch,
909                                 uint64_t /*Offset*/, uint64_t /*InstSize*/) {
910   using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>;
911   using SectionSymbolsTy = std::vector<SymbolInfoTy>;
912 
913   if (!IsBranch) {
914     return false;
915   }
916 
917   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
918   if (!Symbols)
919     return false;
920 
921   auto Result = std::find_if(Symbols->begin(), Symbols->end(),
922                              [Value](const SymbolInfoTy& Val) {
923                                 return std::get<0>(Val) == static_cast<uint64_t>(Value)
924                                     && std::get<2>(Val) == ELF::STT_NOTYPE;
925                              });
926   if (Result != Symbols->end()) {
927     auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result));
928     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
929     Inst.addOperand(MCOperand::createExpr(Add));
930     return true;
931   }
932   return false;
933 }
934 
935 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
936                                                        int64_t Value,
937                                                        uint64_t Address) {
938   llvm_unreachable("unimplemented");
939 }
940 
941 //===----------------------------------------------------------------------===//
942 // Initialization
943 //===----------------------------------------------------------------------===//
944 
945 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
946                               LLVMOpInfoCallback /*GetOpInfo*/,
947                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
948                               void *DisInfo,
949                               MCContext *Ctx,
950                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
951   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
952 }
953 
954 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
955                                                 const MCSubtargetInfo &STI,
956                                                 MCContext &Ctx) {
957   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
958 }
959 
960 extern "C" void LLVMInitializeAMDGPUDisassembler() {
961   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
962                                          createAMDGPUDisassembler);
963   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
964                                        createAMDGPUSymbolizer);
965 }
966