xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision b05dbc4d5f28e4fe6ac4486925e09d64861720cc)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIDefines.h"
22 #include "SIRegisterInfo.h"
23 #include "TargetInfo/AMDGPUTargetInfo.h"
24 #include "Utils/AMDGPUBaseInfo.h"
25 #include "llvm-c/DisassemblerTypes.h"
26 #include "llvm/BinaryFormat/ELF.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCDecoderOps.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDHSAKernelDescriptor.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "amdgpu-disassembler"
40 
41 #define SGPR_MAX                                                               \
42   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
43                  : AMDGPU::EncValues::SGPR_MAX_SI)
44 
45 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
46 
47 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
48                                        MCContext &Ctx, MCInstrInfo const *MCII)
49     : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50       MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
51   // ToDo: AMDGPUDisassembler supports only VI ISA.
52   if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
53     report_fatal_error("Disassembly not yet supported for subtarget");
54 }
55 
56 inline static MCDisassembler::DecodeStatus
57 addOperand(MCInst &Inst, const MCOperand& Opnd) {
58   Inst.addOperand(Opnd);
59   return Opnd.isValid() ?
60     MCDisassembler::Success :
61     MCDisassembler::Fail;
62 }
63 
64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
65                                 uint16_t NameIdx) {
66   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
67   if (OpIdx != -1) {
68     auto I = MI.begin();
69     std::advance(I, OpIdx);
70     MI.insert(I, Op);
71   }
72   return OpIdx;
73 }
74 
75 static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
76                                        uint64_t Addr,
77                                        const MCDisassembler *Decoder) {
78   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
79 
80   // Our branches take a simm16, but we need two extra bits to account for the
81   // factor of 4.
82   APInt SignedOffset(18, Imm * 4, true);
83   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
84 
85   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
86     return MCDisassembler::Success;
87   return addOperand(Inst, MCOperand::createImm(Imm));
88 }
89 
90 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
91                                      const MCDisassembler *Decoder) {
92   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
93   int64_t Offset;
94   if (DAsm->isVI()) {         // VI supports 20-bit unsigned offsets.
95     Offset = Imm & 0xFFFFF;
96   } else {                    // GFX9+ supports 21-bit signed offsets.
97     Offset = SignExtend64<21>(Imm);
98   }
99   return addOperand(Inst, MCOperand::createImm(Offset));
100 }
101 
102 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
103                                   const MCDisassembler *Decoder) {
104   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
105   return addOperand(Inst, DAsm->decodeBoolReg(Val));
106 }
107 
108 #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
109   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
110                                         uint64_t /*Addr*/,                     \
111                                         const MCDisassembler *Decoder) {       \
112     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
113     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
114   }
115 
116 // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
117 // number of register. Used by VGPR only and AGPR only operands.
118 #define DECODE_OPERAND_REG_8(RegClass)                                         \
119   static DecodeStatus Decode##RegClass##RegisterClass(                         \
120       MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,                           \
121       const MCDisassembler *Decoder) {                                         \
122     assert(Imm < (1 << 8) && "8-bit encoding");                                \
123     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
124     return addOperand(                                                         \
125         Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm));      \
126   }
127 
128 #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral,         \
129                      ImmWidth)                                                 \
130   static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,      \
131                            const MCDisassembler *Decoder) {                    \
132     assert(Imm < (1 << EncSize) && #EncSize "-bit encoding");                  \
133     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
134     return addOperand(Inst,                                                    \
135                       DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm,   \
136                                         MandatoryLiteral, ImmWidth));          \
137   }
138 
139 // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
140 // get register class. Used by SGPR only operands.
141 #define DECODE_OPERAND_REG_7(RegClass, OpWidth)                                \
142   DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
143 
144 // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
145 // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
146 // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
147 // Used by AV_ register classes (AGPR or VGPR only register operands).
148 #define DECODE_OPERAND_REG_AV10(RegClass, OpWidth)                             \
149   DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth,                   \
150                Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
151 
152 // Decoder for Src(9-bit encoding) registers only.
153 #define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth)                            \
154   DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
155 
156 // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
157 // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
158 // only.
159 #define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth)                           \
160   DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
161 
162 // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
163 // Imm{9} is acc, registers only.
164 #define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth)                         \
165   DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
166 
167 // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
168 // register from RegClass or immediate. Registers that don't belong to RegClass
169 // will be decoded and InstPrinter will report warning. Immediate will be
170 // decoded into constant of size ImmWidth, should match width of immediate used
171 // by OperandType (important for floating point types).
172 #define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth)           \
173   DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm,      \
174                false, ImmWidth)
175 
176 // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
177 // and decode using 'enum10' from decodeSrcOp.
178 #define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth)          \
179   DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth,           \
180                Imm | 512, false, ImmWidth)
181 
182 #define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth)  \
183   DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9,         \
184                OpWidth, Imm, true, ImmWidth)
185 
186 // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
187 // when RegisterClass is used as an operand. Most often used for destination
188 // operands.
189 
190 DECODE_OPERAND_REG_8(VGPR_32)
191 DECODE_OPERAND_REG_8(VGPR_32_Lo128)
192 DECODE_OPERAND_REG_8(VReg_64)
193 DECODE_OPERAND_REG_8(VReg_96)
194 DECODE_OPERAND_REG_8(VReg_128)
195 DECODE_OPERAND_REG_8(VReg_256)
196 DECODE_OPERAND_REG_8(VReg_288)
197 DECODE_OPERAND_REG_8(VReg_352)
198 DECODE_OPERAND_REG_8(VReg_384)
199 DECODE_OPERAND_REG_8(VReg_512)
200 DECODE_OPERAND_REG_8(VReg_1024)
201 
202 DECODE_OPERAND_REG_7(SReg_32, OPW32)
203 DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
204 DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
205 DECODE_OPERAND_REG_7(SReg_64, OPW64)
206 DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
207 DECODE_OPERAND_REG_7(SReg_128, OPW128)
208 DECODE_OPERAND_REG_7(SReg_256, OPW256)
209 DECODE_OPERAND_REG_7(SReg_512, OPW512)
210 
211 DECODE_OPERAND_REG_8(AGPR_32)
212 DECODE_OPERAND_REG_8(AReg_64)
213 DECODE_OPERAND_REG_8(AReg_128)
214 DECODE_OPERAND_REG_8(AReg_256)
215 DECODE_OPERAND_REG_8(AReg_512)
216 DECODE_OPERAND_REG_8(AReg_1024)
217 
218 DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
219 DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
220 
221 // Decoders for register only source RegisterOperands that use use 9-bit Src
222 // encoding: 'decodeOperand_<RegClass>'.
223 
224 DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
225 DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
226 DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
227 DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
228 DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
229 
230 DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
231 
232 DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
233 DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
234 DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
235 
236 // Decoders for register or immediate RegisterOperands that use 9-bit Src
237 // encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
238 
239 DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
240 DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
241 DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 16)
242 DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
243 DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
244 DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
245 DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
246 DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
247 DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
248 DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
249 DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
250 DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
251 DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
252 DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
253 
254 DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
255 DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
256 DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
257 DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
258 DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
259 
260 DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
261 DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
262 DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
263 DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(SReg_32, OPW32, 32)
264 
265 static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
266                                                uint64_t /*Addr*/,
267                                                const MCDisassembler *Decoder) {
268   assert(isUInt<10>(Imm) && "10-bit encoding expected");
269   assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
270 
271   bool IsHi = Imm & (1 << 9);
272   unsigned RegIdx = Imm & 0xff;
273   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
274   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
275 }
276 
277 static DecodeStatus
278 DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
279                                  const MCDisassembler *Decoder) {
280   assert(isUInt<8>(Imm) && "8-bit encoding expected");
281 
282   bool IsHi = Imm & (1 << 7);
283   unsigned RegIdx = Imm & 0x7f;
284   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
285   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
286 }
287 
288 static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
289                                                 uint64_t /*Addr*/,
290                                                 const MCDisassembler *Decoder) {
291   assert(isUInt<9>(Imm) && "9-bit encoding expected");
292 
293   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
294   bool IsVGPR = Imm & (1 << 8);
295   if (IsVGPR) {
296     bool IsHi = Imm & (1 << 7);
297     unsigned RegIdx = Imm & 0x7f;
298     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
299   }
300   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
301                                                    Imm & 0xFF, false, 16));
302 }
303 
304 static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
305                                           uint64_t /*Addr*/,
306                                           const MCDisassembler *Decoder) {
307   assert(isUInt<10>(Imm) && "10-bit encoding expected");
308 
309   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
310   bool IsVGPR = Imm & (1 << 8);
311   if (IsVGPR) {
312     bool IsHi = Imm & (1 << 9);
313     unsigned RegIdx = Imm & 0xff;
314     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
315   }
316   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
317                                                    Imm & 0xFF, false, 16));
318 }
319 
320 static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
321                                          uint64_t Addr,
322                                          const MCDisassembler *Decoder) {
323   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
324   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
325 }
326 
327 static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
328                                           uint64_t Addr, const void *Decoder) {
329   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
330   return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
331 }
332 
333 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
334                           const MCRegisterInfo *MRI) {
335   if (OpIdx < 0)
336     return false;
337 
338   const MCOperand &Op = Inst.getOperand(OpIdx);
339   if (!Op.isReg())
340     return false;
341 
342   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
343   auto Reg = Sub ? Sub : Op.getReg();
344   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
345 }
346 
347 static DecodeStatus decodeOperand_AVLdSt_Any(MCInst &Inst, unsigned Imm,
348                                              AMDGPUDisassembler::OpWidthTy Opw,
349                                              const MCDisassembler *Decoder) {
350   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
351   if (!DAsm->isGFX90A()) {
352     Imm &= 511;
353   } else {
354     // If atomic has both vdata and vdst their register classes are tied.
355     // The bit is decoded along with the vdst, first operand. We need to
356     // change register class to AGPR if vdst was AGPR.
357     // If a DS instruction has both data0 and data1 their register classes
358     // are also tied.
359     unsigned Opc = Inst.getOpcode();
360     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
361     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
362                                                         : AMDGPU::OpName::vdata;
363     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
364     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
365     if ((int)Inst.getNumOperands() == DataIdx) {
366       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
367       if (IsAGPROperand(Inst, DstIdx, MRI))
368         Imm |= 512;
369     }
370 
371     if (TSFlags & SIInstrFlags::DS) {
372       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
373       if ((int)Inst.getNumOperands() == Data2Idx &&
374           IsAGPROperand(Inst, DataIdx, MRI))
375         Imm |= 512;
376     }
377   }
378   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
379 }
380 
381 static DecodeStatus
382 DecodeAVLdSt_32RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
383                              const MCDisassembler *Decoder) {
384   return decodeOperand_AVLdSt_Any(Inst, Imm,
385                                   AMDGPUDisassembler::OPW32, Decoder);
386 }
387 
388 static DecodeStatus
389 DecodeAVLdSt_64RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
390                              const MCDisassembler *Decoder) {
391   return decodeOperand_AVLdSt_Any(Inst, Imm,
392                                   AMDGPUDisassembler::OPW64, Decoder);
393 }
394 
395 static DecodeStatus
396 DecodeAVLdSt_96RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
397                              const MCDisassembler *Decoder) {
398   return decodeOperand_AVLdSt_Any(Inst, Imm,
399                                   AMDGPUDisassembler::OPW96, Decoder);
400 }
401 
402 static DecodeStatus
403 DecodeAVLdSt_128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
404                               const MCDisassembler *Decoder) {
405   return decodeOperand_AVLdSt_Any(Inst, Imm,
406                                   AMDGPUDisassembler::OPW128, Decoder);
407 }
408 
409 static DecodeStatus
410 DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
411                               const MCDisassembler *Decoder) {
412   return decodeOperand_AVLdSt_Any(Inst, Imm, AMDGPUDisassembler::OPW160,
413                                   Decoder);
414 }
415 
416 #define DECODE_SDWA(DecName) \
417 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
418 
419 DECODE_SDWA(Src32)
420 DECODE_SDWA(Src16)
421 DECODE_SDWA(VopcDst)
422 
423 #include "AMDGPUGenDisassemblerTables.inc"
424 
425 //===----------------------------------------------------------------------===//
426 //
427 //===----------------------------------------------------------------------===//
428 
429 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
430   assert(Bytes.size() >= sizeof(T));
431   const auto Res =
432       support::endian::read<T, llvm::endianness::little>(Bytes.data());
433   Bytes = Bytes.slice(sizeof(T));
434   return Res;
435 }
436 
437 static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
438   assert(Bytes.size() >= 12);
439   uint64_t Lo =
440       support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
441   Bytes = Bytes.slice(8);
442   uint64_t Hi =
443       support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
444   Bytes = Bytes.slice(4);
445   return DecoderUInt128(Lo, Hi);
446 }
447 
448 // The disassembler is greedy, so we need to check FI operand value to
449 // not parse a dpp if the correct literal is not set. For dpp16 the
450 // autogenerated decoder checks the dpp literal
451 static bool isValidDPP8(const MCInst &MI) {
452   using namespace llvm::AMDGPU::DPP;
453   int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
454   assert(FiIdx != -1);
455   if ((unsigned)FiIdx >= MI.getNumOperands())
456     return false;
457   unsigned Fi = MI.getOperand(FiIdx).getImm();
458   return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
459 }
460 
461 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
462                                                 ArrayRef<uint8_t> Bytes_,
463                                                 uint64_t Address,
464                                                 raw_ostream &CS) const {
465   bool IsSDWA = false;
466 
467   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
468   Bytes = Bytes_.slice(0, MaxInstBytesNum);
469 
470   DecodeStatus Res = MCDisassembler::Fail;
471   do {
472     // ToDo: better to switch encoding length using some bit predicate
473     // but it is unknown yet, so try all we can
474 
475     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
476     // encodings
477     if (isGFX11Plus() && Bytes.size() >= 12 ) {
478       DecoderUInt128 DecW = eat12Bytes(Bytes);
479       Res =
480           tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
481                         MI, DecW, Address, CS);
482       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
483         break;
484       MI = MCInst(); // clear
485       Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
486                           MI, DecW, Address, CS);
487       if (Res) {
488         if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
489           convertVOP3PDPPInst(MI);
490         else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
491           convertVOPCDPPInst(MI); // Special VOP3 case
492         else {
493           assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3);
494           convertVOP3DPPInst(MI); // Regular VOP3 case
495         }
496         break;
497       }
498       Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
499       if (Res)
500         break;
501     }
502     // Reinitialize Bytes
503     Bytes = Bytes_.slice(0, MaxInstBytesNum);
504 
505     if (Bytes.size() >= 8) {
506       const uint64_t QW = eatBytes<uint64_t>(Bytes);
507 
508       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
509         Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
510         if (Res) {
511           if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
512               == -1)
513             break;
514           if (convertDPP8Inst(MI) == MCDisassembler::Success)
515             break;
516           MI = MCInst(); // clear
517         }
518       }
519 
520       Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
521       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
522         break;
523       MI = MCInst(); // clear
524 
525       Res = tryDecodeInst(DecoderTableDPP8GFX1164,
526                           DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
527       if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
528         break;
529       MI = MCInst(); // clear
530 
531       Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
532       if (Res) break;
533 
534       Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
535                           MI, QW, Address, CS);
536       if (Res) {
537         if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
538           convertVOPCDPPInst(MI);
539         break;
540       }
541 
542       Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
543       if (Res) { IsSDWA = true;  break; }
544 
545       Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
546       if (Res) { IsSDWA = true;  break; }
547 
548       Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
549       if (Res) { IsSDWA = true;  break; }
550 
551       if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
552         Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
553         if (Res)
554           break;
555       }
556 
557       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
558       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
559       // table first so we print the correct name.
560       if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
561         Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
562         if (Res)
563           break;
564       }
565     }
566 
567     // Reinitialize Bytes as DPP64 could have eaten too much
568     Bytes = Bytes_.slice(0, MaxInstBytesNum);
569 
570     // Try decode 32-bit instruction
571     if (Bytes.size() < 4) break;
572     const uint32_t DW = eatBytes<uint32_t>(Bytes);
573     Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
574     if (Res) break;
575 
576     Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
577     if (Res) break;
578 
579     Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
580     if (Res) break;
581 
582     if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
583       Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
584       if (Res)
585         break;
586     }
587 
588     if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
589       Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
590       if (Res) break;
591     }
592 
593     Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
594     if (Res) break;
595 
596     Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
597                         Address, CS);
598     if (Res) break;
599 
600     if (Bytes.size() < 4) break;
601     const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
602 
603     if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
604       Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
605       if (Res)
606         break;
607     }
608 
609     if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
610       Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
611       if (Res)
612         break;
613     }
614 
615     Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
616     if (Res) break;
617 
618     Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
619     if (Res) break;
620 
621     Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
622     if (Res) break;
623 
624     Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
625     if (Res) break;
626 
627     Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
628                         Address, CS);
629     if (Res)
630       break;
631 
632     Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
633   } while (false);
634 
635   if (Res && AMDGPU::isMAC(MI.getOpcode())) {
636     // Insert dummy unused src2_modifiers.
637     insertNamedMCOperand(MI, MCOperand::createImm(0),
638                          AMDGPU::OpName::src2_modifiers);
639   }
640 
641   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
642           (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
643     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
644                                              AMDGPU::OpName::cpol);
645     if (CPolPos != -1) {
646       unsigned CPol =
647           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
648               AMDGPU::CPol::GLC : 0;
649       if (MI.getNumOperands() <= (unsigned)CPolPos) {
650         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
651                              AMDGPU::OpName::cpol);
652       } else if (CPol) {
653         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
654       }
655     }
656   }
657 
658   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
659               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
660              (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
661     // GFX90A lost TFE, its place is occupied by ACC.
662     int TFEOpIdx =
663         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
664     if (TFEOpIdx != -1) {
665       auto TFEIter = MI.begin();
666       std::advance(TFEIter, TFEOpIdx);
667       MI.insert(TFEIter, MCOperand::createImm(0));
668     }
669   }
670 
671   if (Res && (MCII->get(MI.getOpcode()).TSFlags &
672               (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
673     int SWZOpIdx =
674         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
675     if (SWZOpIdx != -1) {
676       auto SWZIter = MI.begin();
677       std::advance(SWZIter, SWZOpIdx);
678       MI.insert(SWZIter, MCOperand::createImm(0));
679     }
680   }
681 
682   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
683     int VAddr0Idx =
684         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
685     int RsrcIdx =
686         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
687     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
688     if (VAddr0Idx >= 0 && NSAArgs > 0) {
689       unsigned NSAWords = (NSAArgs + 3) / 4;
690       if (Bytes.size() < 4 * NSAWords) {
691         Res = MCDisassembler::Fail;
692       } else {
693         for (unsigned i = 0; i < NSAArgs; ++i) {
694           const unsigned VAddrIdx = VAddr0Idx + 1 + i;
695           auto VAddrRCID =
696               MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
697           MI.insert(MI.begin() + VAddrIdx,
698                     createRegOperand(VAddrRCID, Bytes[i]));
699         }
700         Bytes = Bytes.slice(4 * NSAWords);
701       }
702     }
703 
704     if (Res)
705       Res = convertMIMGInst(MI);
706   }
707 
708   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
709     Res = convertEXPInst(MI);
710 
711   if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP))
712     Res = convertVINTERPInst(MI);
713 
714   if (Res && IsSDWA)
715     Res = convertSDWAInst(MI);
716 
717   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
718                                               AMDGPU::OpName::vdst_in);
719   if (VDstIn_Idx != -1) {
720     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
721                            MCOI::OperandConstraint::TIED_TO);
722     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
723          !MI.getOperand(VDstIn_Idx).isReg() ||
724          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
725       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
726         MI.erase(&MI.getOperand(VDstIn_Idx));
727       insertNamedMCOperand(MI,
728         MCOperand::createReg(MI.getOperand(Tied).getReg()),
729         AMDGPU::OpName::vdst_in);
730     }
731   }
732 
733   int ImmLitIdx =
734       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
735   bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
736   if (Res && ImmLitIdx != -1 && !IsSOPK)
737     Res = convertFMAanyK(MI, ImmLitIdx);
738 
739   // if the opcode was not recognized we'll assume a Size of 4 bytes
740   // (unless there are fewer bytes left)
741   Size = Res ? (MaxInstBytesNum - Bytes.size())
742              : std::min((size_t)4, Bytes_.size());
743   return Res;
744 }
745 
746 DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
747   if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
748     // The MCInst still has these fields even though they are no longer encoded
749     // in the GFX11 instruction.
750     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
751     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
752   }
753   return MCDisassembler::Success;
754 }
755 
756 DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
757   if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
758       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
759       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
760       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11) {
761     // The MCInst has this field that is not directly encoded in the
762     // instruction.
763     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
764   }
765   return MCDisassembler::Success;
766 }
767 
768 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
769   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
770       STI.hasFeature(AMDGPU::FeatureGFX10)) {
771     if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
772       // VOPC - insert clamp
773       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
774   } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
775     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
776     if (SDst != -1) {
777       // VOPC - insert VCC register as sdst
778       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
779                            AMDGPU::OpName::sdst);
780     } else {
781       // VOP1/2 - insert omod if present in instruction
782       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
783     }
784   }
785   return MCDisassembler::Success;
786 }
787 
788 struct VOPModifiers {
789   unsigned OpSel = 0;
790   unsigned OpSelHi = 0;
791   unsigned NegLo = 0;
792   unsigned NegHi = 0;
793 };
794 
795 // Reconstruct values of VOP3/VOP3P operands such as op_sel.
796 // Note that these values do not affect disassembler output,
797 // so this is only necessary for consistency with src_modifiers.
798 static VOPModifiers collectVOPModifiers(const MCInst &MI,
799                                         bool IsVOP3P = false) {
800   VOPModifiers Modifiers;
801   unsigned Opc = MI.getOpcode();
802   const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
803                         AMDGPU::OpName::src1_modifiers,
804                         AMDGPU::OpName::src2_modifiers};
805   for (int J = 0; J < 3; ++J) {
806     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
807     if (OpIdx == -1)
808       continue;
809 
810     unsigned Val = MI.getOperand(OpIdx).getImm();
811 
812     Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
813     if (IsVOP3P) {
814       Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
815       Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
816       Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
817     } else if (J == 0) {
818       Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
819     }
820   }
821 
822   return Modifiers;
823 }
824 
825 // MAC opcodes have special old and src2 operands.
826 // src2 is tied to dst, while old is not tied (but assumed to be).
827 bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
828   constexpr int DST_IDX = 0;
829   auto Opcode = MI.getOpcode();
830   const auto &Desc = MCII->get(Opcode);
831   auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
832 
833   if (OldIdx != -1 && Desc.getOperandConstraint(
834                           OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
835     assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
836     assert(Desc.getOperandConstraint(
837                AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
838                MCOI::OperandConstraint::TIED_TO) == DST_IDX);
839     (void)DST_IDX;
840     return true;
841   }
842 
843   return false;
844 }
845 
846 // Create dummy old operand and insert dummy unused src2_modifiers
847 void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
848   assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
849   insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
850   insertNamedMCOperand(MI, MCOperand::createImm(0),
851                        AMDGPU::OpName::src2_modifiers);
852 }
853 
854 // We must check FI == literal to reject not genuine dpp8 insts, and we must
855 // first add optional MI operands to check FI
856 DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
857   unsigned Opc = MI.getOpcode();
858   if (MCII->get(Opc).TSFlags & SIInstrFlags::VOP3P) {
859     convertVOP3PDPPInst(MI);
860   } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) ||
861              AMDGPU::isVOPC64DPP(Opc)) {
862     convertVOPCDPPInst(MI);
863   } else {
864     if (isMacDPP(MI))
865       convertMacDPPInst(MI);
866 
867     unsigned DescNumOps = MCII->get(Opc).getNumOperands();
868     if (MI.getNumOperands() < DescNumOps &&
869         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
870       auto Mods = collectVOPModifiers(MI);
871       insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
872                            AMDGPU::OpName::op_sel);
873     } else {
874       // Insert dummy unused src modifiers.
875       if (MI.getNumOperands() < DescNumOps &&
876           AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
877         insertNamedMCOperand(MI, MCOperand::createImm(0),
878                              AMDGPU::OpName::src0_modifiers);
879 
880       if (MI.getNumOperands() < DescNumOps &&
881           AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
882         insertNamedMCOperand(MI, MCOperand::createImm(0),
883                              AMDGPU::OpName::src1_modifiers);
884     }
885   }
886   return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
887 }
888 
889 DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
890   if (isMacDPP(MI))
891     convertMacDPPInst(MI);
892 
893   unsigned Opc = MI.getOpcode();
894   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
895   if (MI.getNumOperands() < DescNumOps &&
896       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
897     auto Mods = collectVOPModifiers(MI);
898     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
899                          AMDGPU::OpName::op_sel);
900   }
901   return MCDisassembler::Success;
902 }
903 
904 // Note that before gfx10, the MIMG encoding provided no information about
905 // VADDR size. Consequently, decoded instructions always show address as if it
906 // has 1 dword, which could be not really so.
907 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
908 
909   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
910                                            AMDGPU::OpName::vdst);
911 
912   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
913                                             AMDGPU::OpName::vdata);
914   int VAddr0Idx =
915       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
916   int RsrcIdx =
917       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
918   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
919                                             AMDGPU::OpName::dmask);
920 
921   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
922                                             AMDGPU::OpName::tfe);
923   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
924                                             AMDGPU::OpName::d16);
925 
926   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
927   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
928       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
929 
930   assert(VDataIdx != -1);
931   if (BaseOpcode->BVH) {
932     // Add A16 operand for intersect_ray instructions
933     addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
934     return MCDisassembler::Success;
935   }
936 
937   bool IsAtomic = (VDstIdx != -1);
938   bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
939   bool IsNSA = false;
940   bool IsPartialNSA = false;
941   unsigned AddrSize = Info->VAddrDwords;
942 
943   if (isGFX10Plus()) {
944     unsigned DimIdx =
945         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
946     int A16Idx =
947         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
948     const AMDGPU::MIMGDimInfo *Dim =
949         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
950     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
951 
952     AddrSize =
953         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
954 
955     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
956             Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
957     if (!IsNSA) {
958       if (AddrSize > 12)
959         AddrSize = 16;
960     } else {
961       if (AddrSize > Info->VAddrDwords) {
962         if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
963           // The NSA encoding does not contain enough operands for the
964           // combination of base opcode / dimension. Should this be an error?
965           return MCDisassembler::Success;
966         }
967         IsPartialNSA = true;
968       }
969     }
970   }
971 
972   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
973   unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
974 
975   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
976   if (D16 && AMDGPU::hasPackedD16(STI)) {
977     DstSize = (DstSize + 1) / 2;
978   }
979 
980   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
981     DstSize += 1;
982 
983   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
984     return MCDisassembler::Success;
985 
986   int NewOpcode =
987       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
988   if (NewOpcode == -1)
989     return MCDisassembler::Success;
990 
991   // Widen the register to the correct number of enabled channels.
992   unsigned NewVdata = AMDGPU::NoRegister;
993   if (DstSize != Info->VDataDwords) {
994     auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
995 
996     // Get first subregister of VData
997     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
998     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
999     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1000 
1001     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1002                                        &MRI.getRegClass(DataRCID));
1003     if (NewVdata == AMDGPU::NoRegister) {
1004       // It's possible to encode this such that the low register + enabled
1005       // components exceeds the register count.
1006       return MCDisassembler::Success;
1007     }
1008   }
1009 
1010   // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1011   // If using partial NSA on GFX11+ widen last address register.
1012   int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1013   unsigned NewVAddrSA = AMDGPU::NoRegister;
1014   if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1015       AddrSize != Info->VAddrDwords) {
1016     unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1017     unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1018     VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1019 
1020     auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1021     NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1022                                         &MRI.getRegClass(AddrRCID));
1023     if (!NewVAddrSA)
1024       return MCDisassembler::Success;
1025   }
1026 
1027   MI.setOpcode(NewOpcode);
1028 
1029   if (NewVdata != AMDGPU::NoRegister) {
1030     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1031 
1032     if (IsAtomic) {
1033       // Atomic operations have an additional operand (a copy of data)
1034       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1035     }
1036   }
1037 
1038   if (NewVAddrSA) {
1039     MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1040   } else if (IsNSA) {
1041     assert(AddrSize <= Info->VAddrDwords);
1042     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1043              MI.begin() + VAddr0Idx + Info->VAddrDwords);
1044   }
1045 
1046   return MCDisassembler::Success;
1047 }
1048 
1049 // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1050 // decoder only adds to src_modifiers, so manually add the bits to the other
1051 // operands.
1052 DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1053   unsigned Opc = MI.getOpcode();
1054   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1055   auto Mods = collectVOPModifiers(MI, true);
1056 
1057   if (MI.getNumOperands() < DescNumOps &&
1058       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1059     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1060 
1061   if (MI.getNumOperands() < DescNumOps &&
1062       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1063     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1064                          AMDGPU::OpName::op_sel);
1065   if (MI.getNumOperands() < DescNumOps &&
1066       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1067     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1068                          AMDGPU::OpName::op_sel_hi);
1069   if (MI.getNumOperands() < DescNumOps &&
1070       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1071     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1072                          AMDGPU::OpName::neg_lo);
1073   if (MI.getNumOperands() < DescNumOps &&
1074       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1075     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1076                          AMDGPU::OpName::neg_hi);
1077 
1078   return MCDisassembler::Success;
1079 }
1080 
1081 // Create dummy old operand and insert optional operands
1082 DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1083   unsigned Opc = MI.getOpcode();
1084   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1085 
1086   if (MI.getNumOperands() < DescNumOps &&
1087       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1088     insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1089 
1090   if (MI.getNumOperands() < DescNumOps &&
1091       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1092     insertNamedMCOperand(MI, MCOperand::createImm(0),
1093                          AMDGPU::OpName::src0_modifiers);
1094 
1095   if (MI.getNumOperands() < DescNumOps &&
1096       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1097     insertNamedMCOperand(MI, MCOperand::createImm(0),
1098                          AMDGPU::OpName::src1_modifiers);
1099   return MCDisassembler::Success;
1100 }
1101 
1102 DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
1103                                                 int ImmLitIdx) const {
1104   assert(HasLiteral && "Should have decoded a literal");
1105   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1106   unsigned DescNumOps = Desc.getNumOperands();
1107   insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1108                        AMDGPU::OpName::immDeferred);
1109   assert(DescNumOps == MI.getNumOperands());
1110   for (unsigned I = 0; I < DescNumOps; ++I) {
1111     auto &Op = MI.getOperand(I);
1112     auto OpType = Desc.operands()[I].OperandType;
1113     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1114                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1115     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1116         IsDeferredOp)
1117       Op.setImm(Literal);
1118   }
1119   return MCDisassembler::Success;
1120 }
1121 
1122 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1123   return getContext().getRegisterInfo()->
1124     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1125 }
1126 
1127 inline
1128 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1129                                          const Twine& ErrMsg) const {
1130   *CommentStream << "Error: " + ErrMsg;
1131 
1132   // ToDo: add support for error operands to MCInst.h
1133   // return MCOperand::createError(V);
1134   return MCOperand();
1135 }
1136 
1137 inline
1138 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1139   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
1140 }
1141 
1142 inline
1143 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1144                                                unsigned Val) const {
1145   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1146   if (Val >= RegCl.getNumRegs())
1147     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1148                            ": unknown register " + Twine(Val));
1149   return createRegOperand(RegCl.getRegister(Val));
1150 }
1151 
1152 inline
1153 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1154                                                 unsigned Val) const {
1155   // ToDo: SI/CI have 104 SGPRs, VI - 102
1156   // Valery: here we accepting as much as we can, let assembler sort it out
1157   int shift = 0;
1158   switch (SRegClassID) {
1159   case AMDGPU::SGPR_32RegClassID:
1160   case AMDGPU::TTMP_32RegClassID:
1161     break;
1162   case AMDGPU::SGPR_64RegClassID:
1163   case AMDGPU::TTMP_64RegClassID:
1164     shift = 1;
1165     break;
1166   case AMDGPU::SGPR_128RegClassID:
1167   case AMDGPU::TTMP_128RegClassID:
1168   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1169   // this bundle?
1170   case AMDGPU::SGPR_256RegClassID:
1171   case AMDGPU::TTMP_256RegClassID:
1172     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1173   // this bundle?
1174   case AMDGPU::SGPR_288RegClassID:
1175   case AMDGPU::TTMP_288RegClassID:
1176   case AMDGPU::SGPR_320RegClassID:
1177   case AMDGPU::TTMP_320RegClassID:
1178   case AMDGPU::SGPR_352RegClassID:
1179   case AMDGPU::TTMP_352RegClassID:
1180   case AMDGPU::SGPR_384RegClassID:
1181   case AMDGPU::TTMP_384RegClassID:
1182   case AMDGPU::SGPR_512RegClassID:
1183   case AMDGPU::TTMP_512RegClassID:
1184     shift = 2;
1185     break;
1186   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1187   // this bundle?
1188   default:
1189     llvm_unreachable("unhandled register class");
1190   }
1191 
1192   if (Val % (1 << shift)) {
1193     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1194                    << ": scalar reg isn't aligned " << Val;
1195   }
1196 
1197   return createRegOperand(SRegClassID, Val >> shift);
1198 }
1199 
1200 MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1201                                                   bool IsHi) const {
1202   unsigned RCID =
1203       IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID;
1204   return createRegOperand(RCID, RegIdx);
1205 }
1206 
1207 // Decode Literals for insts which always have a literal in the encoding
1208 MCOperand
1209 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1210   if (HasLiteral) {
1211     assert(
1212         AMDGPU::hasVOPD(STI) &&
1213         "Should only decode multiple kimm with VOPD, check VSrc operand types");
1214     if (Literal != Val)
1215       return errOperand(Val, "More than one unique literal is illegal");
1216   }
1217   HasLiteral = true;
1218   Literal = Val;
1219   return MCOperand::createImm(Literal);
1220 }
1221 
1222 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
1223   // For now all literal constants are supposed to be unsigned integer
1224   // ToDo: deal with signed/unsigned 64-bit integer constants
1225   // ToDo: deal with float/double constants
1226   if (!HasLiteral) {
1227     if (Bytes.size() < 4) {
1228       return errOperand(0, "cannot read literal, inst bytes left " +
1229                         Twine(Bytes.size()));
1230     }
1231     HasLiteral = true;
1232     Literal = eatBytes<uint32_t>(Bytes);
1233   }
1234   return MCOperand::createImm(Literal);
1235 }
1236 
1237 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1238   using namespace AMDGPU::EncValues;
1239 
1240   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1241   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1242     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1243     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1244       // Cast prevents negative overflow.
1245 }
1246 
1247 static int64_t getInlineImmVal32(unsigned Imm) {
1248   switch (Imm) {
1249   case 240:
1250     return llvm::bit_cast<uint32_t>(0.5f);
1251   case 241:
1252     return llvm::bit_cast<uint32_t>(-0.5f);
1253   case 242:
1254     return llvm::bit_cast<uint32_t>(1.0f);
1255   case 243:
1256     return llvm::bit_cast<uint32_t>(-1.0f);
1257   case 244:
1258     return llvm::bit_cast<uint32_t>(2.0f);
1259   case 245:
1260     return llvm::bit_cast<uint32_t>(-2.0f);
1261   case 246:
1262     return llvm::bit_cast<uint32_t>(4.0f);
1263   case 247:
1264     return llvm::bit_cast<uint32_t>(-4.0f);
1265   case 248: // 1 / (2 * PI)
1266     return 0x3e22f983;
1267   default:
1268     llvm_unreachable("invalid fp inline imm");
1269   }
1270 }
1271 
1272 static int64_t getInlineImmVal64(unsigned Imm) {
1273   switch (Imm) {
1274   case 240:
1275     return llvm::bit_cast<uint64_t>(0.5);
1276   case 241:
1277     return llvm::bit_cast<uint64_t>(-0.5);
1278   case 242:
1279     return llvm::bit_cast<uint64_t>(1.0);
1280   case 243:
1281     return llvm::bit_cast<uint64_t>(-1.0);
1282   case 244:
1283     return llvm::bit_cast<uint64_t>(2.0);
1284   case 245:
1285     return llvm::bit_cast<uint64_t>(-2.0);
1286   case 246:
1287     return llvm::bit_cast<uint64_t>(4.0);
1288   case 247:
1289     return llvm::bit_cast<uint64_t>(-4.0);
1290   case 248: // 1 / (2 * PI)
1291     return 0x3fc45f306dc9c882;
1292   default:
1293     llvm_unreachable("invalid fp inline imm");
1294   }
1295 }
1296 
1297 static int64_t getInlineImmVal16(unsigned Imm) {
1298   switch (Imm) {
1299   case 240:
1300     return 0x3800;
1301   case 241:
1302     return 0xB800;
1303   case 242:
1304     return 0x3C00;
1305   case 243:
1306     return 0xBC00;
1307   case 244:
1308     return 0x4000;
1309   case 245:
1310     return 0xC000;
1311   case 246:
1312     return 0x4400;
1313   case 247:
1314     return 0xC400;
1315   case 248: // 1 / (2 * PI)
1316     return 0x3118;
1317   default:
1318     llvm_unreachable("invalid fp inline imm");
1319   }
1320 }
1321 
1322 MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
1323   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
1324       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1325 
1326   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1327   // ImmWidth 0 is a default case where operand should not allow immediates.
1328   // Imm value is still decoded into 32 bit immediate operand, inst printer will
1329   // use it to print verbose error message.
1330   switch (ImmWidth) {
1331   case 0:
1332   case 32:
1333     return MCOperand::createImm(getInlineImmVal32(Imm));
1334   case 64:
1335     return MCOperand::createImm(getInlineImmVal64(Imm));
1336   case 16:
1337     return MCOperand::createImm(getInlineImmVal16(Imm));
1338   default:
1339     llvm_unreachable("implement me");
1340   }
1341 }
1342 
1343 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1344   using namespace AMDGPU;
1345 
1346   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1347   switch (Width) {
1348   default: // fall
1349   case OPW32:
1350   case OPW16:
1351   case OPWV216:
1352     return VGPR_32RegClassID;
1353   case OPW64:
1354   case OPWV232: return VReg_64RegClassID;
1355   case OPW96: return VReg_96RegClassID;
1356   case OPW128: return VReg_128RegClassID;
1357   case OPW160: return VReg_160RegClassID;
1358   case OPW256: return VReg_256RegClassID;
1359   case OPW288: return VReg_288RegClassID;
1360   case OPW320: return VReg_320RegClassID;
1361   case OPW352: return VReg_352RegClassID;
1362   case OPW384: return VReg_384RegClassID;
1363   case OPW512: return VReg_512RegClassID;
1364   case OPW1024: return VReg_1024RegClassID;
1365   }
1366 }
1367 
1368 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1369   using namespace AMDGPU;
1370 
1371   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1372   switch (Width) {
1373   default: // fall
1374   case OPW32:
1375   case OPW16:
1376   case OPWV216:
1377     return AGPR_32RegClassID;
1378   case OPW64:
1379   case OPWV232: return AReg_64RegClassID;
1380   case OPW96: return AReg_96RegClassID;
1381   case OPW128: return AReg_128RegClassID;
1382   case OPW160: return AReg_160RegClassID;
1383   case OPW256: return AReg_256RegClassID;
1384   case OPW288: return AReg_288RegClassID;
1385   case OPW320: return AReg_320RegClassID;
1386   case OPW352: return AReg_352RegClassID;
1387   case OPW384: return AReg_384RegClassID;
1388   case OPW512: return AReg_512RegClassID;
1389   case OPW1024: return AReg_1024RegClassID;
1390   }
1391 }
1392 
1393 
1394 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1395   using namespace AMDGPU;
1396 
1397   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1398   switch (Width) {
1399   default: // fall
1400   case OPW32:
1401   case OPW16:
1402   case OPWV216:
1403     return SGPR_32RegClassID;
1404   case OPW64:
1405   case OPWV232: return SGPR_64RegClassID;
1406   case OPW96: return SGPR_96RegClassID;
1407   case OPW128: return SGPR_128RegClassID;
1408   case OPW160: return SGPR_160RegClassID;
1409   case OPW256: return SGPR_256RegClassID;
1410   case OPW288: return SGPR_288RegClassID;
1411   case OPW320: return SGPR_320RegClassID;
1412   case OPW352: return SGPR_352RegClassID;
1413   case OPW384: return SGPR_384RegClassID;
1414   case OPW512: return SGPR_512RegClassID;
1415   }
1416 }
1417 
1418 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1419   using namespace AMDGPU;
1420 
1421   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1422   switch (Width) {
1423   default: // fall
1424   case OPW32:
1425   case OPW16:
1426   case OPWV216:
1427     return TTMP_32RegClassID;
1428   case OPW64:
1429   case OPWV232: return TTMP_64RegClassID;
1430   case OPW128: return TTMP_128RegClassID;
1431   case OPW256: return TTMP_256RegClassID;
1432   case OPW288: return TTMP_288RegClassID;
1433   case OPW320: return TTMP_320RegClassID;
1434   case OPW352: return TTMP_352RegClassID;
1435   case OPW384: return TTMP_384RegClassID;
1436   case OPW512: return TTMP_512RegClassID;
1437   }
1438 }
1439 
1440 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1441   using namespace AMDGPU::EncValues;
1442 
1443   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1444   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1445 
1446   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1447 }
1448 
1449 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1450                                           bool MandatoryLiteral,
1451                                           unsigned ImmWidth) const {
1452   using namespace AMDGPU::EncValues;
1453 
1454   assert(Val < 1024); // enum10
1455 
1456   bool IsAGPR = Val & 512;
1457   Val &= 511;
1458 
1459   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1460     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1461                                    : getVgprClassId(Width), Val - VGPR_MIN);
1462   }
1463   return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth);
1464 }
1465 
1466 MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width,
1467                                                  unsigned Val,
1468                                                  bool MandatoryLiteral,
1469                                                  unsigned ImmWidth) const {
1470   // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1471   // decoded earlier.
1472   assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1473   using namespace AMDGPU::EncValues;
1474 
1475   if (Val <= SGPR_MAX) {
1476     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1477     static_assert(SGPR_MIN == 0);
1478     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1479   }
1480 
1481   int TTmpIdx = getTTmpIdx(Val);
1482   if (TTmpIdx >= 0) {
1483     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1484   }
1485 
1486   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1487     return decodeIntImmed(Val);
1488 
1489   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1490     return decodeFPImmed(ImmWidth, Val);
1491 
1492   if (Val == LITERAL_CONST) {
1493     if (MandatoryLiteral)
1494       // Keep a sentinel value for deferred setting
1495       return MCOperand::createImm(LITERAL_CONST);
1496     else
1497       return decodeLiteralConstant();
1498   }
1499 
1500   switch (Width) {
1501   case OPW32:
1502   case OPW16:
1503   case OPWV216:
1504     return decodeSpecialReg32(Val);
1505   case OPW64:
1506   case OPWV232:
1507     return decodeSpecialReg64(Val);
1508   default:
1509     llvm_unreachable("unexpected immediate type");
1510   }
1511 }
1512 
1513 // Bit 0 of DstY isn't stored in the instruction, because it's always the
1514 // opposite of bit 0 of DstX.
1515 MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1516                                                unsigned Val) const {
1517   int VDstXInd =
1518       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1519   assert(VDstXInd != -1);
1520   assert(Inst.getOperand(VDstXInd).isReg());
1521   unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1522   Val |= ~XDstReg & 1;
1523   auto Width = llvm::AMDGPUDisassembler::OPW32;
1524   return createRegOperand(getVgprClassId(Width), Val);
1525 }
1526 
1527 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1528   using namespace AMDGPU;
1529 
1530   switch (Val) {
1531   // clang-format off
1532   case 102: return createRegOperand(FLAT_SCR_LO);
1533   case 103: return createRegOperand(FLAT_SCR_HI);
1534   case 104: return createRegOperand(XNACK_MASK_LO);
1535   case 105: return createRegOperand(XNACK_MASK_HI);
1536   case 106: return createRegOperand(VCC_LO);
1537   case 107: return createRegOperand(VCC_HI);
1538   case 108: return createRegOperand(TBA_LO);
1539   case 109: return createRegOperand(TBA_HI);
1540   case 110: return createRegOperand(TMA_LO);
1541   case 111: return createRegOperand(TMA_HI);
1542   case 124:
1543     return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1544   case 125:
1545     return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1546   case 126: return createRegOperand(EXEC_LO);
1547   case 127: return createRegOperand(EXEC_HI);
1548   case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1549   case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1550   case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1551   case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1552   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1553   case 251: return createRegOperand(SRC_VCCZ);
1554   case 252: return createRegOperand(SRC_EXECZ);
1555   case 253: return createRegOperand(SRC_SCC);
1556   case 254: return createRegOperand(LDS_DIRECT);
1557   default: break;
1558     // clang-format on
1559   }
1560   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1561 }
1562 
1563 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1564   using namespace AMDGPU;
1565 
1566   switch (Val) {
1567   case 102: return createRegOperand(FLAT_SCR);
1568   case 104: return createRegOperand(XNACK_MASK);
1569   case 106: return createRegOperand(VCC);
1570   case 108: return createRegOperand(TBA);
1571   case 110: return createRegOperand(TMA);
1572   case 124:
1573     if (isGFX11Plus())
1574       return createRegOperand(SGPR_NULL);
1575     break;
1576   case 125:
1577     if (!isGFX11Plus())
1578       return createRegOperand(SGPR_NULL);
1579     break;
1580   case 126: return createRegOperand(EXEC);
1581   case 235: return createRegOperand(SRC_SHARED_BASE);
1582   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1583   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1584   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1585   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1586   case 251: return createRegOperand(SRC_VCCZ);
1587   case 252: return createRegOperand(SRC_EXECZ);
1588   case 253: return createRegOperand(SRC_SCC);
1589   default: break;
1590   }
1591   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1592 }
1593 
1594 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
1595                                             const unsigned Val,
1596                                             unsigned ImmWidth) const {
1597   using namespace AMDGPU::SDWA;
1598   using namespace AMDGPU::EncValues;
1599 
1600   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1601       STI.hasFeature(AMDGPU::FeatureGFX10)) {
1602     // XXX: cast to int is needed to avoid stupid warning:
1603     // compare with unsigned is always true
1604     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1605         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1606       return createRegOperand(getVgprClassId(Width),
1607                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1608     }
1609     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1610         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1611                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1612       return createSRegOperand(getSgprClassId(Width),
1613                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1614     }
1615     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1616         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1617       return createSRegOperand(getTtmpClassId(Width),
1618                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1619     }
1620 
1621     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1622 
1623     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1624       return decodeIntImmed(SVal);
1625 
1626     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1627       return decodeFPImmed(ImmWidth, SVal);
1628 
1629     return decodeSpecialReg32(SVal);
1630   } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1631     return createRegOperand(getVgprClassId(Width), Val);
1632   }
1633   llvm_unreachable("unsupported target");
1634 }
1635 
1636 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1637   return decodeSDWASrc(OPW16, Val, 16);
1638 }
1639 
1640 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1641   return decodeSDWASrc(OPW32, Val, 32);
1642 }
1643 
1644 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1645   using namespace AMDGPU::SDWA;
1646 
1647   assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1648           STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1649          "SDWAVopcDst should be present only on GFX9+");
1650 
1651   bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1652 
1653   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1654     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1655 
1656     int TTmpIdx = getTTmpIdx(Val);
1657     if (TTmpIdx >= 0) {
1658       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1659       return createSRegOperand(TTmpClsId, TTmpIdx);
1660     } else if (Val > SGPR_MAX) {
1661       return IsWave64 ? decodeSpecialReg64(Val)
1662                       : decodeSpecialReg32(Val);
1663     } else {
1664       return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1665     }
1666   } else {
1667     return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1668   }
1669 }
1670 
1671 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1672   return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1673              ? decodeSrcOp(OPW64, Val)
1674              : decodeSrcOp(OPW32, Val);
1675 }
1676 
1677 bool AMDGPUDisassembler::isVI() const {
1678   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1679 }
1680 
1681 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1682 
1683 bool AMDGPUDisassembler::isGFX90A() const {
1684   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1685 }
1686 
1687 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1688 
1689 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1690 
1691 bool AMDGPUDisassembler::isGFX10Plus() const {
1692   return AMDGPU::isGFX10Plus(STI);
1693 }
1694 
1695 bool AMDGPUDisassembler::isGFX11() const {
1696   return STI.hasFeature(AMDGPU::FeatureGFX11);
1697 }
1698 
1699 bool AMDGPUDisassembler::isGFX11Plus() const {
1700   return AMDGPU::isGFX11Plus(STI);
1701 }
1702 
1703 
1704 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1705   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1706 }
1707 
1708 bool AMDGPUDisassembler::hasKernargPreload() const {
1709   return AMDGPU::hasKernargPreload(STI);
1710 }
1711 
1712 //===----------------------------------------------------------------------===//
1713 // AMDGPU specific symbol handling
1714 //===----------------------------------------------------------------------===//
1715 #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1716 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1717   do {                                                                         \
1718     KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n';            \
1719   } while (0)
1720 #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)                        \
1721   do {                                                                         \
1722     KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " "       \
1723              << GET_FIELD(MASK) << '\n';                                       \
1724   } while (0)
1725 
1726 // NOLINTNEXTLINE(readability-identifier-naming)
1727 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1728     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1729   using namespace amdhsa;
1730   StringRef Indent = "\t";
1731 
1732   // We cannot accurately backward compute #VGPRs used from
1733   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1734   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1735   // simply calculate the inverse of what the assembler does.
1736 
1737   uint32_t GranulatedWorkitemVGPRCount =
1738       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1739 
1740   uint32_t NextFreeVGPR =
1741       (GranulatedWorkitemVGPRCount + 1) *
1742       AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1743 
1744   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1745 
1746   // We cannot backward compute values used to calculate
1747   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1748   // directives can't be computed:
1749   // .amdhsa_reserve_vcc
1750   // .amdhsa_reserve_flat_scratch
1751   // .amdhsa_reserve_xnack_mask
1752   // They take their respective default values if not specified in the assembly.
1753   //
1754   // GRANULATED_WAVEFRONT_SGPR_COUNT
1755   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1756   //
1757   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1758   // are set to 0. So while disassembling we consider that:
1759   //
1760   // GRANULATED_WAVEFRONT_SGPR_COUNT
1761   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1762   //
1763   // The disassembler cannot recover the original values of those 3 directives.
1764 
1765   uint32_t GranulatedWavefrontSGPRCount =
1766       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1767 
1768   if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
1769     return MCDisassembler::Fail;
1770 
1771   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1772                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1773 
1774   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1775   if (!hasArchitectedFlatScratch())
1776     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1777   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1778   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1779 
1780   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY)
1781     return MCDisassembler::Fail;
1782 
1783   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1784                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1785   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1786                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1787   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1788                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1789   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1790                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1791 
1792   if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV)
1793     return MCDisassembler::Fail;
1794 
1795   PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
1796 
1797   if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE)
1798     return MCDisassembler::Fail;
1799 
1800   PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
1801 
1802   if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY)
1803     return MCDisassembler::Fail;
1804 
1805   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
1806     return MCDisassembler::Fail;
1807 
1808   PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
1809 
1810   if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
1811     return MCDisassembler::Fail;
1812 
1813   if (isGFX10Plus()) {
1814     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1815                     COMPUTE_PGM_RSRC1_WGP_MODE);
1816     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
1817     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
1818   }
1819   return MCDisassembler::Success;
1820 }
1821 
1822 // NOLINTNEXTLINE(readability-identifier-naming)
1823 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1824     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1825   using namespace amdhsa;
1826   StringRef Indent = "\t";
1827   if (hasArchitectedFlatScratch())
1828     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1829                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1830   else
1831     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1832                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1833   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1834                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1835   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1836                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1837   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1838                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1839   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1840                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1841   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1842                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1843 
1844   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH)
1845     return MCDisassembler::Fail;
1846 
1847   if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY)
1848     return MCDisassembler::Fail;
1849 
1850   if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE)
1851     return MCDisassembler::Fail;
1852 
1853   PRINT_DIRECTIVE(
1854       ".amdhsa_exception_fp_ieee_invalid_op",
1855       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1856   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1857                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1858   PRINT_DIRECTIVE(
1859       ".amdhsa_exception_fp_ieee_div_zero",
1860       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1861   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1862                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1863   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1864                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1865   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1866                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1867   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1868                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1869 
1870   if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0)
1871     return MCDisassembler::Fail;
1872 
1873   return MCDisassembler::Success;
1874 }
1875 
1876 // NOLINTNEXTLINE(readability-identifier-naming)
1877 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
1878     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1879   using namespace amdhsa;
1880   StringRef Indent = "\t";
1881   if (isGFX90A()) {
1882     KdStream << Indent << ".amdhsa_accum_offset "
1883              << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1884              << '\n';
1885     if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
1886       return MCDisassembler::Fail;
1887     PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
1888     if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
1889       return MCDisassembler::Fail;
1890   } else if (isGFX10Plus()) {
1891     if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
1892       PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
1893                       COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
1894     } else {
1895       PRINT_PSEUDO_DIRECTIVE_COMMENT(
1896           "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
1897     }
1898     PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
1899                                    COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
1900     PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
1901                                    COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
1902     PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
1903                                    COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
1904     if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
1905       return MCDisassembler::Fail;
1906     PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
1907                                    COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
1908   } else if (FourByteBuffer) {
1909     return MCDisassembler::Fail;
1910   }
1911   return MCDisassembler::Success;
1912 }
1913 #undef PRINT_PSEUDO_DIRECTIVE_COMMENT
1914 #undef PRINT_DIRECTIVE
1915 #undef GET_FIELD
1916 
1917 MCDisassembler::DecodeStatus
1918 AMDGPUDisassembler::decodeKernelDescriptorDirective(
1919     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
1920     raw_string_ostream &KdStream) const {
1921 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1922   do {                                                                         \
1923     KdStream << Indent << DIRECTIVE " "                                        \
1924              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
1925   } while (0)
1926 
1927   uint16_t TwoByteBuffer = 0;
1928   uint32_t FourByteBuffer = 0;
1929 
1930   StringRef ReservedBytes;
1931   StringRef Indent = "\t";
1932 
1933   assert(Bytes.size() == 64);
1934   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
1935 
1936   switch (Cursor.tell()) {
1937   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
1938     FourByteBuffer = DE.getU32(Cursor);
1939     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
1940              << '\n';
1941     return MCDisassembler::Success;
1942 
1943   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
1944     FourByteBuffer = DE.getU32(Cursor);
1945     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
1946              << FourByteBuffer << '\n';
1947     return MCDisassembler::Success;
1948 
1949   case amdhsa::KERNARG_SIZE_OFFSET:
1950     FourByteBuffer = DE.getU32(Cursor);
1951     KdStream << Indent << ".amdhsa_kernarg_size "
1952              << FourByteBuffer << '\n';
1953     return MCDisassembler::Success;
1954 
1955   case amdhsa::RESERVED0_OFFSET:
1956     // 4 reserved bytes, must be 0.
1957     ReservedBytes = DE.getBytes(Cursor, 4);
1958     for (int I = 0; I < 4; ++I) {
1959       if (ReservedBytes[I] != 0) {
1960         return MCDisassembler::Fail;
1961       }
1962     }
1963     return MCDisassembler::Success;
1964 
1965   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
1966     // KERNEL_CODE_ENTRY_BYTE_OFFSET
1967     // So far no directive controls this for Code Object V3, so simply skip for
1968     // disassembly.
1969     DE.skip(Cursor, 8);
1970     return MCDisassembler::Success;
1971 
1972   case amdhsa::RESERVED1_OFFSET:
1973     // 20 reserved bytes, must be 0.
1974     ReservedBytes = DE.getBytes(Cursor, 20);
1975     for (int I = 0; I < 20; ++I) {
1976       if (ReservedBytes[I] != 0) {
1977         return MCDisassembler::Fail;
1978       }
1979     }
1980     return MCDisassembler::Success;
1981 
1982   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
1983     FourByteBuffer = DE.getU32(Cursor);
1984     return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
1985 
1986   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
1987     FourByteBuffer = DE.getU32(Cursor);
1988     return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
1989 
1990   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
1991     FourByteBuffer = DE.getU32(Cursor);
1992     return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
1993 
1994   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
1995     using namespace amdhsa;
1996     TwoByteBuffer = DE.getU16(Cursor);
1997 
1998     if (!hasArchitectedFlatScratch())
1999       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2000                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2001     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2002                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2003     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2004                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2005     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2006                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2007     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2008                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2009     if (!hasArchitectedFlatScratch())
2010       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2011                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2012     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2013                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2014 
2015     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2016       return MCDisassembler::Fail;
2017 
2018     // Reserved for GFX9
2019     if (isGFX9() &&
2020         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2021       return MCDisassembler::Fail;
2022     } else if (isGFX10Plus()) {
2023       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2024                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2025     }
2026 
2027     if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
2028       PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2029                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2030 
2031     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1)
2032       return MCDisassembler::Fail;
2033 
2034     return MCDisassembler::Success;
2035 
2036   case amdhsa::KERNARG_PRELOAD_OFFSET:
2037     using namespace amdhsa;
2038     TwoByteBuffer = DE.getU16(Cursor);
2039     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2040       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2041                       KERNARG_PRELOAD_SPEC_LENGTH);
2042     }
2043 
2044     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2045       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2046                       KERNARG_PRELOAD_SPEC_OFFSET);
2047     }
2048     return MCDisassembler::Success;
2049 
2050   case amdhsa::RESERVED3_OFFSET:
2051     // 4 bytes from here are reserved, must be 0.
2052     ReservedBytes = DE.getBytes(Cursor, 4);
2053     for (int I = 0; I < 4; ++I) {
2054       if (ReservedBytes[I] != 0)
2055         return MCDisassembler::Fail;
2056     }
2057     return MCDisassembler::Success;
2058 
2059   default:
2060     llvm_unreachable("Unhandled index. Case statements cover everything.");
2061     return MCDisassembler::Fail;
2062   }
2063 #undef PRINT_DIRECTIVE
2064 }
2065 
2066 MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
2067     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2068   // CP microcode requires the kernel descriptor to be 64 aligned.
2069   if (Bytes.size() != 64 || KdAddress % 64 != 0)
2070     return MCDisassembler::Fail;
2071 
2072   // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2073   // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2074   // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2075   // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2076   // when required.
2077   if (isGFX10Plus()) {
2078     uint16_t KernelCodeProperties =
2079         support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2080                                 llvm::endianness::little);
2081     EnableWavefrontSize32 =
2082         AMDHSA_BITS_GET(KernelCodeProperties,
2083                         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2084   }
2085 
2086   std::string Kd;
2087   raw_string_ostream KdStream(Kd);
2088   KdStream << ".amdhsa_kernel " << KdName << '\n';
2089 
2090   DataExtractor::Cursor C(0);
2091   while (C && C.tell() < Bytes.size()) {
2092     MCDisassembler::DecodeStatus Status =
2093         decodeKernelDescriptorDirective(C, Bytes, KdStream);
2094 
2095     cantFail(C.takeError());
2096 
2097     if (Status == MCDisassembler::Fail)
2098       return MCDisassembler::Fail;
2099   }
2100   KdStream << ".end_amdhsa_kernel\n";
2101   outs() << KdStream.str();
2102   return MCDisassembler::Success;
2103 }
2104 
2105 std::optional<MCDisassembler::DecodeStatus>
2106 AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
2107                                   ArrayRef<uint8_t> Bytes, uint64_t Address,
2108                                   raw_ostream &CStream) const {
2109   // Right now only kernel descriptor needs to be handled.
2110   // We ignore all other symbols for target specific handling.
2111   // TODO:
2112   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2113   // Object V2 and V3 when symbols are marked protected.
2114 
2115   // amd_kernel_code_t for Code Object V2.
2116   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2117     Size = 256;
2118     return MCDisassembler::Fail;
2119   }
2120 
2121   // Code Object V3 kernel descriptors.
2122   StringRef Name = Symbol.Name;
2123   if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) {
2124     Size = 64; // Size = 64 regardless of success or failure.
2125     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2126   }
2127   return std::nullopt;
2128 }
2129 
2130 //===----------------------------------------------------------------------===//
2131 // AMDGPUSymbolizer
2132 //===----------------------------------------------------------------------===//
2133 
2134 // Try to find symbol name for specified label
2135 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2136     MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2137     uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2138     uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2139 
2140   if (!IsBranch) {
2141     return false;
2142   }
2143 
2144   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2145   if (!Symbols)
2146     return false;
2147 
2148   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2149     return Val.Addr == static_cast<uint64_t>(Value) &&
2150            Val.Type == ELF::STT_NOTYPE;
2151   });
2152   if (Result != Symbols->end()) {
2153     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2154     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2155     Inst.addOperand(MCOperand::createExpr(Add));
2156     return true;
2157   }
2158   // Add to list of referenced addresses, so caller can synthesize a label.
2159   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2160   return false;
2161 }
2162 
2163 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2164                                                        int64_t Value,
2165                                                        uint64_t Address) {
2166   llvm_unreachable("unimplemented");
2167 }
2168 
2169 //===----------------------------------------------------------------------===//
2170 // Initialization
2171 //===----------------------------------------------------------------------===//
2172 
2173 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2174                               LLVMOpInfoCallback /*GetOpInfo*/,
2175                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
2176                               void *DisInfo,
2177                               MCContext *Ctx,
2178                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2179   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2180 }
2181 
2182 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2183                                                 const MCSubtargetInfo &STI,
2184                                                 MCContext &Ctx) {
2185   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2186 }
2187 
2188 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2189   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2190                                          createAMDGPUDisassembler);
2191   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2192                                        createAMDGPUSymbolizer);
2193 }
2194