xref: /llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision 35e27c0ee51f2822415c050c1cc4a73dfaa171d7)
1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 /// \file
12 ///
13 /// This file contains definition for AMDGPU ISA disassembler
14 //
15 //===----------------------------------------------------------------------===//
16 
17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18 
19 #include "Disassembler/AMDGPUDisassembler.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIDefines.h"
22 #include "SIRegisterInfo.h"
23 #include "TargetInfo/AMDGPUTargetInfo.h"
24 #include "Utils/AMDGPUAsmUtils.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm-c/DisassemblerTypes.h"
27 #include "llvm/BinaryFormat/ELF.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/MC/MCRegisterInfo.h"
34 #include "llvm/MC/MCSubtargetInfo.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "amdgpu-disassembler"
41 
42 #define SGPR_MAX                                                               \
43   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
44                  : AMDGPU::EncValues::SGPR_MAX_SI)
45 
46 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
47 
48 static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
49                                                  MCContext &Ctx) {
50   if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
51       !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
52     MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
53     // If there is no default wave size it must be a generation before gfx10,
54     // these have FeatureWavefrontSize64 in their definition already. For gfx10+
55     // set wave32 as a default.
56     STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
57     return STICopy;
58   }
59 
60   return STI;
61 }
62 
63 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
64                                        MCContext &Ctx, MCInstrInfo const *MCII)
65     : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
66       MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
67       TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
68       CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
69   // ToDo: AMDGPUDisassembler supports only VI ISA.
70   if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
71     report_fatal_error("Disassembly not yet supported for subtarget");
72 
73   for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
74     createConstantSymbolExpr(Symbol, Code);
75 
76   UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
77   UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
78   UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
79 }
80 
81 void AMDGPUDisassembler::setABIVersion(unsigned Version) {
82   CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
83 }
84 
85 inline static MCDisassembler::DecodeStatus
86 addOperand(MCInst &Inst, const MCOperand& Opnd) {
87   Inst.addOperand(Opnd);
88   return Opnd.isValid() ?
89     MCDisassembler::Success :
90     MCDisassembler::Fail;
91 }
92 
93 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
94                                 uint16_t NameIdx) {
95   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
96   if (OpIdx != -1) {
97     auto I = MI.begin();
98     std::advance(I, OpIdx);
99     MI.insert(I, Op);
100   }
101   return OpIdx;
102 }
103 
104 static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
105                                        uint64_t Addr,
106                                        const MCDisassembler *Decoder) {
107   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
108 
109   // Our branches take a simm16, but we need two extra bits to account for the
110   // factor of 4.
111   APInt SignedOffset(18, Imm * 4, true);
112   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
113 
114   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
115     return MCDisassembler::Success;
116   return addOperand(Inst, MCOperand::createImm(Imm));
117 }
118 
119 static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
120                                      const MCDisassembler *Decoder) {
121   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
122   int64_t Offset;
123   if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
124     Offset = SignExtend64<24>(Imm);
125   } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
126     Offset = Imm & 0xFFFFF;
127   } else { // GFX9+ supports 21-bit signed offsets.
128     Offset = SignExtend64<21>(Imm);
129   }
130   return addOperand(Inst, MCOperand::createImm(Offset));
131 }
132 
133 static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
134                                   const MCDisassembler *Decoder) {
135   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
136   return addOperand(Inst, DAsm->decodeBoolReg(Val));
137 }
138 
139 static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
140                                        uint64_t Addr,
141                                        const MCDisassembler *Decoder) {
142   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
143   return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
144 }
145 
146 static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
147                                  const MCDisassembler *Decoder) {
148   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
149   return addOperand(Inst, DAsm->decodeDpp8FI(Val));
150 }
151 
152 #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
153   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
154                                         uint64_t /*Addr*/,                     \
155                                         const MCDisassembler *Decoder) {       \
156     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
157     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
158   }
159 
160 // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
161 // number of register. Used by VGPR only and AGPR only operands.
162 #define DECODE_OPERAND_REG_8(RegClass)                                         \
163   static DecodeStatus Decode##RegClass##RegisterClass(                         \
164       MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,                           \
165       const MCDisassembler *Decoder) {                                         \
166     assert(Imm < (1 << 8) && "8-bit encoding");                                \
167     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
168     return addOperand(                                                         \
169         Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm));      \
170   }
171 
172 #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral,         \
173                      ImmWidth)                                                 \
174   static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,      \
175                            const MCDisassembler *Decoder) {                    \
176     assert(Imm < (1 << EncSize) && #EncSize "-bit encoding");                  \
177     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
178     return addOperand(Inst,                                                    \
179                       DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm,   \
180                                         MandatoryLiteral, ImmWidth));          \
181   }
182 
183 static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
184                                 AMDGPUDisassembler::OpWidthTy OpWidth,
185                                 unsigned Imm, unsigned EncImm,
186                                 bool MandatoryLiteral, unsigned ImmWidth,
187                                 AMDGPU::OperandSemantics Sema,
188                                 const MCDisassembler *Decoder) {
189   assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
190   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
191   return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
192                                             ImmWidth, Sema));
193 }
194 
195 // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
196 // get register class. Used by SGPR only operands.
197 #define DECODE_OPERAND_REG_7(RegClass, OpWidth)                                \
198   DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
199 
200 // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
201 // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
202 // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
203 // Used by AV_ register classes (AGPR or VGPR only register operands).
204 template <AMDGPUDisassembler::OpWidthTy OpWidth>
205 static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
206                                const MCDisassembler *Decoder) {
207   return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
208                      false, 0, AMDGPU::OperandSemantics::INT, Decoder);
209 }
210 
211 // Decoder for Src(9-bit encoding) registers only.
212 template <AMDGPUDisassembler::OpWidthTy OpWidth>
213 static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
214                                   uint64_t /* Addr */,
215                                   const MCDisassembler *Decoder) {
216   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
217                      AMDGPU::OperandSemantics::INT, Decoder);
218 }
219 
220 // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
221 // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
222 // only.
223 template <AMDGPUDisassembler::OpWidthTy OpWidth>
224 static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
225                                 const MCDisassembler *Decoder) {
226   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
227                      AMDGPU::OperandSemantics::INT, Decoder);
228 }
229 
230 // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
231 // Imm{9} is acc, registers only.
232 template <AMDGPUDisassembler::OpWidthTy OpWidth>
233 static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
234                                   uint64_t /* Addr */,
235                                   const MCDisassembler *Decoder) {
236   return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
237                      AMDGPU::OperandSemantics::INT, Decoder);
238 }
239 
240 // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
241 // register from RegClass or immediate. Registers that don't belong to RegClass
242 // will be decoded and InstPrinter will report warning. Immediate will be
243 // decoded into constant of size ImmWidth, should match width of immediate used
244 // by OperandType (important for floating point types).
245 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
246           unsigned OperandSemantics>
247 static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
248                                        uint64_t /* Addr */,
249                                        const MCDisassembler *Decoder) {
250   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
251                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
252 }
253 
254 // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
255 // and decode using 'enum10' from decodeSrcOp.
256 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
257           unsigned OperandSemantics>
258 static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
259                                         uint64_t /* Addr */,
260                                         const MCDisassembler *Decoder) {
261   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
262                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
263 }
264 
265 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
266           unsigned OperandSemantics>
267 static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
268                                                uint64_t /* Addr */,
269                                                const MCDisassembler *Decoder) {
270   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
271                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
272 }
273 
274 // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
275 // when RegisterClass is used as an operand. Most often used for destination
276 // operands.
277 
278 DECODE_OPERAND_REG_8(VGPR_32)
279 DECODE_OPERAND_REG_8(VGPR_32_Lo128)
280 DECODE_OPERAND_REG_8(VReg_64)
281 DECODE_OPERAND_REG_8(VReg_96)
282 DECODE_OPERAND_REG_8(VReg_128)
283 DECODE_OPERAND_REG_8(VReg_256)
284 DECODE_OPERAND_REG_8(VReg_288)
285 DECODE_OPERAND_REG_8(VReg_352)
286 DECODE_OPERAND_REG_8(VReg_384)
287 DECODE_OPERAND_REG_8(VReg_512)
288 DECODE_OPERAND_REG_8(VReg_1024)
289 
290 DECODE_OPERAND_REG_7(SReg_32, OPW32)
291 DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
292 DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
293 DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
294 DECODE_OPERAND_REG_7(SReg_64, OPW64)
295 DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
296 DECODE_OPERAND_REG_7(SReg_96, OPW96)
297 DECODE_OPERAND_REG_7(SReg_128, OPW128)
298 DECODE_OPERAND_REG_7(SReg_256, OPW256)
299 DECODE_OPERAND_REG_7(SReg_512, OPW512)
300 
301 DECODE_OPERAND_REG_8(AGPR_32)
302 DECODE_OPERAND_REG_8(AReg_64)
303 DECODE_OPERAND_REG_8(AReg_128)
304 DECODE_OPERAND_REG_8(AReg_256)
305 DECODE_OPERAND_REG_8(AReg_512)
306 DECODE_OPERAND_REG_8(AReg_1024)
307 
308 static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
309                                                uint64_t /*Addr*/,
310                                                const MCDisassembler *Decoder) {
311   assert(isUInt<10>(Imm) && "10-bit encoding expected");
312   assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
313 
314   bool IsHi = Imm & (1 << 9);
315   unsigned RegIdx = Imm & 0xff;
316   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
317   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318 }
319 
320 static DecodeStatus
321 DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
322                                  const MCDisassembler *Decoder) {
323   assert(isUInt<8>(Imm) && "8-bit encoding expected");
324 
325   bool IsHi = Imm & (1 << 7);
326   unsigned RegIdx = Imm & 0x7f;
327   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329 }
330 
331 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
332           unsigned OperandSemantics>
333 static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
334                                                 uint64_t /*Addr*/,
335                                                 const MCDisassembler *Decoder) {
336   assert(isUInt<9>(Imm) && "9-bit encoding expected");
337 
338   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
339   if (Imm & AMDGPU::EncValues::IS_VGPR) {
340     bool IsHi = Imm & (1 << 7);
341     unsigned RegIdx = Imm & 0x7f;
342     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
343   }
344   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
345                               OpWidth, Imm & 0xFF, false, ImmWidth,
346                               (AMDGPU::OperandSemantics)OperandSemantics));
347 }
348 
349 template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
350           unsigned OperandSemantics>
351 static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
352                                           uint64_t /*Addr*/,
353                                           const MCDisassembler *Decoder) {
354   assert(isUInt<10>(Imm) && "10-bit encoding expected");
355 
356   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
357   if (Imm & AMDGPU::EncValues::IS_VGPR) {
358     bool IsHi = Imm & (1 << 9);
359     unsigned RegIdx = Imm & 0xff;
360     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
361   }
362   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(
363                               OpWidth, Imm & 0xFF, false, ImmWidth,
364                               (AMDGPU::OperandSemantics)OperandSemantics));
365 }
366 
367 static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
368                                          uint64_t Addr,
369                                          const MCDisassembler *Decoder) {
370   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
372 }
373 
374 static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
375                                           uint64_t Addr, const void *Decoder) {
376   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377   return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
378 }
379 
380 static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
381                           const MCRegisterInfo *MRI) {
382   if (OpIdx < 0)
383     return false;
384 
385   const MCOperand &Op = Inst.getOperand(OpIdx);
386   if (!Op.isReg())
387     return false;
388 
389   MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
390   auto Reg = Sub ? Sub : Op.getReg();
391   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
392 }
393 
394 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
395                                  AMDGPUDisassembler::OpWidthTy Opw,
396                                  const MCDisassembler *Decoder) {
397   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
398   if (!DAsm->isGFX90A()) {
399     Imm &= 511;
400   } else {
401     // If atomic has both vdata and vdst their register classes are tied.
402     // The bit is decoded along with the vdst, first operand. We need to
403     // change register class to AGPR if vdst was AGPR.
404     // If a DS instruction has both data0 and data1 their register classes
405     // are also tied.
406     unsigned Opc = Inst.getOpcode();
407     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
408     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
409                                                         : AMDGPU::OpName::vdata;
410     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
411     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
412     if ((int)Inst.getNumOperands() == DataIdx) {
413       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
414       if (IsAGPROperand(Inst, DstIdx, MRI))
415         Imm |= 512;
416     }
417 
418     if (TSFlags & SIInstrFlags::DS) {
419       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
420       if ((int)Inst.getNumOperands() == Data2Idx &&
421           IsAGPROperand(Inst, DataIdx, MRI))
422         Imm |= 512;
423     }
424   }
425   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
426 }
427 
428 template <AMDGPUDisassembler::OpWidthTy Opw>
429 static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
430                                  uint64_t /* Addr */,
431                                  const MCDisassembler *Decoder) {
432   return decodeAVLdSt(Inst, Imm, Opw, Decoder);
433 }
434 
435 static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
436                                            uint64_t Addr,
437                                            const MCDisassembler *Decoder) {
438   assert(Imm < (1 << 9) && "9-bit encoding");
439   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
440   return addOperand(Inst,
441                     DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
442                                       AMDGPU::OperandSemantics::FP64));
443 }
444 
445 #define DECODE_SDWA(DecName) \
446 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
447 
448 DECODE_SDWA(Src32)
449 DECODE_SDWA(Src16)
450 DECODE_SDWA(VopcDst)
451 
452 static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
453                                      uint64_t /* Addr */,
454                                      const MCDisassembler *Decoder) {
455   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
456   return addOperand(Inst, DAsm->decodeVersionImm(Imm));
457 }
458 
459 #include "AMDGPUGenDisassemblerTables.inc"
460 
461 //===----------------------------------------------------------------------===//
462 //
463 //===----------------------------------------------------------------------===//
464 
465 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
466   assert(Bytes.size() >= sizeof(T));
467   const auto Res =
468       support::endian::read<T, llvm::endianness::little>(Bytes.data());
469   Bytes = Bytes.slice(sizeof(T));
470   return Res;
471 }
472 
473 static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
474   assert(Bytes.size() >= 12);
475   uint64_t Lo =
476       support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
477   Bytes = Bytes.slice(8);
478   uint64_t Hi =
479       support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
480   Bytes = Bytes.slice(4);
481   return DecoderUInt128(Lo, Hi);
482 }
483 
484 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
485                                                 ArrayRef<uint8_t> Bytes_,
486                                                 uint64_t Address,
487                                                 raw_ostream &CS) const {
488   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
489   Bytes = Bytes_.slice(0, MaxInstBytesNum);
490 
491   // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
492   // there are fewer bytes left). This will be overridden on success.
493   Size = std::min((size_t)4, Bytes_.size());
494 
495   do {
496     // ToDo: better to switch encoding length using some bit predicate
497     // but it is unknown yet, so try all we can
498 
499     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
500     // encodings
501     if (isGFX11Plus() && Bytes.size() >= 12 ) {
502       DecoderUInt128 DecW = eat12Bytes(Bytes);
503 
504       if (isGFX11() &&
505           tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
506                         DecW, Address, CS))
507         break;
508 
509       if (isGFX12() &&
510           tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
511                         DecW, Address, CS))
512         break;
513 
514       if (isGFX12() &&
515           tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
516         break;
517 
518       // Reinitialize Bytes
519       Bytes = Bytes_.slice(0, MaxInstBytesNum);
520     }
521 
522     if (Bytes.size() >= 8) {
523       const uint64_t QW = eatBytes<uint64_t>(Bytes);
524 
525       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
526           tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
527         break;
528 
529       if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
530           tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
531         break;
532 
533       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
534       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
535       // table first so we print the correct name.
536       if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
537           tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
538         break;
539 
540       if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
541           tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
542         break;
543 
544       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
545           tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
546         break;
547 
548       if ((isVI() || isGFX9()) &&
549           tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
550         break;
551 
552       if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
553         break;
554 
555       if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
556         break;
557 
558       if (isGFX12() &&
559           tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
560                         Address, CS))
561         break;
562 
563       if (isGFX11() &&
564           tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
565                         Address, CS))
566         break;
567 
568       if (isGFX11() &&
569           tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
570         break;
571 
572       if (isGFX12() &&
573           tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
574         break;
575 
576       // Reinitialize Bytes
577       Bytes = Bytes_.slice(0, MaxInstBytesNum);
578     }
579 
580     // Try decode 32-bit instruction
581     if (Bytes.size() >= 4) {
582       const uint32_t DW = eatBytes<uint32_t>(Bytes);
583 
584       if ((isVI() || isGFX9()) &&
585           tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
586         break;
587 
588       if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
589         break;
590 
591       if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
592         break;
593 
594       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
595           tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
596         break;
597 
598       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
599           tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
600         break;
601 
602       if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
603         break;
604 
605       if (isGFX11() &&
606           tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
607                         Address, CS))
608         break;
609 
610       if (isGFX12() &&
611           tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
612                         Address, CS))
613         break;
614     }
615 
616     return MCDisassembler::Fail;
617   } while (false);
618 
619   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
620     if (isMacDPP(MI))
621       convertMacDPPInst(MI);
622 
623     if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
624       convertVOP3PDPPInst(MI);
625     else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
626              AMDGPU::isVOPC64DPP(MI.getOpcode()))
627       convertVOPCDPPInst(MI); // Special VOP3 case
628     else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
629              -1)
630       convertDPP8Inst(MI);
631     else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
632       convertVOP3DPPInst(MI); // Regular VOP3 case
633   }
634 
635   convertTrue16OpSel(MI);
636 
637   if (AMDGPU::isMAC(MI.getOpcode())) {
638     // Insert dummy unused src2_modifiers.
639     insertNamedMCOperand(MI, MCOperand::createImm(0),
640                          AMDGPU::OpName::src2_modifiers);
641   }
642 
643   if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
644       MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
645     // Insert dummy unused src2_modifiers.
646     insertNamedMCOperand(MI, MCOperand::createImm(0),
647                          AMDGPU::OpName::src2_modifiers);
648   }
649 
650   if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
651       !AMDGPU::hasGDS(STI)) {
652     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
653   }
654 
655   if (MCII->get(MI.getOpcode()).TSFlags &
656       (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
657     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
658                                              AMDGPU::OpName::cpol);
659     if (CPolPos != -1) {
660       unsigned CPol =
661           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
662               AMDGPU::CPol::GLC : 0;
663       if (MI.getNumOperands() <= (unsigned)CPolPos) {
664         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
665                              AMDGPU::OpName::cpol);
666       } else if (CPol) {
667         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
668       }
669     }
670   }
671 
672   if ((MCII->get(MI.getOpcode()).TSFlags &
673        (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
674       (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
675     // GFX90A lost TFE, its place is occupied by ACC.
676     int TFEOpIdx =
677         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
678     if (TFEOpIdx != -1) {
679       auto TFEIter = MI.begin();
680       std::advance(TFEIter, TFEOpIdx);
681       MI.insert(TFEIter, MCOperand::createImm(0));
682     }
683   }
684 
685   if (MCII->get(MI.getOpcode()).TSFlags &
686       (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
687     int SWZOpIdx =
688         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
689     if (SWZOpIdx != -1) {
690       auto SWZIter = MI.begin();
691       std::advance(SWZIter, SWZOpIdx);
692       MI.insert(SWZIter, MCOperand::createImm(0));
693     }
694   }
695 
696   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
697     int VAddr0Idx =
698         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
699     int RsrcIdx =
700         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
701     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
702     if (VAddr0Idx >= 0 && NSAArgs > 0) {
703       unsigned NSAWords = (NSAArgs + 3) / 4;
704       if (Bytes.size() < 4 * NSAWords)
705         return MCDisassembler::Fail;
706       for (unsigned i = 0; i < NSAArgs; ++i) {
707         const unsigned VAddrIdx = VAddr0Idx + 1 + i;
708         auto VAddrRCID =
709             MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
710         MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
711       }
712       Bytes = Bytes.slice(4 * NSAWords);
713     }
714 
715     convertMIMGInst(MI);
716   }
717 
718   if (MCII->get(MI.getOpcode()).TSFlags &
719       (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
720     convertMIMGInst(MI);
721 
722   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
723     convertEXPInst(MI);
724 
725   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
726     convertVINTERPInst(MI);
727 
728   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
729     convertSDWAInst(MI);
730 
731   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
732                                               AMDGPU::OpName::vdst_in);
733   if (VDstIn_Idx != -1) {
734     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
735                            MCOI::OperandConstraint::TIED_TO);
736     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
737          !MI.getOperand(VDstIn_Idx).isReg() ||
738          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
739       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
740         MI.erase(&MI.getOperand(VDstIn_Idx));
741       insertNamedMCOperand(MI,
742         MCOperand::createReg(MI.getOperand(Tied).getReg()),
743         AMDGPU::OpName::vdst_in);
744     }
745   }
746 
747   int ImmLitIdx =
748       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
749   bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
750   if (ImmLitIdx != -1 && !IsSOPK)
751     convertFMAanyK(MI, ImmLitIdx);
752 
753   Size = MaxInstBytesNum - Bytes.size();
754   return MCDisassembler::Success;
755 }
756 
757 void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
758   if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
759     // The MCInst still has these fields even though they are no longer encoded
760     // in the GFX11 instruction.
761     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
762     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
763   }
764 }
765 
766 void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
767   if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
768       MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
769       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
770       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
771       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
772       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
773       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
774       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
775     // The MCInst has this field that is not directly encoded in the
776     // instruction.
777     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
778   }
779 }
780 
781 void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
782   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
783       STI.hasFeature(AMDGPU::FeatureGFX10)) {
784     if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
785       // VOPC - insert clamp
786       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
787   } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
788     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
789     if (SDst != -1) {
790       // VOPC - insert VCC register as sdst
791       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
792                            AMDGPU::OpName::sdst);
793     } else {
794       // VOP1/2 - insert omod if present in instruction
795       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
796     }
797   }
798 }
799 
800 struct VOPModifiers {
801   unsigned OpSel = 0;
802   unsigned OpSelHi = 0;
803   unsigned NegLo = 0;
804   unsigned NegHi = 0;
805 };
806 
807 // Reconstruct values of VOP3/VOP3P operands such as op_sel.
808 // Note that these values do not affect disassembler output,
809 // so this is only necessary for consistency with src_modifiers.
810 static VOPModifiers collectVOPModifiers(const MCInst &MI,
811                                         bool IsVOP3P = false) {
812   VOPModifiers Modifiers;
813   unsigned Opc = MI.getOpcode();
814   const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
815                         AMDGPU::OpName::src1_modifiers,
816                         AMDGPU::OpName::src2_modifiers};
817   for (int J = 0; J < 3; ++J) {
818     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
819     if (OpIdx == -1)
820       continue;
821 
822     unsigned Val = MI.getOperand(OpIdx).getImm();
823 
824     Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
825     if (IsVOP3P) {
826       Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
827       Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
828       Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
829     } else if (J == 0) {
830       Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
831     }
832   }
833 
834   return Modifiers;
835 }
836 
837 // Instructions decode the op_sel/suffix bits into the src_modifier
838 // operands. Copy those bits into the src operands for true16 VGPRs.
839 void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
840   const unsigned Opc = MI.getOpcode();
841   const MCRegisterClass &ConversionRC =
842       MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
843   constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
844       {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
845         SISrcMods::OP_SEL_0},
846        {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
847         SISrcMods::OP_SEL_0},
848        {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
849         SISrcMods::OP_SEL_0},
850        {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
851         SISrcMods::DST_OP_SEL}}};
852   for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
853     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
854     int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
855     if (OpIdx == -1 || OpModsIdx == -1)
856       continue;
857     MCOperand &Op = MI.getOperand(OpIdx);
858     if (!Op.isReg())
859       continue;
860     if (!ConversionRC.contains(Op.getReg()))
861       continue;
862     unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
863     const MCOperand &OpMods = MI.getOperand(OpModsIdx);
864     unsigned ModVal = OpMods.getImm();
865     if (ModVal & OpSelMask) { // isHi
866       unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
867       Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
868     }
869   }
870 }
871 
872 // MAC opcodes have special old and src2 operands.
873 // src2 is tied to dst, while old is not tied (but assumed to be).
874 bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
875   constexpr int DST_IDX = 0;
876   auto Opcode = MI.getOpcode();
877   const auto &Desc = MCII->get(Opcode);
878   auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
879 
880   if (OldIdx != -1 && Desc.getOperandConstraint(
881                           OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
882     assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
883     assert(Desc.getOperandConstraint(
884                AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
885                MCOI::OperandConstraint::TIED_TO) == DST_IDX);
886     (void)DST_IDX;
887     return true;
888   }
889 
890   return false;
891 }
892 
893 // Create dummy old operand and insert dummy unused src2_modifiers
894 void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
895   assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
896   insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
897   insertNamedMCOperand(MI, MCOperand::createImm(0),
898                        AMDGPU::OpName::src2_modifiers);
899 }
900 
901 void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
902   unsigned Opc = MI.getOpcode();
903 
904   int VDstInIdx =
905       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
906   if (VDstInIdx != -1)
907     insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
908 
909   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
910   if (MI.getNumOperands() < DescNumOps &&
911       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
912     convertTrue16OpSel(MI);
913     auto Mods = collectVOPModifiers(MI);
914     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
915                          AMDGPU::OpName::op_sel);
916   } else {
917     // Insert dummy unused src modifiers.
918     if (MI.getNumOperands() < DescNumOps &&
919         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
920       insertNamedMCOperand(MI, MCOperand::createImm(0),
921                            AMDGPU::OpName::src0_modifiers);
922 
923     if (MI.getNumOperands() < DescNumOps &&
924         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
925       insertNamedMCOperand(MI, MCOperand::createImm(0),
926                            AMDGPU::OpName::src1_modifiers);
927   }
928 }
929 
930 void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
931   convertTrue16OpSel(MI);
932 
933   int VDstInIdx =
934       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
935   if (VDstInIdx != -1)
936     insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
937 
938   unsigned Opc = MI.getOpcode();
939   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
940   if (MI.getNumOperands() < DescNumOps &&
941       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
942     auto Mods = collectVOPModifiers(MI);
943     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
944                          AMDGPU::OpName::op_sel);
945   }
946 }
947 
948 // Note that before gfx10, the MIMG encoding provided no information about
949 // VADDR size. Consequently, decoded instructions always show address as if it
950 // has 1 dword, which could be not really so.
951 void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
952   auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
953 
954   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
955                                            AMDGPU::OpName::vdst);
956 
957   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
958                                             AMDGPU::OpName::vdata);
959   int VAddr0Idx =
960       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
961   int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
962                                                   : AMDGPU::OpName::rsrc;
963   int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
964   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
965                                             AMDGPU::OpName::dmask);
966 
967   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
968                                             AMDGPU::OpName::tfe);
969   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
970                                             AMDGPU::OpName::d16);
971 
972   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
973   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
974       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
975 
976   assert(VDataIdx != -1);
977   if (BaseOpcode->BVH) {
978     // Add A16 operand for intersect_ray instructions
979     addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
980     return;
981   }
982 
983   bool IsAtomic = (VDstIdx != -1);
984   bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
985   bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
986   bool IsNSA = false;
987   bool IsPartialNSA = false;
988   unsigned AddrSize = Info->VAddrDwords;
989 
990   if (isGFX10Plus()) {
991     unsigned DimIdx =
992         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
993     int A16Idx =
994         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
995     const AMDGPU::MIMGDimInfo *Dim =
996         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
997     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
998 
999     AddrSize =
1000         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1001 
1002     // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1003     // VIMAGE insts other than BVH never use vaddr4.
1004     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1005             Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1006             Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1007     if (!IsNSA) {
1008       if (!IsVSample && AddrSize > 12)
1009         AddrSize = 16;
1010     } else {
1011       if (AddrSize > Info->VAddrDwords) {
1012         if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1013           // The NSA encoding does not contain enough operands for the
1014           // combination of base opcode / dimension. Should this be an error?
1015           return;
1016         }
1017         IsPartialNSA = true;
1018       }
1019     }
1020   }
1021 
1022   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1023   unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1024 
1025   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1026   if (D16 && AMDGPU::hasPackedD16(STI)) {
1027     DstSize = (DstSize + 1) / 2;
1028   }
1029 
1030   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1031     DstSize += 1;
1032 
1033   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1034     return;
1035 
1036   int NewOpcode =
1037       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1038   if (NewOpcode == -1)
1039     return;
1040 
1041   // Widen the register to the correct number of enabled channels.
1042   unsigned NewVdata = AMDGPU::NoRegister;
1043   if (DstSize != Info->VDataDwords) {
1044     auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1045 
1046     // Get first subregister of VData
1047     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1048     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1049     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1050 
1051     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1052                                        &MRI.getRegClass(DataRCID));
1053     if (NewVdata == AMDGPU::NoRegister) {
1054       // It's possible to encode this such that the low register + enabled
1055       // components exceeds the register count.
1056       return;
1057     }
1058   }
1059 
1060   // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1061   // If using partial NSA on GFX11+ widen last address register.
1062   int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1063   unsigned NewVAddrSA = AMDGPU::NoRegister;
1064   if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1065       AddrSize != Info->VAddrDwords) {
1066     unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1067     unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1068     VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1069 
1070     auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
1071     NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1072                                         &MRI.getRegClass(AddrRCID));
1073     if (!NewVAddrSA)
1074       return;
1075   }
1076 
1077   MI.setOpcode(NewOpcode);
1078 
1079   if (NewVdata != AMDGPU::NoRegister) {
1080     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1081 
1082     if (IsAtomic) {
1083       // Atomic operations have an additional operand (a copy of data)
1084       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1085     }
1086   }
1087 
1088   if (NewVAddrSA) {
1089     MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1090   } else if (IsNSA) {
1091     assert(AddrSize <= Info->VAddrDwords);
1092     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1093              MI.begin() + VAddr0Idx + Info->VAddrDwords);
1094   }
1095 }
1096 
1097 // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1098 // decoder only adds to src_modifiers, so manually add the bits to the other
1099 // operands.
1100 void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1101   unsigned Opc = MI.getOpcode();
1102   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1103   auto Mods = collectVOPModifiers(MI, true);
1104 
1105   if (MI.getNumOperands() < DescNumOps &&
1106       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1107     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1108 
1109   if (MI.getNumOperands() < DescNumOps &&
1110       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1111     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1112                          AMDGPU::OpName::op_sel);
1113   if (MI.getNumOperands() < DescNumOps &&
1114       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1115     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1116                          AMDGPU::OpName::op_sel_hi);
1117   if (MI.getNumOperands() < DescNumOps &&
1118       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1119     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1120                          AMDGPU::OpName::neg_lo);
1121   if (MI.getNumOperands() < DescNumOps &&
1122       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1123     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1124                          AMDGPU::OpName::neg_hi);
1125 }
1126 
1127 // Create dummy old operand and insert optional operands
1128 void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1129   unsigned Opc = MI.getOpcode();
1130   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1131 
1132   if (MI.getNumOperands() < DescNumOps &&
1133       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1134     insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1135 
1136   if (MI.getNumOperands() < DescNumOps &&
1137       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1138     insertNamedMCOperand(MI, MCOperand::createImm(0),
1139                          AMDGPU::OpName::src0_modifiers);
1140 
1141   if (MI.getNumOperands() < DescNumOps &&
1142       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1143     insertNamedMCOperand(MI, MCOperand::createImm(0),
1144                          AMDGPU::OpName::src1_modifiers);
1145 }
1146 
1147 void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1148   assert(HasLiteral && "Should have decoded a literal");
1149   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1150   unsigned DescNumOps = Desc.getNumOperands();
1151   insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1152                        AMDGPU::OpName::immDeferred);
1153   assert(DescNumOps == MI.getNumOperands());
1154   for (unsigned I = 0; I < DescNumOps; ++I) {
1155     auto &Op = MI.getOperand(I);
1156     auto OpType = Desc.operands()[I].OperandType;
1157     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1158                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1159     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1160         IsDeferredOp)
1161       Op.setImm(Literal);
1162   }
1163 }
1164 
1165 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1166   return getContext().getRegisterInfo()->
1167     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1168 }
1169 
1170 inline
1171 MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1172                                          const Twine& ErrMsg) const {
1173   *CommentStream << "Error: " + ErrMsg;
1174 
1175   // ToDo: add support for error operands to MCInst.h
1176   // return MCOperand::createError(V);
1177   return MCOperand();
1178 }
1179 
1180 inline
1181 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1182   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
1183 }
1184 
1185 inline
1186 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1187                                                unsigned Val) const {
1188   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1189   if (Val >= RegCl.getNumRegs())
1190     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1191                            ": unknown register " + Twine(Val));
1192   return createRegOperand(RegCl.getRegister(Val));
1193 }
1194 
1195 inline
1196 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1197                                                 unsigned Val) const {
1198   // ToDo: SI/CI have 104 SGPRs, VI - 102
1199   // Valery: here we accepting as much as we can, let assembler sort it out
1200   int shift = 0;
1201   switch (SRegClassID) {
1202   case AMDGPU::SGPR_32RegClassID:
1203   case AMDGPU::TTMP_32RegClassID:
1204     break;
1205   case AMDGPU::SGPR_64RegClassID:
1206   case AMDGPU::TTMP_64RegClassID:
1207     shift = 1;
1208     break;
1209   case AMDGPU::SGPR_96RegClassID:
1210   case AMDGPU::TTMP_96RegClassID:
1211   case AMDGPU::SGPR_128RegClassID:
1212   case AMDGPU::TTMP_128RegClassID:
1213   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1214   // this bundle?
1215   case AMDGPU::SGPR_256RegClassID:
1216   case AMDGPU::TTMP_256RegClassID:
1217     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1218   // this bundle?
1219   case AMDGPU::SGPR_288RegClassID:
1220   case AMDGPU::TTMP_288RegClassID:
1221   case AMDGPU::SGPR_320RegClassID:
1222   case AMDGPU::TTMP_320RegClassID:
1223   case AMDGPU::SGPR_352RegClassID:
1224   case AMDGPU::TTMP_352RegClassID:
1225   case AMDGPU::SGPR_384RegClassID:
1226   case AMDGPU::TTMP_384RegClassID:
1227   case AMDGPU::SGPR_512RegClassID:
1228   case AMDGPU::TTMP_512RegClassID:
1229     shift = 2;
1230     break;
1231   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1232   // this bundle?
1233   default:
1234     llvm_unreachable("unhandled register class");
1235   }
1236 
1237   if (Val % (1 << shift)) {
1238     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1239                    << ": scalar reg isn't aligned " << Val;
1240   }
1241 
1242   return createRegOperand(SRegClassID, Val >> shift);
1243 }
1244 
1245 MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1246                                                   bool IsHi) const {
1247   unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1248   return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1249 }
1250 
1251 // Decode Literals for insts which always have a literal in the encoding
1252 MCOperand
1253 AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1254   if (HasLiteral) {
1255     assert(
1256         AMDGPU::hasVOPD(STI) &&
1257         "Should only decode multiple kimm with VOPD, check VSrc operand types");
1258     if (Literal != Val)
1259       return errOperand(Val, "More than one unique literal is illegal");
1260   }
1261   HasLiteral = true;
1262   Literal = Val;
1263   return MCOperand::createImm(Literal);
1264 }
1265 
1266 MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1267   // For now all literal constants are supposed to be unsigned integer
1268   // ToDo: deal with signed/unsigned 64-bit integer constants
1269   // ToDo: deal with float/double constants
1270   if (!HasLiteral) {
1271     if (Bytes.size() < 4) {
1272       return errOperand(0, "cannot read literal, inst bytes left " +
1273                         Twine(Bytes.size()));
1274     }
1275     HasLiteral = true;
1276     Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1277     if (ExtendFP64)
1278       Literal64 <<= 32;
1279   }
1280   return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
1281 }
1282 
1283 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1284   using namespace AMDGPU::EncValues;
1285 
1286   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1287   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1288     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1289     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1290       // Cast prevents negative overflow.
1291 }
1292 
1293 static int64_t getInlineImmVal32(unsigned Imm) {
1294   switch (Imm) {
1295   case 240:
1296     return llvm::bit_cast<uint32_t>(0.5f);
1297   case 241:
1298     return llvm::bit_cast<uint32_t>(-0.5f);
1299   case 242:
1300     return llvm::bit_cast<uint32_t>(1.0f);
1301   case 243:
1302     return llvm::bit_cast<uint32_t>(-1.0f);
1303   case 244:
1304     return llvm::bit_cast<uint32_t>(2.0f);
1305   case 245:
1306     return llvm::bit_cast<uint32_t>(-2.0f);
1307   case 246:
1308     return llvm::bit_cast<uint32_t>(4.0f);
1309   case 247:
1310     return llvm::bit_cast<uint32_t>(-4.0f);
1311   case 248: // 1 / (2 * PI)
1312     return 0x3e22f983;
1313   default:
1314     llvm_unreachable("invalid fp inline imm");
1315   }
1316 }
1317 
1318 static int64_t getInlineImmVal64(unsigned Imm) {
1319   switch (Imm) {
1320   case 240:
1321     return llvm::bit_cast<uint64_t>(0.5);
1322   case 241:
1323     return llvm::bit_cast<uint64_t>(-0.5);
1324   case 242:
1325     return llvm::bit_cast<uint64_t>(1.0);
1326   case 243:
1327     return llvm::bit_cast<uint64_t>(-1.0);
1328   case 244:
1329     return llvm::bit_cast<uint64_t>(2.0);
1330   case 245:
1331     return llvm::bit_cast<uint64_t>(-2.0);
1332   case 246:
1333     return llvm::bit_cast<uint64_t>(4.0);
1334   case 247:
1335     return llvm::bit_cast<uint64_t>(-4.0);
1336   case 248: // 1 / (2 * PI)
1337     return 0x3fc45f306dc9c882;
1338   default:
1339     llvm_unreachable("invalid fp inline imm");
1340   }
1341 }
1342 
1343 static int64_t getInlineImmValF16(unsigned Imm) {
1344   switch (Imm) {
1345   case 240:
1346     return 0x3800;
1347   case 241:
1348     return 0xB800;
1349   case 242:
1350     return 0x3C00;
1351   case 243:
1352     return 0xBC00;
1353   case 244:
1354     return 0x4000;
1355   case 245:
1356     return 0xC000;
1357   case 246:
1358     return 0x4400;
1359   case 247:
1360     return 0xC400;
1361   case 248: // 1 / (2 * PI)
1362     return 0x3118;
1363   default:
1364     llvm_unreachable("invalid fp inline imm");
1365   }
1366 }
1367 
1368 static int64_t getInlineImmValBF16(unsigned Imm) {
1369   switch (Imm) {
1370   case 240:
1371     return 0x3F00;
1372   case 241:
1373     return 0xBF00;
1374   case 242:
1375     return 0x3F80;
1376   case 243:
1377     return 0xBF80;
1378   case 244:
1379     return 0x4000;
1380   case 245:
1381     return 0xC000;
1382   case 246:
1383     return 0x4080;
1384   case 247:
1385     return 0xC080;
1386   case 248: // 1 / (2 * PI)
1387     return 0x3E22;
1388   default:
1389     llvm_unreachable("invalid fp inline imm");
1390   }
1391 }
1392 
1393 static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1394   return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
1395                                                   : getInlineImmValF16(Imm);
1396 }
1397 
1398 MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1399                                             AMDGPU::OperandSemantics Sema) {
1400   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
1401          Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1402 
1403   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1404   // ImmWidth 0 is a default case where operand should not allow immediates.
1405   // Imm value is still decoded into 32 bit immediate operand, inst printer will
1406   // use it to print verbose error message.
1407   switch (ImmWidth) {
1408   case 0:
1409   case 32:
1410     return MCOperand::createImm(getInlineImmVal32(Imm));
1411   case 64:
1412     return MCOperand::createImm(getInlineImmVal64(Imm));
1413   case 16:
1414     return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1415   default:
1416     llvm_unreachable("implement me");
1417   }
1418 }
1419 
1420 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1421   using namespace AMDGPU;
1422 
1423   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1424   switch (Width) {
1425   default: // fall
1426   case OPW32:
1427   case OPW16:
1428   case OPWV216:
1429     return VGPR_32RegClassID;
1430   case OPW64:
1431   case OPWV232: return VReg_64RegClassID;
1432   case OPW96: return VReg_96RegClassID;
1433   case OPW128: return VReg_128RegClassID;
1434   case OPW160: return VReg_160RegClassID;
1435   case OPW256: return VReg_256RegClassID;
1436   case OPW288: return VReg_288RegClassID;
1437   case OPW320: return VReg_320RegClassID;
1438   case OPW352: return VReg_352RegClassID;
1439   case OPW384: return VReg_384RegClassID;
1440   case OPW512: return VReg_512RegClassID;
1441   case OPW1024: return VReg_1024RegClassID;
1442   }
1443 }
1444 
1445 unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1446   using namespace AMDGPU;
1447 
1448   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1449   switch (Width) {
1450   default: // fall
1451   case OPW32:
1452   case OPW16:
1453   case OPWV216:
1454     return AGPR_32RegClassID;
1455   case OPW64:
1456   case OPWV232: return AReg_64RegClassID;
1457   case OPW96: return AReg_96RegClassID;
1458   case OPW128: return AReg_128RegClassID;
1459   case OPW160: return AReg_160RegClassID;
1460   case OPW256: return AReg_256RegClassID;
1461   case OPW288: return AReg_288RegClassID;
1462   case OPW320: return AReg_320RegClassID;
1463   case OPW352: return AReg_352RegClassID;
1464   case OPW384: return AReg_384RegClassID;
1465   case OPW512: return AReg_512RegClassID;
1466   case OPW1024: return AReg_1024RegClassID;
1467   }
1468 }
1469 
1470 
1471 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1472   using namespace AMDGPU;
1473 
1474   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1475   switch (Width) {
1476   default: // fall
1477   case OPW32:
1478   case OPW16:
1479   case OPWV216:
1480     return SGPR_32RegClassID;
1481   case OPW64:
1482   case OPWV232: return SGPR_64RegClassID;
1483   case OPW96: return SGPR_96RegClassID;
1484   case OPW128: return SGPR_128RegClassID;
1485   case OPW160: return SGPR_160RegClassID;
1486   case OPW256: return SGPR_256RegClassID;
1487   case OPW288: return SGPR_288RegClassID;
1488   case OPW320: return SGPR_320RegClassID;
1489   case OPW352: return SGPR_352RegClassID;
1490   case OPW384: return SGPR_384RegClassID;
1491   case OPW512: return SGPR_512RegClassID;
1492   }
1493 }
1494 
1495 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1496   using namespace AMDGPU;
1497 
1498   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1499   switch (Width) {
1500   default: // fall
1501   case OPW32:
1502   case OPW16:
1503   case OPWV216:
1504     return TTMP_32RegClassID;
1505   case OPW64:
1506   case OPWV232: return TTMP_64RegClassID;
1507   case OPW128: return TTMP_128RegClassID;
1508   case OPW256: return TTMP_256RegClassID;
1509   case OPW288: return TTMP_288RegClassID;
1510   case OPW320: return TTMP_320RegClassID;
1511   case OPW352: return TTMP_352RegClassID;
1512   case OPW384: return TTMP_384RegClassID;
1513   case OPW512: return TTMP_512RegClassID;
1514   }
1515 }
1516 
1517 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1518   using namespace AMDGPU::EncValues;
1519 
1520   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1521   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1522 
1523   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1524 }
1525 
1526 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1527                                           bool MandatoryLiteral,
1528                                           unsigned ImmWidth,
1529                                           AMDGPU::OperandSemantics Sema) const {
1530   using namespace AMDGPU::EncValues;
1531 
1532   assert(Val < 1024); // enum10
1533 
1534   bool IsAGPR = Val & 512;
1535   Val &= 511;
1536 
1537   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1538     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1539                                    : getVgprClassId(Width), Val - VGPR_MIN);
1540   }
1541   return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1542                             Sema);
1543 }
1544 
1545 MCOperand
1546 AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
1547                                        bool MandatoryLiteral, unsigned ImmWidth,
1548                                        AMDGPU::OperandSemantics Sema) const {
1549   // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1550   // decoded earlier.
1551   assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1552   using namespace AMDGPU::EncValues;
1553 
1554   if (Val <= SGPR_MAX) {
1555     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1556     static_assert(SGPR_MIN == 0);
1557     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1558   }
1559 
1560   int TTmpIdx = getTTmpIdx(Val);
1561   if (TTmpIdx >= 0) {
1562     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
1563   }
1564 
1565   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1566     return decodeIntImmed(Val);
1567 
1568   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1569     return decodeFPImmed(ImmWidth, Val, Sema);
1570 
1571   if (Val == LITERAL_CONST) {
1572     if (MandatoryLiteral)
1573       // Keep a sentinel value for deferred setting
1574       return MCOperand::createImm(LITERAL_CONST);
1575     return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64);
1576   }
1577 
1578   switch (Width) {
1579   case OPW32:
1580   case OPW16:
1581   case OPWV216:
1582     return decodeSpecialReg32(Val);
1583   case OPW64:
1584   case OPWV232:
1585     return decodeSpecialReg64(Val);
1586   default:
1587     llvm_unreachable("unexpected immediate type");
1588   }
1589 }
1590 
1591 // Bit 0 of DstY isn't stored in the instruction, because it's always the
1592 // opposite of bit 0 of DstX.
1593 MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1594                                                unsigned Val) const {
1595   int VDstXInd =
1596       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1597   assert(VDstXInd != -1);
1598   assert(Inst.getOperand(VDstXInd).isReg());
1599   unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1600   Val |= ~XDstReg & 1;
1601   auto Width = llvm::AMDGPUDisassembler::OPW32;
1602   return createRegOperand(getVgprClassId(Width), Val);
1603 }
1604 
1605 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1606   using namespace AMDGPU;
1607 
1608   switch (Val) {
1609   // clang-format off
1610   case 102: return createRegOperand(FLAT_SCR_LO);
1611   case 103: return createRegOperand(FLAT_SCR_HI);
1612   case 104: return createRegOperand(XNACK_MASK_LO);
1613   case 105: return createRegOperand(XNACK_MASK_HI);
1614   case 106: return createRegOperand(VCC_LO);
1615   case 107: return createRegOperand(VCC_HI);
1616   case 108: return createRegOperand(TBA_LO);
1617   case 109: return createRegOperand(TBA_HI);
1618   case 110: return createRegOperand(TMA_LO);
1619   case 111: return createRegOperand(TMA_HI);
1620   case 124:
1621     return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1622   case 125:
1623     return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1624   case 126: return createRegOperand(EXEC_LO);
1625   case 127: return createRegOperand(EXEC_HI);
1626   case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1627   case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1628   case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1629   case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1630   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1631   case 251: return createRegOperand(SRC_VCCZ);
1632   case 252: return createRegOperand(SRC_EXECZ);
1633   case 253: return createRegOperand(SRC_SCC);
1634   case 254: return createRegOperand(LDS_DIRECT);
1635   default: break;
1636     // clang-format on
1637   }
1638   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1639 }
1640 
1641 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1642   using namespace AMDGPU;
1643 
1644   switch (Val) {
1645   case 102: return createRegOperand(FLAT_SCR);
1646   case 104: return createRegOperand(XNACK_MASK);
1647   case 106: return createRegOperand(VCC);
1648   case 108: return createRegOperand(TBA);
1649   case 110: return createRegOperand(TMA);
1650   case 124:
1651     if (isGFX11Plus())
1652       return createRegOperand(SGPR_NULL);
1653     break;
1654   case 125:
1655     if (!isGFX11Plus())
1656       return createRegOperand(SGPR_NULL);
1657     break;
1658   case 126: return createRegOperand(EXEC);
1659   case 235: return createRegOperand(SRC_SHARED_BASE);
1660   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1661   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1662   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1663   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1664   case 251: return createRegOperand(SRC_VCCZ);
1665   case 252: return createRegOperand(SRC_EXECZ);
1666   case 253: return createRegOperand(SRC_SCC);
1667   default: break;
1668   }
1669   return errOperand(Val, "unknown operand encoding " + Twine(Val));
1670 }
1671 
1672 MCOperand
1673 AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1674                                   unsigned ImmWidth,
1675                                   AMDGPU::OperandSemantics Sema) const {
1676   using namespace AMDGPU::SDWA;
1677   using namespace AMDGPU::EncValues;
1678 
1679   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1680       STI.hasFeature(AMDGPU::FeatureGFX10)) {
1681     // XXX: cast to int is needed to avoid stupid warning:
1682     // compare with unsigned is always true
1683     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1684         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1685       return createRegOperand(getVgprClassId(Width),
1686                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1687     }
1688     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1689         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1690                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1691       return createSRegOperand(getSgprClassId(Width),
1692                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1693     }
1694     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1695         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1696       return createSRegOperand(getTtmpClassId(Width),
1697                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1698     }
1699 
1700     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1701 
1702     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1703       return decodeIntImmed(SVal);
1704 
1705     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1706       return decodeFPImmed(ImmWidth, SVal, Sema);
1707 
1708     return decodeSpecialReg32(SVal);
1709   }
1710   if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1711     return createRegOperand(getVgprClassId(Width), Val);
1712   llvm_unreachable("unsupported target");
1713 }
1714 
1715 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1716   return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16);
1717 }
1718 
1719 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1720   return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32);
1721 }
1722 
1723 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1724   using namespace AMDGPU::SDWA;
1725 
1726   assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1727           STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1728          "SDWAVopcDst should be present only on GFX9+");
1729 
1730   bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1731 
1732   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1733     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1734 
1735     int TTmpIdx = getTTmpIdx(Val);
1736     if (TTmpIdx >= 0) {
1737       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
1738       return createSRegOperand(TTmpClsId, TTmpIdx);
1739     }
1740     if (Val > SGPR_MAX) {
1741       return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
1742     }
1743     return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1744   }
1745   return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1746 }
1747 
1748 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1749   return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1750              ? decodeSrcOp(OPW64, Val)
1751              : decodeSrcOp(OPW32, Val);
1752 }
1753 
1754 MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1755   return decodeSrcOp(OPW32, Val);
1756 }
1757 
1758 MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
1759   if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
1760     return MCOperand();
1761   return MCOperand::createImm(Val);
1762 }
1763 
1764 MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
1765   using VersionField = AMDGPU::EncodingField<7, 0>;
1766   using W64Bit = AMDGPU::EncodingBit<13>;
1767   using W32Bit = AMDGPU::EncodingBit<14>;
1768   using MDPBit = AMDGPU::EncodingBit<15>;
1769   using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
1770 
1771   auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1772 
1773   // Decode into a plain immediate if any unused bits are raised.
1774   if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1775     return MCOperand::createImm(Imm);
1776 
1777   const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1778   auto I = find_if(Versions,
1779                    [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
1780                      return V.Code == Version;
1781                    });
1782   MCContext &Ctx = getContext();
1783   const MCExpr *E;
1784   if (I == Versions.end())
1785     E = MCConstantExpr::create(Version, Ctx);
1786   else
1787     E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1788 
1789   if (W64)
1790     E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1791   if (W32)
1792     E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1793   if (MDP)
1794     E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1795 
1796   return MCOperand::createExpr(E);
1797 }
1798 
1799 bool AMDGPUDisassembler::isVI() const {
1800   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1801 }
1802 
1803 bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1804 
1805 bool AMDGPUDisassembler::isGFX90A() const {
1806   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1807 }
1808 
1809 bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1810 
1811 bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1812 
1813 bool AMDGPUDisassembler::isGFX10Plus() const {
1814   return AMDGPU::isGFX10Plus(STI);
1815 }
1816 
1817 bool AMDGPUDisassembler::isGFX11() const {
1818   return STI.hasFeature(AMDGPU::FeatureGFX11);
1819 }
1820 
1821 bool AMDGPUDisassembler::isGFX11Plus() const {
1822   return AMDGPU::isGFX11Plus(STI);
1823 }
1824 
1825 bool AMDGPUDisassembler::isGFX12() const {
1826   return STI.hasFeature(AMDGPU::FeatureGFX12);
1827 }
1828 
1829 bool AMDGPUDisassembler::isGFX12Plus() const {
1830   return AMDGPU::isGFX12Plus(STI);
1831 }
1832 
1833 bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1834   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1835 }
1836 
1837 bool AMDGPUDisassembler::hasKernargPreload() const {
1838   return AMDGPU::hasKernargPreload(STI);
1839 }
1840 
1841 //===----------------------------------------------------------------------===//
1842 // AMDGPU specific symbol handling
1843 //===----------------------------------------------------------------------===//
1844 
1845 /// Print a string describing the reserved bit range specified by Mask with
1846 /// offset BaseBytes for use in error comments. Mask is a single continuous
1847 /// range of 1s surrounded by zeros. The format here is meant to align with the
1848 /// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1849 static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1850   SmallString<32> Result;
1851   raw_svector_ostream S(Result);
1852 
1853   int TrailingZeros = llvm::countr_zero(Mask);
1854   int PopCount = llvm::popcount(Mask);
1855 
1856   if (PopCount == 1) {
1857     S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1858   } else {
1859     S << "bits in range ("
1860       << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1861       << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1862   }
1863 
1864   return Result;
1865 }
1866 
1867 #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1868 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1869   do {                                                                         \
1870     KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n';            \
1871   } while (0)
1872 #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)                        \
1873   do {                                                                         \
1874     KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " "       \
1875              << GET_FIELD(MASK) << '\n';                                       \
1876   } while (0)
1877 
1878 #define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG)                              \
1879   do {                                                                         \
1880     if (FourByteBuffer & (MASK)) {                                             \
1881       return createStringError(std::errc::invalid_argument,                    \
1882                                "kernel descriptor " DESC                       \
1883                                " reserved %s set" MSG,                         \
1884                                getBitRangeFromMask((MASK), 0).c_str());        \
1885     }                                                                          \
1886   } while (0)
1887 
1888 #define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1889 #define CHECK_RESERVED_BITS_MSG(MASK, MSG)                                     \
1890   CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1891 #define CHECK_RESERVED_BITS_DESC(MASK, DESC)                                   \
1892   CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1893 #define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)                          \
1894   CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1895 
1896 // NOLINTNEXTLINE(readability-identifier-naming)
1897 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1898     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1899   using namespace amdhsa;
1900   StringRef Indent = "\t";
1901 
1902   // We cannot accurately backward compute #VGPRs used from
1903   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1904   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1905   // simply calculate the inverse of what the assembler does.
1906 
1907   uint32_t GranulatedWorkitemVGPRCount =
1908       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1909 
1910   uint32_t NextFreeVGPR =
1911       (GranulatedWorkitemVGPRCount + 1) *
1912       AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1913 
1914   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1915 
1916   // We cannot backward compute values used to calculate
1917   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1918   // directives can't be computed:
1919   // .amdhsa_reserve_vcc
1920   // .amdhsa_reserve_flat_scratch
1921   // .amdhsa_reserve_xnack_mask
1922   // They take their respective default values if not specified in the assembly.
1923   //
1924   // GRANULATED_WAVEFRONT_SGPR_COUNT
1925   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1926   //
1927   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1928   // are set to 0. So while disassembling we consider that:
1929   //
1930   // GRANULATED_WAVEFRONT_SGPR_COUNT
1931   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1932   //
1933   // The disassembler cannot recover the original values of those 3 directives.
1934 
1935   uint32_t GranulatedWavefrontSGPRCount =
1936       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1937 
1938   if (isGFX10Plus())
1939     CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1940                             "must be zero on gfx10+");
1941 
1942   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1943                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1944 
1945   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1946   if (!hasArchitectedFlatScratch())
1947     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1948   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1949   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1950 
1951   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1952 
1953   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1954                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1955   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1956                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1957   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1958                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1959   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1960                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1961 
1962   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1963 
1964   if (!isGFX12Plus())
1965     PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1966                     COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1967 
1968   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1969 
1970   if (!isGFX12Plus())
1971     PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1972                     COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1973 
1974   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1975   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1976 
1977   if (isGFX9Plus())
1978     PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1979 
1980   if (!isGFX9Plus())
1981     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1982                                  "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1983 
1984   CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1985 
1986   if (!isGFX10Plus())
1987     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1988                                  "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1989 
1990   if (isGFX10Plus()) {
1991     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1992                     COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1993     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1994     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1995   }
1996 
1997   if (isGFX12Plus())
1998     PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1999                     COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2000 
2001   return true;
2002 }
2003 
2004 // NOLINTNEXTLINE(readability-identifier-naming)
2005 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2006     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2007   using namespace amdhsa;
2008   StringRef Indent = "\t";
2009   if (hasArchitectedFlatScratch())
2010     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2011                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2012   else
2013     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2014                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2015   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2016                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2017   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2018                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2019   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2020                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2021   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2022                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2023   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2024                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2025 
2026   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2027   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2028   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2029 
2030   PRINT_DIRECTIVE(
2031       ".amdhsa_exception_fp_ieee_invalid_op",
2032       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2033   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2034                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2035   PRINT_DIRECTIVE(
2036       ".amdhsa_exception_fp_ieee_div_zero",
2037       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2038   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2039                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2040   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2041                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2042   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2043                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2044   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2045                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2046 
2047   CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2048 
2049   return true;
2050 }
2051 
2052 // NOLINTNEXTLINE(readability-identifier-naming)
2053 Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2054     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2055   using namespace amdhsa;
2056   StringRef Indent = "\t";
2057   if (isGFX90A()) {
2058     KdStream << Indent << ".amdhsa_accum_offset "
2059              << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2060              << '\n';
2061 
2062     PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2063 
2064     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2065                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2066     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2067                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2068   } else if (isGFX10Plus()) {
2069     // Bits [0-3].
2070     if (!isGFX12Plus()) {
2071       if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2072         PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2073                         COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2074       } else {
2075         PRINT_PSEUDO_DIRECTIVE_COMMENT(
2076             "SHARED_VGPR_COUNT",
2077             COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2078       }
2079     } else {
2080       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2081                                    "COMPUTE_PGM_RSRC3",
2082                                    "must be zero on gfx12+");
2083     }
2084 
2085     // Bits [4-11].
2086     if (isGFX11()) {
2087       PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2088                                      COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2089       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2090                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2091       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2092                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2093     } else if (isGFX12Plus()) {
2094       PRINT_PSEUDO_DIRECTIVE_COMMENT(
2095           "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2096     } else {
2097       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2098                                    "COMPUTE_PGM_RSRC3",
2099                                    "must be zero on gfx10");
2100     }
2101 
2102     // Bits [12].
2103     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2104                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2105 
2106     // Bits [13].
2107     if (isGFX12Plus()) {
2108       PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2109                                      COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2110     } else {
2111       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2112                                    "COMPUTE_PGM_RSRC3",
2113                                    "must be zero on gfx10 or gfx11");
2114     }
2115 
2116     // Bits [14-30].
2117     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2118                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2119 
2120     // Bits [31].
2121     if (isGFX11Plus()) {
2122       PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2123                                      COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2124     } else {
2125       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2126                                    "COMPUTE_PGM_RSRC3",
2127                                    "must be zero on gfx10");
2128     }
2129   } else if (FourByteBuffer) {
2130     return createStringError(
2131         std::errc::invalid_argument,
2132         "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2133   }
2134   return true;
2135 }
2136 #undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2137 #undef PRINT_DIRECTIVE
2138 #undef GET_FIELD
2139 #undef CHECK_RESERVED_BITS_IMPL
2140 #undef CHECK_RESERVED_BITS
2141 #undef CHECK_RESERVED_BITS_MSG
2142 #undef CHECK_RESERVED_BITS_DESC
2143 #undef CHECK_RESERVED_BITS_DESC_MSG
2144 
2145 /// Create an error object to return from onSymbolStart for reserved kernel
2146 /// descriptor bits being set.
2147 static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2148                                        const char *Msg = "") {
2149   return createStringError(
2150       std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2151       getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2152 }
2153 
2154 /// Create an error object to return from onSymbolStart for reserved kernel
2155 /// descriptor bytes being set.
2156 static Error createReservedKDBytesError(unsigned BaseInBytes,
2157                                         unsigned WidthInBytes) {
2158   // Create an error comment in the same format as the "Kernel Descriptor"
2159   // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2160   return createStringError(
2161       std::errc::invalid_argument,
2162       "kernel descriptor reserved bits in range (%u:%u) set",
2163       (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2164 }
2165 
2166 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2167     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2168     raw_string_ostream &KdStream) const {
2169 #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
2170   do {                                                                         \
2171     KdStream << Indent << DIRECTIVE " "                                        \
2172              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
2173   } while (0)
2174 
2175   uint16_t TwoByteBuffer = 0;
2176   uint32_t FourByteBuffer = 0;
2177 
2178   StringRef ReservedBytes;
2179   StringRef Indent = "\t";
2180 
2181   assert(Bytes.size() == 64);
2182   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2183 
2184   switch (Cursor.tell()) {
2185   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2186     FourByteBuffer = DE.getU32(Cursor);
2187     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2188              << '\n';
2189     return true;
2190 
2191   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2192     FourByteBuffer = DE.getU32(Cursor);
2193     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2194              << FourByteBuffer << '\n';
2195     return true;
2196 
2197   case amdhsa::KERNARG_SIZE_OFFSET:
2198     FourByteBuffer = DE.getU32(Cursor);
2199     KdStream << Indent << ".amdhsa_kernarg_size "
2200              << FourByteBuffer << '\n';
2201     return true;
2202 
2203   case amdhsa::RESERVED0_OFFSET:
2204     // 4 reserved bytes, must be 0.
2205     ReservedBytes = DE.getBytes(Cursor, 4);
2206     for (int I = 0; I < 4; ++I) {
2207       if (ReservedBytes[I] != 0)
2208         return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4);
2209     }
2210     return true;
2211 
2212   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2213     // KERNEL_CODE_ENTRY_BYTE_OFFSET
2214     // So far no directive controls this for Code Object V3, so simply skip for
2215     // disassembly.
2216     DE.skip(Cursor, 8);
2217     return true;
2218 
2219   case amdhsa::RESERVED1_OFFSET:
2220     // 20 reserved bytes, must be 0.
2221     ReservedBytes = DE.getBytes(Cursor, 20);
2222     for (int I = 0; I < 20; ++I) {
2223       if (ReservedBytes[I] != 0)
2224         return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20);
2225     }
2226     return true;
2227 
2228   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2229     FourByteBuffer = DE.getU32(Cursor);
2230     return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2231 
2232   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2233     FourByteBuffer = DE.getU32(Cursor);
2234     return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2235 
2236   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2237     FourByteBuffer = DE.getU32(Cursor);
2238     return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2239 
2240   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2241     using namespace amdhsa;
2242     TwoByteBuffer = DE.getU16(Cursor);
2243 
2244     if (!hasArchitectedFlatScratch())
2245       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2246                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2247     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2248                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2249     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2250                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2251     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2252                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2253     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2254                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2255     if (!hasArchitectedFlatScratch())
2256       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2257                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2258     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2259                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2260 
2261     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2262       return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2263                                        amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2264 
2265     // Reserved for GFX9
2266     if (isGFX9() &&
2267         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2268       return createReservedKDBitsError(
2269           KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2270           amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2271     }
2272     if (isGFX10Plus()) {
2273       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2274                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2275     }
2276 
2277     if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2278       PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2279                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2280 
2281     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2282       return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2283                                        amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2284     }
2285 
2286     return true;
2287 
2288   case amdhsa::KERNARG_PRELOAD_OFFSET:
2289     using namespace amdhsa;
2290     TwoByteBuffer = DE.getU16(Cursor);
2291     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2292       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2293                       KERNARG_PRELOAD_SPEC_LENGTH);
2294     }
2295 
2296     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2297       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2298                       KERNARG_PRELOAD_SPEC_OFFSET);
2299     }
2300     return true;
2301 
2302   case amdhsa::RESERVED3_OFFSET:
2303     // 4 bytes from here are reserved, must be 0.
2304     ReservedBytes = DE.getBytes(Cursor, 4);
2305     for (int I = 0; I < 4; ++I) {
2306       if (ReservedBytes[I] != 0)
2307         return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4);
2308     }
2309     return true;
2310 
2311   default:
2312     llvm_unreachable("Unhandled index. Case statements cover everything.");
2313     return true;
2314   }
2315 #undef PRINT_DIRECTIVE
2316 }
2317 
2318 Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2319     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2320 
2321   // CP microcode requires the kernel descriptor to be 64 aligned.
2322   if (Bytes.size() != 64 || KdAddress % 64 != 0)
2323     return createStringError(std::errc::invalid_argument,
2324                              "kernel descriptor must be 64-byte aligned");
2325 
2326   // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2327   // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2328   // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2329   // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2330   // when required.
2331   if (isGFX10Plus()) {
2332     uint16_t KernelCodeProperties =
2333         support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2334                                 llvm::endianness::little);
2335     EnableWavefrontSize32 =
2336         AMDHSA_BITS_GET(KernelCodeProperties,
2337                         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2338   }
2339 
2340   std::string Kd;
2341   raw_string_ostream KdStream(Kd);
2342   KdStream << ".amdhsa_kernel " << KdName << '\n';
2343 
2344   DataExtractor::Cursor C(0);
2345   while (C && C.tell() < Bytes.size()) {
2346     Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2347 
2348     cantFail(C.takeError());
2349 
2350     if (!Res)
2351       return Res;
2352   }
2353   KdStream << ".end_amdhsa_kernel\n";
2354   outs() << KdStream.str();
2355   return true;
2356 }
2357 
2358 Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2359                                                  uint64_t &Size,
2360                                                  ArrayRef<uint8_t> Bytes,
2361                                                  uint64_t Address) const {
2362   // Right now only kernel descriptor needs to be handled.
2363   // We ignore all other symbols for target specific handling.
2364   // TODO:
2365   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2366   // Object V2 and V3 when symbols are marked protected.
2367 
2368   // amd_kernel_code_t for Code Object V2.
2369   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2370     Size = 256;
2371     return createStringError(std::errc::invalid_argument,
2372                              "code object v2 is not supported");
2373   }
2374 
2375   // Code Object V3 kernel descriptors.
2376   StringRef Name = Symbol.Name;
2377   if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2378     Size = 64; // Size = 64 regardless of success or failure.
2379     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2380   }
2381 
2382   return false;
2383 }
2384 
2385 const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2386                                                            int64_t Val) {
2387   MCContext &Ctx = getContext();
2388   MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2389   // Note: only set value to Val on a new symbol in case an dissassembler
2390   // has already been initialized in this context.
2391   if (!Sym->isVariable()) {
2392     Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2393   } else {
2394     int64_t Res = ~Val;
2395     bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2396     if (!Valid || Res != Val)
2397       Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2398   }
2399   return MCSymbolRefExpr::create(Sym, Ctx);
2400 }
2401 
2402 //===----------------------------------------------------------------------===//
2403 // AMDGPUSymbolizer
2404 //===----------------------------------------------------------------------===//
2405 
2406 // Try to find symbol name for specified label
2407 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2408     MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2409     uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2410     uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2411 
2412   if (!IsBranch) {
2413     return false;
2414   }
2415 
2416   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2417   if (!Symbols)
2418     return false;
2419 
2420   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2421     return Val.Addr == static_cast<uint64_t>(Value) &&
2422            Val.Type == ELF::STT_NOTYPE;
2423   });
2424   if (Result != Symbols->end()) {
2425     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2426     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2427     Inst.addOperand(MCOperand::createExpr(Add));
2428     return true;
2429   }
2430   // Add to list of referenced addresses, so caller can synthesize a label.
2431   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2432   return false;
2433 }
2434 
2435 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2436                                                        int64_t Value,
2437                                                        uint64_t Address) {
2438   llvm_unreachable("unimplemented");
2439 }
2440 
2441 //===----------------------------------------------------------------------===//
2442 // Initialization
2443 //===----------------------------------------------------------------------===//
2444 
2445 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2446                               LLVMOpInfoCallback /*GetOpInfo*/,
2447                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
2448                               void *DisInfo,
2449                               MCContext *Ctx,
2450                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2451   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2452 }
2453 
2454 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2455                                                 const MCSubtargetInfo &STI,
2456                                                 MCContext &Ctx) {
2457   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2458 }
2459 
2460 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2461   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2462                                          createAMDGPUDisassembler);
2463   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2464                                        createAMDGPUSymbolizer);
2465 }
2466