xref: /llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (revision 6f8e7c11cf6157a9f93aa5842dd26fb51b37dce7)
1 //===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // \file
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "SIDefines.h"
13 #include "Utils/AMDGPUAsmUtils.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/MC/MCExpr.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/TargetParser/TargetParser.h"
22 
23 using namespace llvm;
24 using namespace llvm::AMDGPU;
25 
26 void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
27   // FIXME: The current implementation of
28   // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
29   // as an integer or we provide a name which represents a physical register.
30   // For CFI instructions we really want to emit a name for the DWARF register
31   // instead, because there may be multiple DWARF registers corresponding to a
32   // single physical register. One case where this problem manifests is with
33   // wave32/wave64 where using the physical register name is ambiguous: if we
34   // write e.g. `.cfi_undefined v0` we lose information about the wavefront
35   // size which we need to encode the register in the final DWARF. Ideally we
36   // would extend MC to support parsing DWARF register names so we could do
37   // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
38   // non-pretty DWARF register names in assembly text.
39   OS << Reg.id();
40 }
41 
42 void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
43                                   StringRef Annot, const MCSubtargetInfo &STI,
44                                   raw_ostream &OS) {
45   printInstruction(MI, Address, STI, OS);
46   printAnnotation(OS, Annot);
47 }
48 
49 void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
50                                            const MCSubtargetInfo &STI,
51                                            raw_ostream &O) {
52   const MCOperand &Op = MI->getOperand(OpNo);
53   if (Op.isExpr()) {
54     Op.getExpr()->print(O, &MAI);
55     return;
56   }
57 
58   // It's possible to end up with a 32-bit literal used with a 16-bit operand
59   // with ignored high bits. Print as 32-bit anyway in that case.
60   int64_t Imm = Op.getImm();
61   if (isInt<16>(Imm) || isUInt<16>(Imm))
62     O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
63   else
64     printU32ImmOperand(MI, OpNo, STI, O);
65 }
66 
67 void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
68                                               raw_ostream &O) {
69   O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
70 }
71 
72 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
73                                            const MCSubtargetInfo &STI,
74                                            raw_ostream &O) {
75   O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
76 }
77 
78 void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
79                                       raw_ostream &O, StringRef BitName) {
80   if (MI->getOperand(OpNo).getImm()) {
81     O << ' ' << BitName;
82   }
83 }
84 
85 void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
86                                     const MCSubtargetInfo &STI,
87                                     raw_ostream &O) {
88   uint32_t Imm = MI->getOperand(OpNo).getImm();
89   if (Imm != 0) {
90     O << " offset:";
91 
92     // GFX12 uses a 24-bit signed offset for VBUFFER.
93     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
94     bool IsVBuffer = Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF);
95     if (AMDGPU::isGFX12(STI) && IsVBuffer)
96       O << formatDec(SignExtend32<24>(Imm));
97     else
98       printU16ImmDecOperand(MI, OpNo, O);
99   }
100 }
101 
102 void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
103                                         const MCSubtargetInfo &STI,
104                                         raw_ostream &O) {
105   uint32_t Imm = MI->getOperand(OpNo).getImm();
106   if (Imm != 0) {
107     O << " offset:";
108 
109     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
110     bool AllowNegative = (Desc.TSFlags & (SIInstrFlags::FlatGlobal |
111                                           SIInstrFlags::FlatScratch)) ||
112                          AMDGPU::isGFX12(STI);
113 
114     if (AllowNegative) // Signed offset
115       O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI)));
116     else // Unsigned offset
117       printU16ImmDecOperand(MI, OpNo, O);
118   }
119 }
120 
121 void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
122                                         const MCSubtargetInfo &STI,
123                                         raw_ostream &O) {
124   printU32ImmOperand(MI, OpNo, STI, O);
125 }
126 
127 void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
128                                         const MCSubtargetInfo &STI,
129                                         raw_ostream &O) {
130   O << formatHex(MI->getOperand(OpNo).getImm());
131 }
132 
133 void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
134                                                const MCSubtargetInfo &STI,
135                                                raw_ostream &O) {
136   printU32ImmOperand(MI, OpNo, STI, O);
137 }
138 
139 void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
140                                   const MCSubtargetInfo &STI, raw_ostream &O) {
141   auto Imm = MI->getOperand(OpNo).getImm();
142 
143   if (AMDGPU::isGFX12Plus(STI)) {
144     const int64_t TH = Imm & CPol::TH;
145     const int64_t Scope = Imm & CPol::SCOPE;
146 
147     printTH(MI, TH, Scope, O);
148     printScope(Scope, O);
149 
150     return;
151   }
152 
153   if (Imm & CPol::GLC)
154     O << ((AMDGPU::isGFX940(STI) &&
155            !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
156                                                                      : " glc");
157   if (Imm & CPol::SLC)
158     O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
159   if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
160     O << " dlc";
161   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
162     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
163   if (Imm & ~CPol::ALL_pregfx12)
164     O << " /* unexpected cache policy bit */";
165 }
166 
167 void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
168                                 raw_ostream &O) {
169   // For th = 0 do not print this field
170   if (TH == 0)
171     return;
172 
173   const unsigned Opcode = MI->getOpcode();
174   const MCInstrDesc &TID = MII.get(Opcode);
175   bool IsStore = TID.mayStore();
176   bool IsAtomic =
177       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
178 
179   O << " th:";
180 
181   if (IsAtomic) {
182     O << "TH_ATOMIC_";
183     if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) {
184       if (Scope >= AMDGPU::CPol::SCOPE_DEV)
185         O << "CASCADE" << (TH & AMDGPU::CPol::TH_ATOMIC_NT ? "_NT" : "_RT");
186       else
187         O << formatHex(TH);
188     } else if (TH & AMDGPU::CPol::TH_ATOMIC_NT)
189       O << "NT" << (TH & AMDGPU::CPol::TH_ATOMIC_RETURN ? "_RETURN" : "");
190     else if (TH & AMDGPU::CPol::TH_ATOMIC_RETURN)
191       O << "RETURN";
192     else
193       O << formatHex(TH);
194   } else {
195     if (!IsStore && TH == AMDGPU::CPol::TH_RESERVED)
196       O << formatHex(TH);
197     else {
198       // This will default to printing load variants when neither MayStore nor
199       // MayLoad flag is present which is the case with instructions like
200       // image_get_resinfo.
201       O << (IsStore ? "TH_STORE_" : "TH_LOAD_");
202       switch (TH) {
203       case AMDGPU::CPol::TH_NT:
204         O << "NT";
205         break;
206       case AMDGPU::CPol::TH_HT:
207         O << "HT";
208         break;
209       case AMDGPU::CPol::TH_BYPASS: // or LU or RT_WB
210         O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
211                                                : (IsStore ? "RT_WB" : "LU"));
212         break;
213       case AMDGPU::CPol::TH_NT_RT:
214         O << "NT_RT";
215         break;
216       case AMDGPU::CPol::TH_RT_NT:
217         O << "RT_NT";
218         break;
219       case AMDGPU::CPol::TH_NT_HT:
220         O << "NT_HT";
221         break;
222       case AMDGPU::CPol::TH_NT_WB:
223         O << "NT_WB";
224         break;
225       default:
226         llvm_unreachable("unexpected th value");
227       }
228     }
229   }
230 }
231 
232 void AMDGPUInstPrinter::printScope(int64_t Scope, raw_ostream &O) {
233   if (Scope == CPol::SCOPE_CU)
234     return;
235 
236   O << " scope:";
237 
238   if (Scope == CPol::SCOPE_SE)
239     O << "SCOPE_SE";
240   else if (Scope == CPol::SCOPE_DEV)
241     O << "SCOPE_DEV";
242   else if (Scope == CPol::SCOPE_SYS)
243     O << "SCOPE_SYS";
244   else
245     llvm_unreachable("unexpected scope policy value");
246 }
247 
248 void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
249                                  const MCSubtargetInfo &STI, raw_ostream &O) {
250   unsigned Dim = MI->getOperand(OpNo).getImm();
251   O << " dim:SQ_RSRC_IMG_";
252 
253   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
254   if (DimInfo)
255     O << DimInfo->AsmSuffix;
256   else
257     O << Dim;
258 }
259 
260 void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
261                                   const MCSubtargetInfo &STI, raw_ostream &O) {
262   if (STI.hasFeature(AMDGPU::FeatureR128A16))
263     printNamedBit(MI, OpNo, O, "a16");
264   else
265     printNamedBit(MI, OpNo, O, "r128");
266 }
267 
268 void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
269                                     const MCSubtargetInfo &STI,
270                                     raw_ostream &O) {
271 }
272 
273 void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
274                                             const MCSubtargetInfo &STI,
275                                             raw_ostream &O) {
276   using namespace llvm::AMDGPU::MTBUFFormat;
277 
278   int OpNo =
279     AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
280   assert(OpNo != -1);
281 
282   unsigned Val = MI->getOperand(OpNo).getImm();
283   if (AMDGPU::isGFX10Plus(STI)) {
284     if (Val == UFMT_DEFAULT)
285       return;
286     if (isValidUnifiedFormat(Val, STI)) {
287       O << " format:[" << getUnifiedFormatName(Val, STI) << ']';
288     } else {
289       O << " format:" << Val;
290     }
291   } else {
292     if (Val == DFMT_NFMT_DEFAULT)
293       return;
294     if (isValidDfmtNfmt(Val, STI)) {
295       unsigned Dfmt;
296       unsigned Nfmt;
297       decodeDfmtNfmt(Val, Dfmt, Nfmt);
298       O << " format:[";
299       if (Dfmt != DFMT_DEFAULT) {
300         O << getDfmtName(Dfmt);
301         if (Nfmt != NFMT_DEFAULT) {
302           O << ',';
303         }
304       }
305       if (Nfmt != NFMT_DEFAULT) {
306         O << getNfmtName(Nfmt, STI);
307       }
308       O << ']';
309     } else {
310       O << " format:" << Val;
311     }
312   }
313 }
314 
315 void AMDGPUInstPrinter::printRegOperand(MCRegister Reg, raw_ostream &O,
316                                         const MCRegisterInfo &MRI) {
317 #if !defined(NDEBUG)
318   switch (Reg.id()) {
319   case AMDGPU::FP_REG:
320   case AMDGPU::SP_REG:
321   case AMDGPU::PRIVATE_RSRC_REG:
322     llvm_unreachable("pseudo-register should not ever be emitted");
323   default:
324     break;
325   }
326 #endif
327 
328   O << getRegisterName(Reg);
329 }
330 
331 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
332                                     const MCSubtargetInfo &STI, raw_ostream &O) {
333   auto Opcode = MI->getOpcode();
334   auto Flags = MII.get(Opcode).TSFlags;
335   if (OpNo == 0) {
336     if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP)
337       O << "_e64_dpp";
338     else if (Flags & SIInstrFlags::VOP3) {
339       if (!getVOP3IsSingle(Opcode))
340         O << "_e64";
341     } else if (Flags & SIInstrFlags::DPP)
342       O << "_dpp";
343     else if (Flags & SIInstrFlags::SDWA)
344       O << "_sdwa";
345     else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
346              ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode)))
347       O << "_e32";
348     O << " ";
349   }
350 
351   printRegularOperand(MI, OpNo, STI, O);
352 
353   // Print default vcc/vcc_lo operand.
354   switch (Opcode) {
355   default: break;
356 
357   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
358   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
359   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
360   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
361   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
362   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
363   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
364   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
365   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
366   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
367   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
368   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
369   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
370   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
371   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
372   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
373   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
374   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
375   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
376   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
377   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
378   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
379   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
380   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
381   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
382   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
383   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
384   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
385   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
386   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
387     printDefaultVccOperand(false, STI, O);
388     break;
389   }
390 }
391 
392 void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
393                                        const MCSubtargetInfo &STI, raw_ostream &O) {
394   if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
395     O << " ";
396   else
397     O << "_e32 ";
398 
399   printRegularOperand(MI, OpNo, STI, O);
400 }
401 
402 void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
403                                             const MCSubtargetInfo &STI,
404                                             raw_ostream &O) {
405   int32_t SImm = static_cast<int32_t>(Imm);
406   if (isInlinableIntLiteral(SImm)) {
407     O << SImm;
408     return;
409   }
410 
411   if (printImmediateFloat32(Imm, STI, O))
412     return;
413 
414   O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
415 }
416 
417 static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
418                                raw_ostream &O) {
419   if (Imm == 0x3C00)
420     O << "1.0";
421   else if (Imm == 0xBC00)
422     O << "-1.0";
423   else if (Imm == 0x3800)
424     O << "0.5";
425   else if (Imm == 0xB800)
426     O << "-0.5";
427   else if (Imm == 0x4000)
428     O << "2.0";
429   else if (Imm == 0xC000)
430     O << "-2.0";
431   else if (Imm == 0x4400)
432     O << "4.0";
433   else if (Imm == 0xC400)
434     O << "-4.0";
435   else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
436     O << "0.15915494";
437   else
438     return false;
439 
440   return true;
441 }
442 
443 static bool printImmediateBFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
444                                    raw_ostream &O) {
445   if (Imm == 0x3F80)
446     O << "1.0";
447   else if (Imm == 0xBF80)
448     O << "-1.0";
449   else if (Imm == 0x3F00)
450     O << "0.5";
451   else if (Imm == 0xBF00)
452     O << "-0.5";
453   else if (Imm == 0x4000)
454     O << "2.0";
455   else if (Imm == 0xC000)
456     O << "-2.0";
457   else if (Imm == 0x4080)
458     O << "4.0";
459   else if (Imm == 0xC080)
460     O << "-4.0";
461   else if (Imm == 0x3E22 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
462     O << "0.15915494";
463   else
464     return false;
465 
466   return true;
467 }
468 
469 void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
470                                            const MCSubtargetInfo &STI,
471                                            raw_ostream &O) {
472   int16_t SImm = static_cast<int16_t>(Imm);
473   if (isInlinableIntLiteral(SImm)) {
474     O << SImm;
475     return;
476   }
477 
478   if (printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
479     return;
480 
481   O << formatHex(static_cast<uint64_t>(Imm));
482 }
483 
484 void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
485                                           const MCSubtargetInfo &STI,
486                                           raw_ostream &O) {
487   int16_t SImm = static_cast<int16_t>(Imm);
488   if (isInlinableIntLiteral(SImm)) {
489     O << SImm;
490     return;
491   }
492 
493   uint16_t HImm = static_cast<uint16_t>(Imm);
494   if (printImmediateFP16(HImm, STI, O))
495     return;
496 
497   uint64_t Imm16 = static_cast<uint16_t>(Imm);
498   O << formatHex(Imm16);
499 }
500 
501 void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
502                                            const MCSubtargetInfo &STI,
503                                            raw_ostream &O) {
504   int32_t SImm = static_cast<int32_t>(Imm);
505   if (isInlinableIntLiteral(SImm)) {
506     O << SImm;
507     return;
508   }
509 
510   switch (OpType) {
511   case AMDGPU::OPERAND_REG_IMM_V2INT16:
512   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
513   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
514     if (printImmediateFloat32(Imm, STI, O))
515       return;
516     break;
517   case AMDGPU::OPERAND_REG_IMM_V2FP16:
518   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
519   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
520     if (isUInt<16>(Imm) &&
521         printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
522       return;
523     break;
524   case AMDGPU::OPERAND_REG_IMM_V2BF16:
525   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
526   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
527     if (isUInt<16>(Imm) &&
528         printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
529       return;
530     break;
531   default:
532     llvm_unreachable("bad operand type");
533   }
534 
535   O << formatHex(static_cast<uint64_t>(Imm));
536 }
537 
538 bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm,
539                                               const MCSubtargetInfo &STI,
540                                               raw_ostream &O) {
541   if (Imm == llvm::bit_cast<uint32_t>(0.0f))
542     O << "0.0";
543   else if (Imm == llvm::bit_cast<uint32_t>(1.0f))
544     O << "1.0";
545   else if (Imm == llvm::bit_cast<uint32_t>(-1.0f))
546     O << "-1.0";
547   else if (Imm == llvm::bit_cast<uint32_t>(0.5f))
548     O << "0.5";
549   else if (Imm == llvm::bit_cast<uint32_t>(-0.5f))
550     O << "-0.5";
551   else if (Imm == llvm::bit_cast<uint32_t>(2.0f))
552     O << "2.0";
553   else if (Imm == llvm::bit_cast<uint32_t>(-2.0f))
554     O << "-2.0";
555   else if (Imm == llvm::bit_cast<uint32_t>(4.0f))
556     O << "4.0";
557   else if (Imm == llvm::bit_cast<uint32_t>(-4.0f))
558     O << "-4.0";
559   else if (Imm == 0x3e22f983 &&
560            STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
561     O << "0.15915494";
562   else
563     return false;
564 
565   return true;
566 }
567 
568 void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
569                                          const MCSubtargetInfo &STI,
570                                          raw_ostream &O) {
571   int32_t SImm = static_cast<int32_t>(Imm);
572   if (isInlinableIntLiteral(SImm)) {
573     O << SImm;
574     return;
575   }
576 
577   if (printImmediateFloat32(Imm, STI, O))
578     return;
579 
580   O << formatHex(static_cast<uint64_t>(Imm));
581 }
582 
583 void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
584                                          const MCSubtargetInfo &STI,
585                                          raw_ostream &O, bool IsFP) {
586   int64_t SImm = static_cast<int64_t>(Imm);
587   if (SImm >= -16 && SImm <= 64) {
588     O << SImm;
589     return;
590   }
591 
592   if (Imm == llvm::bit_cast<uint64_t>(0.0))
593     O << "0.0";
594   else if (Imm == llvm::bit_cast<uint64_t>(1.0))
595     O << "1.0";
596   else if (Imm == llvm::bit_cast<uint64_t>(-1.0))
597     O << "-1.0";
598   else if (Imm == llvm::bit_cast<uint64_t>(0.5))
599     O << "0.5";
600   else if (Imm == llvm::bit_cast<uint64_t>(-0.5))
601     O << "-0.5";
602   else if (Imm == llvm::bit_cast<uint64_t>(2.0))
603     O << "2.0";
604   else if (Imm == llvm::bit_cast<uint64_t>(-2.0))
605     O << "-2.0";
606   else if (Imm == llvm::bit_cast<uint64_t>(4.0))
607     O << "4.0";
608   else if (Imm == llvm::bit_cast<uint64_t>(-4.0))
609     O << "-4.0";
610   else if (Imm == 0x3fc45f306dc9c882 &&
611            STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
612     O << "0.15915494309189532";
613   else if (IsFP) {
614     assert(AMDGPU::isValid32BitLiteral(Imm, true));
615     O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
616   } else {
617     assert(isUInt<32>(Imm) || isInt<32>(Imm));
618 
619     // In rare situations, we will have a 32-bit literal in a 64-bit
620     // operand. This is technically allowed for the encoding of s_mov_b64.
621     O << formatHex(static_cast<uint64_t>(Imm));
622   }
623 }
624 
625 void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
626                                   const MCSubtargetInfo &STI,
627                                   raw_ostream &O) {
628   unsigned Imm = MI->getOperand(OpNo).getImm();
629   if (!Imm)
630     return;
631 
632   if (AMDGPU::isGFX940(STI)) {
633     switch (MI->getOpcode()) {
634     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
635     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
636     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
637     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
638       O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
639         << ((Imm >> 2) & 1) << ']';
640       return;
641     }
642   }
643 
644   O << " blgp:" << Imm;
645 }
646 
647 void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
648                                                const MCSubtargetInfo &STI,
649                                                raw_ostream &O) {
650   if (!FirstOperand)
651     O << ", ";
652   printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
653                       ? AMDGPU::VCC_LO
654                       : AMDGPU::VCC,
655                   O, MRI);
656   if (FirstOperand)
657     O << ", ";
658 }
659 
660 bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
661                                         unsigned OpNo) const {
662   return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) &&
663          (Desc.TSFlags & SIInstrFlags::VOPC) &&
664          !isVOPCAsmOnly(Desc.getOpcode()) &&
665          (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
666           Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
667 }
668 
669 // Print default vcc/vcc_lo operand of VOPC.
670 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
671                                      const MCSubtargetInfo &STI,
672                                      raw_ostream &O) {
673   unsigned Opc = MI->getOpcode();
674   const MCInstrDesc &Desc = MII.get(Opc);
675   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
676   // 0, 1 and 2 are the first printed operands in different cases
677   // If there are printed modifiers, printOperandAndFPInputMods or
678   // printOperandAndIntInputMods will be called instead
679   if ((OpNo == 0 ||
680        (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
681       (Desc.TSFlags & SIInstrFlags::VOPC) && !isVOPCAsmOnly(Desc.getOpcode()) &&
682       (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
683        Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
684     printDefaultVccOperand(true, STI, O);
685 
686   printRegularOperand(MI, OpNo, STI, O);
687 }
688 
689 // Print operands after vcc or modifier handling.
690 void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
691                                             const MCSubtargetInfo &STI,
692                                             raw_ostream &O) {
693   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
694 
695   if (OpNo >= MI->getNumOperands()) {
696     O << "/*Missing OP" << OpNo << "*/";
697     return;
698   }
699 
700   const MCOperand &Op = MI->getOperand(OpNo);
701   if (Op.isReg()) {
702     printRegOperand(Op.getReg(), O, MRI);
703 
704     // Check if operand register class contains register used.
705     // Intention: print disassembler message when invalid code is decoded,
706     // for example sgpr register used in VReg or VISrc(VReg or imm) operand.
707     int RCID = Desc.operands()[OpNo].RegClass;
708     if (RCID != -1) {
709       const MCRegisterClass RC = MRI.getRegClass(RCID);
710       auto Reg = mc2PseudoReg(Op.getReg());
711       if (!RC.contains(Reg) && !isInlineValue(Reg)) {
712         O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
713           << "\' register class*/";
714       }
715     }
716   } else if (Op.isImm()) {
717     const uint8_t OpTy = Desc.operands()[OpNo].OperandType;
718     switch (OpTy) {
719     case AMDGPU::OPERAND_REG_IMM_INT32:
720     case AMDGPU::OPERAND_REG_IMM_FP32:
721     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
722     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
723     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
724     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
725     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
726     case AMDGPU::OPERAND_REG_IMM_V2INT32:
727     case AMDGPU::OPERAND_REG_IMM_V2FP32:
728     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
729     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
730     case MCOI::OPERAND_IMMEDIATE:
731     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
732       printImmediate32(Op.getImm(), STI, O);
733       break;
734     case AMDGPU::OPERAND_REG_IMM_INT64:
735     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
736       printImmediate64(Op.getImm(), STI, O, false);
737       break;
738     case AMDGPU::OPERAND_REG_IMM_FP64:
739     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
740     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
741       printImmediate64(Op.getImm(), STI, O, true);
742       break;
743     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
744     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
745     case AMDGPU::OPERAND_REG_IMM_INT16:
746       printImmediateInt16(Op.getImm(), STI, O);
747       break;
748     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
749     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
750     case AMDGPU::OPERAND_REG_IMM_FP16:
751     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
752       printImmediateF16(Op.getImm(), STI, O);
753       break;
754     case AMDGPU::OPERAND_REG_INLINE_C_BF16:
755     case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
756     case AMDGPU::OPERAND_REG_IMM_BF16:
757     case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
758       printImmediateBF16(Op.getImm(), STI, O);
759       break;
760     case AMDGPU::OPERAND_REG_IMM_V2INT16:
761     case AMDGPU::OPERAND_REG_IMM_V2BF16:
762     case AMDGPU::OPERAND_REG_IMM_V2FP16:
763     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
764     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
765     case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
766     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
767     case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
768     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
769       printImmediateV216(Op.getImm(), OpTy, STI, O);
770       break;
771     case MCOI::OPERAND_UNKNOWN:
772     case MCOI::OPERAND_PCREL:
773       O << formatDec(Op.getImm());
774       break;
775     case MCOI::OPERAND_REGISTER:
776       // Disassembler does not fail when operand should not allow immediate
777       // operands but decodes them into 32bit immediate operand.
778       printImmediate32(Op.getImm(), STI, O);
779       O << "/*Invalid immediate*/";
780       break;
781     default:
782       // We hit this for the immediate instruction bits that don't yet have a
783       // custom printer.
784       llvm_unreachable("unexpected immediate operand type");
785     }
786   } else if (Op.isDFPImm()) {
787     double Value = bit_cast<double>(Op.getDFPImm());
788     // We special case 0.0 because otherwise it will be printed as an integer.
789     if (Value == 0.0)
790       O << "0.0";
791     else {
792       const MCInstrDesc &Desc = MII.get(MI->getOpcode());
793       int RCID = Desc.operands()[OpNo].RegClass;
794       unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
795       if (RCBits == 32)
796         printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
797       else if (RCBits == 64)
798         printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
799       else
800         llvm_unreachable("Invalid register class size");
801     }
802   } else if (Op.isExpr()) {
803     const MCExpr *Exp = Op.getExpr();
804     Exp->print(O, &MAI);
805   } else {
806     O << "/*INV_OP*/";
807   }
808 
809   // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
810   switch (MI->getOpcode()) {
811   default: break;
812 
813   case AMDGPU::V_CNDMASK_B32_e32_gfx10:
814   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
815   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
816   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
817   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
818   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
819   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
820   case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
821   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
822   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
823   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
824   case AMDGPU::V_CNDMASK_B32_e32_gfx11:
825   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
826   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
827   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
828   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
829   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
830   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
831   case AMDGPU::V_CNDMASK_B32_dpp8_gfx11:
832   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
833   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
834   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
835   case AMDGPU::V_CNDMASK_B32_e32_gfx12:
836   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
837   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
838   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
839   case AMDGPU::V_CNDMASK_B32_dpp_gfx12:
840   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
841   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
842   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
843   case AMDGPU::V_CNDMASK_B32_dpp8_gfx12:
844   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
845   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
846   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
847 
848   case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
849   case AMDGPU::V_CNDMASK_B32_e32_vi:
850     if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
851                                                 AMDGPU::OpName::src1))
852       printDefaultVccOperand(OpNo == 0, STI, O);
853     break;
854   }
855 
856   if (Desc.TSFlags & SIInstrFlags::MTBUF) {
857     int SOffsetIdx =
858       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
859     assert(SOffsetIdx != -1);
860     if ((int)OpNo == SOffsetIdx)
861       printSymbolicFormat(MI, STI, O);
862   }
863 }
864 
865 void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
866                                                    unsigned OpNo,
867                                                    const MCSubtargetInfo &STI,
868                                                    raw_ostream &O) {
869   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
870   if (needsImpliedVcc(Desc, OpNo))
871     printDefaultVccOperand(true, STI, O);
872 
873   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
874 
875   // Use 'neg(...)' instead of '-' to avoid ambiguity.
876   // This is important for integer literals because
877   // -1 is not the same value as neg(1).
878   bool NegMnemo = false;
879 
880   if (InputModifiers & SISrcMods::NEG) {
881     if (OpNo + 1 < MI->getNumOperands() &&
882         (InputModifiers & SISrcMods::ABS) == 0) {
883       const MCOperand &Op = MI->getOperand(OpNo + 1);
884       NegMnemo = Op.isImm() || Op.isDFPImm();
885     }
886     if (NegMnemo) {
887       O << "neg(";
888     } else {
889       O << '-';
890     }
891   }
892 
893   if (InputModifiers & SISrcMods::ABS)
894     O << '|';
895   printRegularOperand(MI, OpNo + 1, STI, O);
896   if (InputModifiers & SISrcMods::ABS)
897     O << '|';
898 
899   if (NegMnemo) {
900     O << ')';
901   }
902 
903   // Print default vcc/vcc_lo operand of VOP2b.
904   switch (MI->getOpcode()) {
905   default:
906     break;
907 
908   case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
909   case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
910   case AMDGPU::V_CNDMASK_B32_dpp_gfx11:
911     if ((int)OpNo + 1 ==
912         AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1))
913       printDefaultVccOperand(OpNo == 0, STI, O);
914     break;
915   }
916 }
917 
918 void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
919                                                     unsigned OpNo,
920                                                     const MCSubtargetInfo &STI,
921                                                     raw_ostream &O) {
922   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
923   if (needsImpliedVcc(Desc, OpNo))
924     printDefaultVccOperand(true, STI, O);
925 
926   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
927   if (InputModifiers & SISrcMods::SEXT)
928     O << "sext(";
929   printRegularOperand(MI, OpNo + 1, STI, O);
930   if (InputModifiers & SISrcMods::SEXT)
931     O << ')';
932 
933   // Print default vcc/vcc_lo operand of VOP2b.
934   switch (MI->getOpcode()) {
935   default: break;
936 
937   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
938   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
939   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
940     if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
941                                                     AMDGPU::OpName::src1))
942       printDefaultVccOperand(OpNo == 0, STI, O);
943     break;
944   }
945 }
946 
947 void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
948                                   const MCSubtargetInfo &STI,
949                                   raw_ostream &O) {
950   if (!AMDGPU::isGFX10Plus(STI))
951     llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
952 
953   unsigned Imm = MI->getOperand(OpNo).getImm();
954   O << "dpp8:[" << formatDec(Imm & 0x7);
955   for (size_t i = 1; i < 8; ++i) {
956     O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
957   }
958   O << ']';
959 }
960 
961 void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
962                                      const MCSubtargetInfo &STI,
963                                      raw_ostream &O) {
964   using namespace AMDGPU::DPP;
965 
966   unsigned Imm = MI->getOperand(OpNo).getImm();
967   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
968 
969   if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
970     O << " /* DP ALU dpp only supports row_newbcast */";
971     return;
972   }
973   if (Imm <= DppCtrl::QUAD_PERM_LAST) {
974     O << "quad_perm:[";
975     O << formatDec(Imm & 0x3)         << ',';
976     O << formatDec((Imm & 0xc)  >> 2) << ',';
977     O << formatDec((Imm & 0x30) >> 4) << ',';
978     O << formatDec((Imm & 0xc0) >> 6) << ']';
979   } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
980              (Imm <= DppCtrl::ROW_SHL_LAST)) {
981     O << "row_shl:" << formatDec(Imm - DppCtrl::ROW_SHL0);
982   } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
983              (Imm <= DppCtrl::ROW_SHR_LAST)) {
984     O << "row_shr:" << formatDec(Imm - DppCtrl::ROW_SHR0);
985   } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
986              (Imm <= DppCtrl::ROW_ROR_LAST)) {
987     O << "row_ror:" << formatDec(Imm - DppCtrl::ROW_ROR0);
988   } else if (Imm == DppCtrl::WAVE_SHL1) {
989     if (AMDGPU::isGFX10Plus(STI)) {
990       O << "/* wave_shl is not supported starting from GFX10 */";
991       return;
992     }
993     O << "wave_shl:1";
994   } else if (Imm == DppCtrl::WAVE_ROL1) {
995     if (AMDGPU::isGFX10Plus(STI)) {
996       O << "/* wave_rol is not supported starting from GFX10 */";
997       return;
998     }
999     O << "wave_rol:1";
1000   } else if (Imm == DppCtrl::WAVE_SHR1) {
1001     if (AMDGPU::isGFX10Plus(STI)) {
1002       O << "/* wave_shr is not supported starting from GFX10 */";
1003       return;
1004     }
1005     O << "wave_shr:1";
1006   } else if (Imm == DppCtrl::WAVE_ROR1) {
1007     if (AMDGPU::isGFX10Plus(STI)) {
1008       O << "/* wave_ror is not supported starting from GFX10 */";
1009       return;
1010     }
1011     O << "wave_ror:1";
1012   } else if (Imm == DppCtrl::ROW_MIRROR) {
1013     O << "row_mirror";
1014   } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
1015     O << "row_half_mirror";
1016   } else if (Imm == DppCtrl::BCAST15) {
1017     if (AMDGPU::isGFX10Plus(STI)) {
1018       O << "/* row_bcast is not supported starting from GFX10 */";
1019       return;
1020     }
1021     O << "row_bcast:15";
1022   } else if (Imm == DppCtrl::BCAST31) {
1023     if (AMDGPU::isGFX10Plus(STI)) {
1024       O << "/* row_bcast is not supported starting from GFX10 */";
1025       return;
1026     }
1027     O << "row_bcast:31";
1028   } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
1029              (Imm <= DppCtrl::ROW_SHARE_LAST)) {
1030     if (AMDGPU::isGFX90A(STI)) {
1031       O << "row_newbcast:";
1032     } else if (AMDGPU::isGFX10Plus(STI)) {
1033       O << "row_share:";
1034     } else {
1035       O << " /* row_newbcast/row_share is not supported on ASICs earlier "
1036            "than GFX90A/GFX10 */";
1037       return;
1038     }
1039     O << formatDec(Imm - DppCtrl::ROW_SHARE_FIRST);
1040   } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
1041              (Imm <= DppCtrl::ROW_XMASK_LAST)) {
1042     if (!AMDGPU::isGFX10Plus(STI)) {
1043       O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
1044       return;
1045     }
1046     O << "row_xmask:" << formatDec(Imm - DppCtrl::ROW_XMASK_FIRST);
1047   } else {
1048     O << "/* Invalid dpp_ctrl value */";
1049   }
1050 }
1051 
1052 void AMDGPUInstPrinter::printDppBoundCtrl(const MCInst *MI, unsigned OpNo,
1053                                           const MCSubtargetInfo &STI,
1054                                           raw_ostream &O) {
1055   unsigned Imm = MI->getOperand(OpNo).getImm();
1056   if (Imm) {
1057     O << " bound_ctrl:1";
1058   }
1059 }
1060 
1061 void AMDGPUInstPrinter::printDppFI(const MCInst *MI, unsigned OpNo,
1062                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1063   using namespace llvm::AMDGPU::DPP;
1064   unsigned Imm = MI->getOperand(OpNo).getImm();
1065   if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
1066     O << " fi:1";
1067   }
1068 }
1069 
1070 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
1071                                      raw_ostream &O) {
1072   using namespace llvm::AMDGPU::SDWA;
1073 
1074   unsigned Imm = MI->getOperand(OpNo).getImm();
1075   switch (Imm) {
1076   case SdwaSel::BYTE_0: O << "BYTE_0"; break;
1077   case SdwaSel::BYTE_1: O << "BYTE_1"; break;
1078   case SdwaSel::BYTE_2: O << "BYTE_2"; break;
1079   case SdwaSel::BYTE_3: O << "BYTE_3"; break;
1080   case SdwaSel::WORD_0: O << "WORD_0"; break;
1081   case SdwaSel::WORD_1: O << "WORD_1"; break;
1082   case SdwaSel::DWORD: O << "DWORD"; break;
1083   default: llvm_unreachable("Invalid SDWA data select operand");
1084   }
1085 }
1086 
1087 void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
1088                                         const MCSubtargetInfo &STI,
1089                                         raw_ostream &O) {
1090   O << "dst_sel:";
1091   printSDWASel(MI, OpNo, O);
1092 }
1093 
1094 void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
1095                                          const MCSubtargetInfo &STI,
1096                                          raw_ostream &O) {
1097   O << "src0_sel:";
1098   printSDWASel(MI, OpNo, O);
1099 }
1100 
1101 void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
1102                                          const MCSubtargetInfo &STI,
1103                                          raw_ostream &O) {
1104   O << "src1_sel:";
1105   printSDWASel(MI, OpNo, O);
1106 }
1107 
1108 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
1109                                            const MCSubtargetInfo &STI,
1110                                            raw_ostream &O) {
1111   using namespace llvm::AMDGPU::SDWA;
1112 
1113   O << "dst_unused:";
1114   unsigned Imm = MI->getOperand(OpNo).getImm();
1115   switch (Imm) {
1116   case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
1117   case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
1118   case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
1119   default: llvm_unreachable("Invalid SDWA dest_unused operand");
1120   }
1121 }
1122 
1123 void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
1124                                      const MCSubtargetInfo &STI, raw_ostream &O,
1125                                      unsigned N) {
1126   unsigned Opc = MI->getOpcode();
1127   int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
1128   unsigned En = MI->getOperand(EnIdx).getImm();
1129 
1130   int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
1131 
1132   // If compr is set, print as src0, src0, src1, src1
1133   if (MI->getOperand(ComprIdx).getImm())
1134     OpNo = OpNo - N + N / 2;
1135 
1136   if (En & (1 << N))
1137     printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
1138   else
1139     O << "off";
1140 }
1141 
1142 void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
1143                                      const MCSubtargetInfo &STI,
1144                                      raw_ostream &O) {
1145   printExpSrcN(MI, OpNo, STI, O, 0);
1146 }
1147 
1148 void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
1149                                      const MCSubtargetInfo &STI,
1150                                      raw_ostream &O) {
1151   printExpSrcN(MI, OpNo, STI, O, 1);
1152 }
1153 
1154 void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
1155                                      const MCSubtargetInfo &STI,
1156                                      raw_ostream &O) {
1157   printExpSrcN(MI, OpNo, STI, O, 2);
1158 }
1159 
1160 void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
1161                                      const MCSubtargetInfo &STI,
1162                                      raw_ostream &O) {
1163   printExpSrcN(MI, OpNo, STI, O, 3);
1164 }
1165 
1166 void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
1167                                     const MCSubtargetInfo &STI,
1168                                     raw_ostream &O) {
1169   using namespace llvm::AMDGPU::Exp;
1170 
1171   // This is really a 6 bit field.
1172   unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
1173 
1174   int Index;
1175   StringRef TgtName;
1176   if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
1177     O << ' ' << TgtName;
1178     if (Index >= 0)
1179       O << Index;
1180   } else {
1181     O << " invalid_target_" << Id;
1182   }
1183 }
1184 
1185 static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
1186                                bool IsPacked, bool HasDstSel) {
1187   int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
1188 
1189   for (int I = 0; I < NumOps; ++I) {
1190     if (!!(Ops[I] & Mod) != DefaultValue)
1191       return false;
1192   }
1193 
1194   if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
1195     return false;
1196 
1197   return true;
1198 }
1199 
1200 void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
1201                                             StringRef Name,
1202                                             unsigned Mod,
1203                                             raw_ostream &O) {
1204   unsigned Opc = MI->getOpcode();
1205   int NumOps = 0;
1206   int Ops[3];
1207 
1208   std::pair<int, int> MOps[] = {
1209       {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src0},
1210       {AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src1},
1211       {AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::src2}};
1212   int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
1213 
1214   for (auto [SrcMod, Src] : MOps) {
1215     if (!AMDGPU::hasNamedOperand(Opc, Src))
1216       break;
1217 
1218     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, SrcMod);
1219     Ops[NumOps++] =
1220         (ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue;
1221   }
1222 
1223   // Print three values of neg/opsel for wmma instructions (prints 0 when there
1224   // is no src_modifier operand instead of not printing anything).
1225   if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsSWMMAC ||
1226       MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsWMMA) {
1227     NumOps = 0;
1228     int DefaultValue = Mod == SISrcMods::OP_SEL_1;
1229     for (int OpName :
1230          {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
1231           AMDGPU::OpName::src2_modifiers}) {
1232       int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1233       if (Idx != -1)
1234         Ops[NumOps++] = MI->getOperand(Idx).getImm();
1235       else
1236         Ops[NumOps++] = DefaultValue;
1237     }
1238   }
1239 
1240   const bool HasDstSel =
1241     NumOps > 0 &&
1242     Mod == SISrcMods::OP_SEL_0 &&
1243     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
1244 
1245   const bool IsPacked =
1246     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
1247 
1248   if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
1249     return;
1250 
1251   O << Name;
1252   for (int I = 0; I < NumOps; ++I) {
1253     if (I != 0)
1254       O << ',';
1255 
1256     O << !!(Ops[I] & Mod);
1257   }
1258 
1259   if (HasDstSel) {
1260     O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
1261   }
1262 
1263   O << ']';
1264 }
1265 
1266 void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
1267                                    const MCSubtargetInfo &STI,
1268                                    raw_ostream &O) {
1269   unsigned Opc = MI->getOpcode();
1270   if (isCvt_F32_Fp8_Bf8_e64(Opc)) {
1271     auto SrcMod =
1272         AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1273     unsigned Mod = MI->getOperand(SrcMod).getImm();
1274     unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0);
1275     unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1);
1276     if (Index0 || Index1)
1277       O << " op_sel:[" << Index0 << ',' << Index1 << ']';
1278     return;
1279   }
1280   if (isPermlane16(Opc)) {
1281     auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1282     auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1283     unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
1284     unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
1285     if (FI || BC)
1286       O << " op_sel:[" << FI << ',' << BC << ']';
1287     return;
1288   }
1289 
1290   printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
1291 }
1292 
1293 void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
1294                                      const MCSubtargetInfo &STI,
1295                                      raw_ostream &O) {
1296   printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
1297 }
1298 
1299 void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
1300                                    const MCSubtargetInfo &STI,
1301                                    raw_ostream &O) {
1302   printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
1303 }
1304 
1305 void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
1306                                    const MCSubtargetInfo &STI,
1307                                    raw_ostream &O) {
1308   printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
1309 }
1310 
1311 void AMDGPUInstPrinter::printIndexKey8bit(const MCInst *MI, unsigned OpNo,
1312                                           const MCSubtargetInfo &STI,
1313                                           raw_ostream &O) {
1314   auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
1315   if (Imm == 0)
1316     return;
1317 
1318   O << " index_key:" << Imm;
1319 }
1320 
1321 void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo,
1322                                            const MCSubtargetInfo &STI,
1323                                            raw_ostream &O) {
1324   auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
1325   if (Imm == 0)
1326     return;
1327 
1328   O << " index_key:" << Imm;
1329 }
1330 
1331 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
1332                                         const MCSubtargetInfo &STI,
1333                                         raw_ostream &O) {
1334   unsigned Imm = MI->getOperand(OpNum).getImm();
1335   switch (Imm) {
1336   case 0:
1337     O << "p10";
1338     break;
1339   case 1:
1340     O << "p20";
1341     break;
1342   case 2:
1343     O << "p0";
1344     break;
1345   default:
1346     O << "invalid_param_" << Imm;
1347   }
1348 }
1349 
1350 void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
1351                                         const MCSubtargetInfo &STI,
1352                                         raw_ostream &O) {
1353   unsigned Attr = MI->getOperand(OpNum).getImm();
1354   O << "attr" << Attr;
1355 }
1356 
1357 void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
1358                                         const MCSubtargetInfo &STI,
1359                                         raw_ostream &O) {
1360   unsigned Chan = MI->getOperand(OpNum).getImm();
1361   O << '.' << "xyzw"[Chan & 0x3];
1362 }
1363 
1364 void AMDGPUInstPrinter::printGPRIdxMode(const MCInst *MI, unsigned OpNo,
1365                                         const MCSubtargetInfo &STI,
1366                                         raw_ostream &O) {
1367   using namespace llvm::AMDGPU::VGPRIndexMode;
1368   unsigned Val = MI->getOperand(OpNo).getImm();
1369 
1370   if ((Val & ~ENABLE_MASK) != 0) {
1371     O << formatHex(static_cast<uint64_t>(Val));
1372   } else {
1373     O << "gpr_idx(";
1374     bool NeedComma = false;
1375     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
1376       if (Val & (1 << ModeId)) {
1377         if (NeedComma)
1378           O << ',';
1379         O << IdSymbolic[ModeId];
1380         NeedComma = true;
1381       }
1382     }
1383     O << ')';
1384   }
1385 }
1386 
1387 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
1388                                         const MCSubtargetInfo &STI,
1389                                         raw_ostream &O) {
1390   printRegularOperand(MI, OpNo, STI, O);
1391   O  << ", ";
1392   printRegularOperand(MI, OpNo + 1, STI, O);
1393 }
1394 
1395 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1396                                    raw_ostream &O, StringRef Asm,
1397                                    StringRef Default) {
1398   const MCOperand &Op = MI->getOperand(OpNo);
1399   assert(Op.isImm());
1400   if (Op.getImm() == 1) {
1401     O << Asm;
1402   } else {
1403     O << Default;
1404   }
1405 }
1406 
1407 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1408                                    raw_ostream &O, char Asm) {
1409   const MCOperand &Op = MI->getOperand(OpNo);
1410   assert(Op.isImm());
1411   if (Op.getImm() == 1)
1412     O << Asm;
1413 }
1414 
1415 void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
1416                                     const MCSubtargetInfo &STI,
1417                                     raw_ostream &O) {
1418   int Imm = MI->getOperand(OpNo).getImm();
1419   if (Imm == SIOutMods::MUL2)
1420     O << " mul:2";
1421   else if (Imm == SIOutMods::MUL4)
1422     O << " mul:4";
1423   else if (Imm == SIOutMods::DIV2)
1424     O << " div:2";
1425 }
1426 
1427 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
1428                                      const MCSubtargetInfo &STI,
1429                                      raw_ostream &O) {
1430   using namespace llvm::AMDGPU::SendMsg;
1431 
1432   const unsigned Imm16 = MI->getOperand(OpNo).getImm();
1433 
1434   uint16_t MsgId;
1435   uint16_t OpId;
1436   uint16_t StreamId;
1437   decodeMsg(Imm16, MsgId, OpId, StreamId, STI);
1438 
1439   StringRef MsgName = getMsgName(MsgId, STI);
1440 
1441   if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) &&
1442       isValidMsgStream(MsgId, OpId, StreamId, STI)) {
1443     O << "sendmsg(" << MsgName;
1444     if (msgRequiresOp(MsgId, STI)) {
1445       O << ", " << getMsgOpName(MsgId, OpId, STI);
1446       if (msgSupportsStream(MsgId, OpId, STI)) {
1447         O << ", " << StreamId;
1448       }
1449     }
1450     O << ')';
1451   } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
1452     O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
1453   } else {
1454     O << Imm16; // Unknown imm16 code.
1455   }
1456 }
1457 
1458 static void printSwizzleBitmask(const uint16_t AndMask,
1459                                 const uint16_t OrMask,
1460                                 const uint16_t XorMask,
1461                                 raw_ostream &O) {
1462   using namespace llvm::AMDGPU::Swizzle;
1463 
1464   uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
1465   uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
1466 
1467   O << "\"";
1468 
1469   for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
1470     uint16_t p0 = Probe0 & Mask;
1471     uint16_t p1 = Probe1 & Mask;
1472 
1473     if (p0 == p1) {
1474       if (p0 == 0) {
1475         O << "0";
1476       } else {
1477         O << "1";
1478       }
1479     } else {
1480       if (p0 == 0) {
1481         O << "p";
1482       } else {
1483         O << "i";
1484       }
1485     }
1486   }
1487 
1488   O << "\"";
1489 }
1490 
1491 void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
1492                                      const MCSubtargetInfo &STI,
1493                                      raw_ostream &O) {
1494   using namespace llvm::AMDGPU::Swizzle;
1495 
1496   uint16_t Imm = MI->getOperand(OpNo).getImm();
1497   if (Imm == 0) {
1498     return;
1499   }
1500 
1501   O << " offset:";
1502 
1503   // Rotate and FFT modes
1504   if (Imm >= ROTATE_MODE_LO && AMDGPU::isGFX9Plus(STI)) {
1505     if (Imm >= FFT_MODE_LO) {
1506       O << "swizzle(" << IdSymbolic[ID_FFT] << ',' << (Imm & FFT_SWIZZLE_MASK)
1507         << ')';
1508     } else if (Imm >= ROTATE_MODE_LO) {
1509       O << "swizzle(" << IdSymbolic[ID_ROTATE] << ','
1510         << ((Imm >> ROTATE_DIR_SHIFT) & ROTATE_DIR_MASK) << ','
1511         << ((Imm >> ROTATE_SIZE_SHIFT) & ROTATE_SIZE_MASK) << ')';
1512     }
1513     return;
1514   }
1515 
1516   // Basic mode
1517   if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
1518     O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
1519     for (unsigned I = 0; I < LANE_NUM; ++I) {
1520       O << ",";
1521       O << formatDec(Imm & LANE_MASK);
1522       Imm >>= LANE_SHIFT;
1523     }
1524     O << ")";
1525 
1526   } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
1527 
1528     uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
1529     uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
1530     uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
1531 
1532     if (AndMask == BITMASK_MAX && OrMask == 0 && llvm::popcount(XorMask) == 1) {
1533 
1534       O << "swizzle(" << IdSymbolic[ID_SWAP];
1535       O << ",";
1536       O << formatDec(XorMask);
1537       O << ")";
1538 
1539     } else if (AndMask == BITMASK_MAX && OrMask == 0 && XorMask > 0 &&
1540                isPowerOf2_64(XorMask + 1)) {
1541 
1542       O << "swizzle(" << IdSymbolic[ID_REVERSE];
1543       O << ",";
1544       O << formatDec(XorMask + 1);
1545       O << ")";
1546 
1547     } else {
1548 
1549       uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
1550       if (GroupSize > 1 &&
1551           isPowerOf2_64(GroupSize) &&
1552           OrMask < GroupSize &&
1553           XorMask == 0) {
1554 
1555         O << "swizzle(" << IdSymbolic[ID_BROADCAST];
1556         O << ",";
1557         O << formatDec(GroupSize);
1558         O << ",";
1559         O << formatDec(OrMask);
1560         O << ")";
1561 
1562       } else {
1563         O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
1564         O << ",";
1565         printSwizzleBitmask(AndMask, OrMask, XorMask, O);
1566         O << ")";
1567       }
1568     }
1569   } else {
1570     printU16ImmDecOperand(MI, OpNo, O);
1571   }
1572 }
1573 
1574 void AMDGPUInstPrinter::printSWaitCnt(const MCInst *MI, unsigned OpNo,
1575                                       const MCSubtargetInfo &STI,
1576                                       raw_ostream &O) {
1577   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
1578 
1579   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1580   unsigned Vmcnt, Expcnt, Lgkmcnt;
1581   decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
1582 
1583   bool IsDefaultVmcnt = Vmcnt == getVmcntBitMask(ISA);
1584   bool IsDefaultExpcnt = Expcnt == getExpcntBitMask(ISA);
1585   bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
1586   bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
1587 
1588   bool NeedSpace = false;
1589 
1590   if (!IsDefaultVmcnt || PrintAll) {
1591     O << "vmcnt(" << Vmcnt << ')';
1592     NeedSpace = true;
1593   }
1594 
1595   if (!IsDefaultExpcnt || PrintAll) {
1596     if (NeedSpace)
1597       O << ' ';
1598     O << "expcnt(" << Expcnt << ')';
1599     NeedSpace = true;
1600   }
1601 
1602   if (!IsDefaultLgkmcnt || PrintAll) {
1603     if (NeedSpace)
1604       O << ' ';
1605     O << "lgkmcnt(" << Lgkmcnt << ')';
1606   }
1607 }
1608 
1609 void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
1610                                     const MCSubtargetInfo &STI,
1611                                     raw_ostream &O) {
1612   using namespace llvm::AMDGPU::DepCtr;
1613 
1614   uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff;
1615 
1616   bool HasNonDefaultVal = false;
1617   if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) {
1618     int Id = 0;
1619     StringRef Name;
1620     unsigned Val;
1621     bool IsDefault;
1622     bool NeedSpace = false;
1623     while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
1624       if (!IsDefault || !HasNonDefaultVal) {
1625         if (NeedSpace)
1626           O << ' ';
1627         O << Name << '(' << Val << ')';
1628         NeedSpace = true;
1629       }
1630     }
1631   } else {
1632     O << formatHex(Imm16);
1633   }
1634 }
1635 
1636 void AMDGPUInstPrinter::printSDelayALU(const MCInst *MI, unsigned OpNo,
1637                                        const MCSubtargetInfo &STI,
1638                                        raw_ostream &O) {
1639   const char *BadInstId = "/* invalid instid value */";
1640   static const std::array<const char *, 12> InstIds = {
1641       "NO_DEP",        "VALU_DEP_1",    "VALU_DEP_2",
1642       "VALU_DEP_3",    "VALU_DEP_4",    "TRANS32_DEP_1",
1643       "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1",
1644       "SALU_CYCLE_1",  "SALU_CYCLE_2",  "SALU_CYCLE_3"};
1645 
1646   const char *BadInstSkip = "/* invalid instskip value */";
1647   static const std::array<const char *, 6> InstSkips = {
1648       "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"};
1649 
1650   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1651   const char *Prefix = "";
1652 
1653   unsigned Value = SImm16 & 0xF;
1654   if (Value) {
1655     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1656     O << Prefix << "instid0(" << Name << ')';
1657     Prefix = " | ";
1658   }
1659 
1660   Value = (SImm16 >> 4) & 7;
1661   if (Value) {
1662     const char *Name =
1663         Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip;
1664     O << Prefix << "instskip(" << Name << ')';
1665     Prefix = " | ";
1666   }
1667 
1668   Value = (SImm16 >> 7) & 0xF;
1669   if (Value) {
1670     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1671     O << Prefix << "instid1(" << Name << ')';
1672     Prefix = " | ";
1673   }
1674 
1675   if (!*Prefix)
1676     O << "0";
1677 }
1678 
1679 void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
1680                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1681   using namespace llvm::AMDGPU::Hwreg;
1682   unsigned Val = MI->getOperand(OpNo).getImm();
1683   auto [Id, Offset, Width] = HwregEncoding::decode(Val);
1684   StringRef HwRegName = getHwreg(Id, STI);
1685 
1686   O << "hwreg(";
1687   if (!HwRegName.empty()) {
1688     O << HwRegName;
1689   } else {
1690     O << Id;
1691   }
1692   if (Width != HwregSize::Default || Offset != HwregOffset::Default)
1693     O << ", " << Offset << ", " << Width;
1694   O << ')';
1695 }
1696 
1697 void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
1698                                     const MCSubtargetInfo &STI,
1699                                     raw_ostream &O) {
1700   uint16_t Imm = MI->getOperand(OpNo).getImm();
1701   if (Imm == 0) {
1702     return;
1703   }
1704 
1705   O << ' ' << formatDec(Imm);
1706 }
1707 
1708 void AMDGPUInstPrinter::printNamedInt(const MCInst *MI, unsigned OpNo,
1709                                       const MCSubtargetInfo &STI,
1710                                       raw_ostream &O, StringRef Prefix,
1711                                       bool PrintInHex, bool AlwaysPrint) {
1712   int64_t V = MI->getOperand(OpNo).getImm();
1713   if (AlwaysPrint || V != 0)
1714     O << ' ' << Prefix << ':' << (PrintInHex ? formatHex(V) : formatDec(V));
1715 }
1716 
1717 void AMDGPUInstPrinter::printBitOp3(const MCInst *MI, unsigned OpNo,
1718                                     const MCSubtargetInfo &STI,
1719                                     raw_ostream &O) {
1720   uint8_t Imm = MI->getOperand(OpNo).getImm();
1721   if (!Imm)
1722     return;
1723 
1724   O << " bitop3:";
1725   if (Imm <= 10)
1726     O << formatDec(Imm);
1727   else
1728     O << formatHex(static_cast<uint64_t>(Imm));
1729 }
1730 
1731 #include "AMDGPUGenAsmWriter.inc"
1732