xref: /llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision b2adeae8650fb720873ad7fa39153beaa8194afc)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCExpr.h"
12 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
15 #include "SIDefines.h"
16 #include "SIInstrInfo.h"
17 #include "TargetInfo/AMDGPUTargetInfo.h"
18 #include "Utils/AMDGPUAsmUtils.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "Utils/AMDKernelCodeTUtils.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/ELF.h"
26 #include "llvm/CodeGenTypes/MachineValueType.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCContext.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCParser/MCAsmLexer.h"
33 #include "llvm/MC/MCParser/MCAsmParser.h"
34 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
35 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
36 #include "llvm/MC/MCSymbol.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/AMDGPUMetadata.h"
39 #include "llvm/Support/AMDHSAKernelDescriptor.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/MathExtras.h"
42 #include "llvm/TargetParser/TargetParser.h"
43 #include <optional>
44 
45 using namespace llvm;
46 using namespace llvm::AMDGPU;
47 using namespace llvm::amdhsa;
48 
49 namespace {
50 
51 class AMDGPUAsmParser;
52 
53 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54 
55 //===----------------------------------------------------------------------===//
56 // Operand
57 //===----------------------------------------------------------------------===//
58 
59 class AMDGPUOperand : public MCParsedAsmOperand {
60   enum KindTy {
61     Token,
62     Immediate,
63     Register,
64     Expression
65   } Kind;
66 
67   SMLoc StartLoc, EndLoc;
68   const AMDGPUAsmParser *AsmParser;
69 
70 public:
71   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72       : Kind(Kind_), AsmParser(AsmParser_) {}
73 
74   using Ptr = std::unique_ptr<AMDGPUOperand>;
75 
76   struct Modifiers {
77     bool Abs = false;
78     bool Neg = false;
79     bool Sext = false;
80     bool Lit = false;
81 
82     bool hasFPModifiers() const { return Abs || Neg; }
83     bool hasIntModifiers() const { return Sext; }
84     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
85 
86     int64_t getFPModifiersOperand() const {
87       int64_t Operand = 0;
88       Operand |= Abs ? SISrcMods::ABS : 0u;
89       Operand |= Neg ? SISrcMods::NEG : 0u;
90       return Operand;
91     }
92 
93     int64_t getIntModifiersOperand() const {
94       int64_t Operand = 0;
95       Operand |= Sext ? SISrcMods::SEXT : 0u;
96       return Operand;
97     }
98 
99     int64_t getModifiersOperand() const {
100       assert(!(hasFPModifiers() && hasIntModifiers())
101            && "fp and int modifiers should not be used simultaneously");
102       if (hasFPModifiers())
103         return getFPModifiersOperand();
104       if (hasIntModifiers())
105         return getIntModifiersOperand();
106       return 0;
107     }
108 
109     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110   };
111 
112   enum ImmTy {
113     ImmTyNone,
114     ImmTyGDS,
115     ImmTyLDS,
116     ImmTyOffen,
117     ImmTyIdxen,
118     ImmTyAddr64,
119     ImmTyOffset,
120     ImmTyInstOffset,
121     ImmTyOffset0,
122     ImmTyOffset1,
123     ImmTySMEMOffsetMod,
124     ImmTyCPol,
125     ImmTyTFE,
126     ImmTyD16,
127     ImmTyClamp,
128     ImmTyOModSI,
129     ImmTySDWADstSel,
130     ImmTySDWASrc0Sel,
131     ImmTySDWASrc1Sel,
132     ImmTySDWADstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyInterpAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTyIndexKey8bit,
155     ImmTyIndexKey16bit,
156     ImmTyDPP8,
157     ImmTyDppCtrl,
158     ImmTyDppRowMask,
159     ImmTyDppBankMask,
160     ImmTyDppBoundCtrl,
161     ImmTyDppFI,
162     ImmTySwizzle,
163     ImmTyGprIdxMode,
164     ImmTyHigh,
165     ImmTyBLGP,
166     ImmTyCBSZ,
167     ImmTyABID,
168     ImmTyEndpgm,
169     ImmTyWaitVDST,
170     ImmTyWaitEXP,
171     ImmTyWaitVAVDst,
172     ImmTyWaitVMVSrc,
173     ImmTyByteSel,
174     ImmTyBitOp3,
175   };
176 
177   // Immediate operand kind.
178   // It helps to identify the location of an offending operand after an error.
179   // Note that regular literals and mandatory literals (KImm) must be handled
180   // differently. When looking for an offending operand, we should usually
181   // ignore mandatory literals because they are part of the instruction and
182   // cannot be changed. Report location of mandatory operands only for VOPD,
183   // when both OpX and OpY have a KImm and there are no other literals.
184   enum ImmKindTy {
185     ImmKindTyNone,
186     ImmKindTyLiteral,
187     ImmKindTyMandatoryLiteral,
188     ImmKindTyConst,
189   };
190 
191 private:
192   struct TokOp {
193     const char *Data;
194     unsigned Length;
195   };
196 
197   struct ImmOp {
198     int64_t Val;
199     ImmTy Type;
200     bool IsFPImm;
201     mutable ImmKindTy Kind;
202     Modifiers Mods;
203   };
204 
205   struct RegOp {
206     MCRegister RegNo;
207     Modifiers Mods;
208   };
209 
210   union {
211     TokOp Tok;
212     ImmOp Imm;
213     RegOp Reg;
214     const MCExpr *Expr;
215   };
216 
217 public:
218   bool isToken() const override { return Kind == Token; }
219 
220   bool isSymbolRefExpr() const {
221     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
222   }
223 
224   bool isImm() const override {
225     return Kind == Immediate;
226   }
227 
228   void setImmKindNone() const {
229     assert(isImm());
230     Imm.Kind = ImmKindTyNone;
231   }
232 
233   void setImmKindLiteral() const {
234     assert(isImm());
235     Imm.Kind = ImmKindTyLiteral;
236   }
237 
238   void setImmKindMandatoryLiteral() const {
239     assert(isImm());
240     Imm.Kind = ImmKindTyMandatoryLiteral;
241   }
242 
243   void setImmKindConst() const {
244     assert(isImm());
245     Imm.Kind = ImmKindTyConst;
246   }
247 
248   bool IsImmKindLiteral() const {
249     return isImm() && Imm.Kind == ImmKindTyLiteral;
250   }
251 
252   bool IsImmKindMandatoryLiteral() const {
253     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
254   }
255 
256   bool isImmKindConst() const {
257     return isImm() && Imm.Kind == ImmKindTyConst;
258   }
259 
260   bool isInlinableImm(MVT type) const;
261   bool isLiteralImm(MVT type) const;
262 
263   bool isRegKind() const {
264     return Kind == Register;
265   }
266 
267   bool isReg() const override {
268     return isRegKind() && !hasModifiers();
269   }
270 
271   bool isRegOrInline(unsigned RCID, MVT type) const {
272     return isRegClass(RCID) || isInlinableImm(type);
273   }
274 
275   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
276     return isRegOrInline(RCID, type) || isLiteralImm(type);
277   }
278 
279   bool isRegOrImmWithInt16InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
281   }
282 
283   template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
284     return isRegOrImmWithInputMods(
285         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
286   }
287 
288   bool isRegOrImmWithInt32InputMods() const {
289     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
290   }
291 
292   bool isRegOrInlineImmWithInt16InputMods() const {
293     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
294   }
295 
296   template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
297     return isRegOrInline(
298         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
299   }
300 
301   bool isRegOrInlineImmWithInt32InputMods() const {
302     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
303   }
304 
305   bool isRegOrImmWithInt64InputMods() const {
306     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
307   }
308 
309   bool isRegOrImmWithFP16InputMods() const {
310     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
311   }
312 
313   template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
314     return isRegOrImmWithInputMods(
315         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
316   }
317 
318   bool isRegOrImmWithFP32InputMods() const {
319     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
320   }
321 
322   bool isRegOrImmWithFP64InputMods() const {
323     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
324   }
325 
326   template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
327     return isRegOrInline(
328         IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
329   }
330 
331   bool isRegOrInlineImmWithFP32InputMods() const {
332     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
333   }
334 
335   bool isPackedFP16InputMods() const {
336     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
337   }
338 
339   bool isPackedFP32InputMods() const {
340     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::v2f32);
341   }
342 
343   bool isVReg() const {
344     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
345            isRegClass(AMDGPU::VReg_64RegClassID) ||
346            isRegClass(AMDGPU::VReg_96RegClassID) ||
347            isRegClass(AMDGPU::VReg_128RegClassID) ||
348            isRegClass(AMDGPU::VReg_160RegClassID) ||
349            isRegClass(AMDGPU::VReg_192RegClassID) ||
350            isRegClass(AMDGPU::VReg_256RegClassID) ||
351            isRegClass(AMDGPU::VReg_512RegClassID) ||
352            isRegClass(AMDGPU::VReg_1024RegClassID);
353   }
354 
355   bool isVReg32() const {
356     return isRegClass(AMDGPU::VGPR_32RegClassID);
357   }
358 
359   bool isVReg32OrOff() const {
360     return isOff() || isVReg32();
361   }
362 
363   bool isNull() const {
364     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
365   }
366 
367   bool isVRegWithInputMods() const;
368   template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
369   template <bool IsFake16> bool isT16VRegWithInputMods() const;
370 
371   bool isSDWAOperand(MVT type) const;
372   bool isSDWAFP16Operand() const;
373   bool isSDWAFP32Operand() const;
374   bool isSDWAInt16Operand() const;
375   bool isSDWAInt32Operand() const;
376 
377   bool isImmTy(ImmTy ImmT) const {
378     return isImm() && Imm.Type == ImmT;
379   }
380 
381   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
382 
383   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
384 
385   bool isImmModifier() const {
386     return isImm() && Imm.Type != ImmTyNone;
387   }
388 
389   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
390   bool isDim() const { return isImmTy(ImmTyDim); }
391   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
392   bool isOff() const { return isImmTy(ImmTyOff); }
393   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
394   bool isOffen() const { return isImmTy(ImmTyOffen); }
395   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
396   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
397   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
398   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
399   bool isGDS() const { return isImmTy(ImmTyGDS); }
400   bool isLDS() const { return isImmTy(ImmTyLDS); }
401   bool isCPol() const { return isImmTy(ImmTyCPol); }
402   bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
403   bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
404   bool isTFE() const { return isImmTy(ImmTyTFE); }
405   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
406   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
407   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
408   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
409   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
410   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
411   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
412   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
413   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
414   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
415   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
416   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
417   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
418   bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
419 
420   bool isRegOrImm() const {
421     return isReg() || isImm();
422   }
423 
424   bool isRegClass(unsigned RCID) const;
425 
426   bool isInlineValue() const;
427 
428   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
429     return isRegOrInline(RCID, type) && !hasModifiers();
430   }
431 
432   bool isSCSrcB16() const {
433     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
434   }
435 
436   bool isSCSrcV2B16() const {
437     return isSCSrcB16();
438   }
439 
440   bool isSCSrc_b32() const {
441     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
442   }
443 
444   bool isSCSrc_b64() const {
445     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
446   }
447 
448   bool isBoolReg() const;
449 
450   bool isSCSrcF16() const {
451     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
452   }
453 
454   bool isSCSrcV2F16() const {
455     return isSCSrcF16();
456   }
457 
458   bool isSCSrcF32() const {
459     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
460   }
461 
462   bool isSCSrcF64() const {
463     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
464   }
465 
466   bool isSSrc_b32() const {
467     return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
468   }
469 
470   bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
471 
472   bool isSSrcV2B16() const {
473     llvm_unreachable("cannot happen");
474     return isSSrc_b16();
475   }
476 
477   bool isSSrc_b64() const {
478     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
479     // See isVSrc64().
480     return isSCSrc_b64() || isLiteralImm(MVT::i64);
481   }
482 
483   bool isSSrc_f32() const {
484     return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
485   }
486 
487   bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
488 
489   bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
490 
491   bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
492 
493   bool isSSrcV2F16() const {
494     llvm_unreachable("cannot happen");
495     return isSSrc_f16();
496   }
497 
498   bool isSSrcV2FP32() const {
499     llvm_unreachable("cannot happen");
500     return isSSrc_f32();
501   }
502 
503   bool isSCSrcV2FP32() const {
504     llvm_unreachable("cannot happen");
505     return isSCSrcF32();
506   }
507 
508   bool isSSrcV2INT32() const {
509     llvm_unreachable("cannot happen");
510     return isSSrc_b32();
511   }
512 
513   bool isSCSrcV2INT32() const {
514     llvm_unreachable("cannot happen");
515     return isSCSrc_b32();
516   }
517 
518   bool isSSrcOrLds_b32() const {
519     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
520            isLiteralImm(MVT::i32) || isExpr();
521   }
522 
523   bool isVCSrc_b32() const {
524     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
525   }
526 
527   bool isVCSrcB64() const {
528     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
529   }
530 
531   bool isVCSrcT_b16() const {
532     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
533   }
534 
535   bool isVCSrcTB16_Lo128() const {
536     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
537   }
538 
539   bool isVCSrcFake16B16_Lo128() const {
540     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
541   }
542 
543   bool isVCSrc_b16() const {
544     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
545   }
546 
547   bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
548 
549   bool isVCSrc_f32() const {
550     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
551   }
552 
553   bool isVCSrcF64() const {
554     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
555   }
556 
557   bool isVCSrcTBF16() const {
558     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
559   }
560 
561   bool isVCSrcT_f16() const {
562     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
563   }
564 
565   bool isVCSrcT_bf16() const {
566     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
567   }
568 
569   bool isVCSrcTBF16_Lo128() const {
570     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
571   }
572 
573   bool isVCSrcTF16_Lo128() const {
574     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
575   }
576 
577   bool isVCSrcFake16BF16_Lo128() const {
578     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
579   }
580 
581   bool isVCSrcFake16F16_Lo128() const {
582     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
583   }
584 
585   bool isVCSrc_bf16() const {
586     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
587   }
588 
589   bool isVCSrc_f16() const {
590     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
591   }
592 
593   bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
594 
595   bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
596 
597   bool isVSrc_b32() const {
598     return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
599   }
600 
601   bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); }
602 
603   bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
604 
605   bool isVSrcT_b16_Lo128() const {
606     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
607   }
608 
609   bool isVSrcFake16_b16_Lo128() const {
610     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
611   }
612 
613   bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
614 
615   bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
616 
617   bool isVCSrcV2FP32() const {
618     return isVCSrcF64();
619   }
620 
621   bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
622 
623   bool isVCSrcV2INT32() const {
624     return isVCSrcB64();
625   }
626 
627   bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
628 
629   bool isVSrc_f32() const {
630     return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
631   }
632 
633   bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); }
634 
635   bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
636 
637   bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
638 
639   bool isVSrcT_bf16_Lo128() const {
640     return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
641   }
642 
643   bool isVSrcT_f16_Lo128() const {
644     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
645   }
646 
647   bool isVSrcFake16_bf16_Lo128() const {
648     return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
649   }
650 
651   bool isVSrcFake16_f16_Lo128() const {
652     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
653   }
654 
655   bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
656 
657   bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
658 
659   bool isVSrc_v2bf16() const {
660     return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
661   }
662 
663   bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
664 
665   bool isVISrcB32() const {
666     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
667   }
668 
669   bool isVISrcB16() const {
670     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
671   }
672 
673   bool isVISrcV2B16() const {
674     return isVISrcB16();
675   }
676 
677   bool isVISrcF32() const {
678     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
679   }
680 
681   bool isVISrcF16() const {
682     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
683   }
684 
685   bool isVISrcV2F16() const {
686     return isVISrcF16() || isVISrcB32();
687   }
688 
689   bool isVISrc_64_bf16() const {
690     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
691   }
692 
693   bool isVISrc_64_f16() const {
694     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
695   }
696 
697   bool isVISrc_64_b32() const {
698     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
699   }
700 
701   bool isVISrc_64B64() const {
702     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
703   }
704 
705   bool isVISrc_64_f64() const {
706     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
707   }
708 
709   bool isVISrc_64V2FP32() const {
710     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
711   }
712 
713   bool isVISrc_64V2INT32() const {
714     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
715   }
716 
717   bool isVISrc_256_b32() const {
718     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
719   }
720 
721   bool isVISrc_256_f32() const {
722     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
723   }
724 
725   bool isVISrc_256B64() const {
726     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
727   }
728 
729   bool isVISrc_256_f64() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
731   }
732 
733   bool isVISrc_128B16() const {
734     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
735   }
736 
737   bool isVISrc_128V2B16() const {
738     return isVISrc_128B16();
739   }
740 
741   bool isVISrc_128_b32() const {
742     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
743   }
744 
745   bool isVISrc_128_f32() const {
746     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
747   }
748 
749   bool isVISrc_256V2FP32() const {
750     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
751   }
752 
753   bool isVISrc_256V2INT32() const {
754     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
755   }
756 
757   bool isVISrc_512_b32() const {
758     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
759   }
760 
761   bool isVISrc_512B16() const {
762     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
763   }
764 
765   bool isVISrc_512V2B16() const {
766     return isVISrc_512B16();
767   }
768 
769   bool isVISrc_512_f32() const {
770     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
771   }
772 
773   bool isVISrc_512F16() const {
774     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
775   }
776 
777   bool isVISrc_512V2F16() const {
778     return isVISrc_512F16() || isVISrc_512_b32();
779   }
780 
781   bool isVISrc_1024_b32() const {
782     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
783   }
784 
785   bool isVISrc_1024B16() const {
786     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
787   }
788 
789   bool isVISrc_1024V2B16() const {
790     return isVISrc_1024B16();
791   }
792 
793   bool isVISrc_1024_f32() const {
794     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
795   }
796 
797   bool isVISrc_1024F16() const {
798     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
799   }
800 
801   bool isVISrc_1024V2F16() const {
802     return isVISrc_1024F16() || isVISrc_1024_b32();
803   }
804 
805   bool isAISrcB32() const {
806     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
807   }
808 
809   bool isAISrcB16() const {
810     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
811   }
812 
813   bool isAISrcV2B16() const {
814     return isAISrcB16();
815   }
816 
817   bool isAISrcF32() const {
818     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
819   }
820 
821   bool isAISrcF16() const {
822     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
823   }
824 
825   bool isAISrcV2F16() const {
826     return isAISrcF16() || isAISrcB32();
827   }
828 
829   bool isAISrc_64B64() const {
830     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
831   }
832 
833   bool isAISrc_64_f64() const {
834     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
835   }
836 
837   bool isAISrc_128_b32() const {
838     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
839   }
840 
841   bool isAISrc_128B16() const {
842     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
843   }
844 
845   bool isAISrc_128V2B16() const {
846     return isAISrc_128B16();
847   }
848 
849   bool isAISrc_128_f32() const {
850     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
851   }
852 
853   bool isAISrc_128F16() const {
854     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
855   }
856 
857   bool isAISrc_128V2F16() const {
858     return isAISrc_128F16() || isAISrc_128_b32();
859   }
860 
861   bool isVISrc_128_bf16() const {
862     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
863   }
864 
865   bool isVISrc_128_f16() const {
866     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
867   }
868 
869   bool isVISrc_128V2F16() const {
870     return isVISrc_128_f16() || isVISrc_128_b32();
871   }
872 
873   bool isAISrc_256B64() const {
874     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
875   }
876 
877   bool isAISrc_256_f64() const {
878     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
879   }
880 
881   bool isAISrc_512_b32() const {
882     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
883   }
884 
885   bool isAISrc_512B16() const {
886     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
887   }
888 
889   bool isAISrc_512V2B16() const {
890     return isAISrc_512B16();
891   }
892 
893   bool isAISrc_512_f32() const {
894     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
895   }
896 
897   bool isAISrc_512F16() const {
898     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
899   }
900 
901   bool isAISrc_512V2F16() const {
902     return isAISrc_512F16() || isAISrc_512_b32();
903   }
904 
905   bool isAISrc_1024_b32() const {
906     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
907   }
908 
909   bool isAISrc_1024B16() const {
910     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
911   }
912 
913   bool isAISrc_1024V2B16() const {
914     return isAISrc_1024B16();
915   }
916 
917   bool isAISrc_1024_f32() const {
918     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
919   }
920 
921   bool isAISrc_1024F16() const {
922     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
923   }
924 
925   bool isAISrc_1024V2F16() const {
926     return isAISrc_1024F16() || isAISrc_1024_b32();
927   }
928 
929   bool isKImmFP32() const {
930     return isLiteralImm(MVT::f32);
931   }
932 
933   bool isKImmFP16() const {
934     return isLiteralImm(MVT::f16);
935   }
936 
937   bool isMem() const override {
938     return false;
939   }
940 
941   bool isExpr() const {
942     return Kind == Expression;
943   }
944 
945   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
946 
947   bool isSWaitCnt() const;
948   bool isDepCtr() const;
949   bool isSDelayALU() const;
950   bool isHwreg() const;
951   bool isSendMsg() const;
952   bool isSplitBarrier() const;
953   bool isSwizzle() const;
954   bool isSMRDOffset8() const;
955   bool isSMEMOffset() const;
956   bool isSMRDLiteralOffset() const;
957   bool isDPP8() const;
958   bool isDPPCtrl() const;
959   bool isBLGP() const;
960   bool isGPRIdxMode() const;
961   bool isS16Imm() const;
962   bool isU16Imm() const;
963   bool isEndpgm() const;
964 
965   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
966     return [=](){ return P(*this); };
967   }
968 
969   StringRef getToken() const {
970     assert(isToken());
971     return StringRef(Tok.Data, Tok.Length);
972   }
973 
974   int64_t getImm() const {
975     assert(isImm());
976     return Imm.Val;
977   }
978 
979   void setImm(int64_t Val) {
980     assert(isImm());
981     Imm.Val = Val;
982   }
983 
984   ImmTy getImmTy() const {
985     assert(isImm());
986     return Imm.Type;
987   }
988 
989   MCRegister getReg() const override {
990     assert(isRegKind());
991     return Reg.RegNo;
992   }
993 
994   SMLoc getStartLoc() const override {
995     return StartLoc;
996   }
997 
998   SMLoc getEndLoc() const override {
999     return EndLoc;
1000   }
1001 
1002   SMRange getLocRange() const {
1003     return SMRange(StartLoc, EndLoc);
1004   }
1005 
1006   Modifiers getModifiers() const {
1007     assert(isRegKind() || isImmTy(ImmTyNone));
1008     return isRegKind() ? Reg.Mods : Imm.Mods;
1009   }
1010 
1011   void setModifiers(Modifiers Mods) {
1012     assert(isRegKind() || isImmTy(ImmTyNone));
1013     if (isRegKind())
1014       Reg.Mods = Mods;
1015     else
1016       Imm.Mods = Mods;
1017   }
1018 
1019   bool hasModifiers() const {
1020     return getModifiers().hasModifiers();
1021   }
1022 
1023   bool hasFPModifiers() const {
1024     return getModifiers().hasFPModifiers();
1025   }
1026 
1027   bool hasIntModifiers() const {
1028     return getModifiers().hasIntModifiers();
1029   }
1030 
1031   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1032 
1033   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1034 
1035   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1036 
1037   void addRegOperands(MCInst &Inst, unsigned N) const;
1038 
1039   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1040     if (isRegKind())
1041       addRegOperands(Inst, N);
1042     else
1043       addImmOperands(Inst, N);
1044   }
1045 
1046   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1047     Modifiers Mods = getModifiers();
1048     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1049     if (isRegKind()) {
1050       addRegOperands(Inst, N);
1051     } else {
1052       addImmOperands(Inst, N, false);
1053     }
1054   }
1055 
1056   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1057     assert(!hasIntModifiers());
1058     addRegOrImmWithInputModsOperands(Inst, N);
1059   }
1060 
1061   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1062     assert(!hasFPModifiers());
1063     addRegOrImmWithInputModsOperands(Inst, N);
1064   }
1065 
1066   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1067     Modifiers Mods = getModifiers();
1068     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1069     assert(isRegKind());
1070     addRegOperands(Inst, N);
1071   }
1072 
1073   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1074     assert(!hasIntModifiers());
1075     addRegWithInputModsOperands(Inst, N);
1076   }
1077 
1078   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1079     assert(!hasFPModifiers());
1080     addRegWithInputModsOperands(Inst, N);
1081   }
1082 
1083   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1084     // clang-format off
1085     switch (Type) {
1086     case ImmTyNone: OS << "None"; break;
1087     case ImmTyGDS: OS << "GDS"; break;
1088     case ImmTyLDS: OS << "LDS"; break;
1089     case ImmTyOffen: OS << "Offen"; break;
1090     case ImmTyIdxen: OS << "Idxen"; break;
1091     case ImmTyAddr64: OS << "Addr64"; break;
1092     case ImmTyOffset: OS << "Offset"; break;
1093     case ImmTyInstOffset: OS << "InstOffset"; break;
1094     case ImmTyOffset0: OS << "Offset0"; break;
1095     case ImmTyOffset1: OS << "Offset1"; break;
1096     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1097     case ImmTyCPol: OS << "CPol"; break;
1098     case ImmTyIndexKey8bit: OS << "index_key"; break;
1099     case ImmTyIndexKey16bit: OS << "index_key"; break;
1100     case ImmTyTFE: OS << "TFE"; break;
1101     case ImmTyD16: OS << "D16"; break;
1102     case ImmTyFORMAT: OS << "FORMAT"; break;
1103     case ImmTyClamp: OS << "Clamp"; break;
1104     case ImmTyOModSI: OS << "OModSI"; break;
1105     case ImmTyDPP8: OS << "DPP8"; break;
1106     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1107     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1108     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1109     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1110     case ImmTyDppFI: OS << "DppFI"; break;
1111     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1112     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1113     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1114     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1115     case ImmTyDMask: OS << "DMask"; break;
1116     case ImmTyDim: OS << "Dim"; break;
1117     case ImmTyUNorm: OS << "UNorm"; break;
1118     case ImmTyDA: OS << "DA"; break;
1119     case ImmTyR128A16: OS << "R128A16"; break;
1120     case ImmTyA16: OS << "A16"; break;
1121     case ImmTyLWE: OS << "LWE"; break;
1122     case ImmTyOff: OS << "Off"; break;
1123     case ImmTyExpTgt: OS << "ExpTgt"; break;
1124     case ImmTyExpCompr: OS << "ExpCompr"; break;
1125     case ImmTyExpVM: OS << "ExpVM"; break;
1126     case ImmTyHwreg: OS << "Hwreg"; break;
1127     case ImmTySendMsg: OS << "SendMsg"; break;
1128     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1129     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1130     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1131     case ImmTyOpSel: OS << "OpSel"; break;
1132     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1133     case ImmTyNegLo: OS << "NegLo"; break;
1134     case ImmTyNegHi: OS << "NegHi"; break;
1135     case ImmTySwizzle: OS << "Swizzle"; break;
1136     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1137     case ImmTyHigh: OS << "High"; break;
1138     case ImmTyBLGP: OS << "BLGP"; break;
1139     case ImmTyCBSZ: OS << "CBSZ"; break;
1140     case ImmTyABID: OS << "ABID"; break;
1141     case ImmTyEndpgm: OS << "Endpgm"; break;
1142     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1143     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1144     case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1145     case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1146     case ImmTyByteSel: OS << "ByteSel" ; break;
1147     case ImmTyBitOp3: OS << "BitOp3"; break;
1148     }
1149     // clang-format on
1150   }
1151 
1152   void print(raw_ostream &OS) const override {
1153     switch (Kind) {
1154     case Register:
1155       OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1156          << " mods: " << Reg.Mods << '>';
1157       break;
1158     case Immediate:
1159       OS << '<' << getImm();
1160       if (getImmTy() != ImmTyNone) {
1161         OS << " type: "; printImmTy(OS, getImmTy());
1162       }
1163       OS << " mods: " << Imm.Mods << '>';
1164       break;
1165     case Token:
1166       OS << '\'' << getToken() << '\'';
1167       break;
1168     case Expression:
1169       OS << "<expr " << *Expr << '>';
1170       break;
1171     }
1172   }
1173 
1174   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1175                                       int64_t Val, SMLoc Loc,
1176                                       ImmTy Type = ImmTyNone,
1177                                       bool IsFPImm = false) {
1178     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1179     Op->Imm.Val = Val;
1180     Op->Imm.IsFPImm = IsFPImm;
1181     Op->Imm.Kind = ImmKindTyNone;
1182     Op->Imm.Type = Type;
1183     Op->Imm.Mods = Modifiers();
1184     Op->StartLoc = Loc;
1185     Op->EndLoc = Loc;
1186     return Op;
1187   }
1188 
1189   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1190                                         StringRef Str, SMLoc Loc,
1191                                         bool HasExplicitEncodingSize = true) {
1192     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1193     Res->Tok.Data = Str.data();
1194     Res->Tok.Length = Str.size();
1195     Res->StartLoc = Loc;
1196     Res->EndLoc = Loc;
1197     return Res;
1198   }
1199 
1200   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1201                                       MCRegister Reg, SMLoc S, SMLoc E) {
1202     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1203     Op->Reg.RegNo = Reg;
1204     Op->Reg.Mods = Modifiers();
1205     Op->StartLoc = S;
1206     Op->EndLoc = E;
1207     return Op;
1208   }
1209 
1210   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1211                                        const class MCExpr *Expr, SMLoc S) {
1212     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1213     Op->Expr = Expr;
1214     Op->StartLoc = S;
1215     Op->EndLoc = S;
1216     return Op;
1217   }
1218 };
1219 
1220 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1221   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1222   return OS;
1223 }
1224 
1225 //===----------------------------------------------------------------------===//
1226 // AsmParser
1227 //===----------------------------------------------------------------------===//
1228 
1229 // Holds info related to the current kernel, e.g. count of SGPRs used.
1230 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1231 // .amdgpu_hsa_kernel or at EOF.
1232 class KernelScopeInfo {
1233   int SgprIndexUnusedMin = -1;
1234   int VgprIndexUnusedMin = -1;
1235   int AgprIndexUnusedMin = -1;
1236   MCContext *Ctx = nullptr;
1237   MCSubtargetInfo const *MSTI = nullptr;
1238 
1239   void usesSgprAt(int i) {
1240     if (i >= SgprIndexUnusedMin) {
1241       SgprIndexUnusedMin = ++i;
1242       if (Ctx) {
1243         MCSymbol* const Sym =
1244           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1245         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1246       }
1247     }
1248   }
1249 
1250   void usesVgprAt(int i) {
1251     if (i >= VgprIndexUnusedMin) {
1252       VgprIndexUnusedMin = ++i;
1253       if (Ctx) {
1254         MCSymbol* const Sym =
1255           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1256         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1257                                          VgprIndexUnusedMin);
1258         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1259       }
1260     }
1261   }
1262 
1263   void usesAgprAt(int i) {
1264     // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1265     if (!hasMAIInsts(*MSTI))
1266       return;
1267 
1268     if (i >= AgprIndexUnusedMin) {
1269       AgprIndexUnusedMin = ++i;
1270       if (Ctx) {
1271         MCSymbol* const Sym =
1272           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1273         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1274 
1275         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1276         MCSymbol* const vSym =
1277           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1278         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1279                                          VgprIndexUnusedMin);
1280         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1281       }
1282     }
1283   }
1284 
1285 public:
1286   KernelScopeInfo() = default;
1287 
1288   void initialize(MCContext &Context) {
1289     Ctx = &Context;
1290     MSTI = Ctx->getSubtargetInfo();
1291 
1292     usesSgprAt(SgprIndexUnusedMin = -1);
1293     usesVgprAt(VgprIndexUnusedMin = -1);
1294     if (hasMAIInsts(*MSTI)) {
1295       usesAgprAt(AgprIndexUnusedMin = -1);
1296     }
1297   }
1298 
1299   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1300                     unsigned RegWidth) {
1301     switch (RegKind) {
1302     case IS_SGPR:
1303       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1304       break;
1305     case IS_AGPR:
1306       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1307       break;
1308     case IS_VGPR:
1309       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1310       break;
1311     default:
1312       break;
1313     }
1314   }
1315 };
1316 
1317 class AMDGPUAsmParser : public MCTargetAsmParser {
1318   MCAsmParser &Parser;
1319 
1320   unsigned ForcedEncodingSize = 0;
1321   bool ForcedDPP = false;
1322   bool ForcedSDWA = false;
1323   KernelScopeInfo KernelScope;
1324 
1325   /// @name Auto-generated Match Functions
1326   /// {
1327 
1328 #define GET_ASSEMBLER_HEADER
1329 #include "AMDGPUGenAsmMatcher.inc"
1330 
1331   /// }
1332 
1333 private:
1334   void createConstantSymbol(StringRef Id, int64_t Val);
1335 
1336   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1337   bool OutOfRangeError(SMRange Range);
1338   /// Calculate VGPR/SGPR blocks required for given target, reserved
1339   /// registers, and user-specified NextFreeXGPR values.
1340   ///
1341   /// \param Features [in] Target features, used for bug corrections.
1342   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1343   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1344   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1345   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1346   /// descriptor field, if valid.
1347   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1348   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1349   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1350   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1351   /// \param VGPRBlocks [out] Result VGPR block count.
1352   /// \param SGPRBlocks [out] Result SGPR block count.
1353   bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1354                           const MCExpr *FlatScrUsed, bool XNACKUsed,
1355                           std::optional<bool> EnableWavefrontSize32,
1356                           const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1357                           const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1358                           const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1359   bool ParseDirectiveAMDGCNTarget();
1360   bool ParseDirectiveAMDHSACodeObjectVersion();
1361   bool ParseDirectiveAMDHSAKernel();
1362   bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1363   bool ParseDirectiveAMDKernelCodeT();
1364   // TODO: Possibly make subtargetHasRegister const.
1365   bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1366   bool ParseDirectiveAMDGPUHsaKernel();
1367 
1368   bool ParseDirectiveISAVersion();
1369   bool ParseDirectiveHSAMetadata();
1370   bool ParseDirectivePALMetadataBegin();
1371   bool ParseDirectivePALMetadata();
1372   bool ParseDirectiveAMDGPULDS();
1373 
1374   /// Common code to parse out a block of text (typically YAML) between start and
1375   /// end directives.
1376   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1377                            const char *AssemblerDirectiveEnd,
1378                            std::string &CollectString);
1379 
1380   bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1381                              RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1382   bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1383                            unsigned &RegNum, unsigned &RegWidth,
1384                            bool RestoreOnFailure = false);
1385   bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1386                            unsigned &RegNum, unsigned &RegWidth,
1387                            SmallVectorImpl<AsmToken> &Tokens);
1388   MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1389                              unsigned &RegWidth,
1390                              SmallVectorImpl<AsmToken> &Tokens);
1391   MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1392                              unsigned &RegWidth,
1393                              SmallVectorImpl<AsmToken> &Tokens);
1394   MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1395                           unsigned &RegWidth,
1396                           SmallVectorImpl<AsmToken> &Tokens);
1397   bool ParseRegRange(unsigned& Num, unsigned& Width);
1398   MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1399                            unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1400 
1401   bool isRegister();
1402   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1403   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1404   void initializeGprCountSymbol(RegisterKind RegKind);
1405   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1406                              unsigned RegWidth);
1407   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1408                     bool IsAtomic);
1409 
1410 public:
1411   enum OperandMode {
1412     OperandMode_Default,
1413     OperandMode_NSA,
1414   };
1415 
1416   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1417 
1418   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1419                const MCInstrInfo &MII,
1420                const MCTargetOptions &Options)
1421       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1422     MCAsmParserExtension::Initialize(Parser);
1423 
1424     if (getFeatureBits().none()) {
1425       // Set default features.
1426       copySTI().ToggleFeature("southern-islands");
1427     }
1428 
1429     FeatureBitset FB = getFeatureBits();
1430     if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1431         !FB[AMDGPU::FeatureWavefrontSize32]) {
1432       // If there is no default wave size it must be a generation before gfx10,
1433       // these have FeatureWavefrontSize64 in their definition already. For
1434       // gfx10+ set wave32 as a default.
1435       copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
1436     }
1437 
1438     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1439 
1440     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1441     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1442       createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1443       createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1444       createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1445     } else {
1446       createConstantSymbol(".option.machine_version_major", ISA.Major);
1447       createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1448       createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1449     }
1450     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1451       initializeGprCountSymbol(IS_VGPR);
1452       initializeGprCountSymbol(IS_SGPR);
1453     } else
1454       KernelScope.initialize(getContext());
1455 
1456     for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1457       createConstantSymbol(Symbol, Code);
1458 
1459     createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1460     createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1461     createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1462   }
1463 
1464   bool hasMIMG_R128() const {
1465     return AMDGPU::hasMIMG_R128(getSTI());
1466   }
1467 
1468   bool hasPackedD16() const {
1469     return AMDGPU::hasPackedD16(getSTI());
1470   }
1471 
1472   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1473 
1474   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1475 
1476   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1477 
1478   bool isSI() const {
1479     return AMDGPU::isSI(getSTI());
1480   }
1481 
1482   bool isCI() const {
1483     return AMDGPU::isCI(getSTI());
1484   }
1485 
1486   bool isVI() const {
1487     return AMDGPU::isVI(getSTI());
1488   }
1489 
1490   bool isGFX9() const {
1491     return AMDGPU::isGFX9(getSTI());
1492   }
1493 
1494   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1495   bool isGFX90A() const {
1496     return AMDGPU::isGFX90A(getSTI());
1497   }
1498 
1499   bool isGFX940() const {
1500     return AMDGPU::isGFX940(getSTI());
1501   }
1502 
1503   bool isGFX9Plus() const {
1504     return AMDGPU::isGFX9Plus(getSTI());
1505   }
1506 
1507   bool isGFX10() const {
1508     return AMDGPU::isGFX10(getSTI());
1509   }
1510 
1511   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1512 
1513   bool isGFX11() const {
1514     return AMDGPU::isGFX11(getSTI());
1515   }
1516 
1517   bool isGFX11Plus() const {
1518     return AMDGPU::isGFX11Plus(getSTI());
1519   }
1520 
1521   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1522 
1523   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1524 
1525   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1526 
1527   bool isGFX10_BEncoding() const {
1528     return AMDGPU::isGFX10_BEncoding(getSTI());
1529   }
1530 
1531   bool hasInv2PiInlineImm() const {
1532     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1533   }
1534 
1535   bool hasFlatOffsets() const {
1536     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1537   }
1538 
1539   bool hasArchitectedFlatScratch() const {
1540     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1541   }
1542 
1543   bool hasSGPR102_SGPR103() const {
1544     return !isVI() && !isGFX9();
1545   }
1546 
1547   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1548 
1549   bool hasIntClamp() const {
1550     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1551   }
1552 
1553   bool hasPartialNSAEncoding() const {
1554     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1555   }
1556 
1557   unsigned getNSAMaxSize(bool HasSampler = false) const {
1558     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1559   }
1560 
1561   unsigned getMaxNumUserSGPRs() const {
1562     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1563   }
1564 
1565   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1566 
1567   AMDGPUTargetStreamer &getTargetStreamer() {
1568     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1569     return static_cast<AMDGPUTargetStreamer &>(TS);
1570   }
1571 
1572   const MCRegisterInfo *getMRI() const {
1573     // We need this const_cast because for some reason getContext() is not const
1574     // in MCAsmParser.
1575     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1576   }
1577 
1578   const MCInstrInfo *getMII() const {
1579     return &MII;
1580   }
1581 
1582   const FeatureBitset &getFeatureBits() const {
1583     return getSTI().getFeatureBits();
1584   }
1585 
1586   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1587   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1588   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1589 
1590   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1591   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1592   bool isForcedDPP() const { return ForcedDPP; }
1593   bool isForcedSDWA() const { return ForcedSDWA; }
1594   ArrayRef<unsigned> getMatchedVariants() const;
1595   StringRef getMatchedVariantName() const;
1596 
1597   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1598   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1599                      bool RestoreOnFailure);
1600   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1601   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1602                                SMLoc &EndLoc) override;
1603   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1604   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1605                                       unsigned Kind) override;
1606   bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1607                                OperandVector &Operands, MCStreamer &Out,
1608                                uint64_t &ErrorInfo,
1609                                bool MatchingInlineAsm) override;
1610   bool ParseDirective(AsmToken DirectiveID) override;
1611   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1612                            OperandMode Mode = OperandMode_Default);
1613   StringRef parseMnemonicSuffix(StringRef Name);
1614   bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1615                         SMLoc NameLoc, OperandVector &Operands) override;
1616   //bool ProcessInstruction(MCInst &Inst);
1617 
1618   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1619 
1620   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1621 
1622   ParseStatus
1623   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1624                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1625                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1626 
1627   ParseStatus parseOperandArrayWithPrefix(
1628       const char *Prefix, OperandVector &Operands,
1629       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1630       bool (*ConvertResult)(int64_t &) = nullptr);
1631 
1632   ParseStatus
1633   parseNamedBit(StringRef Name, OperandVector &Operands,
1634                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1635   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1636   ParseStatus parseCPol(OperandVector &Operands);
1637   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1638   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1639   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1640                                     SMLoc &StringLoc);
1641   ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1642                                          StringRef Name,
1643                                          ArrayRef<const char *> Ids,
1644                                          int64_t &IntVal);
1645   ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1646                                          StringRef Name,
1647                                          ArrayRef<const char *> Ids,
1648                                          AMDGPUOperand::ImmTy Type);
1649 
1650   bool isModifier();
1651   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1652   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1653   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1654   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1655   bool parseSP3NegModifier();
1656   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1657                        bool HasLit = false);
1658   ParseStatus parseReg(OperandVector &Operands);
1659   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1660                             bool HasLit = false);
1661   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1662                                            bool AllowImm = true);
1663   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1664                                             bool AllowImm = true);
1665   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1666   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1667   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1668   ParseStatus tryParseIndexKey(OperandVector &Operands,
1669                                AMDGPUOperand::ImmTy ImmTy);
1670   ParseStatus parseIndexKey8bit(OperandVector &Operands);
1671   ParseStatus parseIndexKey16bit(OperandVector &Operands);
1672 
1673   ParseStatus parseDfmtNfmt(int64_t &Format);
1674   ParseStatus parseUfmt(int64_t &Format);
1675   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1676                                        int64_t &Format);
1677   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1678                                          int64_t &Format);
1679   ParseStatus parseFORMAT(OperandVector &Operands);
1680   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1681   ParseStatus parseNumericFormat(int64_t &Format);
1682   ParseStatus parseFlatOffset(OperandVector &Operands);
1683   ParseStatus parseR128A16(OperandVector &Operands);
1684   ParseStatus parseBLGP(OperandVector &Operands);
1685   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1686   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1687 
1688   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1689 
1690   bool parseCnt(int64_t &IntVal);
1691   ParseStatus parseSWaitCnt(OperandVector &Operands);
1692 
1693   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1694   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1695   ParseStatus parseDepCtr(OperandVector &Operands);
1696 
1697   bool parseDelay(int64_t &Delay);
1698   ParseStatus parseSDelayALU(OperandVector &Operands);
1699 
1700   ParseStatus parseHwreg(OperandVector &Operands);
1701 
1702 private:
1703   struct OperandInfoTy {
1704     SMLoc Loc;
1705     int64_t Val;
1706     bool IsSymbolic = false;
1707     bool IsDefined = false;
1708 
1709     OperandInfoTy(int64_t Val) : Val(Val) {}
1710   };
1711 
1712   struct StructuredOpField : OperandInfoTy {
1713     StringLiteral Id;
1714     StringLiteral Desc;
1715     unsigned Width;
1716     bool IsDefined = false;
1717 
1718     StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1719                       int64_t Default)
1720         : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1721     virtual ~StructuredOpField() = default;
1722 
1723     bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1724       Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1725       return false;
1726     }
1727 
1728     virtual bool validate(AMDGPUAsmParser &Parser) const {
1729       if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1730         return Error(Parser, "not supported on this GPU");
1731       if (!isUIntN(Width, Val))
1732         return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1733       return true;
1734     }
1735   };
1736 
1737   ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1738   bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1739 
1740   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1741   bool validateSendMsg(const OperandInfoTy &Msg,
1742                        const OperandInfoTy &Op,
1743                        const OperandInfoTy &Stream);
1744 
1745   ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1746                              OperandInfoTy &Width);
1747 
1748   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1749   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1750   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1751 
1752   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1753                       const OperandVector &Operands) const;
1754   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1755   SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1756   SMLoc getLitLoc(const OperandVector &Operands,
1757                   bool SearchMandatoryLiterals = false) const;
1758   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1759   SMLoc getConstLoc(const OperandVector &Operands) const;
1760   SMLoc getInstLoc(const OperandVector &Operands) const;
1761 
1762   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1763   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1764   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1765   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1766   bool validateSOPLiteral(const MCInst &Inst) const;
1767   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1768   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1769                                       const OperandVector &Operands);
1770   bool validateIntClampSupported(const MCInst &Inst);
1771   bool validateMIMGAtomicDMask(const MCInst &Inst);
1772   bool validateMIMGGatherDMask(const MCInst &Inst);
1773   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1774   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1775   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1776   bool validateMIMGD16(const MCInst &Inst);
1777   bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1778   bool validateMIMGMSAA(const MCInst &Inst);
1779   bool validateOpSel(const MCInst &Inst);
1780   bool validateNeg(const MCInst &Inst, int OpName);
1781   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1782   bool validateVccOperand(MCRegister Reg) const;
1783   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1784   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1785   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1786   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1787   bool validateAGPRLdSt(const MCInst &Inst) const;
1788   bool validateVGPRAlign(const MCInst &Inst) const;
1789   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1790   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1791   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1792   bool validateDivScale(const MCInst &Inst);
1793   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1794   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1795                              const SMLoc &IDLoc);
1796   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1797                               const unsigned CPol);
1798   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1799   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1800   unsigned getConstantBusLimit(unsigned Opcode) const;
1801   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1802   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1803   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1804 
1805   bool isSupportedMnemo(StringRef Mnemo,
1806                         const FeatureBitset &FBS);
1807   bool isSupportedMnemo(StringRef Mnemo,
1808                         const FeatureBitset &FBS,
1809                         ArrayRef<unsigned> Variants);
1810   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1811 
1812   bool isId(const StringRef Id) const;
1813   bool isId(const AsmToken &Token, const StringRef Id) const;
1814   bool isToken(const AsmToken::TokenKind Kind) const;
1815   StringRef getId() const;
1816   bool trySkipId(const StringRef Id);
1817   bool trySkipId(const StringRef Pref, const StringRef Id);
1818   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1819   bool trySkipToken(const AsmToken::TokenKind Kind);
1820   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1821   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1822   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1823 
1824   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1825   AsmToken::TokenKind getTokenKind() const;
1826   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1827   bool parseExpr(OperandVector &Operands);
1828   StringRef getTokenStr() const;
1829   AsmToken peekToken(bool ShouldSkipSpace = true);
1830   AsmToken getToken() const;
1831   SMLoc getLoc() const;
1832   void lex();
1833 
1834 public:
1835   void onBeginOfFile() override;
1836   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1837 
1838   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1839 
1840   ParseStatus parseExpTgt(OperandVector &Operands);
1841   ParseStatus parseSendMsg(OperandVector &Operands);
1842   ParseStatus parseInterpSlot(OperandVector &Operands);
1843   ParseStatus parseInterpAttr(OperandVector &Operands);
1844   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1845   ParseStatus parseBoolReg(OperandVector &Operands);
1846 
1847   bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1848                            const unsigned MaxVal, const Twine &ErrMsg,
1849                            SMLoc &Loc);
1850   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1851                             const unsigned MinVal,
1852                             const unsigned MaxVal,
1853                             const StringRef ErrMsg);
1854   ParseStatus parseSwizzle(OperandVector &Operands);
1855   bool parseSwizzleOffset(int64_t &Imm);
1856   bool parseSwizzleMacro(int64_t &Imm);
1857   bool parseSwizzleQuadPerm(int64_t &Imm);
1858   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1859   bool parseSwizzleBroadcast(int64_t &Imm);
1860   bool parseSwizzleSwap(int64_t &Imm);
1861   bool parseSwizzleReverse(int64_t &Imm);
1862   bool parseSwizzleFFT(int64_t &Imm);
1863   bool parseSwizzleRotate(int64_t &Imm);
1864 
1865   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1866   int64_t parseGPRIdxMacro();
1867 
1868   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1869   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1870 
1871   ParseStatus parseOModSI(OperandVector &Operands);
1872 
1873   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1874                OptionalImmIndexMap &OptionalIdx);
1875   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1876   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1877   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1878   void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1879 
1880   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1881   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1882                     OptionalImmIndexMap &OptionalIdx);
1883   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1884                 OptionalImmIndexMap &OptionalIdx);
1885 
1886   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1887   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1888 
1889   bool parseDimId(unsigned &Encoding);
1890   ParseStatus parseDim(OperandVector &Operands);
1891   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1892   ParseStatus parseDPP8(OperandVector &Operands);
1893   ParseStatus parseDPPCtrl(OperandVector &Operands);
1894   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1895   int64_t parseDPPCtrlSel(StringRef Ctrl);
1896   int64_t parseDPPCtrlPerm();
1897   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1898   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1899     cvtDPP(Inst, Operands, true);
1900   }
1901   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1902                   bool IsDPP8 = false);
1903   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1904     cvtVOP3DPP(Inst, Operands, true);
1905   }
1906 
1907   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1908                            AMDGPUOperand::ImmTy Type);
1909   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1910   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1911   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1912   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1913   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1914   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1915   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1916                uint64_t BasicInstType,
1917                bool SkipDstVcc = false,
1918                bool SkipSrcVcc = false);
1919 
1920   ParseStatus parseEndpgm(OperandVector &Operands);
1921 
1922   ParseStatus parseVOPD(OperandVector &Operands);
1923 };
1924 
1925 } // end anonymous namespace
1926 
1927 // May be called with integer type with equivalent bitwidth.
1928 static const fltSemantics *getFltSemantics(unsigned Size) {
1929   switch (Size) {
1930   case 4:
1931     return &APFloat::IEEEsingle();
1932   case 8:
1933     return &APFloat::IEEEdouble();
1934   case 2:
1935     return &APFloat::IEEEhalf();
1936   default:
1937     llvm_unreachable("unsupported fp type");
1938   }
1939 }
1940 
1941 static const fltSemantics *getFltSemantics(MVT VT) {
1942   return getFltSemantics(VT.getSizeInBits() / 8);
1943 }
1944 
1945 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1946   switch (OperandType) {
1947   // When floating-point immediate is used as operand of type i16, the 32-bit
1948    // representation of the constant truncated to the 16 LSBs should be used.
1949   case AMDGPU::OPERAND_REG_IMM_INT16:
1950   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1951   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1952   case AMDGPU::OPERAND_REG_IMM_INT32:
1953   case AMDGPU::OPERAND_REG_IMM_FP32:
1954   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1955   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1956   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1957   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1958   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1959   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1960   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1961   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1962   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1963   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1964   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1965   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1966   case AMDGPU::OPERAND_KIMM32:
1967   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1968     return &APFloat::IEEEsingle();
1969   case AMDGPU::OPERAND_REG_IMM_INT64:
1970   case AMDGPU::OPERAND_REG_IMM_FP64:
1971   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1972   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1973   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1974     return &APFloat::IEEEdouble();
1975   case AMDGPU::OPERAND_REG_IMM_FP16:
1976   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1977   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1978   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1979   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1980   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1981   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1982   case AMDGPU::OPERAND_KIMM16:
1983     return &APFloat::IEEEhalf();
1984   case AMDGPU::OPERAND_REG_IMM_BF16:
1985   case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1986   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1987   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1988   case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1989   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1990   case AMDGPU::OPERAND_REG_IMM_V2BF16:
1991     return &APFloat::BFloat();
1992   default:
1993     llvm_unreachable("unsupported fp type");
1994   }
1995 }
1996 
1997 //===----------------------------------------------------------------------===//
1998 // Operand
1999 //===----------------------------------------------------------------------===//
2000 
2001 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2002   bool Lost;
2003 
2004   // Convert literal to single precision
2005   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
2006                                                APFloat::rmNearestTiesToEven,
2007                                                &Lost);
2008   // We allow precision lost but not overflow or underflow
2009   if (Status != APFloat::opOK &&
2010       Lost &&
2011       ((Status & APFloat::opOverflow)  != 0 ||
2012        (Status & APFloat::opUnderflow) != 0)) {
2013     return false;
2014   }
2015 
2016   return true;
2017 }
2018 
2019 static bool isSafeTruncation(int64_t Val, unsigned Size) {
2020   return isUIntN(Size, Val) || isIntN(Size, Val);
2021 }
2022 
2023 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2024   if (VT.getScalarType() == MVT::i16)
2025     return isInlinableLiteral32(Val, HasInv2Pi);
2026 
2027   if (VT.getScalarType() == MVT::f16)
2028     return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2029 
2030   assert(VT.getScalarType() == MVT::bf16);
2031 
2032   return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2033 }
2034 
2035 bool AMDGPUOperand::isInlinableImm(MVT type) const {
2036 
2037   // This is a hack to enable named inline values like
2038   // shared_base with both 32-bit and 64-bit operands.
2039   // Note that these values are defined as
2040   // 32-bit operands only.
2041   if (isInlineValue()) {
2042     return true;
2043   }
2044 
2045   if (!isImmTy(ImmTyNone)) {
2046     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2047     return false;
2048   }
2049   // TODO: We should avoid using host float here. It would be better to
2050   // check the float bit values which is what a few other places do.
2051   // We've had bot failures before due to weird NaN support on mips hosts.
2052 
2053   APInt Literal(64, Imm.Val);
2054 
2055   if (Imm.IsFPImm) { // We got fp literal token
2056     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2057       return AMDGPU::isInlinableLiteral64(Imm.Val,
2058                                           AsmParser->hasInv2PiInlineImm());
2059     }
2060 
2061     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2062     if (!canLosslesslyConvertToFPType(FPLiteral, type))
2063       return false;
2064 
2065     if (type.getScalarSizeInBits() == 16) {
2066       bool Lost = false;
2067       switch (type.getScalarType().SimpleTy) {
2068       default:
2069         llvm_unreachable("unknown 16-bit type");
2070       case MVT::bf16:
2071         FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2072                           &Lost);
2073         break;
2074       case MVT::f16:
2075         FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2076                           &Lost);
2077         break;
2078       case MVT::i16:
2079         FPLiteral.convert(APFloatBase::IEEEsingle(),
2080                           APFloat::rmNearestTiesToEven, &Lost);
2081         break;
2082       }
2083       // We need to use 32-bit representation here because when a floating-point
2084       // inline constant is used as an i16 operand, its 32-bit representation
2085       // representation will be used. We will need the 32-bit value to check if
2086       // it is FP inline constant.
2087       uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2088       return isInlineableLiteralOp16(ImmVal, type,
2089                                      AsmParser->hasInv2PiInlineImm());
2090     }
2091 
2092     // Check if single precision literal is inlinable
2093     return AMDGPU::isInlinableLiteral32(
2094       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2095       AsmParser->hasInv2PiInlineImm());
2096   }
2097 
2098   // We got int literal token.
2099   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2100     return AMDGPU::isInlinableLiteral64(Imm.Val,
2101                                         AsmParser->hasInv2PiInlineImm());
2102   }
2103 
2104   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2105     return false;
2106   }
2107 
2108   if (type.getScalarSizeInBits() == 16) {
2109     return isInlineableLiteralOp16(
2110       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2111       type, AsmParser->hasInv2PiInlineImm());
2112   }
2113 
2114   return AMDGPU::isInlinableLiteral32(
2115     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2116     AsmParser->hasInv2PiInlineImm());
2117 }
2118 
2119 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2120   // Check that this immediate can be added as literal
2121   if (!isImmTy(ImmTyNone)) {
2122     return false;
2123   }
2124 
2125   if (!Imm.IsFPImm) {
2126     // We got int literal token.
2127 
2128     if (type == MVT::f64 && hasFPModifiers()) {
2129       // Cannot apply fp modifiers to int literals preserving the same semantics
2130       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2131       // disable these cases.
2132       return false;
2133     }
2134 
2135     unsigned Size = type.getSizeInBits();
2136     if (Size == 64)
2137       Size = 32;
2138 
2139     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2140     // types.
2141     return isSafeTruncation(Imm.Val, Size);
2142   }
2143 
2144   // We got fp literal token
2145   if (type == MVT::f64) { // Expected 64-bit fp operand
2146     // We would set low 64-bits of literal to zeroes but we accept this literals
2147     return true;
2148   }
2149 
2150   if (type == MVT::i64) { // Expected 64-bit int operand
2151     // We don't allow fp literals in 64-bit integer instructions. It is
2152     // unclear how we should encode them.
2153     return false;
2154   }
2155 
2156   // We allow fp literals with f16x2 operands assuming that the specified
2157   // literal goes into the lower half and the upper half is zero. We also
2158   // require that the literal may be losslessly converted to f16.
2159   //
2160   // For i16x2 operands, we assume that the specified literal is encoded as a
2161   // single-precision float. This is pretty odd, but it matches SP3 and what
2162   // happens in hardware.
2163   MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2164                      : (type == MVT::v2i16) ? MVT::f32
2165                      : (type == MVT::v2f32) ? MVT::f32
2166                                             : type;
2167 
2168   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2169   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2170 }
2171 
2172 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2173   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2174 }
2175 
2176 bool AMDGPUOperand::isVRegWithInputMods() const {
2177   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2178          // GFX90A allows DPP on 64-bit operands.
2179          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2180           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2181 }
2182 
2183 template <bool IsFake16>
2184 bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2185   return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2186                              : AMDGPU::VGPR_16_Lo128RegClassID);
2187 }
2188 
2189 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2190   return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2191                              : AMDGPU::VGPR_16RegClassID);
2192 }
2193 
2194 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2195   if (AsmParser->isVI())
2196     return isVReg32();
2197   if (AsmParser->isGFX9Plus())
2198     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2199   return false;
2200 }
2201 
2202 bool AMDGPUOperand::isSDWAFP16Operand() const {
2203   return isSDWAOperand(MVT::f16);
2204 }
2205 
2206 bool AMDGPUOperand::isSDWAFP32Operand() const {
2207   return isSDWAOperand(MVT::f32);
2208 }
2209 
2210 bool AMDGPUOperand::isSDWAInt16Operand() const {
2211   return isSDWAOperand(MVT::i16);
2212 }
2213 
2214 bool AMDGPUOperand::isSDWAInt32Operand() const {
2215   return isSDWAOperand(MVT::i32);
2216 }
2217 
2218 bool AMDGPUOperand::isBoolReg() const {
2219   auto FB = AsmParser->getFeatureBits();
2220   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2221                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2222 }
2223 
2224 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2225 {
2226   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2227   assert(Size == 2 || Size == 4 || Size == 8);
2228 
2229   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2230 
2231   if (Imm.Mods.Abs) {
2232     Val &= ~FpSignMask;
2233   }
2234   if (Imm.Mods.Neg) {
2235     Val ^= FpSignMask;
2236   }
2237 
2238   return Val;
2239 }
2240 
2241 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2242   if (isExpr()) {
2243     Inst.addOperand(MCOperand::createExpr(Expr));
2244     return;
2245   }
2246 
2247   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2248                              Inst.getNumOperands())) {
2249     addLiteralImmOperand(Inst, Imm.Val,
2250                          ApplyModifiers &
2251                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2252   } else {
2253     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2254     Inst.addOperand(MCOperand::createImm(Imm.Val));
2255     setImmKindNone();
2256   }
2257 }
2258 
2259 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2260   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2261   auto OpNum = Inst.getNumOperands();
2262   // Check that this operand accepts literals
2263   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2264 
2265   if (ApplyModifiers) {
2266     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2267     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2268     Val = applyInputFPModifiers(Val, Size);
2269   }
2270 
2271   APInt Literal(64, Val);
2272   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2273 
2274   if (Imm.IsFPImm) { // We got fp literal token
2275     switch (OpTy) {
2276     case AMDGPU::OPERAND_REG_IMM_INT64:
2277     case AMDGPU::OPERAND_REG_IMM_FP64:
2278     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2279     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2280     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2281       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2282                                        AsmParser->hasInv2PiInlineImm())) {
2283         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2284         setImmKindConst();
2285         return;
2286       }
2287 
2288       // Non-inlineable
2289       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2290         // For fp operands we check if low 32 bits are zeros
2291         if (Literal.getLoBits(32) != 0) {
2292           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2293           "Can't encode literal as exact 64-bit floating-point operand. "
2294           "Low 32-bits will be set to zero");
2295           Val &= 0xffffffff00000000u;
2296         }
2297 
2298         Inst.addOperand(MCOperand::createImm(Val));
2299         setImmKindLiteral();
2300         return;
2301       }
2302 
2303       // We don't allow fp literals in 64-bit integer instructions. It is
2304       // unclear how we should encode them. This case should be checked earlier
2305       // in predicate methods (isLiteralImm())
2306       llvm_unreachable("fp literal in 64-bit integer instruction.");
2307 
2308     case AMDGPU::OPERAND_REG_IMM_BF16:
2309     case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2310     case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2311     case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2312     case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2313     case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2314     case AMDGPU::OPERAND_REG_IMM_V2BF16:
2315       if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2316         // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2317         // loss of precision. The constant represents ideomatic fp32 value of
2318         // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2319         // bits. Prevent rounding below.
2320         Inst.addOperand(MCOperand::createImm(0x3e22));
2321         setImmKindLiteral();
2322         return;
2323       }
2324       [[fallthrough]];
2325 
2326     case AMDGPU::OPERAND_REG_IMM_INT32:
2327     case AMDGPU::OPERAND_REG_IMM_FP32:
2328     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2329     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2330     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2331     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2332     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2333     case AMDGPU::OPERAND_REG_IMM_INT16:
2334     case AMDGPU::OPERAND_REG_IMM_FP16:
2335     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2336     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2337     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2338     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2339     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2340     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2341     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2342     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2343     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2344     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2345     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2346     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2347     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2348     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2349     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2350     case AMDGPU::OPERAND_KIMM32:
2351     case AMDGPU::OPERAND_KIMM16:
2352     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2353       bool lost;
2354       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2355       // Convert literal to single precision
2356       FPLiteral.convert(*getOpFltSemantics(OpTy),
2357                         APFloat::rmNearestTiesToEven, &lost);
2358       // We allow precision lost but not overflow or underflow. This should be
2359       // checked earlier in isLiteralImm()
2360 
2361       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2362       Inst.addOperand(MCOperand::createImm(ImmVal));
2363       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2364         setImmKindMandatoryLiteral();
2365       } else {
2366         setImmKindLiteral();
2367       }
2368       return;
2369     }
2370     default:
2371       llvm_unreachable("invalid operand size");
2372     }
2373 
2374     return;
2375   }
2376 
2377   // We got int literal token.
2378   // Only sign extend inline immediates.
2379   switch (OpTy) {
2380   case AMDGPU::OPERAND_REG_IMM_INT32:
2381   case AMDGPU::OPERAND_REG_IMM_FP32:
2382   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2383   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2384   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2385   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2386   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2387   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2388   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2389   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2390   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2391   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2392   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2393   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2394   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2395     if (isSafeTruncation(Val, 32) &&
2396         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2397                                      AsmParser->hasInv2PiInlineImm())) {
2398       Inst.addOperand(MCOperand::createImm(Val));
2399       setImmKindConst();
2400       return;
2401     }
2402 
2403     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2404     setImmKindLiteral();
2405     return;
2406 
2407   case AMDGPU::OPERAND_REG_IMM_INT64:
2408   case AMDGPU::OPERAND_REG_IMM_FP64:
2409   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2410   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2411   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2412     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2413       Inst.addOperand(MCOperand::createImm(Val));
2414       setImmKindConst();
2415       return;
2416     }
2417 
2418     Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2419                                                     : Lo_32(Val);
2420 
2421     Inst.addOperand(MCOperand::createImm(Val));
2422     setImmKindLiteral();
2423     return;
2424 
2425   case AMDGPU::OPERAND_REG_IMM_INT16:
2426   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2427   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2428     if (isSafeTruncation(Val, 16) &&
2429         AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
2430       Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2431       setImmKindConst();
2432       return;
2433     }
2434 
2435     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2436     setImmKindLiteral();
2437     return;
2438 
2439   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2440   case AMDGPU::OPERAND_REG_IMM_FP16:
2441   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2442   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2443     if (isSafeTruncation(Val, 16) &&
2444         AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2445                                        AsmParser->hasInv2PiInlineImm())) {
2446       Inst.addOperand(MCOperand::createImm(Val));
2447       setImmKindConst();
2448       return;
2449     }
2450 
2451     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2452     setImmKindLiteral();
2453     return;
2454 
2455   case AMDGPU::OPERAND_REG_IMM_BF16:
2456   case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2457   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2458   case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2459     if (isSafeTruncation(Val, 16) &&
2460         AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2461                                      AsmParser->hasInv2PiInlineImm())) {
2462       Inst.addOperand(MCOperand::createImm(Val));
2463       setImmKindConst();
2464       return;
2465     }
2466 
2467     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2468     setImmKindLiteral();
2469     return;
2470 
2471   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2472   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2473     assert(isSafeTruncation(Val, 16));
2474     assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2475     Inst.addOperand(MCOperand::createImm(Val));
2476     return;
2477   }
2478   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2479   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2480     assert(isSafeTruncation(Val, 16));
2481     assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2482                                           AsmParser->hasInv2PiInlineImm()));
2483 
2484     Inst.addOperand(MCOperand::createImm(Val));
2485     return;
2486   }
2487 
2488   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2489   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: {
2490     assert(isSafeTruncation(Val, 16));
2491     assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2492                                           AsmParser->hasInv2PiInlineImm()));
2493 
2494     Inst.addOperand(MCOperand::createImm(Val));
2495     return;
2496   }
2497 
2498   case AMDGPU::OPERAND_KIMM32:
2499     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2500     setImmKindMandatoryLiteral();
2501     return;
2502   case AMDGPU::OPERAND_KIMM16:
2503     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2504     setImmKindMandatoryLiteral();
2505     return;
2506   default:
2507     llvm_unreachable("invalid operand size");
2508   }
2509 }
2510 
2511 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2512   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2513 }
2514 
2515 bool AMDGPUOperand::isInlineValue() const {
2516   return isRegKind() && ::isInlineValue(getReg());
2517 }
2518 
2519 //===----------------------------------------------------------------------===//
2520 // AsmParser
2521 //===----------------------------------------------------------------------===//
2522 
2523 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2524   // TODO: make those pre-defined variables read-only.
2525   // Currently there is none suitable machinery in the core llvm-mc for this.
2526   // MCSymbol::isRedefinable is intended for another purpose, and
2527   // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2528   MCContext &Ctx = getContext();
2529   MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2530   Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2531 }
2532 
2533 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2534   if (Is == IS_VGPR) {
2535     switch (RegWidth) {
2536       default: return -1;
2537       case 32:
2538         return AMDGPU::VGPR_32RegClassID;
2539       case 64:
2540         return AMDGPU::VReg_64RegClassID;
2541       case 96:
2542         return AMDGPU::VReg_96RegClassID;
2543       case 128:
2544         return AMDGPU::VReg_128RegClassID;
2545       case 160:
2546         return AMDGPU::VReg_160RegClassID;
2547       case 192:
2548         return AMDGPU::VReg_192RegClassID;
2549       case 224:
2550         return AMDGPU::VReg_224RegClassID;
2551       case 256:
2552         return AMDGPU::VReg_256RegClassID;
2553       case 288:
2554         return AMDGPU::VReg_288RegClassID;
2555       case 320:
2556         return AMDGPU::VReg_320RegClassID;
2557       case 352:
2558         return AMDGPU::VReg_352RegClassID;
2559       case 384:
2560         return AMDGPU::VReg_384RegClassID;
2561       case 512:
2562         return AMDGPU::VReg_512RegClassID;
2563       case 1024:
2564         return AMDGPU::VReg_1024RegClassID;
2565     }
2566   } else if (Is == IS_TTMP) {
2567     switch (RegWidth) {
2568       default: return -1;
2569       case 32:
2570         return AMDGPU::TTMP_32RegClassID;
2571       case 64:
2572         return AMDGPU::TTMP_64RegClassID;
2573       case 128:
2574         return AMDGPU::TTMP_128RegClassID;
2575       case 256:
2576         return AMDGPU::TTMP_256RegClassID;
2577       case 512:
2578         return AMDGPU::TTMP_512RegClassID;
2579     }
2580   } else if (Is == IS_SGPR) {
2581     switch (RegWidth) {
2582       default: return -1;
2583       case 32:
2584         return AMDGPU::SGPR_32RegClassID;
2585       case 64:
2586         return AMDGPU::SGPR_64RegClassID;
2587       case 96:
2588         return AMDGPU::SGPR_96RegClassID;
2589       case 128:
2590         return AMDGPU::SGPR_128RegClassID;
2591       case 160:
2592         return AMDGPU::SGPR_160RegClassID;
2593       case 192:
2594         return AMDGPU::SGPR_192RegClassID;
2595       case 224:
2596         return AMDGPU::SGPR_224RegClassID;
2597       case 256:
2598         return AMDGPU::SGPR_256RegClassID;
2599       case 288:
2600         return AMDGPU::SGPR_288RegClassID;
2601       case 320:
2602         return AMDGPU::SGPR_320RegClassID;
2603       case 352:
2604         return AMDGPU::SGPR_352RegClassID;
2605       case 384:
2606         return AMDGPU::SGPR_384RegClassID;
2607       case 512:
2608         return AMDGPU::SGPR_512RegClassID;
2609     }
2610   } else if (Is == IS_AGPR) {
2611     switch (RegWidth) {
2612       default: return -1;
2613       case 32:
2614         return AMDGPU::AGPR_32RegClassID;
2615       case 64:
2616         return AMDGPU::AReg_64RegClassID;
2617       case 96:
2618         return AMDGPU::AReg_96RegClassID;
2619       case 128:
2620         return AMDGPU::AReg_128RegClassID;
2621       case 160:
2622         return AMDGPU::AReg_160RegClassID;
2623       case 192:
2624         return AMDGPU::AReg_192RegClassID;
2625       case 224:
2626         return AMDGPU::AReg_224RegClassID;
2627       case 256:
2628         return AMDGPU::AReg_256RegClassID;
2629       case 288:
2630         return AMDGPU::AReg_288RegClassID;
2631       case 320:
2632         return AMDGPU::AReg_320RegClassID;
2633       case 352:
2634         return AMDGPU::AReg_352RegClassID;
2635       case 384:
2636         return AMDGPU::AReg_384RegClassID;
2637       case 512:
2638         return AMDGPU::AReg_512RegClassID;
2639       case 1024:
2640         return AMDGPU::AReg_1024RegClassID;
2641     }
2642   }
2643   return -1;
2644 }
2645 
2646 static MCRegister getSpecialRegForName(StringRef RegName) {
2647   return StringSwitch<unsigned>(RegName)
2648     .Case("exec", AMDGPU::EXEC)
2649     .Case("vcc", AMDGPU::VCC)
2650     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2651     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2652     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2653     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2654     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2655     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2656     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2657     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2658     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2659     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2660     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2661     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2662     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2663     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2664     .Case("m0", AMDGPU::M0)
2665     .Case("vccz", AMDGPU::SRC_VCCZ)
2666     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2667     .Case("execz", AMDGPU::SRC_EXECZ)
2668     .Case("src_execz", AMDGPU::SRC_EXECZ)
2669     .Case("scc", AMDGPU::SRC_SCC)
2670     .Case("src_scc", AMDGPU::SRC_SCC)
2671     .Case("tba", AMDGPU::TBA)
2672     .Case("tma", AMDGPU::TMA)
2673     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2674     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2675     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2676     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2677     .Case("vcc_lo", AMDGPU::VCC_LO)
2678     .Case("vcc_hi", AMDGPU::VCC_HI)
2679     .Case("exec_lo", AMDGPU::EXEC_LO)
2680     .Case("exec_hi", AMDGPU::EXEC_HI)
2681     .Case("tma_lo", AMDGPU::TMA_LO)
2682     .Case("tma_hi", AMDGPU::TMA_HI)
2683     .Case("tba_lo", AMDGPU::TBA_LO)
2684     .Case("tba_hi", AMDGPU::TBA_HI)
2685     .Case("pc", AMDGPU::PC_REG)
2686     .Case("null", AMDGPU::SGPR_NULL)
2687     .Default(AMDGPU::NoRegister);
2688 }
2689 
2690 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2691                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2692   auto R = parseRegister();
2693   if (!R) return true;
2694   assert(R->isReg());
2695   RegNo = R->getReg();
2696   StartLoc = R->getStartLoc();
2697   EndLoc = R->getEndLoc();
2698   return false;
2699 }
2700 
2701 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2702                                     SMLoc &EndLoc) {
2703   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2704 }
2705 
2706 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2707                                               SMLoc &EndLoc) {
2708   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2709   bool PendingErrors = getParser().hasPendingError();
2710   getParser().clearPendingErrors();
2711   if (PendingErrors)
2712     return ParseStatus::Failure;
2713   if (Result)
2714     return ParseStatus::NoMatch;
2715   return ParseStatus::Success;
2716 }
2717 
2718 bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2719                                             RegisterKind RegKind,
2720                                             MCRegister Reg1, SMLoc Loc) {
2721   switch (RegKind) {
2722   case IS_SPECIAL:
2723     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2724       Reg = AMDGPU::EXEC;
2725       RegWidth = 64;
2726       return true;
2727     }
2728     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2729       Reg = AMDGPU::FLAT_SCR;
2730       RegWidth = 64;
2731       return true;
2732     }
2733     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2734       Reg = AMDGPU::XNACK_MASK;
2735       RegWidth = 64;
2736       return true;
2737     }
2738     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2739       Reg = AMDGPU::VCC;
2740       RegWidth = 64;
2741       return true;
2742     }
2743     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2744       Reg = AMDGPU::TBA;
2745       RegWidth = 64;
2746       return true;
2747     }
2748     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2749       Reg = AMDGPU::TMA;
2750       RegWidth = 64;
2751       return true;
2752     }
2753     Error(Loc, "register does not fit in the list");
2754     return false;
2755   case IS_VGPR:
2756   case IS_SGPR:
2757   case IS_AGPR:
2758   case IS_TTMP:
2759     if (Reg1 != Reg + RegWidth / 32) {
2760       Error(Loc, "registers in a list must have consecutive indices");
2761       return false;
2762     }
2763     RegWidth += 32;
2764     return true;
2765   default:
2766     llvm_unreachable("unexpected register kind");
2767   }
2768 }
2769 
2770 struct RegInfo {
2771   StringLiteral Name;
2772   RegisterKind Kind;
2773 };
2774 
2775 static constexpr RegInfo RegularRegisters[] = {
2776   {{"v"},    IS_VGPR},
2777   {{"s"},    IS_SGPR},
2778   {{"ttmp"}, IS_TTMP},
2779   {{"acc"},  IS_AGPR},
2780   {{"a"},    IS_AGPR},
2781 };
2782 
2783 static bool isRegularReg(RegisterKind Kind) {
2784   return Kind == IS_VGPR ||
2785          Kind == IS_SGPR ||
2786          Kind == IS_TTMP ||
2787          Kind == IS_AGPR;
2788 }
2789 
2790 static const RegInfo* getRegularRegInfo(StringRef Str) {
2791   for (const RegInfo &Reg : RegularRegisters)
2792     if (Str.starts_with(Reg.Name))
2793       return &Reg;
2794   return nullptr;
2795 }
2796 
2797 static bool getRegNum(StringRef Str, unsigned& Num) {
2798   return !Str.getAsInteger(10, Num);
2799 }
2800 
2801 bool
2802 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2803                             const AsmToken &NextToken) const {
2804 
2805   // A list of consecutive registers: [s0,s1,s2,s3]
2806   if (Token.is(AsmToken::LBrac))
2807     return true;
2808 
2809   if (!Token.is(AsmToken::Identifier))
2810     return false;
2811 
2812   // A single register like s0 or a range of registers like s[0:1]
2813 
2814   StringRef Str = Token.getString();
2815   const RegInfo *Reg = getRegularRegInfo(Str);
2816   if (Reg) {
2817     StringRef RegName = Reg->Name;
2818     StringRef RegSuffix = Str.substr(RegName.size());
2819     if (!RegSuffix.empty()) {
2820       RegSuffix.consume_back(".l");
2821       RegSuffix.consume_back(".h");
2822       unsigned Num;
2823       // A single register with an index: rXX
2824       if (getRegNum(RegSuffix, Num))
2825         return true;
2826     } else {
2827       // A range of registers: r[XX:YY].
2828       if (NextToken.is(AsmToken::LBrac))
2829         return true;
2830     }
2831   }
2832 
2833   return getSpecialRegForName(Str).isValid();
2834 }
2835 
2836 bool
2837 AMDGPUAsmParser::isRegister()
2838 {
2839   return isRegister(getToken(), peekToken());
2840 }
2841 
2842 MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2843                                           unsigned SubReg, unsigned RegWidth,
2844                                           SMLoc Loc) {
2845   assert(isRegularReg(RegKind));
2846 
2847   unsigned AlignSize = 1;
2848   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2849     // SGPR and TTMP registers must be aligned.
2850     // Max required alignment is 4 dwords.
2851     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2852   }
2853 
2854   if (RegNum % AlignSize != 0) {
2855     Error(Loc, "invalid register alignment");
2856     return MCRegister();
2857   }
2858 
2859   unsigned RegIdx = RegNum / AlignSize;
2860   int RCID = getRegClass(RegKind, RegWidth);
2861   if (RCID == -1) {
2862     Error(Loc, "invalid or unsupported register size");
2863     return MCRegister();
2864   }
2865 
2866   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2867   const MCRegisterClass RC = TRI->getRegClass(RCID);
2868   if (RegIdx >= RC.getNumRegs()) {
2869     Error(Loc, "register index is out of range");
2870     return MCRegister();
2871   }
2872 
2873   MCRegister Reg = RC.getRegister(RegIdx);
2874 
2875   if (SubReg) {
2876     Reg = TRI->getSubReg(Reg, SubReg);
2877 
2878     // Currently all regular registers have their .l and .h subregisters, so
2879     // we should never need to generate an error here.
2880     assert(Reg && "Invalid subregister!");
2881   }
2882 
2883   return Reg;
2884 }
2885 
2886 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2887   int64_t RegLo, RegHi;
2888   if (!skipToken(AsmToken::LBrac, "missing register index"))
2889     return false;
2890 
2891   SMLoc FirstIdxLoc = getLoc();
2892   SMLoc SecondIdxLoc;
2893 
2894   if (!parseExpr(RegLo))
2895     return false;
2896 
2897   if (trySkipToken(AsmToken::Colon)) {
2898     SecondIdxLoc = getLoc();
2899     if (!parseExpr(RegHi))
2900       return false;
2901   } else {
2902     RegHi = RegLo;
2903   }
2904 
2905   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2906     return false;
2907 
2908   if (!isUInt<32>(RegLo)) {
2909     Error(FirstIdxLoc, "invalid register index");
2910     return false;
2911   }
2912 
2913   if (!isUInt<32>(RegHi)) {
2914     Error(SecondIdxLoc, "invalid register index");
2915     return false;
2916   }
2917 
2918   if (RegLo > RegHi) {
2919     Error(FirstIdxLoc, "first register index should not exceed second index");
2920     return false;
2921   }
2922 
2923   Num = static_cast<unsigned>(RegLo);
2924   RegWidth = 32 * ((RegHi - RegLo) + 1);
2925   return true;
2926 }
2927 
2928 MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2929                                             unsigned &RegNum,
2930                                             unsigned &RegWidth,
2931                                             SmallVectorImpl<AsmToken> &Tokens) {
2932   assert(isToken(AsmToken::Identifier));
2933   MCRegister Reg = getSpecialRegForName(getTokenStr());
2934   if (Reg) {
2935     RegNum = 0;
2936     RegWidth = 32;
2937     RegKind = IS_SPECIAL;
2938     Tokens.push_back(getToken());
2939     lex(); // skip register name
2940   }
2941   return Reg;
2942 }
2943 
2944 MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2945                                             unsigned &RegNum,
2946                                             unsigned &RegWidth,
2947                                             SmallVectorImpl<AsmToken> &Tokens) {
2948   assert(isToken(AsmToken::Identifier));
2949   StringRef RegName = getTokenStr();
2950   auto Loc = getLoc();
2951 
2952   const RegInfo *RI = getRegularRegInfo(RegName);
2953   if (!RI) {
2954     Error(Loc, "invalid register name");
2955     return MCRegister();
2956   }
2957 
2958   Tokens.push_back(getToken());
2959   lex(); // skip register name
2960 
2961   RegKind = RI->Kind;
2962   StringRef RegSuffix = RegName.substr(RI->Name.size());
2963   unsigned SubReg = NoSubRegister;
2964   if (!RegSuffix.empty()) {
2965     if (RegSuffix.consume_back(".l"))
2966       SubReg = AMDGPU::lo16;
2967     else if (RegSuffix.consume_back(".h"))
2968       SubReg = AMDGPU::hi16;
2969 
2970     // Single 32-bit register: vXX.
2971     if (!getRegNum(RegSuffix, RegNum)) {
2972       Error(Loc, "invalid register index");
2973       return MCRegister();
2974     }
2975     RegWidth = 32;
2976   } else {
2977     // Range of registers: v[XX:YY]. ":YY" is optional.
2978     if (!ParseRegRange(RegNum, RegWidth))
2979       return MCRegister();
2980   }
2981 
2982   return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2983 }
2984 
2985 MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2986                                          unsigned &RegNum, unsigned &RegWidth,
2987                                          SmallVectorImpl<AsmToken> &Tokens) {
2988   MCRegister Reg;
2989   auto ListLoc = getLoc();
2990 
2991   if (!skipToken(AsmToken::LBrac,
2992                  "expected a register or a list of registers")) {
2993     return MCRegister();
2994   }
2995 
2996   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2997 
2998   auto Loc = getLoc();
2999   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3000     return MCRegister();
3001   if (RegWidth != 32) {
3002     Error(Loc, "expected a single 32-bit register");
3003     return MCRegister();
3004   }
3005 
3006   for (; trySkipToken(AsmToken::Comma); ) {
3007     RegisterKind NextRegKind;
3008     MCRegister NextReg;
3009     unsigned NextRegNum, NextRegWidth;
3010     Loc = getLoc();
3011 
3012     if (!ParseAMDGPURegister(NextRegKind, NextReg,
3013                              NextRegNum, NextRegWidth,
3014                              Tokens)) {
3015       return MCRegister();
3016     }
3017     if (NextRegWidth != 32) {
3018       Error(Loc, "expected a single 32-bit register");
3019       return MCRegister();
3020     }
3021     if (NextRegKind != RegKind) {
3022       Error(Loc, "registers in a list must be of the same kind");
3023       return MCRegister();
3024     }
3025     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3026       return MCRegister();
3027   }
3028 
3029   if (!skipToken(AsmToken::RBrac,
3030                  "expected a comma or a closing square bracket")) {
3031     return MCRegister();
3032   }
3033 
3034   if (isRegularReg(RegKind))
3035     Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3036 
3037   return Reg;
3038 }
3039 
3040 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3041                                           MCRegister &Reg, unsigned &RegNum,
3042                                           unsigned &RegWidth,
3043                                           SmallVectorImpl<AsmToken> &Tokens) {
3044   auto Loc = getLoc();
3045   Reg = MCRegister();
3046 
3047   if (isToken(AsmToken::Identifier)) {
3048     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3049     if (!Reg)
3050       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3051   } else {
3052     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3053   }
3054 
3055   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3056   if (!Reg) {
3057     assert(Parser.hasPendingError());
3058     return false;
3059   }
3060 
3061   if (!subtargetHasRegister(*TRI, Reg)) {
3062     if (Reg == AMDGPU::SGPR_NULL) {
3063       Error(Loc, "'null' operand is not supported on this GPU");
3064     } else {
3065       Error(Loc, Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +
3066                      " register not available on this GPU");
3067     }
3068     return false;
3069   }
3070 
3071   return true;
3072 }
3073 
3074 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3075                                           MCRegister &Reg, unsigned &RegNum,
3076                                           unsigned &RegWidth,
3077                                           bool RestoreOnFailure /*=false*/) {
3078   Reg = MCRegister();
3079 
3080   SmallVector<AsmToken, 1> Tokens;
3081   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3082     if (RestoreOnFailure) {
3083       while (!Tokens.empty()) {
3084         getLexer().UnLex(Tokens.pop_back_val());
3085       }
3086     }
3087     return true;
3088   }
3089   return false;
3090 }
3091 
3092 std::optional<StringRef>
3093 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3094   switch (RegKind) {
3095   case IS_VGPR:
3096     return StringRef(".amdgcn.next_free_vgpr");
3097   case IS_SGPR:
3098     return StringRef(".amdgcn.next_free_sgpr");
3099   default:
3100     return std::nullopt;
3101   }
3102 }
3103 
3104 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3105   auto SymbolName = getGprCountSymbolName(RegKind);
3106   assert(SymbolName && "initializing invalid register kind");
3107   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3108   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
3109 }
3110 
3111 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3112                                             unsigned DwordRegIndex,
3113                                             unsigned RegWidth) {
3114   // Symbols are only defined for GCN targets
3115   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3116     return true;
3117 
3118   auto SymbolName = getGprCountSymbolName(RegKind);
3119   if (!SymbolName)
3120     return true;
3121   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3122 
3123   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3124   int64_t OldCount;
3125 
3126   if (!Sym->isVariable())
3127     return !Error(getLoc(),
3128                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3129   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
3130     return !Error(
3131         getLoc(),
3132         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3133 
3134   if (OldCount <= NewMax)
3135     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
3136 
3137   return true;
3138 }
3139 
3140 std::unique_ptr<AMDGPUOperand>
3141 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3142   const auto &Tok = getToken();
3143   SMLoc StartLoc = Tok.getLoc();
3144   SMLoc EndLoc = Tok.getEndLoc();
3145   RegisterKind RegKind;
3146   MCRegister Reg;
3147   unsigned RegNum, RegWidth;
3148 
3149   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3150     return nullptr;
3151   }
3152   if (isHsaAbi(getSTI())) {
3153     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3154       return nullptr;
3155   } else
3156     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3157   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3158 }
3159 
3160 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3161                                       bool HasSP3AbsModifier, bool HasLit) {
3162   // TODO: add syntactic sugar for 1/(2*PI)
3163 
3164   if (isRegister())
3165     return ParseStatus::NoMatch;
3166   assert(!isModifier());
3167 
3168   if (!HasLit) {
3169     HasLit = trySkipId("lit");
3170     if (HasLit) {
3171       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3172         return ParseStatus::Failure;
3173       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3174       if (S.isSuccess() &&
3175           !skipToken(AsmToken::RParen, "expected closing parentheses"))
3176         return ParseStatus::Failure;
3177       return S;
3178     }
3179   }
3180 
3181   const auto& Tok = getToken();
3182   const auto& NextTok = peekToken();
3183   bool IsReal = Tok.is(AsmToken::Real);
3184   SMLoc S = getLoc();
3185   bool Negate = false;
3186 
3187   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3188     lex();
3189     IsReal = true;
3190     Negate = true;
3191   }
3192 
3193   AMDGPUOperand::Modifiers Mods;
3194   Mods.Lit = HasLit;
3195 
3196   if (IsReal) {
3197     // Floating-point expressions are not supported.
3198     // Can only allow floating-point literals with an
3199     // optional sign.
3200 
3201     StringRef Num = getTokenStr();
3202     lex();
3203 
3204     APFloat RealVal(APFloat::IEEEdouble());
3205     auto roundMode = APFloat::rmNearestTiesToEven;
3206     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3207       return ParseStatus::Failure;
3208     if (Negate)
3209       RealVal.changeSign();
3210 
3211     Operands.push_back(
3212       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3213                                AMDGPUOperand::ImmTyNone, true));
3214     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3215     Op.setModifiers(Mods);
3216 
3217     return ParseStatus::Success;
3218 
3219   } else {
3220     int64_t IntVal;
3221     const MCExpr *Expr;
3222     SMLoc S = getLoc();
3223 
3224     if (HasSP3AbsModifier) {
3225       // This is a workaround for handling expressions
3226       // as arguments of SP3 'abs' modifier, for example:
3227       //     |1.0|
3228       //     |-1|
3229       //     |1+x|
3230       // This syntax is not compatible with syntax of standard
3231       // MC expressions (due to the trailing '|').
3232       SMLoc EndLoc;
3233       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3234         return ParseStatus::Failure;
3235     } else {
3236       if (Parser.parseExpression(Expr))
3237         return ParseStatus::Failure;
3238     }
3239 
3240     if (Expr->evaluateAsAbsolute(IntVal)) {
3241       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3242       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3243       Op.setModifiers(Mods);
3244     } else {
3245       if (HasLit)
3246         return ParseStatus::NoMatch;
3247       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3248     }
3249 
3250     return ParseStatus::Success;
3251   }
3252 
3253   return ParseStatus::NoMatch;
3254 }
3255 
3256 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3257   if (!isRegister())
3258     return ParseStatus::NoMatch;
3259 
3260   if (auto R = parseRegister()) {
3261     assert(R->isReg());
3262     Operands.push_back(std::move(R));
3263     return ParseStatus::Success;
3264   }
3265   return ParseStatus::Failure;
3266 }
3267 
3268 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3269                                            bool HasSP3AbsMod, bool HasLit) {
3270   ParseStatus Res = parseReg(Operands);
3271   if (!Res.isNoMatch())
3272     return Res;
3273   if (isModifier())
3274     return ParseStatus::NoMatch;
3275   return parseImm(Operands, HasSP3AbsMod, HasLit);
3276 }
3277 
3278 bool
3279 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3280   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3281     const auto &str = Token.getString();
3282     return str == "abs" || str == "neg" || str == "sext";
3283   }
3284   return false;
3285 }
3286 
3287 bool
3288 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3289   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3290 }
3291 
3292 bool
3293 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3294   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3295 }
3296 
3297 bool
3298 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3299   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3300 }
3301 
3302 // Check if this is an operand modifier or an opcode modifier
3303 // which may look like an expression but it is not. We should
3304 // avoid parsing these modifiers as expressions. Currently
3305 // recognized sequences are:
3306 //   |...|
3307 //   abs(...)
3308 //   neg(...)
3309 //   sext(...)
3310 //   -reg
3311 //   -|...|
3312 //   -abs(...)
3313 //   name:...
3314 //
3315 bool
3316 AMDGPUAsmParser::isModifier() {
3317 
3318   AsmToken Tok = getToken();
3319   AsmToken NextToken[2];
3320   peekTokens(NextToken);
3321 
3322   return isOperandModifier(Tok, NextToken[0]) ||
3323          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3324          isOpcodeModifierWithVal(Tok, NextToken[0]);
3325 }
3326 
3327 // Check if the current token is an SP3 'neg' modifier.
3328 // Currently this modifier is allowed in the following context:
3329 //
3330 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3331 // 2. Before an 'abs' modifier: -abs(...)
3332 // 3. Before an SP3 'abs' modifier: -|...|
3333 //
3334 // In all other cases "-" is handled as a part
3335 // of an expression that follows the sign.
3336 //
3337 // Note: When "-" is followed by an integer literal,
3338 // this is interpreted as integer negation rather
3339 // than a floating-point NEG modifier applied to N.
3340 // Beside being contr-intuitive, such use of floating-point
3341 // NEG modifier would have resulted in different meaning
3342 // of integer literals used with VOP1/2/C and VOP3,
3343 // for example:
3344 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3345 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3346 // Negative fp literals with preceding "-" are
3347 // handled likewise for uniformity
3348 //
3349 bool
3350 AMDGPUAsmParser::parseSP3NegModifier() {
3351 
3352   AsmToken NextToken[2];
3353   peekTokens(NextToken);
3354 
3355   if (isToken(AsmToken::Minus) &&
3356       (isRegister(NextToken[0], NextToken[1]) ||
3357        NextToken[0].is(AsmToken::Pipe) ||
3358        isId(NextToken[0], "abs"))) {
3359     lex();
3360     return true;
3361   }
3362 
3363   return false;
3364 }
3365 
3366 ParseStatus
3367 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3368                                               bool AllowImm) {
3369   bool Neg, SP3Neg;
3370   bool Abs, SP3Abs;
3371   bool Lit;
3372   SMLoc Loc;
3373 
3374   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3375   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3376     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3377 
3378   SP3Neg = parseSP3NegModifier();
3379 
3380   Loc = getLoc();
3381   Neg = trySkipId("neg");
3382   if (Neg && SP3Neg)
3383     return Error(Loc, "expected register or immediate");
3384   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3385     return ParseStatus::Failure;
3386 
3387   Abs = trySkipId("abs");
3388   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3389     return ParseStatus::Failure;
3390 
3391   Lit = trySkipId("lit");
3392   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3393     return ParseStatus::Failure;
3394 
3395   Loc = getLoc();
3396   SP3Abs = trySkipToken(AsmToken::Pipe);
3397   if (Abs && SP3Abs)
3398     return Error(Loc, "expected register or immediate");
3399 
3400   ParseStatus Res;
3401   if (AllowImm) {
3402     Res = parseRegOrImm(Operands, SP3Abs, Lit);
3403   } else {
3404     Res = parseReg(Operands);
3405   }
3406   if (!Res.isSuccess())
3407     return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3408 
3409   if (Lit && !Operands.back()->isImm())
3410     Error(Loc, "expected immediate with lit modifier");
3411 
3412   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3413     return ParseStatus::Failure;
3414   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3415     return ParseStatus::Failure;
3416   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3417     return ParseStatus::Failure;
3418   if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3419     return ParseStatus::Failure;
3420 
3421   AMDGPUOperand::Modifiers Mods;
3422   Mods.Abs = Abs || SP3Abs;
3423   Mods.Neg = Neg || SP3Neg;
3424   Mods.Lit = Lit;
3425 
3426   if (Mods.hasFPModifiers() || Lit) {
3427     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3428     if (Op.isExpr())
3429       return Error(Op.getStartLoc(), "expected an absolute expression");
3430     Op.setModifiers(Mods);
3431   }
3432   return ParseStatus::Success;
3433 }
3434 
3435 ParseStatus
3436 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3437                                                bool AllowImm) {
3438   bool Sext = trySkipId("sext");
3439   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3440     return ParseStatus::Failure;
3441 
3442   ParseStatus Res;
3443   if (AllowImm) {
3444     Res = parseRegOrImm(Operands);
3445   } else {
3446     Res = parseReg(Operands);
3447   }
3448   if (!Res.isSuccess())
3449     return Sext ? ParseStatus::Failure : Res;
3450 
3451   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3452     return ParseStatus::Failure;
3453 
3454   AMDGPUOperand::Modifiers Mods;
3455   Mods.Sext = Sext;
3456 
3457   if (Mods.hasIntModifiers()) {
3458     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3459     if (Op.isExpr())
3460       return Error(Op.getStartLoc(), "expected an absolute expression");
3461     Op.setModifiers(Mods);
3462   }
3463 
3464   return ParseStatus::Success;
3465 }
3466 
3467 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3468   return parseRegOrImmWithFPInputMods(Operands, false);
3469 }
3470 
3471 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3472   return parseRegOrImmWithIntInputMods(Operands, false);
3473 }
3474 
3475 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3476   auto Loc = getLoc();
3477   if (trySkipId("off")) {
3478     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3479                                                 AMDGPUOperand::ImmTyOff, false));
3480     return ParseStatus::Success;
3481   }
3482 
3483   if (!isRegister())
3484     return ParseStatus::NoMatch;
3485 
3486   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3487   if (Reg) {
3488     Operands.push_back(std::move(Reg));
3489     return ParseStatus::Success;
3490   }
3491 
3492   return ParseStatus::Failure;
3493 }
3494 
3495 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3496   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3497 
3498   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3499       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3500       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3501       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3502     return Match_InvalidOperand;
3503 
3504   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3505       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3506     // v_mac_f32/16 allow only dst_sel == DWORD;
3507     auto OpNum =
3508         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3509     const auto &Op = Inst.getOperand(OpNum);
3510     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3511       return Match_InvalidOperand;
3512     }
3513   }
3514 
3515   return Match_Success;
3516 }
3517 
3518 static ArrayRef<unsigned> getAllVariants() {
3519   static const unsigned Variants[] = {
3520     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3521     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3522     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3523   };
3524 
3525   return ArrayRef(Variants);
3526 }
3527 
3528 // What asm variants we should check
3529 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3530   if (isForcedDPP() && isForcedVOP3()) {
3531     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3532     return ArrayRef(Variants);
3533   }
3534   if (getForcedEncodingSize() == 32) {
3535     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3536     return ArrayRef(Variants);
3537   }
3538 
3539   if (isForcedVOP3()) {
3540     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3541     return ArrayRef(Variants);
3542   }
3543 
3544   if (isForcedSDWA()) {
3545     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3546                                         AMDGPUAsmVariants::SDWA9};
3547     return ArrayRef(Variants);
3548   }
3549 
3550   if (isForcedDPP()) {
3551     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3552     return ArrayRef(Variants);
3553   }
3554 
3555   return getAllVariants();
3556 }
3557 
3558 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3559   if (isForcedDPP() && isForcedVOP3())
3560     return "e64_dpp";
3561 
3562   if (getForcedEncodingSize() == 32)
3563     return "e32";
3564 
3565   if (isForcedVOP3())
3566     return "e64";
3567 
3568   if (isForcedSDWA())
3569     return "sdwa";
3570 
3571   if (isForcedDPP())
3572     return "dpp";
3573 
3574   return "";
3575 }
3576 
3577 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3578   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3579   for (MCPhysReg Reg : Desc.implicit_uses()) {
3580     switch (Reg) {
3581     case AMDGPU::FLAT_SCR:
3582     case AMDGPU::VCC:
3583     case AMDGPU::VCC_LO:
3584     case AMDGPU::VCC_HI:
3585     case AMDGPU::M0:
3586       return Reg;
3587     default:
3588       break;
3589     }
3590   }
3591   return AMDGPU::NoRegister;
3592 }
3593 
3594 // NB: This code is correct only when used to check constant
3595 // bus limitations because GFX7 support no f16 inline constants.
3596 // Note that there are no cases when a GFX7 opcode violates
3597 // constant bus limitations due to the use of an f16 constant.
3598 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3599                                        unsigned OpIdx) const {
3600   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3601 
3602   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3603       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3604     return false;
3605   }
3606 
3607   const MCOperand &MO = Inst.getOperand(OpIdx);
3608 
3609   int64_t Val = MO.getImm();
3610   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3611 
3612   switch (OpSize) { // expected operand size
3613   case 8:
3614     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3615   case 4:
3616     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3617   case 2: {
3618     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3619     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3620         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3621         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3622       return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3623 
3624     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3625         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3626         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3627       return AMDGPU::isInlinableLiteralV2I16(Val);
3628 
3629     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3630         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3631         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3632       return AMDGPU::isInlinableLiteralV2F16(Val);
3633 
3634     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3635         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 ||
3636         OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3637       return AMDGPU::isInlinableLiteralV2BF16(Val);
3638 
3639     if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3640         OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
3641         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
3642         OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3643       return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3644 
3645     if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3646         OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
3647         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
3648         OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3649       return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3650 
3651     llvm_unreachable("invalid operand type");
3652   }
3653   default:
3654     llvm_unreachable("invalid operand size");
3655   }
3656 }
3657 
3658 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3659   if (!isGFX10Plus())
3660     return 1;
3661 
3662   switch (Opcode) {
3663   // 64-bit shift instructions can use only one scalar value input
3664   case AMDGPU::V_LSHLREV_B64_e64:
3665   case AMDGPU::V_LSHLREV_B64_gfx10:
3666   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3667   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3668   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3669   case AMDGPU::V_LSHRREV_B64_e64:
3670   case AMDGPU::V_LSHRREV_B64_gfx10:
3671   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3672   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3673   case AMDGPU::V_ASHRREV_I64_e64:
3674   case AMDGPU::V_ASHRREV_I64_gfx10:
3675   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3676   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3677   case AMDGPU::V_LSHL_B64_e64:
3678   case AMDGPU::V_LSHR_B64_e64:
3679   case AMDGPU::V_ASHR_I64_e64:
3680     return 1;
3681   default:
3682     return 2;
3683   }
3684 }
3685 
3686 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3687 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3688 
3689 // Get regular operand indices in the same order as specified
3690 // in the instruction (but append mandatory literals to the end).
3691 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3692                                            bool AddMandatoryLiterals = false) {
3693 
3694   int16_t ImmIdx =
3695       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3696 
3697   if (isVOPD(Opcode)) {
3698     int16_t ImmDeferredIdx =
3699         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3700                              : -1;
3701 
3702     return {getNamedOperandIdx(Opcode, OpName::src0X),
3703             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3704             getNamedOperandIdx(Opcode, OpName::src0Y),
3705             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3706             ImmDeferredIdx,
3707             ImmIdx};
3708   }
3709 
3710   return {getNamedOperandIdx(Opcode, OpName::src0),
3711           getNamedOperandIdx(Opcode, OpName::src1),
3712           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3713 }
3714 
3715 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3716   const MCOperand &MO = Inst.getOperand(OpIdx);
3717   if (MO.isImm())
3718     return !isInlineConstant(Inst, OpIdx);
3719   if (MO.isReg()) {
3720     auto Reg = MO.getReg();
3721     if (!Reg)
3722       return false;
3723     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3724     auto PReg = mc2PseudoReg(Reg);
3725     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3726   }
3727   return true;
3728 }
3729 
3730 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3731 // Writelane is special in that it can use SGPR and M0 (which would normally
3732 // count as using the constant bus twice - but in this case it is allowed since
3733 // the lane selector doesn't count as a use of the constant bus). However, it is
3734 // still required to abide by the 1 SGPR rule.
3735 static bool checkWriteLane(const MCInst &Inst) {
3736   const unsigned Opcode = Inst.getOpcode();
3737   if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3738     return false;
3739   const MCOperand &LaneSelOp = Inst.getOperand(2);
3740   if (!LaneSelOp.isReg())
3741     return false;
3742   auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3743   return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3744 }
3745 
3746 bool AMDGPUAsmParser::validateConstantBusLimitations(
3747     const MCInst &Inst, const OperandVector &Operands) {
3748   const unsigned Opcode = Inst.getOpcode();
3749   const MCInstrDesc &Desc = MII.get(Opcode);
3750   MCRegister LastSGPR;
3751   unsigned ConstantBusUseCount = 0;
3752   unsigned NumLiterals = 0;
3753   unsigned LiteralSize;
3754 
3755   if (!(Desc.TSFlags &
3756         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3757          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3758       !isVOPD(Opcode))
3759     return true;
3760 
3761   if (checkWriteLane(Inst))
3762     return true;
3763 
3764   // Check special imm operands (used by madmk, etc)
3765   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3766     ++NumLiterals;
3767     LiteralSize = 4;
3768   }
3769 
3770   SmallDenseSet<unsigned> SGPRsUsed;
3771   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3772   if (SGPRUsed != AMDGPU::NoRegister) {
3773     SGPRsUsed.insert(SGPRUsed);
3774     ++ConstantBusUseCount;
3775   }
3776 
3777   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3778 
3779   for (int OpIdx : OpIndices) {
3780     if (OpIdx == -1)
3781       continue;
3782 
3783     const MCOperand &MO = Inst.getOperand(OpIdx);
3784     if (usesConstantBus(Inst, OpIdx)) {
3785       if (MO.isReg()) {
3786         LastSGPR = mc2PseudoReg(MO.getReg());
3787         // Pairs of registers with a partial intersections like these
3788         //   s0, s[0:1]
3789         //   flat_scratch_lo, flat_scratch
3790         //   flat_scratch_lo, flat_scratch_hi
3791         // are theoretically valid but they are disabled anyway.
3792         // Note that this code mimics SIInstrInfo::verifyInstruction
3793         if (SGPRsUsed.insert(LastSGPR).second) {
3794           ++ConstantBusUseCount;
3795         }
3796       } else { // Expression or a literal
3797 
3798         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3799           continue; // special operand like VINTERP attr_chan
3800 
3801         // An instruction may use only one literal.
3802         // This has been validated on the previous step.
3803         // See validateVOPLiteral.
3804         // This literal may be used as more than one operand.
3805         // If all these operands are of the same size,
3806         // this literal counts as one scalar value.
3807         // Otherwise it counts as 2 scalar values.
3808         // See "GFX10 Shader Programming", section 3.6.2.3.
3809 
3810         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3811         if (Size < 4)
3812           Size = 4;
3813 
3814         if (NumLiterals == 0) {
3815           NumLiterals = 1;
3816           LiteralSize = Size;
3817         } else if (LiteralSize != Size) {
3818           NumLiterals = 2;
3819         }
3820       }
3821     }
3822   }
3823   ConstantBusUseCount += NumLiterals;
3824 
3825   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3826     return true;
3827 
3828   SMLoc LitLoc = getLitLoc(Operands);
3829   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3830   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3831   Error(Loc, "invalid operand (violates constant bus restrictions)");
3832   return false;
3833 }
3834 
3835 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3836     const MCInst &Inst, const OperandVector &Operands) {
3837 
3838   const unsigned Opcode = Inst.getOpcode();
3839   if (!isVOPD(Opcode))
3840     return true;
3841 
3842   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3843 
3844   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3845     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3846     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3847                ? Opr.getReg()
3848                : MCRegister();
3849   };
3850 
3851   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3852   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3853 
3854   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3855   auto InvalidCompOprIdx =
3856       InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3857   if (!InvalidCompOprIdx)
3858     return true;
3859 
3860   auto CompOprIdx = *InvalidCompOprIdx;
3861   auto ParsedIdx =
3862       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3863                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3864   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3865 
3866   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3867   if (CompOprIdx == VOPD::Component::DST) {
3868     Error(Loc, "one dst register must be even and the other odd");
3869   } else {
3870     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3871     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3872                    " operands must use different VGPR banks");
3873   }
3874 
3875   return false;
3876 }
3877 
3878 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3879 
3880   const unsigned Opc = Inst.getOpcode();
3881   const MCInstrDesc &Desc = MII.get(Opc);
3882 
3883   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3884     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3885     assert(ClampIdx != -1);
3886     return Inst.getOperand(ClampIdx).getImm() == 0;
3887   }
3888 
3889   return true;
3890 }
3891 
3892 constexpr uint64_t MIMGFlags =
3893     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3894 
3895 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3896                                            const SMLoc &IDLoc) {
3897 
3898   const unsigned Opc = Inst.getOpcode();
3899   const MCInstrDesc &Desc = MII.get(Opc);
3900 
3901   if ((Desc.TSFlags & MIMGFlags) == 0)
3902     return true;
3903 
3904   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3905   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3906   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3907 
3908   if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3909     return true;
3910 
3911   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3912     return true;
3913 
3914   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3915   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3916   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3917   if (DMask == 0)
3918     DMask = 1;
3919 
3920   bool IsPackedD16 = false;
3921   unsigned DataSize =
3922       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3923   if (hasPackedD16()) {
3924     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3925     IsPackedD16 = D16Idx >= 0;
3926     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3927       DataSize = (DataSize + 1) / 2;
3928   }
3929 
3930   if ((VDataSize / 4) == DataSize + TFESize)
3931     return true;
3932 
3933   StringRef Modifiers;
3934   if (isGFX90A())
3935     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3936   else
3937     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3938 
3939   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3940   return false;
3941 }
3942 
3943 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3944                                            const SMLoc &IDLoc) {
3945   const unsigned Opc = Inst.getOpcode();
3946   const MCInstrDesc &Desc = MII.get(Opc);
3947 
3948   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3949     return true;
3950 
3951   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3952 
3953   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3954       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3955   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3956   int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3957                                                        : AMDGPU::OpName::rsrc;
3958   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3959   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3960   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3961 
3962   assert(VAddr0Idx != -1);
3963   assert(SrsrcIdx != -1);
3964   assert(SrsrcIdx > VAddr0Idx);
3965 
3966   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3967   if (BaseOpcode->BVH) {
3968     if (IsA16 == BaseOpcode->A16)
3969       return true;
3970     Error(IDLoc, "image address size does not match a16");
3971     return false;
3972   }
3973 
3974   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3975   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3976   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3977   unsigned ActualAddrSize =
3978       IsNSA ? SrsrcIdx - VAddr0Idx
3979             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3980 
3981   unsigned ExpectedAddrSize =
3982       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3983 
3984   if (IsNSA) {
3985     if (hasPartialNSAEncoding() &&
3986         ExpectedAddrSize >
3987             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3988       int VAddrLastIdx = SrsrcIdx - 1;
3989       unsigned VAddrLastSize =
3990           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3991 
3992       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3993     }
3994   } else {
3995     if (ExpectedAddrSize > 12)
3996       ExpectedAddrSize = 16;
3997 
3998     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3999     // This provides backward compatibility for assembly created
4000     // before 160b/192b/224b types were directly supported.
4001     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4002       return true;
4003   }
4004 
4005   if (ActualAddrSize == ExpectedAddrSize)
4006     return true;
4007 
4008   Error(IDLoc, "image address size does not match dim and a16");
4009   return false;
4010 }
4011 
4012 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4013 
4014   const unsigned Opc = Inst.getOpcode();
4015   const MCInstrDesc &Desc = MII.get(Opc);
4016 
4017   if ((Desc.TSFlags & MIMGFlags) == 0)
4018     return true;
4019   if (!Desc.mayLoad() || !Desc.mayStore())
4020     return true; // Not atomic
4021 
4022   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4023   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4024 
4025   // This is an incomplete check because image_atomic_cmpswap
4026   // may only use 0x3 and 0xf while other atomic operations
4027   // may use 0x1 and 0x3. However these limitations are
4028   // verified when we check that dmask matches dst size.
4029   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4030 }
4031 
4032 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4033 
4034   const unsigned Opc = Inst.getOpcode();
4035   const MCInstrDesc &Desc = MII.get(Opc);
4036 
4037   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4038     return true;
4039 
4040   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4041   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4042 
4043   // GATHER4 instructions use dmask in a different fashion compared to
4044   // other MIMG instructions. The only useful DMASK values are
4045   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4046   // (red,red,red,red) etc.) The ISA document doesn't mention
4047   // this.
4048   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4049 }
4050 
4051 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4052                                       const OperandVector &Operands) {
4053   if (!isGFX10Plus())
4054     return true;
4055 
4056   const unsigned Opc = Inst.getOpcode();
4057   const MCInstrDesc &Desc = MII.get(Opc);
4058 
4059   if ((Desc.TSFlags & MIMGFlags) == 0)
4060     return true;
4061 
4062   // image_bvh_intersect_ray instructions do not have dim
4063   if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4064     return true;
4065 
4066   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4067     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4068     if (Op.isDim())
4069       return true;
4070   }
4071   return false;
4072 }
4073 
4074 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4075   const unsigned Opc = Inst.getOpcode();
4076   const MCInstrDesc &Desc = MII.get(Opc);
4077 
4078   if ((Desc.TSFlags & MIMGFlags) == 0)
4079     return true;
4080 
4081   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4082   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4083       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
4084 
4085   if (!BaseOpcode->MSAA)
4086     return true;
4087 
4088   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4089   assert(DimIdx != -1);
4090 
4091   unsigned Dim = Inst.getOperand(DimIdx).getImm();
4092   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4093 
4094   return DimInfo->MSAA;
4095 }
4096 
4097 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4098 {
4099   switch (Opcode) {
4100   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4101   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4102   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4103     return true;
4104   default:
4105     return false;
4106   }
4107 }
4108 
4109 // movrels* opcodes should only allow VGPRS as src0.
4110 // This is specified in .td description for vop1/vop3,
4111 // but sdwa is handled differently. See isSDWAOperand.
4112 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4113                                       const OperandVector &Operands) {
4114 
4115   const unsigned Opc = Inst.getOpcode();
4116   const MCInstrDesc &Desc = MII.get(Opc);
4117 
4118   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4119     return true;
4120 
4121   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4122   assert(Src0Idx != -1);
4123 
4124   SMLoc ErrLoc;
4125   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4126   if (Src0.isReg()) {
4127     auto Reg = mc2PseudoReg(Src0.getReg());
4128     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4129     if (!isSGPR(Reg, TRI))
4130       return true;
4131     ErrLoc = getRegLoc(Reg, Operands);
4132   } else {
4133     ErrLoc = getConstLoc(Operands);
4134   }
4135 
4136   Error(ErrLoc, "source operand must be a VGPR");
4137   return false;
4138 }
4139 
4140 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4141                                           const OperandVector &Operands) {
4142 
4143   const unsigned Opc = Inst.getOpcode();
4144 
4145   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4146     return true;
4147 
4148   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4149   assert(Src0Idx != -1);
4150 
4151   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4152   if (!Src0.isReg())
4153     return true;
4154 
4155   auto Reg = mc2PseudoReg(Src0.getReg());
4156   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4157   if (!isGFX90A() && isSGPR(Reg, TRI)) {
4158     Error(getRegLoc(Reg, Operands),
4159           "source operand must be either a VGPR or an inline constant");
4160     return false;
4161   }
4162 
4163   return true;
4164 }
4165 
4166 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4167                                       const OperandVector &Operands) {
4168   unsigned Opcode = Inst.getOpcode();
4169   const MCInstrDesc &Desc = MII.get(Opcode);
4170 
4171   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4172       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4173     return true;
4174 
4175   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4176   if (Src2Idx == -1)
4177     return true;
4178 
4179   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4180     Error(getConstLoc(Operands),
4181           "inline constants are not allowed for this operand");
4182     return false;
4183   }
4184 
4185   return true;
4186 }
4187 
4188 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4189                                    const OperandVector &Operands) {
4190   const unsigned Opc = Inst.getOpcode();
4191   const MCInstrDesc &Desc = MII.get(Opc);
4192 
4193   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4194     return true;
4195 
4196   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4197   if (BlgpIdx != -1) {
4198     if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4199       int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4200 
4201       unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4202       unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4203 
4204       // Validate the correct register size was used for the floating point
4205       // format operands
4206 
4207       bool Success = true;
4208       if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4209         int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4210         Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()),
4211                         Operands),
4212               "wrong register tuple size for cbsz value " + Twine(CBSZ));
4213         Success = false;
4214       }
4215 
4216       if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4217         int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4218         Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()),
4219                         Operands),
4220               "wrong register tuple size for blgp value " + Twine(BLGP));
4221         Success = false;
4222       }
4223 
4224       return Success;
4225     }
4226   }
4227 
4228   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4229   if (Src2Idx == -1)
4230     return true;
4231 
4232   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4233   if (!Src2.isReg())
4234     return true;
4235 
4236   MCRegister Src2Reg = Src2.getReg();
4237   MCRegister DstReg = Inst.getOperand(0).getReg();
4238   if (Src2Reg == DstReg)
4239     return true;
4240 
4241   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4242   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4243     return true;
4244 
4245   if (TRI->regsOverlap(Src2Reg, DstReg)) {
4246     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
4247           "source 2 operand must not partially overlap with dst");
4248     return false;
4249   }
4250 
4251   return true;
4252 }
4253 
4254 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4255   switch (Inst.getOpcode()) {
4256   default:
4257     return true;
4258   case V_DIV_SCALE_F32_gfx6_gfx7:
4259   case V_DIV_SCALE_F32_vi:
4260   case V_DIV_SCALE_F32_gfx10:
4261   case V_DIV_SCALE_F64_gfx6_gfx7:
4262   case V_DIV_SCALE_F64_vi:
4263   case V_DIV_SCALE_F64_gfx10:
4264     break;
4265   }
4266 
4267   // TODO: Check that src0 = src1 or src2.
4268 
4269   for (auto Name : {AMDGPU::OpName::src0_modifiers,
4270                     AMDGPU::OpName::src2_modifiers,
4271                     AMDGPU::OpName::src2_modifiers}) {
4272     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4273             .getImm() &
4274         SISrcMods::ABS) {
4275       return false;
4276     }
4277   }
4278 
4279   return true;
4280 }
4281 
4282 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4283 
4284   const unsigned Opc = Inst.getOpcode();
4285   const MCInstrDesc &Desc = MII.get(Opc);
4286 
4287   if ((Desc.TSFlags & MIMGFlags) == 0)
4288     return true;
4289 
4290   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4291   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4292     if (isCI() || isSI())
4293       return false;
4294   }
4295 
4296   return true;
4297 }
4298 
4299 static bool IsRevOpcode(const unsigned Opcode)
4300 {
4301   switch (Opcode) {
4302   case AMDGPU::V_SUBREV_F32_e32:
4303   case AMDGPU::V_SUBREV_F32_e64:
4304   case AMDGPU::V_SUBREV_F32_e32_gfx10:
4305   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4306   case AMDGPU::V_SUBREV_F32_e32_vi:
4307   case AMDGPU::V_SUBREV_F32_e64_gfx10:
4308   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4309   case AMDGPU::V_SUBREV_F32_e64_vi:
4310 
4311   case AMDGPU::V_SUBREV_CO_U32_e32:
4312   case AMDGPU::V_SUBREV_CO_U32_e64:
4313   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4314   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4315 
4316   case AMDGPU::V_SUBBREV_U32_e32:
4317   case AMDGPU::V_SUBBREV_U32_e64:
4318   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4319   case AMDGPU::V_SUBBREV_U32_e32_vi:
4320   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4321   case AMDGPU::V_SUBBREV_U32_e64_vi:
4322 
4323   case AMDGPU::V_SUBREV_U32_e32:
4324   case AMDGPU::V_SUBREV_U32_e64:
4325   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4326   case AMDGPU::V_SUBREV_U32_e32_vi:
4327   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4328   case AMDGPU::V_SUBREV_U32_e64_vi:
4329 
4330   case AMDGPU::V_SUBREV_F16_e32:
4331   case AMDGPU::V_SUBREV_F16_e64:
4332   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4333   case AMDGPU::V_SUBREV_F16_e32_vi:
4334   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4335   case AMDGPU::V_SUBREV_F16_e64_vi:
4336 
4337   case AMDGPU::V_SUBREV_U16_e32:
4338   case AMDGPU::V_SUBREV_U16_e64:
4339   case AMDGPU::V_SUBREV_U16_e32_vi:
4340   case AMDGPU::V_SUBREV_U16_e64_vi:
4341 
4342   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4343   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4344   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4345 
4346   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4347   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4348 
4349   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4350   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4351 
4352   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4353   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4354 
4355   case AMDGPU::V_LSHRREV_B32_e32:
4356   case AMDGPU::V_LSHRREV_B32_e64:
4357   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4358   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4359   case AMDGPU::V_LSHRREV_B32_e32_vi:
4360   case AMDGPU::V_LSHRREV_B32_e64_vi:
4361   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4362   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4363 
4364   case AMDGPU::V_ASHRREV_I32_e32:
4365   case AMDGPU::V_ASHRREV_I32_e64:
4366   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4367   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4368   case AMDGPU::V_ASHRREV_I32_e32_vi:
4369   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4370   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4371   case AMDGPU::V_ASHRREV_I32_e64_vi:
4372 
4373   case AMDGPU::V_LSHLREV_B32_e32:
4374   case AMDGPU::V_LSHLREV_B32_e64:
4375   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4376   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4377   case AMDGPU::V_LSHLREV_B32_e32_vi:
4378   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4379   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4380   case AMDGPU::V_LSHLREV_B32_e64_vi:
4381 
4382   case AMDGPU::V_LSHLREV_B16_e32:
4383   case AMDGPU::V_LSHLREV_B16_e64:
4384   case AMDGPU::V_LSHLREV_B16_e32_vi:
4385   case AMDGPU::V_LSHLREV_B16_e64_vi:
4386   case AMDGPU::V_LSHLREV_B16_gfx10:
4387 
4388   case AMDGPU::V_LSHRREV_B16_e32:
4389   case AMDGPU::V_LSHRREV_B16_e64:
4390   case AMDGPU::V_LSHRREV_B16_e32_vi:
4391   case AMDGPU::V_LSHRREV_B16_e64_vi:
4392   case AMDGPU::V_LSHRREV_B16_gfx10:
4393 
4394   case AMDGPU::V_ASHRREV_I16_e32:
4395   case AMDGPU::V_ASHRREV_I16_e64:
4396   case AMDGPU::V_ASHRREV_I16_e32_vi:
4397   case AMDGPU::V_ASHRREV_I16_e64_vi:
4398   case AMDGPU::V_ASHRREV_I16_gfx10:
4399 
4400   case AMDGPU::V_LSHLREV_B64_e64:
4401   case AMDGPU::V_LSHLREV_B64_gfx10:
4402   case AMDGPU::V_LSHLREV_B64_vi:
4403 
4404   case AMDGPU::V_LSHRREV_B64_e64:
4405   case AMDGPU::V_LSHRREV_B64_gfx10:
4406   case AMDGPU::V_LSHRREV_B64_vi:
4407 
4408   case AMDGPU::V_ASHRREV_I64_e64:
4409   case AMDGPU::V_ASHRREV_I64_gfx10:
4410   case AMDGPU::V_ASHRREV_I64_vi:
4411 
4412   case AMDGPU::V_PK_LSHLREV_B16:
4413   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4414   case AMDGPU::V_PK_LSHLREV_B16_vi:
4415 
4416   case AMDGPU::V_PK_LSHRREV_B16:
4417   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4418   case AMDGPU::V_PK_LSHRREV_B16_vi:
4419   case AMDGPU::V_PK_ASHRREV_I16:
4420   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4421   case AMDGPU::V_PK_ASHRREV_I16_vi:
4422     return true;
4423   default:
4424     return false;
4425   }
4426 }
4427 
4428 std::optional<StringRef>
4429 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4430 
4431   using namespace SIInstrFlags;
4432   const unsigned Opcode = Inst.getOpcode();
4433   const MCInstrDesc &Desc = MII.get(Opcode);
4434 
4435   // lds_direct register is defined so that it can be used
4436   // with 9-bit operands only. Ignore encodings which do not accept these.
4437   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4438   if ((Desc.TSFlags & Enc) == 0)
4439     return std::nullopt;
4440 
4441   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4442     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4443     if (SrcIdx == -1)
4444       break;
4445     const auto &Src = Inst.getOperand(SrcIdx);
4446     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4447 
4448       if (isGFX90A() || isGFX11Plus())
4449         return StringRef("lds_direct is not supported on this GPU");
4450 
4451       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4452         return StringRef("lds_direct cannot be used with this instruction");
4453 
4454       if (SrcName != OpName::src0)
4455         return StringRef("lds_direct may be used as src0 only");
4456     }
4457   }
4458 
4459   return std::nullopt;
4460 }
4461 
4462 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4463   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4464     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4465     if (Op.isFlatOffset())
4466       return Op.getStartLoc();
4467   }
4468   return getLoc();
4469 }
4470 
4471 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4472                                      const OperandVector &Operands) {
4473   auto Opcode = Inst.getOpcode();
4474   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4475   if (OpNum == -1)
4476     return true;
4477 
4478   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4479   if ((TSFlags & SIInstrFlags::FLAT))
4480     return validateFlatOffset(Inst, Operands);
4481 
4482   if ((TSFlags & SIInstrFlags::SMRD))
4483     return validateSMEMOffset(Inst, Operands);
4484 
4485   const auto &Op = Inst.getOperand(OpNum);
4486   if (isGFX12Plus() &&
4487       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4488     const unsigned OffsetSize = 24;
4489     if (!isIntN(OffsetSize, Op.getImm())) {
4490       Error(getFlatOffsetLoc(Operands),
4491             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4492       return false;
4493     }
4494   } else {
4495     const unsigned OffsetSize = 16;
4496     if (!isUIntN(OffsetSize, Op.getImm())) {
4497       Error(getFlatOffsetLoc(Operands),
4498             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4499       return false;
4500     }
4501   }
4502   return true;
4503 }
4504 
4505 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4506                                          const OperandVector &Operands) {
4507   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4508   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4509     return true;
4510 
4511   auto Opcode = Inst.getOpcode();
4512   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4513   assert(OpNum != -1);
4514 
4515   const auto &Op = Inst.getOperand(OpNum);
4516   if (!hasFlatOffsets() && Op.getImm() != 0) {
4517     Error(getFlatOffsetLoc(Operands),
4518           "flat offset modifier is not supported on this GPU");
4519     return false;
4520   }
4521 
4522   // For pre-GFX12 FLAT instructions the offset must be positive;
4523   // MSB is ignored and forced to zero.
4524   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4525   bool AllowNegative =
4526       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4527       isGFX12Plus();
4528   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4529     Error(getFlatOffsetLoc(Operands),
4530           Twine("expected a ") +
4531               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4532                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4533     return false;
4534   }
4535 
4536   return true;
4537 }
4538 
4539 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4540   // Start with second operand because SMEM Offset cannot be dst or src0.
4541   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4542     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4543     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4544       return Op.getStartLoc();
4545   }
4546   return getLoc();
4547 }
4548 
4549 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4550                                          const OperandVector &Operands) {
4551   if (isCI() || isSI())
4552     return true;
4553 
4554   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4555   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4556     return true;
4557 
4558   auto Opcode = Inst.getOpcode();
4559   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4560   if (OpNum == -1)
4561     return true;
4562 
4563   const auto &Op = Inst.getOperand(OpNum);
4564   if (!Op.isImm())
4565     return true;
4566 
4567   uint64_t Offset = Op.getImm();
4568   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4569   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4570       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4571     return true;
4572 
4573   Error(getSMEMOffsetLoc(Operands),
4574         isGFX12Plus()          ? "expected a 24-bit signed offset"
4575         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4576                                : "expected a 21-bit signed offset");
4577 
4578   return false;
4579 }
4580 
4581 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4582   unsigned Opcode = Inst.getOpcode();
4583   const MCInstrDesc &Desc = MII.get(Opcode);
4584   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4585     return true;
4586 
4587   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4588   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4589 
4590   const int OpIndices[] = { Src0Idx, Src1Idx };
4591 
4592   unsigned NumExprs = 0;
4593   unsigned NumLiterals = 0;
4594   uint32_t LiteralValue;
4595 
4596   for (int OpIdx : OpIndices) {
4597     if (OpIdx == -1) break;
4598 
4599     const MCOperand &MO = Inst.getOperand(OpIdx);
4600     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4601     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4602       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4603         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4604         if (NumLiterals == 0 || LiteralValue != Value) {
4605           LiteralValue = Value;
4606           ++NumLiterals;
4607         }
4608       } else if (MO.isExpr()) {
4609         ++NumExprs;
4610       }
4611     }
4612   }
4613 
4614   return NumLiterals + NumExprs <= 1;
4615 }
4616 
4617 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4618   const unsigned Opc = Inst.getOpcode();
4619   if (isPermlane16(Opc)) {
4620     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4621     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4622 
4623     if (OpSel & ~3)
4624       return false;
4625   }
4626 
4627   uint64_t TSFlags = MII.get(Opc).TSFlags;
4628 
4629   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4630     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4631     if (OpSelIdx != -1) {
4632       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4633         return false;
4634     }
4635     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4636     if (OpSelHiIdx != -1) {
4637       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4638         return false;
4639     }
4640   }
4641 
4642   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4643   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4644       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4645     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4646     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4647     if (OpSel & 3)
4648       return false;
4649   }
4650 
4651   return true;
4652 }
4653 
4654 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4655   assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4656 
4657   const unsigned Opc = Inst.getOpcode();
4658   uint64_t TSFlags = MII.get(Opc).TSFlags;
4659 
4660   // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4661   // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4662   // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4663   // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4664   if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4665       !(TSFlags & SIInstrFlags::IsSWMMAC))
4666     return true;
4667 
4668   int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4669   if (NegIdx == -1)
4670     return true;
4671 
4672   unsigned Neg = Inst.getOperand(NegIdx).getImm();
4673 
4674   // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4675   // on some src operands but not allowed on other.
4676   // It is convenient that such instructions don't have src_modifiers operand
4677   // for src operands that don't allow neg because they also don't allow opsel.
4678 
4679   int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4680                     AMDGPU::OpName::src1_modifiers,
4681                     AMDGPU::OpName::src2_modifiers};
4682 
4683   for (unsigned i = 0; i < 3; ++i) {
4684     if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4685       if (Neg & (1 << i))
4686         return false;
4687     }
4688   }
4689 
4690   return true;
4691 }
4692 
4693 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4694                                   const OperandVector &Operands) {
4695   const unsigned Opc = Inst.getOpcode();
4696   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4697   if (DppCtrlIdx >= 0) {
4698     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4699 
4700     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4701         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4702       // DP ALU DPP is supported for row_newbcast only on GFX9*
4703       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4704       Error(S, "DP ALU dpp only supports row_newbcast");
4705       return false;
4706     }
4707   }
4708 
4709   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4710   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4711 
4712   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4713     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4714     if (Src1Idx >= 0) {
4715       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4716       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4717       if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
4718         auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg());
4719         SMLoc S = getRegLoc(Reg, Operands);
4720         Error(S, "invalid operand for instruction");
4721         return false;
4722       }
4723       if (Src1.isImm()) {
4724         Error(getInstLoc(Operands),
4725               "src1 immediate operand invalid for instruction");
4726         return false;
4727       }
4728     }
4729   }
4730 
4731   return true;
4732 }
4733 
4734 // Check if VCC register matches wavefront size
4735 bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4736   auto FB = getFeatureBits();
4737   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4738     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4739 }
4740 
4741 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4742 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4743                                          const OperandVector &Operands) {
4744   unsigned Opcode = Inst.getOpcode();
4745   const MCInstrDesc &Desc = MII.get(Opcode);
4746   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4747   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4748       !HasMandatoryLiteral && !isVOPD(Opcode))
4749     return true;
4750 
4751   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4752 
4753   unsigned NumExprs = 0;
4754   unsigned NumLiterals = 0;
4755   uint32_t LiteralValue;
4756 
4757   for (int OpIdx : OpIndices) {
4758     if (OpIdx == -1)
4759       continue;
4760 
4761     const MCOperand &MO = Inst.getOperand(OpIdx);
4762     if (!MO.isImm() && !MO.isExpr())
4763       continue;
4764     if (!isSISrcOperand(Desc, OpIdx))
4765       continue;
4766 
4767     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4768       uint64_t Value = static_cast<uint64_t>(MO.getImm());
4769       bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4770                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4771       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4772 
4773       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4774         Error(getLitLoc(Operands), "invalid operand for instruction");
4775         return false;
4776       }
4777 
4778       if (IsFP64 && IsValid32Op)
4779         Value = Hi_32(Value);
4780 
4781       if (NumLiterals == 0 || LiteralValue != Value) {
4782         LiteralValue = Value;
4783         ++NumLiterals;
4784       }
4785     } else if (MO.isExpr()) {
4786       ++NumExprs;
4787     }
4788   }
4789   NumLiterals += NumExprs;
4790 
4791   if (!NumLiterals)
4792     return true;
4793 
4794   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4795     Error(getLitLoc(Operands), "literal operands are not supported");
4796     return false;
4797   }
4798 
4799   if (NumLiterals > 1) {
4800     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4801     return false;
4802   }
4803 
4804   return true;
4805 }
4806 
4807 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4808 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4809                          const MCRegisterInfo *MRI) {
4810   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4811   if (OpIdx < 0)
4812     return -1;
4813 
4814   const MCOperand &Op = Inst.getOperand(OpIdx);
4815   if (!Op.isReg())
4816     return -1;
4817 
4818   MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4819   auto Reg = Sub ? Sub : Op.getReg();
4820   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4821   return AGPR32.contains(Reg) ? 1 : 0;
4822 }
4823 
4824 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4825   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4826   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4827                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4828                   SIInstrFlags::DS)) == 0)
4829     return true;
4830 
4831   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4832                                                       : AMDGPU::OpName::vdata;
4833 
4834   const MCRegisterInfo *MRI = getMRI();
4835   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4836   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4837 
4838   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4839     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4840     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4841       return false;
4842   }
4843 
4844   auto FB = getFeatureBits();
4845   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4846     if (DataAreg < 0 || DstAreg < 0)
4847       return true;
4848     return DstAreg == DataAreg;
4849   }
4850 
4851   return DstAreg < 1 && DataAreg < 1;
4852 }
4853 
4854 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4855   auto FB = getFeatureBits();
4856   if (!FB[AMDGPU::FeatureGFX90AInsts])
4857     return true;
4858 
4859   const MCRegisterInfo *MRI = getMRI();
4860   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4861   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4862   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4863     const MCOperand &Op = Inst.getOperand(I);
4864     if (!Op.isReg())
4865       continue;
4866 
4867     MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4868     if (!Sub)
4869       continue;
4870 
4871     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4872       return false;
4873     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4874       return false;
4875   }
4876 
4877   return true;
4878 }
4879 
4880 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4881   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4882     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4883     if (Op.isBLGP())
4884       return Op.getStartLoc();
4885   }
4886   return SMLoc();
4887 }
4888 
4889 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4890                                    const OperandVector &Operands) {
4891   unsigned Opc = Inst.getOpcode();
4892   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4893   if (BlgpIdx == -1)
4894     return true;
4895   SMLoc BLGPLoc = getBLGPLoc(Operands);
4896   if (!BLGPLoc.isValid())
4897     return true;
4898   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4899   auto FB = getFeatureBits();
4900   bool UsesNeg = false;
4901   if (FB[AMDGPU::FeatureGFX940Insts]) {
4902     switch (Opc) {
4903     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4904     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4905     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4906     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4907       UsesNeg = true;
4908     }
4909   }
4910 
4911   if (IsNeg == UsesNeg)
4912     return true;
4913 
4914   Error(BLGPLoc,
4915         UsesNeg ? "invalid modifier: blgp is not supported"
4916                 : "invalid modifier: neg is not supported");
4917 
4918   return false;
4919 }
4920 
4921 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4922                                       const OperandVector &Operands) {
4923   if (!isGFX11Plus())
4924     return true;
4925 
4926   unsigned Opc = Inst.getOpcode();
4927   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4928       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4929       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4930       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4931     return true;
4932 
4933   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4934   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4935   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4936   if (Reg == AMDGPU::SGPR_NULL)
4937     return true;
4938 
4939   SMLoc RegLoc = getRegLoc(Reg, Operands);
4940   Error(RegLoc, "src0 must be null");
4941   return false;
4942 }
4943 
4944 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4945                                  const OperandVector &Operands) {
4946   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4947   if ((TSFlags & SIInstrFlags::DS) == 0)
4948     return true;
4949   if (TSFlags & SIInstrFlags::GWS)
4950     return validateGWS(Inst, Operands);
4951   // Only validate GDS for non-GWS instructions.
4952   if (hasGDS())
4953     return true;
4954   int GDSIdx =
4955       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4956   if (GDSIdx < 0)
4957     return true;
4958   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4959   if (GDS) {
4960     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4961     Error(S, "gds modifier is not supported on this GPU");
4962     return false;
4963   }
4964   return true;
4965 }
4966 
4967 // gfx90a has an undocumented limitation:
4968 // DS_GWS opcodes must use even aligned registers.
4969 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4970                                   const OperandVector &Operands) {
4971   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4972     return true;
4973 
4974   int Opc = Inst.getOpcode();
4975   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4976       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4977     return true;
4978 
4979   const MCRegisterInfo *MRI = getMRI();
4980   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4981   int Data0Pos =
4982       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4983   assert(Data0Pos != -1);
4984   auto Reg = Inst.getOperand(Data0Pos).getReg();
4985   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4986   if (RegIdx & 1) {
4987     SMLoc RegLoc = getRegLoc(Reg, Operands);
4988     Error(RegLoc, "vgpr must be even aligned");
4989     return false;
4990   }
4991 
4992   return true;
4993 }
4994 
4995 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4996                                             const OperandVector &Operands,
4997                                             const SMLoc &IDLoc) {
4998   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4999                                            AMDGPU::OpName::cpol);
5000   if (CPolPos == -1)
5001     return true;
5002 
5003   unsigned CPol = Inst.getOperand(CPolPos).getImm();
5004 
5005   if (isGFX12Plus())
5006     return validateTHAndScopeBits(Inst, Operands, CPol);
5007 
5008   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5009   if (TSFlags & SIInstrFlags::SMRD) {
5010     if (CPol && (isSI() || isCI())) {
5011       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5012       Error(S, "cache policy is not supported for SMRD instructions");
5013       return false;
5014     }
5015     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5016       Error(IDLoc, "invalid cache policy for SMEM instruction");
5017       return false;
5018     }
5019   }
5020 
5021   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5022     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5023                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5024                                       SIInstrFlags::FLAT;
5025     if (!(TSFlags & AllowSCCModifier)) {
5026       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5027       StringRef CStr(S.getPointer());
5028       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5029       Error(S,
5030             "scc modifier is not supported for this instruction on this GPU");
5031       return false;
5032     }
5033   }
5034 
5035   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
5036     return true;
5037 
5038   if (TSFlags & SIInstrFlags::IsAtomicRet) {
5039     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5040       Error(IDLoc, isGFX940() ? "instruction must use sc0"
5041                               : "instruction must use glc");
5042       return false;
5043     }
5044   } else {
5045     if (CPol & CPol::GLC) {
5046       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5047       StringRef CStr(S.getPointer());
5048       S = SMLoc::getFromPointer(
5049           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5050       Error(S, isGFX940() ? "instruction must not use sc0"
5051                           : "instruction must not use glc");
5052       return false;
5053     }
5054   }
5055 
5056   return true;
5057 }
5058 
5059 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5060                                              const OperandVector &Operands,
5061                                              const unsigned CPol) {
5062   const unsigned TH = CPol & AMDGPU::CPol::TH;
5063   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5064 
5065   const unsigned Opcode = Inst.getOpcode();
5066   const MCInstrDesc &TID = MII.get(Opcode);
5067 
5068   auto PrintError = [&](StringRef Msg) {
5069     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5070     Error(S, Msg);
5071     return false;
5072   };
5073 
5074   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5075       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
5076       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5077     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5078 
5079   if (TH == 0)
5080     return true;
5081 
5082   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5083       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5084        (TH == AMDGPU::CPol::TH_NT_HT)))
5085     return PrintError("invalid th value for SMEM instruction");
5086 
5087   if (TH == AMDGPU::CPol::TH_BYPASS) {
5088     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5089          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
5090         (Scope == AMDGPU::CPol::SCOPE_SYS &&
5091          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5092       return PrintError("scope and th combination is not valid");
5093   }
5094 
5095   bool IsStore = TID.mayStore();
5096   bool IsAtomic =
5097       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
5098 
5099   if (IsAtomic) {
5100     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5101       return PrintError("invalid th value for atomic instructions");
5102   } else if (IsStore) {
5103     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5104       return PrintError("invalid th value for store instructions");
5105   } else {
5106     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5107       return PrintError("invalid th value for load instructions");
5108   }
5109 
5110   return true;
5111 }
5112 
5113 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5114                                   const OperandVector &Operands) {
5115   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5116   if (Desc.mayStore() &&
5117       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5118     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5119     if (Loc != getInstLoc(Operands)) {
5120       Error(Loc, "TFE modifier has no meaning for store instructions");
5121       return false;
5122     }
5123   }
5124 
5125   return true;
5126 }
5127 
5128 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5129                                           const SMLoc &IDLoc,
5130                                           const OperandVector &Operands) {
5131   if (auto ErrMsg = validateLdsDirect(Inst)) {
5132     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
5133     return false;
5134   }
5135   if (!validateSOPLiteral(Inst)) {
5136     Error(getLitLoc(Operands),
5137       "only one unique literal operand is allowed");
5138     return false;
5139   }
5140   if (!validateVOPLiteral(Inst, Operands)) {
5141     return false;
5142   }
5143   if (!validateConstantBusLimitations(Inst, Operands)) {
5144     return false;
5145   }
5146   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5147     return false;
5148   }
5149   if (!validateIntClampSupported(Inst)) {
5150     Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5151           "integer clamping is not supported on this GPU");
5152     return false;
5153   }
5154   if (!validateOpSel(Inst)) {
5155     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5156       "invalid op_sel operand");
5157     return false;
5158   }
5159   if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5160     Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5161           "invalid neg_lo operand");
5162     return false;
5163   }
5164   if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5165     Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5166           "invalid neg_hi operand");
5167     return false;
5168   }
5169   if (!validateDPP(Inst, Operands)) {
5170     return false;
5171   }
5172   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5173   if (!validateMIMGD16(Inst)) {
5174     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5175       "d16 modifier is not supported on this GPU");
5176     return false;
5177   }
5178   if (!validateMIMGDim(Inst, Operands)) {
5179     Error(IDLoc, "missing dim operand");
5180     return false;
5181   }
5182   if (!validateMIMGMSAA(Inst)) {
5183     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5184           "invalid dim; must be MSAA type");
5185     return false;
5186   }
5187   if (!validateMIMGDataSize(Inst, IDLoc)) {
5188     return false;
5189   }
5190   if (!validateMIMGAddrSize(Inst, IDLoc))
5191     return false;
5192   if (!validateMIMGAtomicDMask(Inst)) {
5193     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5194       "invalid atomic image dmask");
5195     return false;
5196   }
5197   if (!validateMIMGGatherDMask(Inst)) {
5198     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5199       "invalid image_gather dmask: only one bit must be set");
5200     return false;
5201   }
5202   if (!validateMovrels(Inst, Operands)) {
5203     return false;
5204   }
5205   if (!validateOffset(Inst, Operands)) {
5206     return false;
5207   }
5208   if (!validateMAIAccWrite(Inst, Operands)) {
5209     return false;
5210   }
5211   if (!validateMAISrc2(Inst, Operands)) {
5212     return false;
5213   }
5214   if (!validateMFMA(Inst, Operands)) {
5215     return false;
5216   }
5217   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5218     return false;
5219   }
5220 
5221   if (!validateAGPRLdSt(Inst)) {
5222     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5223     ? "invalid register class: data and dst should be all VGPR or AGPR"
5224     : "invalid register class: agpr loads and stores not supported on this GPU"
5225     );
5226     return false;
5227   }
5228   if (!validateVGPRAlign(Inst)) {
5229     Error(IDLoc,
5230       "invalid register class: vgpr tuples must be 64 bit aligned");
5231     return false;
5232   }
5233   if (!validateDS(Inst, Operands)) {
5234     return false;
5235   }
5236 
5237   if (!validateBLGP(Inst, Operands)) {
5238     return false;
5239   }
5240 
5241   if (!validateDivScale(Inst)) {
5242     Error(IDLoc, "ABS not allowed in VOP3B instructions");
5243     return false;
5244   }
5245   if (!validateWaitCnt(Inst, Operands)) {
5246     return false;
5247   }
5248   if (!validateTFE(Inst, Operands)) {
5249     return false;
5250   }
5251 
5252   return true;
5253 }
5254 
5255 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5256                                             const FeatureBitset &FBS,
5257                                             unsigned VariantID = 0);
5258 
5259 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5260                                 const FeatureBitset &AvailableFeatures,
5261                                 unsigned VariantID);
5262 
5263 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5264                                        const FeatureBitset &FBS) {
5265   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5266 }
5267 
5268 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5269                                        const FeatureBitset &FBS,
5270                                        ArrayRef<unsigned> Variants) {
5271   for (auto Variant : Variants) {
5272     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5273       return true;
5274   }
5275 
5276   return false;
5277 }
5278 
5279 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5280                                                   const SMLoc &IDLoc) {
5281   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5282 
5283   // Check if requested instruction variant is supported.
5284   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5285     return false;
5286 
5287   // This instruction is not supported.
5288   // Clear any other pending errors because they are no longer relevant.
5289   getParser().clearPendingErrors();
5290 
5291   // Requested instruction variant is not supported.
5292   // Check if any other variants are supported.
5293   StringRef VariantName = getMatchedVariantName();
5294   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5295     return Error(IDLoc,
5296                  Twine(VariantName,
5297                        " variant of this instruction is not supported"));
5298   }
5299 
5300   // Check if this instruction may be used with a different wavesize.
5301   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5302       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5303 
5304     FeatureBitset FeaturesWS32 = getFeatureBits();
5305     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5306         .flip(AMDGPU::FeatureWavefrontSize32);
5307     FeatureBitset AvailableFeaturesWS32 =
5308         ComputeAvailableFeatures(FeaturesWS32);
5309 
5310     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5311       return Error(IDLoc, "instruction requires wavesize=32");
5312   }
5313 
5314   // Finally check if this instruction is supported on any other GPU.
5315   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5316     return Error(IDLoc, "instruction not supported on this GPU");
5317   }
5318 
5319   // Instruction not supported on any GPU. Probably a typo.
5320   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5321   return Error(IDLoc, "invalid instruction" + Suggestion);
5322 }
5323 
5324 static bool isInvalidVOPDY(const OperandVector &Operands,
5325                            uint64_t InvalidOprIdx) {
5326   assert(InvalidOprIdx < Operands.size());
5327   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5328   if (Op.isToken() && InvalidOprIdx > 1) {
5329     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5330     return PrevOp.isToken() && PrevOp.getToken() == "::";
5331   }
5332   return false;
5333 }
5334 
5335 bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5336                                               OperandVector &Operands,
5337                                               MCStreamer &Out,
5338                                               uint64_t &ErrorInfo,
5339                                               bool MatchingInlineAsm) {
5340   MCInst Inst;
5341   unsigned Result = Match_Success;
5342   for (auto Variant : getMatchedVariants()) {
5343     uint64_t EI;
5344     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5345                                   Variant);
5346     // We order match statuses from least to most specific. We use most specific
5347     // status as resulting
5348     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5349     if (R == Match_Success || R == Match_MissingFeature ||
5350         (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5351         (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5352          Result != Match_MissingFeature)) {
5353       Result = R;
5354       ErrorInfo = EI;
5355     }
5356     if (R == Match_Success)
5357       break;
5358   }
5359 
5360   if (Result == Match_Success) {
5361     if (!validateInstruction(Inst, IDLoc, Operands)) {
5362       return true;
5363     }
5364     Inst.setLoc(IDLoc);
5365     Out.emitInstruction(Inst, getSTI());
5366     return false;
5367   }
5368 
5369   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5370   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5371     return true;
5372   }
5373 
5374   switch (Result) {
5375   default: break;
5376   case Match_MissingFeature:
5377     // It has been verified that the specified instruction
5378     // mnemonic is valid. A match was found but it requires
5379     // features which are not supported on this GPU.
5380     return Error(IDLoc, "operands are not valid for this GPU or mode");
5381 
5382   case Match_InvalidOperand: {
5383     SMLoc ErrorLoc = IDLoc;
5384     if (ErrorInfo != ~0ULL) {
5385       if (ErrorInfo >= Operands.size()) {
5386         return Error(IDLoc, "too few operands for instruction");
5387       }
5388       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5389       if (ErrorLoc == SMLoc())
5390         ErrorLoc = IDLoc;
5391 
5392       if (isInvalidVOPDY(Operands, ErrorInfo))
5393         return Error(ErrorLoc, "invalid VOPDY instruction");
5394     }
5395     return Error(ErrorLoc, "invalid operand for instruction");
5396   }
5397 
5398   case Match_MnemonicFail:
5399     llvm_unreachable("Invalid instructions should have been handled already");
5400   }
5401   llvm_unreachable("Implement any new match types added!");
5402 }
5403 
5404 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5405   int64_t Tmp = -1;
5406   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5407     return true;
5408   }
5409   if (getParser().parseAbsoluteExpression(Tmp)) {
5410     return true;
5411   }
5412   Ret = static_cast<uint32_t>(Tmp);
5413   return false;
5414 }
5415 
5416 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5417   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5418     return TokError("directive only supported for amdgcn architecture");
5419 
5420   std::string TargetIDDirective;
5421   SMLoc TargetStart = getTok().getLoc();
5422   if (getParser().parseEscapedString(TargetIDDirective))
5423     return true;
5424 
5425   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5426   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5427     return getParser().Error(TargetRange.Start,
5428         (Twine(".amdgcn_target directive's target id ") +
5429          Twine(TargetIDDirective) +
5430          Twine(" does not match the specified target id ") +
5431          Twine(getTargetStreamer().getTargetID()->toString())).str());
5432 
5433   return false;
5434 }
5435 
5436 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5437   return Error(Range.Start, "value out of range", Range);
5438 }
5439 
5440 bool AMDGPUAsmParser::calculateGPRBlocks(
5441     const FeatureBitset &Features, const MCExpr *VCCUsed,
5442     const MCExpr *FlatScrUsed, bool XNACKUsed,
5443     std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5444     SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5445     const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5446   // TODO(scott.linder): These calculations are duplicated from
5447   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5448   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5449   MCContext &Ctx = getContext();
5450 
5451   const MCExpr *NumSGPRs = NextFreeSGPR;
5452   int64_t EvaluatedSGPRs;
5453 
5454   if (Version.Major >= 10)
5455     NumSGPRs = MCConstantExpr::create(0, Ctx);
5456   else {
5457     unsigned MaxAddressableNumSGPRs =
5458         IsaInfo::getAddressableNumSGPRs(&getSTI());
5459 
5460     if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5461         !Features.test(FeatureSGPRInitBug) &&
5462         static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5463       return OutOfRangeError(SGPRRange);
5464 
5465     const MCExpr *ExtraSGPRs =
5466         AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5467     NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5468 
5469     if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5470         (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5471         static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5472       return OutOfRangeError(SGPRRange);
5473 
5474     if (Features.test(FeatureSGPRInitBug))
5475       NumSGPRs =
5476           MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5477   }
5478 
5479   // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5480   // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5481   auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5482                                 unsigned Granule) -> const MCExpr * {
5483     const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5484     const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5485     const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5486     const MCExpr *AlignToGPR =
5487         AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5488     const MCExpr *DivGPR =
5489         MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5490     const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5491     return SubGPR;
5492   };
5493 
5494   VGPRBlocks = GetNumGPRBlocks(
5495       NextFreeVGPR,
5496       IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5497   SGPRBlocks =
5498       GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5499 
5500   return false;
5501 }
5502 
5503 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5504   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5505     return TokError("directive only supported for amdgcn architecture");
5506 
5507   if (!isHsaAbi(getSTI()))
5508     return TokError("directive only supported for amdhsa OS");
5509 
5510   StringRef KernelName;
5511   if (getParser().parseIdentifier(KernelName))
5512     return true;
5513 
5514   AMDGPU::MCKernelDescriptor KD =
5515       AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5516           &getSTI(), getContext());
5517 
5518   StringSet<> Seen;
5519 
5520   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5521 
5522   const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5523   const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5524 
5525   SMRange VGPRRange;
5526   const MCExpr *NextFreeVGPR = ZeroExpr;
5527   const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5528   uint64_t SharedVGPRCount = 0;
5529   uint64_t PreloadLength = 0;
5530   uint64_t PreloadOffset = 0;
5531   SMRange SGPRRange;
5532   const MCExpr *NextFreeSGPR = ZeroExpr;
5533 
5534   // Count the number of user SGPRs implied from the enabled feature bits.
5535   unsigned ImpliedUserSGPRCount = 0;
5536 
5537   // Track if the asm explicitly contains the directive for the user SGPR
5538   // count.
5539   std::optional<unsigned> ExplicitUserSGPRCount;
5540   const MCExpr *ReserveVCC = OneExpr;
5541   const MCExpr *ReserveFlatScr = OneExpr;
5542   std::optional<bool> EnableWavefrontSize32;
5543 
5544   while (true) {
5545     while (trySkipToken(AsmToken::EndOfStatement));
5546 
5547     StringRef ID;
5548     SMRange IDRange = getTok().getLocRange();
5549     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5550       return true;
5551 
5552     if (ID == ".end_amdhsa_kernel")
5553       break;
5554 
5555     if (!Seen.insert(ID).second)
5556       return TokError(".amdhsa_ directives cannot be repeated");
5557 
5558     SMLoc ValStart = getLoc();
5559     const MCExpr *ExprVal;
5560     if (getParser().parseExpression(ExprVal))
5561       return true;
5562     SMLoc ValEnd = getLoc();
5563     SMRange ValRange = SMRange(ValStart, ValEnd);
5564 
5565     int64_t IVal = 0;
5566     uint64_t Val = IVal;
5567     bool EvaluatableExpr;
5568     if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
5569       if (IVal < 0)
5570         return OutOfRangeError(ValRange);
5571       Val = IVal;
5572     }
5573 
5574 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5575   if (!isUInt<ENTRY##_WIDTH>(Val))                                             \
5576     return OutOfRangeError(RANGE);                                             \
5577   AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY,     \
5578                                        getContext());
5579 
5580 // Some fields use the parsed value immediately which requires the expression to
5581 // be solvable.
5582 #define EXPR_RESOLVE_OR_ERROR(RESOLVED)                                        \
5583   if (!(RESOLVED))                                                             \
5584     return Error(IDRange.Start, "directive should have resolvable expression", \
5585                  IDRange);
5586 
5587     if (ID == ".amdhsa_group_segment_fixed_size") {
5588       if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5589                   CHAR_BIT>(Val))
5590         return OutOfRangeError(ValRange);
5591       KD.group_segment_fixed_size = ExprVal;
5592     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5593       if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5594                   CHAR_BIT>(Val))
5595         return OutOfRangeError(ValRange);
5596       KD.private_segment_fixed_size = ExprVal;
5597     } else if (ID == ".amdhsa_kernarg_size") {
5598       if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
5599         return OutOfRangeError(ValRange);
5600       KD.kernarg_size = ExprVal;
5601     } else if (ID == ".amdhsa_user_sgpr_count") {
5602       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5603       ExplicitUserSGPRCount = Val;
5604     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5605       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5606       if (hasArchitectedFlatScratch())
5607         return Error(IDRange.Start,
5608                      "directive is not supported with architected flat scratch",
5609                      IDRange);
5610       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5611                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5612                        ExprVal, ValRange);
5613       if (Val)
5614         ImpliedUserSGPRCount += 4;
5615     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5616       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5617       if (!hasKernargPreload())
5618         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5619 
5620       if (Val > getMaxNumUserSGPRs())
5621         return OutOfRangeError(ValRange);
5622       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5623                        ValRange);
5624       if (Val) {
5625         ImpliedUserSGPRCount += Val;
5626         PreloadLength = Val;
5627       }
5628     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5629       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5630       if (!hasKernargPreload())
5631         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5632 
5633       if (Val >= 1024)
5634         return OutOfRangeError(ValRange);
5635       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5636                        ValRange);
5637       if (Val)
5638         PreloadOffset = Val;
5639     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5640       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5641       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5642                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5643                        ValRange);
5644       if (Val)
5645         ImpliedUserSGPRCount += 2;
5646     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5647       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5648       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5649                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5650                        ValRange);
5651       if (Val)
5652         ImpliedUserSGPRCount += 2;
5653     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5654       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5655       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5656                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5657                        ExprVal, ValRange);
5658       if (Val)
5659         ImpliedUserSGPRCount += 2;
5660     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5661       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5662       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5663                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5664                        ValRange);
5665       if (Val)
5666         ImpliedUserSGPRCount += 2;
5667     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5668       if (hasArchitectedFlatScratch())
5669         return Error(IDRange.Start,
5670                      "directive is not supported with architected flat scratch",
5671                      IDRange);
5672       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5673       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5674                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5675                        ExprVal, ValRange);
5676       if (Val)
5677         ImpliedUserSGPRCount += 2;
5678     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5679       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5680       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5681                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5682                        ExprVal, ValRange);
5683       if (Val)
5684         ImpliedUserSGPRCount += 1;
5685     } else if (ID == ".amdhsa_wavefront_size32") {
5686       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5687       if (IVersion.Major < 10)
5688         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5689       EnableWavefrontSize32 = Val;
5690       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5691                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5692                        ValRange);
5693     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5694       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5695                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5696                        ValRange);
5697     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5698       if (hasArchitectedFlatScratch())
5699         return Error(IDRange.Start,
5700                      "directive is not supported with architected flat scratch",
5701                      IDRange);
5702       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5703                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5704                        ValRange);
5705     } else if (ID == ".amdhsa_enable_private_segment") {
5706       if (!hasArchitectedFlatScratch())
5707         return Error(
5708             IDRange.Start,
5709             "directive is not supported without architected flat scratch",
5710             IDRange);
5711       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5712                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5713                        ValRange);
5714     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5715       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5716                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5717                        ValRange);
5718     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5719       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5720                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5721                        ValRange);
5722     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5723       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5724                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5725                        ValRange);
5726     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5727       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5728                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5729                        ValRange);
5730     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5731       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5732                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5733                        ValRange);
5734     } else if (ID == ".amdhsa_next_free_vgpr") {
5735       VGPRRange = ValRange;
5736       NextFreeVGPR = ExprVal;
5737     } else if (ID == ".amdhsa_next_free_sgpr") {
5738       SGPRRange = ValRange;
5739       NextFreeSGPR = ExprVal;
5740     } else if (ID == ".amdhsa_accum_offset") {
5741       if (!isGFX90A())
5742         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5743       AccumOffset = ExprVal;
5744     } else if (ID == ".amdhsa_reserve_vcc") {
5745       if (EvaluatableExpr && !isUInt<1>(Val))
5746         return OutOfRangeError(ValRange);
5747       ReserveVCC = ExprVal;
5748     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5749       if (IVersion.Major < 7)
5750         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5751       if (hasArchitectedFlatScratch())
5752         return Error(IDRange.Start,
5753                      "directive is not supported with architected flat scratch",
5754                      IDRange);
5755       if (EvaluatableExpr && !isUInt<1>(Val))
5756         return OutOfRangeError(ValRange);
5757       ReserveFlatScr = ExprVal;
5758     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5759       if (IVersion.Major < 8)
5760         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5761       if (!isUInt<1>(Val))
5762         return OutOfRangeError(ValRange);
5763       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5764         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5765                                  IDRange);
5766     } else if (ID == ".amdhsa_float_round_mode_32") {
5767       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5768                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5769                        ValRange);
5770     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5771       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5772                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5773                        ValRange);
5774     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5775       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5776                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5777                        ValRange);
5778     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5779       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5780                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5781                        ValRange);
5782     } else if (ID == ".amdhsa_dx10_clamp") {
5783       if (IVersion.Major >= 12)
5784         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5786                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5787                        ValRange);
5788     } else if (ID == ".amdhsa_ieee_mode") {
5789       if (IVersion.Major >= 12)
5790         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5791       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5792                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5793                        ValRange);
5794     } else if (ID == ".amdhsa_fp16_overflow") {
5795       if (IVersion.Major < 9)
5796         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5797       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5798                        COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5799                        ValRange);
5800     } else if (ID == ".amdhsa_tg_split") {
5801       if (!isGFX90A())
5802         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5803       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5804                        ExprVal, ValRange);
5805     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5806       if (IVersion.Major < 10)
5807         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5808       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5809                        COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5810                        ValRange);
5811     } else if (ID == ".amdhsa_memory_ordered") {
5812       if (IVersion.Major < 10)
5813         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5814       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5815                        COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5816                        ValRange);
5817     } else if (ID == ".amdhsa_forward_progress") {
5818       if (IVersion.Major < 10)
5819         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5821                        COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5822                        ValRange);
5823     } else if (ID == ".amdhsa_shared_vgpr_count") {
5824       EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5825       if (IVersion.Major < 10 || IVersion.Major >= 12)
5826         return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5827                      IDRange);
5828       SharedVGPRCount = Val;
5829       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5830                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5831                        ValRange);
5832     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5833       PARSE_BITS_ENTRY(
5834           KD.compute_pgm_rsrc2,
5835           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5836           ExprVal, ValRange);
5837     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5838       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5839                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5840                        ExprVal, ValRange);
5841     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5842       PARSE_BITS_ENTRY(
5843           KD.compute_pgm_rsrc2,
5844           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5845           ExprVal, ValRange);
5846     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5847       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5848                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5849                        ExprVal, ValRange);
5850     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5852                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5853                        ExprVal, ValRange);
5854     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5855       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5856                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5857                        ExprVal, ValRange);
5858     } else if (ID == ".amdhsa_exception_int_div_zero") {
5859       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5860                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5861                        ExprVal, ValRange);
5862     } else if (ID == ".amdhsa_round_robin_scheduling") {
5863       if (IVersion.Major < 12)
5864         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5865       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5866                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5867                        ValRange);
5868     } else {
5869       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5870     }
5871 
5872 #undef PARSE_BITS_ENTRY
5873   }
5874 
5875   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5876     return TokError(".amdhsa_next_free_vgpr directive is required");
5877 
5878   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5879     return TokError(".amdhsa_next_free_sgpr directive is required");
5880 
5881   unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
5882 
5883   // Consider the case where the total number of UserSGPRs with trailing
5884   // allocated preload SGPRs, is greater than the number of explicitly
5885   // referenced SGPRs.
5886   if (PreloadLength) {
5887     MCContext &Ctx = getContext();
5888     NextFreeSGPR = AMDGPUMCExpr::createMax(
5889         {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
5890   }
5891 
5892   const MCExpr *VGPRBlocks;
5893   const MCExpr *SGPRBlocks;
5894   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5895                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5896                          EnableWavefrontSize32, NextFreeVGPR,
5897                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5898                          SGPRBlocks))
5899     return true;
5900 
5901   int64_t EvaluatedVGPRBlocks;
5902   bool VGPRBlocksEvaluatable =
5903       VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
5904   if (VGPRBlocksEvaluatable &&
5905       !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5906           static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5907     return OutOfRangeError(VGPRRange);
5908   }
5909   AMDGPU::MCKernelDescriptor::bits_set(
5910       KD.compute_pgm_rsrc1, VGPRBlocks,
5911       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5912       COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
5913 
5914   int64_t EvaluatedSGPRBlocks;
5915   if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
5916       !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5917           static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5918     return OutOfRangeError(SGPRRange);
5919   AMDGPU::MCKernelDescriptor::bits_set(
5920       KD.compute_pgm_rsrc1, SGPRBlocks,
5921       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5922       COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
5923 
5924   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5925     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5926                     "enabled user SGPRs");
5927 
5928   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5929     return TokError("too many user SGPRs enabled");
5930   AMDGPU::MCKernelDescriptor::bits_set(
5931       KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()),
5932       COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5933       COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext());
5934 
5935   int64_t IVal = 0;
5936   if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
5937     return TokError("Kernarg size should be resolvable");
5938   uint64_t kernarg_size = IVal;
5939   if (PreloadLength && kernarg_size &&
5940       (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5941     return TokError("Kernarg preload length + offset is larger than the "
5942                     "kernarg segment size");
5943 
5944   if (isGFX90A()) {
5945     if (!Seen.contains(".amdhsa_accum_offset"))
5946       return TokError(".amdhsa_accum_offset directive is required");
5947     int64_t EvaluatedAccum;
5948     bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
5949     uint64_t UEvaluatedAccum = EvaluatedAccum;
5950     if (AccumEvaluatable &&
5951         (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
5952       return TokError("accum_offset should be in range [4..256] in "
5953                       "increments of 4");
5954 
5955     int64_t EvaluatedNumVGPR;
5956     if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
5957         AccumEvaluatable &&
5958         UEvaluatedAccum >
5959             alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
5960       return TokError("accum_offset exceeds total VGPR allocation");
5961     const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5962         MCBinaryExpr::createDiv(
5963             AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
5964         MCConstantExpr::create(1, getContext()), getContext());
5965     MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,
5966                                  COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5967                                  COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5968                                  getContext());
5969   }
5970 
5971   if (IVersion.Major >= 10 && IVersion.Major < 12) {
5972     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5973     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5974       return TokError("shared_vgpr_count directive not valid on "
5975                       "wavefront size 32");
5976     }
5977 
5978     if (VGPRBlocksEvaluatable &&
5979         (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5980          63)) {
5981       return TokError("shared_vgpr_count*2 + "
5982                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5983                       "exceed 63\n");
5984     }
5985   }
5986 
5987   getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5988                                                  NextFreeVGPR, NextFreeSGPR,
5989                                                  ReserveVCC, ReserveFlatScr);
5990   return false;
5991 }
5992 
5993 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5994   uint32_t Version;
5995   if (ParseAsAbsoluteExpression(Version))
5996     return true;
5997 
5998   getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5999   return false;
6000 }
6001 
6002 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6003                                                AMDGPUMCKernelCodeT &C) {
6004   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6005   // assembly for backwards compatibility.
6006   if (ID == "max_scratch_backing_memory_byte_size") {
6007     Parser.eatToEndOfStatement();
6008     return false;
6009   }
6010 
6011   SmallString<40> ErrStr;
6012   raw_svector_ostream Err(ErrStr);
6013   if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6014     return TokError(Err.str());
6015   }
6016   Lex();
6017 
6018   if (ID == "enable_wavefront_size32") {
6019     if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6020       if (!isGFX10Plus())
6021         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6022       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6023         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6024     } else {
6025       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6026         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6027     }
6028   }
6029 
6030   if (ID == "wavefront_size") {
6031     if (C.wavefront_size == 5) {
6032       if (!isGFX10Plus())
6033         return TokError("wavefront_size=5 is only allowed on GFX10+");
6034       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6035         return TokError("wavefront_size=5 requires +WavefrontSize32");
6036     } else if (C.wavefront_size == 6) {
6037       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6038         return TokError("wavefront_size=6 requires +WavefrontSize64");
6039     }
6040   }
6041 
6042   return false;
6043 }
6044 
6045 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6046   AMDGPUMCKernelCodeT KernelCode;
6047   KernelCode.initDefault(&getSTI(), getContext());
6048 
6049   while (true) {
6050     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
6051     // will set the current token to EndOfStatement.
6052     while(trySkipToken(AsmToken::EndOfStatement));
6053 
6054     StringRef ID;
6055     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6056       return true;
6057 
6058     if (ID == ".end_amd_kernel_code_t")
6059       break;
6060 
6061     if (ParseAMDKernelCodeTValue(ID, KernelCode))
6062       return true;
6063   }
6064 
6065   KernelCode.validate(&getSTI(), getContext());
6066   getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6067 
6068   return false;
6069 }
6070 
6071 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6072   StringRef KernelName;
6073   if (!parseId(KernelName, "expected symbol name"))
6074     return true;
6075 
6076   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6077                                            ELF::STT_AMDGPU_HSA_KERNEL);
6078 
6079   KernelScope.initialize(getContext());
6080   return false;
6081 }
6082 
6083 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6084   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6085     return Error(getLoc(),
6086                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
6087                  "architectures");
6088   }
6089 
6090   auto TargetIDDirective = getLexer().getTok().getStringContents();
6091   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6092     return Error(getParser().getTok().getLoc(), "target id must match options");
6093 
6094   getTargetStreamer().EmitISAVersion();
6095   Lex();
6096 
6097   return false;
6098 }
6099 
6100 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6101   assert(isHsaAbi(getSTI()));
6102 
6103   std::string HSAMetadataString;
6104   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6105                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6106     return true;
6107 
6108   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6109     return Error(getLoc(), "invalid HSA metadata");
6110 
6111   return false;
6112 }
6113 
6114 /// Common code to parse out a block of text (typically YAML) between start and
6115 /// end directives.
6116 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6117                                           const char *AssemblerDirectiveEnd,
6118                                           std::string &CollectString) {
6119 
6120   raw_string_ostream CollectStream(CollectString);
6121 
6122   getLexer().setSkipSpace(false);
6123 
6124   bool FoundEnd = false;
6125   while (!isToken(AsmToken::Eof)) {
6126     while (isToken(AsmToken::Space)) {
6127       CollectStream << getTokenStr();
6128       Lex();
6129     }
6130 
6131     if (trySkipId(AssemblerDirectiveEnd)) {
6132       FoundEnd = true;
6133       break;
6134     }
6135 
6136     CollectStream << Parser.parseStringToEndOfStatement()
6137                   << getContext().getAsmInfo()->getSeparatorString();
6138 
6139     Parser.eatToEndOfStatement();
6140   }
6141 
6142   getLexer().setSkipSpace(true);
6143 
6144   if (isToken(AsmToken::Eof) && !FoundEnd) {
6145     return TokError(Twine("expected directive ") +
6146                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6147   }
6148 
6149   return false;
6150 }
6151 
6152 /// Parse the assembler directive for new MsgPack-format PAL metadata.
6153 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6154   std::string String;
6155   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6156                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
6157     return true;
6158 
6159   auto *PALMetadata = getTargetStreamer().getPALMetadata();
6160   if (!PALMetadata->setFromString(String))
6161     return Error(getLoc(), "invalid PAL metadata");
6162   return false;
6163 }
6164 
6165 /// Parse the assembler directive for old linear-format PAL metadata.
6166 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6167   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6168     return Error(getLoc(),
6169                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6170                  "not available on non-amdpal OSes")).str());
6171   }
6172 
6173   auto *PALMetadata = getTargetStreamer().getPALMetadata();
6174   PALMetadata->setLegacy();
6175   for (;;) {
6176     uint32_t Key, Value;
6177     if (ParseAsAbsoluteExpression(Key)) {
6178       return TokError(Twine("invalid value in ") +
6179                       Twine(PALMD::AssemblerDirective));
6180     }
6181     if (!trySkipToken(AsmToken::Comma)) {
6182       return TokError(Twine("expected an even number of values in ") +
6183                       Twine(PALMD::AssemblerDirective));
6184     }
6185     if (ParseAsAbsoluteExpression(Value)) {
6186       return TokError(Twine("invalid value in ") +
6187                       Twine(PALMD::AssemblerDirective));
6188     }
6189     PALMetadata->setRegister(Key, Value);
6190     if (!trySkipToken(AsmToken::Comma))
6191       break;
6192   }
6193   return false;
6194 }
6195 
6196 /// ParseDirectiveAMDGPULDS
6197 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6198 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6199   if (getParser().checkForValidSection())
6200     return true;
6201 
6202   StringRef Name;
6203   SMLoc NameLoc = getLoc();
6204   if (getParser().parseIdentifier(Name))
6205     return TokError("expected identifier in directive");
6206 
6207   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6208   if (getParser().parseComma())
6209     return true;
6210 
6211   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6212 
6213   int64_t Size;
6214   SMLoc SizeLoc = getLoc();
6215   if (getParser().parseAbsoluteExpression(Size))
6216     return true;
6217   if (Size < 0)
6218     return Error(SizeLoc, "size must be non-negative");
6219   if (Size > LocalMemorySize)
6220     return Error(SizeLoc, "size is too large");
6221 
6222   int64_t Alignment = 4;
6223   if (trySkipToken(AsmToken::Comma)) {
6224     SMLoc AlignLoc = getLoc();
6225     if (getParser().parseAbsoluteExpression(Alignment))
6226       return true;
6227     if (Alignment < 0 || !isPowerOf2_64(Alignment))
6228       return Error(AlignLoc, "alignment must be a power of two");
6229 
6230     // Alignment larger than the size of LDS is possible in theory, as long
6231     // as the linker manages to place to symbol at address 0, but we do want
6232     // to make sure the alignment fits nicely into a 32-bit integer.
6233     if (Alignment >= 1u << 31)
6234       return Error(AlignLoc, "alignment is too large");
6235   }
6236 
6237   if (parseEOL())
6238     return true;
6239 
6240   Symbol->redefineIfPossible();
6241   if (!Symbol->isUndefined())
6242     return Error(NameLoc, "invalid symbol redefinition");
6243 
6244   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6245   return false;
6246 }
6247 
6248 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6249   StringRef IDVal = DirectiveID.getString();
6250 
6251   if (isHsaAbi(getSTI())) {
6252     if (IDVal == ".amdhsa_kernel")
6253      return ParseDirectiveAMDHSAKernel();
6254 
6255     if (IDVal == ".amdhsa_code_object_version")
6256       return ParseDirectiveAMDHSACodeObjectVersion();
6257 
6258     // TODO: Restructure/combine with PAL metadata directive.
6259     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6260       return ParseDirectiveHSAMetadata();
6261   } else {
6262     if (IDVal == ".amd_kernel_code_t")
6263       return ParseDirectiveAMDKernelCodeT();
6264 
6265     if (IDVal == ".amdgpu_hsa_kernel")
6266       return ParseDirectiveAMDGPUHsaKernel();
6267 
6268     if (IDVal == ".amd_amdgpu_isa")
6269       return ParseDirectiveISAVersion();
6270 
6271     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6272       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6273                               Twine(" directive is "
6274                                     "not available on non-amdhsa OSes"))
6275                                  .str());
6276     }
6277   }
6278 
6279   if (IDVal == ".amdgcn_target")
6280     return ParseDirectiveAMDGCNTarget();
6281 
6282   if (IDVal == ".amdgpu_lds")
6283     return ParseDirectiveAMDGPULDS();
6284 
6285   if (IDVal == PALMD::AssemblerDirectiveBegin)
6286     return ParseDirectivePALMetadataBegin();
6287 
6288   if (IDVal == PALMD::AssemblerDirective)
6289     return ParseDirectivePALMetadata();
6290 
6291   return true;
6292 }
6293 
6294 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6295                                            MCRegister Reg) {
6296   if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6297     return isGFX9Plus();
6298 
6299   // GFX10+ has 2 more SGPRs 104 and 105.
6300   if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6301     return hasSGPR104_SGPR105();
6302 
6303   switch (Reg.id()) {
6304   case SRC_SHARED_BASE_LO:
6305   case SRC_SHARED_BASE:
6306   case SRC_SHARED_LIMIT_LO:
6307   case SRC_SHARED_LIMIT:
6308   case SRC_PRIVATE_BASE_LO:
6309   case SRC_PRIVATE_BASE:
6310   case SRC_PRIVATE_LIMIT_LO:
6311   case SRC_PRIVATE_LIMIT:
6312     return isGFX9Plus();
6313   case SRC_POPS_EXITING_WAVE_ID:
6314     return isGFX9Plus() && !isGFX11Plus();
6315   case TBA:
6316   case TBA_LO:
6317   case TBA_HI:
6318   case TMA:
6319   case TMA_LO:
6320   case TMA_HI:
6321     return !isGFX9Plus();
6322   case XNACK_MASK:
6323   case XNACK_MASK_LO:
6324   case XNACK_MASK_HI:
6325     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6326   case SGPR_NULL:
6327     return isGFX10Plus();
6328   case SRC_EXECZ:
6329   case SRC_VCCZ:
6330     return !isGFX11Plus();
6331   default:
6332     break;
6333   }
6334 
6335   if (isCI())
6336     return true;
6337 
6338   if (isSI() || isGFX10Plus()) {
6339     // No flat_scr on SI.
6340     // On GFX10Plus flat scratch is not a valid register operand and can only be
6341     // accessed with s_setreg/s_getreg.
6342     switch (Reg.id()) {
6343     case FLAT_SCR:
6344     case FLAT_SCR_LO:
6345     case FLAT_SCR_HI:
6346       return false;
6347     default:
6348       return true;
6349     }
6350   }
6351 
6352   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6353   // SI/CI have.
6354   if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6355     return hasSGPR102_SGPR103();
6356 
6357   return true;
6358 }
6359 
6360 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6361                                           StringRef Mnemonic,
6362                                           OperandMode Mode) {
6363   ParseStatus Res = parseVOPD(Operands);
6364   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6365     return Res;
6366 
6367   // Try to parse with a custom parser
6368   Res = MatchOperandParserImpl(Operands, Mnemonic);
6369 
6370   // If we successfully parsed the operand or if there as an error parsing,
6371   // we are done.
6372   //
6373   // If we are parsing after we reach EndOfStatement then this means we
6374   // are appending default values to the Operands list.  This is only done
6375   // by custom parser, so we shouldn't continue on to the generic parsing.
6376   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6377     return Res;
6378 
6379   SMLoc RBraceLoc;
6380   SMLoc LBraceLoc = getLoc();
6381   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6382     unsigned Prefix = Operands.size();
6383 
6384     for (;;) {
6385       auto Loc = getLoc();
6386       Res = parseReg(Operands);
6387       if (Res.isNoMatch())
6388         Error(Loc, "expected a register");
6389       if (!Res.isSuccess())
6390         return ParseStatus::Failure;
6391 
6392       RBraceLoc = getLoc();
6393       if (trySkipToken(AsmToken::RBrac))
6394         break;
6395 
6396       if (!skipToken(AsmToken::Comma,
6397                      "expected a comma or a closing square bracket"))
6398         return ParseStatus::Failure;
6399     }
6400 
6401     if (Operands.size() - Prefix > 1) {
6402       Operands.insert(Operands.begin() + Prefix,
6403                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6404       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6405     }
6406 
6407     return ParseStatus::Success;
6408   }
6409 
6410   return parseRegOrImm(Operands);
6411 }
6412 
6413 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6414   // Clear any forced encodings from the previous instruction.
6415   setForcedEncodingSize(0);
6416   setForcedDPP(false);
6417   setForcedSDWA(false);
6418 
6419   if (Name.ends_with("_e64_dpp")) {
6420     setForcedDPP(true);
6421     setForcedEncodingSize(64);
6422     return Name.substr(0, Name.size() - 8);
6423   }
6424   if (Name.ends_with("_e64")) {
6425     setForcedEncodingSize(64);
6426     return Name.substr(0, Name.size() - 4);
6427   }
6428   if (Name.ends_with("_e32")) {
6429     setForcedEncodingSize(32);
6430     return Name.substr(0, Name.size() - 4);
6431   }
6432   if (Name.ends_with("_dpp")) {
6433     setForcedDPP(true);
6434     return Name.substr(0, Name.size() - 4);
6435   }
6436   if (Name.ends_with("_sdwa")) {
6437     setForcedSDWA(true);
6438     return Name.substr(0, Name.size() - 5);
6439   }
6440   return Name;
6441 }
6442 
6443 static void applyMnemonicAliases(StringRef &Mnemonic,
6444                                  const FeatureBitset &Features,
6445                                  unsigned VariantID);
6446 
6447 bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6448                                        StringRef Name, SMLoc NameLoc,
6449                                        OperandVector &Operands) {
6450   // Add the instruction mnemonic
6451   Name = parseMnemonicSuffix(Name);
6452 
6453   // If the target architecture uses MnemonicAlias, call it here to parse
6454   // operands correctly.
6455   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6456 
6457   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6458 
6459   bool IsMIMG = Name.starts_with("image_");
6460 
6461   while (!trySkipToken(AsmToken::EndOfStatement)) {
6462     OperandMode Mode = OperandMode_Default;
6463     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6464       Mode = OperandMode_NSA;
6465     ParseStatus Res = parseOperand(Operands, Name, Mode);
6466 
6467     if (!Res.isSuccess()) {
6468       checkUnsupportedInstruction(Name, NameLoc);
6469       if (!Parser.hasPendingError()) {
6470         // FIXME: use real operand location rather than the current location.
6471         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6472                                         : "not a valid operand.";
6473         Error(getLoc(), Msg);
6474       }
6475       while (!trySkipToken(AsmToken::EndOfStatement)) {
6476         lex();
6477       }
6478       return true;
6479     }
6480 
6481     // Eat the comma or space if there is one.
6482     trySkipToken(AsmToken::Comma);
6483   }
6484 
6485   return false;
6486 }
6487 
6488 //===----------------------------------------------------------------------===//
6489 // Utility functions
6490 //===----------------------------------------------------------------------===//
6491 
6492 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6493                                           OperandVector &Operands) {
6494   SMLoc S = getLoc();
6495   if (!trySkipId(Name))
6496     return ParseStatus::NoMatch;
6497 
6498   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6499   return ParseStatus::Success;
6500 }
6501 
6502 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6503                                                 int64_t &IntVal) {
6504 
6505   if (!trySkipId(Prefix, AsmToken::Colon))
6506     return ParseStatus::NoMatch;
6507 
6508   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6509 }
6510 
6511 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6512     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6513     std::function<bool(int64_t &)> ConvertResult) {
6514   SMLoc S = getLoc();
6515   int64_t Value = 0;
6516 
6517   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6518   if (!Res.isSuccess())
6519     return Res;
6520 
6521   if (ConvertResult && !ConvertResult(Value)) {
6522     Error(S, "invalid " + StringRef(Prefix) + " value.");
6523   }
6524 
6525   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6526   return ParseStatus::Success;
6527 }
6528 
6529 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6530     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6531     bool (*ConvertResult)(int64_t &)) {
6532   SMLoc S = getLoc();
6533   if (!trySkipId(Prefix, AsmToken::Colon))
6534     return ParseStatus::NoMatch;
6535 
6536   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6537     return ParseStatus::Failure;
6538 
6539   unsigned Val = 0;
6540   const unsigned MaxSize = 4;
6541 
6542   // FIXME: How to verify the number of elements matches the number of src
6543   // operands?
6544   for (int I = 0; ; ++I) {
6545     int64_t Op;
6546     SMLoc Loc = getLoc();
6547     if (!parseExpr(Op))
6548       return ParseStatus::Failure;
6549 
6550     if (Op != 0 && Op != 1)
6551       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6552 
6553     Val |= (Op << I);
6554 
6555     if (trySkipToken(AsmToken::RBrac))
6556       break;
6557 
6558     if (I + 1 == MaxSize)
6559       return Error(getLoc(), "expected a closing square bracket");
6560 
6561     if (!skipToken(AsmToken::Comma, "expected a comma"))
6562       return ParseStatus::Failure;
6563   }
6564 
6565   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6566   return ParseStatus::Success;
6567 }
6568 
6569 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6570                                            OperandVector &Operands,
6571                                            AMDGPUOperand::ImmTy ImmTy) {
6572   int64_t Bit;
6573   SMLoc S = getLoc();
6574 
6575   if (trySkipId(Name)) {
6576     Bit = 1;
6577   } else if (trySkipId("no", Name)) {
6578     Bit = 0;
6579   } else {
6580     return ParseStatus::NoMatch;
6581   }
6582 
6583   if (Name == "r128" && !hasMIMG_R128())
6584     return Error(S, "r128 modifier is not supported on this GPU");
6585   if (Name == "a16" && !hasA16())
6586     return Error(S, "a16 modifier is not supported on this GPU");
6587 
6588   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6589     ImmTy = AMDGPUOperand::ImmTyR128A16;
6590 
6591   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6592   return ParseStatus::Success;
6593 }
6594 
6595 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6596                                       bool &Disabling) const {
6597   Disabling = Id.consume_front("no");
6598 
6599   if (isGFX940() && !Mnemo.starts_with("s_")) {
6600     return StringSwitch<unsigned>(Id)
6601         .Case("nt", AMDGPU::CPol::NT)
6602         .Case("sc0", AMDGPU::CPol::SC0)
6603         .Case("sc1", AMDGPU::CPol::SC1)
6604         .Default(0);
6605   }
6606 
6607   return StringSwitch<unsigned>(Id)
6608       .Case("dlc", AMDGPU::CPol::DLC)
6609       .Case("glc", AMDGPU::CPol::GLC)
6610       .Case("scc", AMDGPU::CPol::SCC)
6611       .Case("slc", AMDGPU::CPol::SLC)
6612       .Default(0);
6613 }
6614 
6615 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6616   if (isGFX12Plus()) {
6617     SMLoc StringLoc = getLoc();
6618 
6619     int64_t CPolVal = 0;
6620     ParseStatus ResTH = ParseStatus::NoMatch;
6621     ParseStatus ResScope = ParseStatus::NoMatch;
6622 
6623     for (;;) {
6624       if (ResTH.isNoMatch()) {
6625         int64_t TH;
6626         ResTH = parseTH(Operands, TH);
6627         if (ResTH.isFailure())
6628           return ResTH;
6629         if (ResTH.isSuccess()) {
6630           CPolVal |= TH;
6631           continue;
6632         }
6633       }
6634 
6635       if (ResScope.isNoMatch()) {
6636         int64_t Scope;
6637         ResScope = parseScope(Operands, Scope);
6638         if (ResScope.isFailure())
6639           return ResScope;
6640         if (ResScope.isSuccess()) {
6641           CPolVal |= Scope;
6642           continue;
6643         }
6644       }
6645 
6646       break;
6647     }
6648 
6649     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6650       return ParseStatus::NoMatch;
6651 
6652     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6653                                                 AMDGPUOperand::ImmTyCPol));
6654     return ParseStatus::Success;
6655   }
6656 
6657   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6658   SMLoc OpLoc = getLoc();
6659   unsigned Enabled = 0, Seen = 0;
6660   for (;;) {
6661     SMLoc S = getLoc();
6662     bool Disabling;
6663     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6664     if (!CPol)
6665       break;
6666 
6667     lex();
6668 
6669     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6670       return Error(S, "dlc modifier is not supported on this GPU");
6671 
6672     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6673       return Error(S, "scc modifier is not supported on this GPU");
6674 
6675     if (Seen & CPol)
6676       return Error(S, "duplicate cache policy modifier");
6677 
6678     if (!Disabling)
6679       Enabled |= CPol;
6680 
6681     Seen |= CPol;
6682   }
6683 
6684   if (!Seen)
6685     return ParseStatus::NoMatch;
6686 
6687   Operands.push_back(
6688       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6689   return ParseStatus::Success;
6690 }
6691 
6692 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6693                                         int64_t &Scope) {
6694   static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6695                                     CPol::SCOPE_DEV, CPol::SCOPE_SYS};
6696 
6697   ParseStatus Res = parseStringOrIntWithPrefix(
6698       Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6699       Scope);
6700 
6701   if (Res.isSuccess())
6702     Scope = Scopes[Scope];
6703 
6704   return Res;
6705 }
6706 
6707 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6708   TH = AMDGPU::CPol::TH_RT; // default
6709 
6710   StringRef Value;
6711   SMLoc StringLoc;
6712   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6713   if (!Res.isSuccess())
6714     return Res;
6715 
6716   if (Value == "TH_DEFAULT")
6717     TH = AMDGPU::CPol::TH_RT;
6718   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6719            Value == "TH_LOAD_NT_WB") {
6720     return Error(StringLoc, "invalid th value");
6721   } else if (Value.consume_front("TH_ATOMIC_")) {
6722     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6723   } else if (Value.consume_front("TH_LOAD_")) {
6724     TH = AMDGPU::CPol::TH_TYPE_LOAD;
6725   } else if (Value.consume_front("TH_STORE_")) {
6726     TH = AMDGPU::CPol::TH_TYPE_STORE;
6727   } else {
6728     return Error(StringLoc, "invalid th value");
6729   }
6730 
6731   if (Value == "BYPASS")
6732     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6733 
6734   if (TH != 0) {
6735     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6736       TH |= StringSwitch<int64_t>(Value)
6737                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6738                 .Case("RT", AMDGPU::CPol::TH_RT)
6739                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6740                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6741                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6742                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
6743                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6744                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6745                                         AMDGPU::CPol::TH_ATOMIC_NT)
6746                 .Default(0xffffffff);
6747     else
6748       TH |= StringSwitch<int64_t>(Value)
6749                 .Case("RT", AMDGPU::CPol::TH_RT)
6750                 .Case("NT", AMDGPU::CPol::TH_NT)
6751                 .Case("HT", AMDGPU::CPol::TH_HT)
6752                 .Case("LU", AMDGPU::CPol::TH_LU)
6753                 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6754                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6755                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6756                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6757                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6758                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6759                 .Default(0xffffffff);
6760   }
6761 
6762   if (TH == 0xffffffff)
6763     return Error(StringLoc, "invalid th value");
6764 
6765   return ParseStatus::Success;
6766 }
6767 
6768 static void addOptionalImmOperand(
6769   MCInst& Inst, const OperandVector& Operands,
6770   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6771   AMDGPUOperand::ImmTy ImmT,
6772   int64_t Default = 0) {
6773   auto i = OptionalIdx.find(ImmT);
6774   if (i != OptionalIdx.end()) {
6775     unsigned Idx = i->second;
6776     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6777   } else {
6778     Inst.addOperand(MCOperand::createImm(Default));
6779   }
6780 }
6781 
6782 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6783                                                    StringRef &Value,
6784                                                    SMLoc &StringLoc) {
6785   if (!trySkipId(Prefix, AsmToken::Colon))
6786     return ParseStatus::NoMatch;
6787 
6788   StringLoc = getLoc();
6789   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6790                                                   : ParseStatus::Failure;
6791 }
6792 
6793 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6794     OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
6795     int64_t &IntVal) {
6796   if (!trySkipId(Name, AsmToken::Colon))
6797     return ParseStatus::NoMatch;
6798 
6799   SMLoc StringLoc = getLoc();
6800 
6801   StringRef Value;
6802   if (isToken(AsmToken::Identifier)) {
6803     Value = getTokenStr();
6804     lex();
6805 
6806     for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
6807       if (Value == Ids[IntVal])
6808         break;
6809   } else if (!parseExpr(IntVal))
6810     return ParseStatus::Failure;
6811 
6812   if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
6813     return Error(StringLoc, "invalid " + Twine(Name) + " value");
6814 
6815   return ParseStatus::Success;
6816 }
6817 
6818 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6819     OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
6820     AMDGPUOperand::ImmTy Type) {
6821   SMLoc S = getLoc();
6822   int64_t IntVal;
6823 
6824   ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
6825   if (Res.isSuccess())
6826     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
6827 
6828   return Res;
6829 }
6830 
6831 //===----------------------------------------------------------------------===//
6832 // MTBUF format
6833 //===----------------------------------------------------------------------===//
6834 
6835 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6836                                   int64_t MaxVal,
6837                                   int64_t &Fmt) {
6838   int64_t Val;
6839   SMLoc Loc = getLoc();
6840 
6841   auto Res = parseIntWithPrefix(Pref, Val);
6842   if (Res.isFailure())
6843     return false;
6844   if (Res.isNoMatch())
6845     return true;
6846 
6847   if (Val < 0 || Val > MaxVal) {
6848     Error(Loc, Twine("out of range ", StringRef(Pref)));
6849     return false;
6850   }
6851 
6852   Fmt = Val;
6853   return true;
6854 }
6855 
6856 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6857                                               AMDGPUOperand::ImmTy ImmTy) {
6858   const char *Pref = "index_key";
6859   int64_t ImmVal = 0;
6860   SMLoc Loc = getLoc();
6861   auto Res = parseIntWithPrefix(Pref, ImmVal);
6862   if (!Res.isSuccess())
6863     return Res;
6864 
6865   if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6866     return Error(Loc, Twine("out of range ", StringRef(Pref)));
6867 
6868   if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6869     return Error(Loc, Twine("out of range ", StringRef(Pref)));
6870 
6871   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6872   return ParseStatus::Success;
6873 }
6874 
6875 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6876   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6877 }
6878 
6879 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6880   return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6881 }
6882 
6883 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6884 // values to live in a joint format operand in the MCInst encoding.
6885 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6886   using namespace llvm::AMDGPU::MTBUFFormat;
6887 
6888   int64_t Dfmt = DFMT_UNDEF;
6889   int64_t Nfmt = NFMT_UNDEF;
6890 
6891   // dfmt and nfmt can appear in either order, and each is optional.
6892   for (int I = 0; I < 2; ++I) {
6893     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6894       return ParseStatus::Failure;
6895 
6896     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6897       return ParseStatus::Failure;
6898 
6899     // Skip optional comma between dfmt/nfmt
6900     // but guard against 2 commas following each other.
6901     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6902         !peekToken().is(AsmToken::Comma)) {
6903       trySkipToken(AsmToken::Comma);
6904     }
6905   }
6906 
6907   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6908     return ParseStatus::NoMatch;
6909 
6910   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6911   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6912 
6913   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6914   return ParseStatus::Success;
6915 }
6916 
6917 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6918   using namespace llvm::AMDGPU::MTBUFFormat;
6919 
6920   int64_t Fmt = UFMT_UNDEF;
6921 
6922   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6923     return ParseStatus::Failure;
6924 
6925   if (Fmt == UFMT_UNDEF)
6926     return ParseStatus::NoMatch;
6927 
6928   Format = Fmt;
6929   return ParseStatus::Success;
6930 }
6931 
6932 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6933                                     int64_t &Nfmt,
6934                                     StringRef FormatStr,
6935                                     SMLoc Loc) {
6936   using namespace llvm::AMDGPU::MTBUFFormat;
6937   int64_t Format;
6938 
6939   Format = getDfmt(FormatStr);
6940   if (Format != DFMT_UNDEF) {
6941     Dfmt = Format;
6942     return true;
6943   }
6944 
6945   Format = getNfmt(FormatStr, getSTI());
6946   if (Format != NFMT_UNDEF) {
6947     Nfmt = Format;
6948     return true;
6949   }
6950 
6951   Error(Loc, "unsupported format");
6952   return false;
6953 }
6954 
6955 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6956                                                       SMLoc FormatLoc,
6957                                                       int64_t &Format) {
6958   using namespace llvm::AMDGPU::MTBUFFormat;
6959 
6960   int64_t Dfmt = DFMT_UNDEF;
6961   int64_t Nfmt = NFMT_UNDEF;
6962   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6963     return ParseStatus::Failure;
6964 
6965   if (trySkipToken(AsmToken::Comma)) {
6966     StringRef Str;
6967     SMLoc Loc = getLoc();
6968     if (!parseId(Str, "expected a format string") ||
6969         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6970       return ParseStatus::Failure;
6971     if (Dfmt == DFMT_UNDEF)
6972       return Error(Loc, "duplicate numeric format");
6973     if (Nfmt == NFMT_UNDEF)
6974       return Error(Loc, "duplicate data format");
6975   }
6976 
6977   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6978   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6979 
6980   if (isGFX10Plus()) {
6981     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6982     if (Ufmt == UFMT_UNDEF)
6983       return Error(FormatLoc, "unsupported format");
6984     Format = Ufmt;
6985   } else {
6986     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6987   }
6988 
6989   return ParseStatus::Success;
6990 }
6991 
6992 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6993                                                         SMLoc Loc,
6994                                                         int64_t &Format) {
6995   using namespace llvm::AMDGPU::MTBUFFormat;
6996 
6997   auto Id = getUnifiedFormat(FormatStr, getSTI());
6998   if (Id == UFMT_UNDEF)
6999     return ParseStatus::NoMatch;
7000 
7001   if (!isGFX10Plus())
7002     return Error(Loc, "unified format is not supported on this GPU");
7003 
7004   Format = Id;
7005   return ParseStatus::Success;
7006 }
7007 
7008 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7009   using namespace llvm::AMDGPU::MTBUFFormat;
7010   SMLoc Loc = getLoc();
7011 
7012   if (!parseExpr(Format))
7013     return ParseStatus::Failure;
7014   if (!isValidFormatEncoding(Format, getSTI()))
7015     return Error(Loc, "out of range format");
7016 
7017   return ParseStatus::Success;
7018 }
7019 
7020 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7021   using namespace llvm::AMDGPU::MTBUFFormat;
7022 
7023   if (!trySkipId("format", AsmToken::Colon))
7024     return ParseStatus::NoMatch;
7025 
7026   if (trySkipToken(AsmToken::LBrac)) {
7027     StringRef FormatStr;
7028     SMLoc Loc = getLoc();
7029     if (!parseId(FormatStr, "expected a format string"))
7030       return ParseStatus::Failure;
7031 
7032     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7033     if (Res.isNoMatch())
7034       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7035     if (!Res.isSuccess())
7036       return Res;
7037 
7038     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7039       return ParseStatus::Failure;
7040 
7041     return ParseStatus::Success;
7042   }
7043 
7044   return parseNumericFormat(Format);
7045 }
7046 
7047 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7048   using namespace llvm::AMDGPU::MTBUFFormat;
7049 
7050   int64_t Format = getDefaultFormatEncoding(getSTI());
7051   ParseStatus Res;
7052   SMLoc Loc = getLoc();
7053 
7054   // Parse legacy format syntax.
7055   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7056   if (Res.isFailure())
7057     return Res;
7058 
7059   bool FormatFound = Res.isSuccess();
7060 
7061   Operands.push_back(
7062     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7063 
7064   if (FormatFound)
7065     trySkipToken(AsmToken::Comma);
7066 
7067   if (isToken(AsmToken::EndOfStatement)) {
7068     // We are expecting an soffset operand,
7069     // but let matcher handle the error.
7070     return ParseStatus::Success;
7071   }
7072 
7073   // Parse soffset.
7074   Res = parseRegOrImm(Operands);
7075   if (!Res.isSuccess())
7076     return Res;
7077 
7078   trySkipToken(AsmToken::Comma);
7079 
7080   if (!FormatFound) {
7081     Res = parseSymbolicOrNumericFormat(Format);
7082     if (Res.isFailure())
7083       return Res;
7084     if (Res.isSuccess()) {
7085       auto Size = Operands.size();
7086       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7087       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7088       Op.setImm(Format);
7089     }
7090     return ParseStatus::Success;
7091   }
7092 
7093   if (isId("format") && peekToken().is(AsmToken::Colon))
7094     return Error(getLoc(), "duplicate format");
7095   return ParseStatus::Success;
7096 }
7097 
7098 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7099   ParseStatus Res =
7100       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7101   if (Res.isNoMatch()) {
7102     Res = parseIntWithPrefix("inst_offset", Operands,
7103                              AMDGPUOperand::ImmTyInstOffset);
7104   }
7105   return Res;
7106 }
7107 
7108 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7109   ParseStatus Res =
7110       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7111   if (Res.isNoMatch())
7112     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7113   return Res;
7114 }
7115 
7116 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7117   ParseStatus Res =
7118       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7119   if (Res.isNoMatch()) {
7120     Res =
7121         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7122   }
7123   return Res;
7124 }
7125 
7126 //===----------------------------------------------------------------------===//
7127 // Exp
7128 //===----------------------------------------------------------------------===//
7129 
7130 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7131   OptionalImmIndexMap OptionalIdx;
7132 
7133   unsigned OperandIdx[4];
7134   unsigned EnMask = 0;
7135   int SrcIdx = 0;
7136 
7137   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7138     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7139 
7140     // Add the register arguments
7141     if (Op.isReg()) {
7142       assert(SrcIdx < 4);
7143       OperandIdx[SrcIdx] = Inst.size();
7144       Op.addRegOperands(Inst, 1);
7145       ++SrcIdx;
7146       continue;
7147     }
7148 
7149     if (Op.isOff()) {
7150       assert(SrcIdx < 4);
7151       OperandIdx[SrcIdx] = Inst.size();
7152       Inst.addOperand(MCOperand::createReg(MCRegister()));
7153       ++SrcIdx;
7154       continue;
7155     }
7156 
7157     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7158       Op.addImmOperands(Inst, 1);
7159       continue;
7160     }
7161 
7162     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7163       continue;
7164 
7165     // Handle optional arguments
7166     OptionalIdx[Op.getImmTy()] = i;
7167   }
7168 
7169   assert(SrcIdx == 4);
7170 
7171   bool Compr = false;
7172   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7173     Compr = true;
7174     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7175     Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7176     Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7177   }
7178 
7179   for (auto i = 0; i < SrcIdx; ++i) {
7180     if (Inst.getOperand(OperandIdx[i]).getReg()) {
7181       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7182     }
7183   }
7184 
7185   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7186   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7187 
7188   Inst.addOperand(MCOperand::createImm(EnMask));
7189 }
7190 
7191 //===----------------------------------------------------------------------===//
7192 // s_waitcnt
7193 //===----------------------------------------------------------------------===//
7194 
7195 static bool
7196 encodeCnt(
7197   const AMDGPU::IsaVersion ISA,
7198   int64_t &IntVal,
7199   int64_t CntVal,
7200   bool Saturate,
7201   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7202   unsigned (*decode)(const IsaVersion &Version, unsigned))
7203 {
7204   bool Failed = false;
7205 
7206   IntVal = encode(ISA, IntVal, CntVal);
7207   if (CntVal != decode(ISA, IntVal)) {
7208     if (Saturate) {
7209       IntVal = encode(ISA, IntVal, -1);
7210     } else {
7211       Failed = true;
7212     }
7213   }
7214   return Failed;
7215 }
7216 
7217 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7218 
7219   SMLoc CntLoc = getLoc();
7220   StringRef CntName = getTokenStr();
7221 
7222   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7223       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7224     return false;
7225 
7226   int64_t CntVal;
7227   SMLoc ValLoc = getLoc();
7228   if (!parseExpr(CntVal))
7229     return false;
7230 
7231   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7232 
7233   bool Failed = true;
7234   bool Sat = CntName.ends_with("_sat");
7235 
7236   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7237     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7238   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7239     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7240   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7241     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7242   } else {
7243     Error(CntLoc, "invalid counter name " + CntName);
7244     return false;
7245   }
7246 
7247   if (Failed) {
7248     Error(ValLoc, "too large value for " + CntName);
7249     return false;
7250   }
7251 
7252   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7253     return false;
7254 
7255   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7256     if (isToken(AsmToken::EndOfStatement)) {
7257       Error(getLoc(), "expected a counter name");
7258       return false;
7259     }
7260   }
7261 
7262   return true;
7263 }
7264 
7265 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7266   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7267   int64_t Waitcnt = getWaitcntBitMask(ISA);
7268   SMLoc S = getLoc();
7269 
7270   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7271     while (!isToken(AsmToken::EndOfStatement)) {
7272       if (!parseCnt(Waitcnt))
7273         return ParseStatus::Failure;
7274     }
7275   } else {
7276     if (!parseExpr(Waitcnt))
7277       return ParseStatus::Failure;
7278   }
7279 
7280   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7281   return ParseStatus::Success;
7282 }
7283 
7284 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7285   SMLoc FieldLoc = getLoc();
7286   StringRef FieldName = getTokenStr();
7287   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7288       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7289     return false;
7290 
7291   SMLoc ValueLoc = getLoc();
7292   StringRef ValueName = getTokenStr();
7293   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7294       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7295     return false;
7296 
7297   unsigned Shift;
7298   if (FieldName == "instid0") {
7299     Shift = 0;
7300   } else if (FieldName == "instskip") {
7301     Shift = 4;
7302   } else if (FieldName == "instid1") {
7303     Shift = 7;
7304   } else {
7305     Error(FieldLoc, "invalid field name " + FieldName);
7306     return false;
7307   }
7308 
7309   int Value;
7310   if (Shift == 4) {
7311     // Parse values for instskip.
7312     Value = StringSwitch<int>(ValueName)
7313                 .Case("SAME", 0)
7314                 .Case("NEXT", 1)
7315                 .Case("SKIP_1", 2)
7316                 .Case("SKIP_2", 3)
7317                 .Case("SKIP_3", 4)
7318                 .Case("SKIP_4", 5)
7319                 .Default(-1);
7320   } else {
7321     // Parse values for instid0 and instid1.
7322     Value = StringSwitch<int>(ValueName)
7323                 .Case("NO_DEP", 0)
7324                 .Case("VALU_DEP_1", 1)
7325                 .Case("VALU_DEP_2", 2)
7326                 .Case("VALU_DEP_3", 3)
7327                 .Case("VALU_DEP_4", 4)
7328                 .Case("TRANS32_DEP_1", 5)
7329                 .Case("TRANS32_DEP_2", 6)
7330                 .Case("TRANS32_DEP_3", 7)
7331                 .Case("FMA_ACCUM_CYCLE_1", 8)
7332                 .Case("SALU_CYCLE_1", 9)
7333                 .Case("SALU_CYCLE_2", 10)
7334                 .Case("SALU_CYCLE_3", 11)
7335                 .Default(-1);
7336   }
7337   if (Value < 0) {
7338     Error(ValueLoc, "invalid value name " + ValueName);
7339     return false;
7340   }
7341 
7342   Delay |= Value << Shift;
7343   return true;
7344 }
7345 
7346 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7347   int64_t Delay = 0;
7348   SMLoc S = getLoc();
7349 
7350   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7351     do {
7352       if (!parseDelay(Delay))
7353         return ParseStatus::Failure;
7354     } while (trySkipToken(AsmToken::Pipe));
7355   } else {
7356     if (!parseExpr(Delay))
7357       return ParseStatus::Failure;
7358   }
7359 
7360   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7361   return ParseStatus::Success;
7362 }
7363 
7364 bool
7365 AMDGPUOperand::isSWaitCnt() const {
7366   return isImm();
7367 }
7368 
7369 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7370 
7371 //===----------------------------------------------------------------------===//
7372 // DepCtr
7373 //===----------------------------------------------------------------------===//
7374 
7375 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7376                                   StringRef DepCtrName) {
7377   switch (ErrorId) {
7378   case OPR_ID_UNKNOWN:
7379     Error(Loc, Twine("invalid counter name ", DepCtrName));
7380     return;
7381   case OPR_ID_UNSUPPORTED:
7382     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7383     return;
7384   case OPR_ID_DUPLICATE:
7385     Error(Loc, Twine("duplicate counter name ", DepCtrName));
7386     return;
7387   case OPR_VAL_INVALID:
7388     Error(Loc, Twine("invalid value for ", DepCtrName));
7389     return;
7390   default:
7391     assert(false);
7392   }
7393 }
7394 
7395 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7396 
7397   using namespace llvm::AMDGPU::DepCtr;
7398 
7399   SMLoc DepCtrLoc = getLoc();
7400   StringRef DepCtrName = getTokenStr();
7401 
7402   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7403       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7404     return false;
7405 
7406   int64_t ExprVal;
7407   if (!parseExpr(ExprVal))
7408     return false;
7409 
7410   unsigned PrevOprMask = UsedOprMask;
7411   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7412 
7413   if (CntVal < 0) {
7414     depCtrError(DepCtrLoc, CntVal, DepCtrName);
7415     return false;
7416   }
7417 
7418   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7419     return false;
7420 
7421   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7422     if (isToken(AsmToken::EndOfStatement)) {
7423       Error(getLoc(), "expected a counter name");
7424       return false;
7425     }
7426   }
7427 
7428   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7429   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7430   return true;
7431 }
7432 
7433 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7434   using namespace llvm::AMDGPU::DepCtr;
7435 
7436   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7437   SMLoc Loc = getLoc();
7438 
7439   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7440     unsigned UsedOprMask = 0;
7441     while (!isToken(AsmToken::EndOfStatement)) {
7442       if (!parseDepCtr(DepCtr, UsedOprMask))
7443         return ParseStatus::Failure;
7444     }
7445   } else {
7446     if (!parseExpr(DepCtr))
7447       return ParseStatus::Failure;
7448   }
7449 
7450   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7451   return ParseStatus::Success;
7452 }
7453 
7454 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7455 
7456 //===----------------------------------------------------------------------===//
7457 // hwreg
7458 //===----------------------------------------------------------------------===//
7459 
7460 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7461                                             OperandInfoTy &Offset,
7462                                             OperandInfoTy &Width) {
7463   using namespace llvm::AMDGPU::Hwreg;
7464 
7465   if (!trySkipId("hwreg", AsmToken::LParen))
7466     return ParseStatus::NoMatch;
7467 
7468   // The register may be specified by name or using a numeric code
7469   HwReg.Loc = getLoc();
7470   if (isToken(AsmToken::Identifier) &&
7471       (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7472     HwReg.IsSymbolic = true;
7473     lex(); // skip register name
7474   } else if (!parseExpr(HwReg.Val, "a register name")) {
7475     return ParseStatus::Failure;
7476   }
7477 
7478   if (trySkipToken(AsmToken::RParen))
7479     return ParseStatus::Success;
7480 
7481   // parse optional params
7482   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7483     return ParseStatus::Failure;
7484 
7485   Offset.Loc = getLoc();
7486   if (!parseExpr(Offset.Val))
7487     return ParseStatus::Failure;
7488 
7489   if (!skipToken(AsmToken::Comma, "expected a comma"))
7490     return ParseStatus::Failure;
7491 
7492   Width.Loc = getLoc();
7493   if (!parseExpr(Width.Val) ||
7494       !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7495     return ParseStatus::Failure;
7496 
7497   return ParseStatus::Success;
7498 }
7499 
7500 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7501   using namespace llvm::AMDGPU::Hwreg;
7502 
7503   int64_t ImmVal = 0;
7504   SMLoc Loc = getLoc();
7505 
7506   StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7507                           HwregId::Default);
7508   StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7509                            HwregOffset::Default);
7510   struct : StructuredOpField {
7511     using StructuredOpField::StructuredOpField;
7512     bool validate(AMDGPUAsmParser &Parser) const override {
7513       if (!isUIntN(Width, Val - 1))
7514         return Error(Parser, "only values from 1 to 32 are legal");
7515       return true;
7516     }
7517   } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7518   ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
7519 
7520   if (Res.isNoMatch())
7521     Res = parseHwregFunc(HwReg, Offset, Width);
7522 
7523   if (Res.isSuccess()) {
7524     if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
7525       return ParseStatus::Failure;
7526     ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
7527   }
7528 
7529   if (Res.isNoMatch() &&
7530       parseExpr(ImmVal, "a hwreg macro, structured immediate"))
7531     Res = ParseStatus::Success;
7532 
7533   if (!Res.isSuccess())
7534     return ParseStatus::Failure;
7535 
7536   if (!isUInt<16>(ImmVal))
7537     return Error(Loc, "invalid immediate: only 16-bit values are legal");
7538   Operands.push_back(
7539       AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7540   return ParseStatus::Success;
7541 }
7542 
7543 bool AMDGPUOperand::isHwreg() const {
7544   return isImmTy(ImmTyHwreg);
7545 }
7546 
7547 //===----------------------------------------------------------------------===//
7548 // sendmsg
7549 //===----------------------------------------------------------------------===//
7550 
7551 bool
7552 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7553                                   OperandInfoTy &Op,
7554                                   OperandInfoTy &Stream) {
7555   using namespace llvm::AMDGPU::SendMsg;
7556 
7557   Msg.Loc = getLoc();
7558   if (isToken(AsmToken::Identifier) &&
7559       (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7560     Msg.IsSymbolic = true;
7561     lex(); // skip message name
7562   } else if (!parseExpr(Msg.Val, "a message name")) {
7563     return false;
7564   }
7565 
7566   if (trySkipToken(AsmToken::Comma)) {
7567     Op.IsDefined = true;
7568     Op.Loc = getLoc();
7569     if (isToken(AsmToken::Identifier) &&
7570         (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
7571             OPR_ID_UNKNOWN) {
7572       lex(); // skip operation name
7573     } else if (!parseExpr(Op.Val, "an operation name")) {
7574       return false;
7575     }
7576 
7577     if (trySkipToken(AsmToken::Comma)) {
7578       Stream.IsDefined = true;
7579       Stream.Loc = getLoc();
7580       if (!parseExpr(Stream.Val))
7581         return false;
7582     }
7583   }
7584 
7585   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7586 }
7587 
7588 bool
7589 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7590                                  const OperandInfoTy &Op,
7591                                  const OperandInfoTy &Stream) {
7592   using namespace llvm::AMDGPU::SendMsg;
7593 
7594   // Validation strictness depends on whether message is specified
7595   // in a symbolic or in a numeric form. In the latter case
7596   // only encoding possibility is checked.
7597   bool Strict = Msg.IsSymbolic;
7598 
7599   if (Strict) {
7600     if (Msg.Val == OPR_ID_UNSUPPORTED) {
7601       Error(Msg.Loc, "specified message id is not supported on this GPU");
7602       return false;
7603     }
7604   } else {
7605     if (!isValidMsgId(Msg.Val, getSTI())) {
7606       Error(Msg.Loc, "invalid message id");
7607       return false;
7608     }
7609   }
7610   if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
7611     if (Op.IsDefined) {
7612       Error(Op.Loc, "message does not support operations");
7613     } else {
7614       Error(Msg.Loc, "missing message operation");
7615     }
7616     return false;
7617   }
7618   if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
7619     if (Op.Val == OPR_ID_UNSUPPORTED)
7620       Error(Op.Loc, "specified operation id is not supported on this GPU");
7621     else
7622       Error(Op.Loc, "invalid operation id");
7623     return false;
7624   }
7625   if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
7626       Stream.IsDefined) {
7627     Error(Stream.Loc, "message operation does not support streams");
7628     return false;
7629   }
7630   if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
7631     Error(Stream.Loc, "invalid message stream id");
7632     return false;
7633   }
7634   return true;
7635 }
7636 
7637 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7638   using namespace llvm::AMDGPU::SendMsg;
7639 
7640   int64_t ImmVal = 0;
7641   SMLoc Loc = getLoc();
7642 
7643   if (trySkipId("sendmsg", AsmToken::LParen)) {
7644     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7645     OperandInfoTy Op(OP_NONE_);
7646     OperandInfoTy Stream(STREAM_ID_NONE_);
7647     if (parseSendMsgBody(Msg, Op, Stream) &&
7648         validateSendMsg(Msg, Op, Stream)) {
7649       ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
7650     } else {
7651       return ParseStatus::Failure;
7652     }
7653   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7654     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7655       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7656   } else {
7657     return ParseStatus::Failure;
7658   }
7659 
7660   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7661   return ParseStatus::Success;
7662 }
7663 
7664 bool AMDGPUOperand::isSendMsg() const {
7665   return isImmTy(ImmTySendMsg);
7666 }
7667 
7668 //===----------------------------------------------------------------------===//
7669 // v_interp
7670 //===----------------------------------------------------------------------===//
7671 
7672 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7673   StringRef Str;
7674   SMLoc S = getLoc();
7675 
7676   if (!parseId(Str))
7677     return ParseStatus::NoMatch;
7678 
7679   int Slot = StringSwitch<int>(Str)
7680     .Case("p10", 0)
7681     .Case("p20", 1)
7682     .Case("p0", 2)
7683     .Default(-1);
7684 
7685   if (Slot == -1)
7686     return Error(S, "invalid interpolation slot");
7687 
7688   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7689                                               AMDGPUOperand::ImmTyInterpSlot));
7690   return ParseStatus::Success;
7691 }
7692 
7693 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7694   StringRef Str;
7695   SMLoc S = getLoc();
7696 
7697   if (!parseId(Str))
7698     return ParseStatus::NoMatch;
7699 
7700   if (!Str.starts_with("attr"))
7701     return Error(S, "invalid interpolation attribute");
7702 
7703   StringRef Chan = Str.take_back(2);
7704   int AttrChan = StringSwitch<int>(Chan)
7705     .Case(".x", 0)
7706     .Case(".y", 1)
7707     .Case(".z", 2)
7708     .Case(".w", 3)
7709     .Default(-1);
7710   if (AttrChan == -1)
7711     return Error(S, "invalid or missing interpolation attribute channel");
7712 
7713   Str = Str.drop_back(2).drop_front(4);
7714 
7715   uint8_t Attr;
7716   if (Str.getAsInteger(10, Attr))
7717     return Error(S, "invalid or missing interpolation attribute number");
7718 
7719   if (Attr > 32)
7720     return Error(S, "out of bounds interpolation attribute number");
7721 
7722   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7723 
7724   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7725                                               AMDGPUOperand::ImmTyInterpAttr));
7726   Operands.push_back(AMDGPUOperand::CreateImm(
7727       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7728   return ParseStatus::Success;
7729 }
7730 
7731 //===----------------------------------------------------------------------===//
7732 // exp
7733 //===----------------------------------------------------------------------===//
7734 
7735 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7736   using namespace llvm::AMDGPU::Exp;
7737 
7738   StringRef Str;
7739   SMLoc S = getLoc();
7740 
7741   if (!parseId(Str))
7742     return ParseStatus::NoMatch;
7743 
7744   unsigned Id = getTgtId(Str);
7745   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7746     return Error(S, (Id == ET_INVALID)
7747                         ? "invalid exp target"
7748                         : "exp target is not supported on this GPU");
7749 
7750   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7751                                               AMDGPUOperand::ImmTyExpTgt));
7752   return ParseStatus::Success;
7753 }
7754 
7755 //===----------------------------------------------------------------------===//
7756 // parser helpers
7757 //===----------------------------------------------------------------------===//
7758 
7759 bool
7760 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7761   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7762 }
7763 
7764 bool
7765 AMDGPUAsmParser::isId(const StringRef Id) const {
7766   return isId(getToken(), Id);
7767 }
7768 
7769 bool
7770 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7771   return getTokenKind() == Kind;
7772 }
7773 
7774 StringRef AMDGPUAsmParser::getId() const {
7775   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7776 }
7777 
7778 bool
7779 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7780   if (isId(Id)) {
7781     lex();
7782     return true;
7783   }
7784   return false;
7785 }
7786 
7787 bool
7788 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7789   if (isToken(AsmToken::Identifier)) {
7790     StringRef Tok = getTokenStr();
7791     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7792       lex();
7793       return true;
7794     }
7795   }
7796   return false;
7797 }
7798 
7799 bool
7800 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7801   if (isId(Id) && peekToken().is(Kind)) {
7802     lex();
7803     lex();
7804     return true;
7805   }
7806   return false;
7807 }
7808 
7809 bool
7810 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7811   if (isToken(Kind)) {
7812     lex();
7813     return true;
7814   }
7815   return false;
7816 }
7817 
7818 bool
7819 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7820                            const StringRef ErrMsg) {
7821   if (!trySkipToken(Kind)) {
7822     Error(getLoc(), ErrMsg);
7823     return false;
7824   }
7825   return true;
7826 }
7827 
7828 bool
7829 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7830   SMLoc S = getLoc();
7831 
7832   const MCExpr *Expr;
7833   if (Parser.parseExpression(Expr))
7834     return false;
7835 
7836   if (Expr->evaluateAsAbsolute(Imm))
7837     return true;
7838 
7839   if (Expected.empty()) {
7840     Error(S, "expected absolute expression");
7841   } else {
7842     Error(S, Twine("expected ", Expected) +
7843              Twine(" or an absolute expression"));
7844   }
7845   return false;
7846 }
7847 
7848 bool
7849 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7850   SMLoc S = getLoc();
7851 
7852   const MCExpr *Expr;
7853   if (Parser.parseExpression(Expr))
7854     return false;
7855 
7856   int64_t IntVal;
7857   if (Expr->evaluateAsAbsolute(IntVal)) {
7858     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7859   } else {
7860     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7861   }
7862   return true;
7863 }
7864 
7865 bool
7866 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7867   if (isToken(AsmToken::String)) {
7868     Val = getToken().getStringContents();
7869     lex();
7870     return true;
7871   }
7872   Error(getLoc(), ErrMsg);
7873   return false;
7874 }
7875 
7876 bool
7877 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7878   if (isToken(AsmToken::Identifier)) {
7879     Val = getTokenStr();
7880     lex();
7881     return true;
7882   }
7883   if (!ErrMsg.empty())
7884     Error(getLoc(), ErrMsg);
7885   return false;
7886 }
7887 
7888 AsmToken
7889 AMDGPUAsmParser::getToken() const {
7890   return Parser.getTok();
7891 }
7892 
7893 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7894   return isToken(AsmToken::EndOfStatement)
7895              ? getToken()
7896              : getLexer().peekTok(ShouldSkipSpace);
7897 }
7898 
7899 void
7900 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7901   auto TokCount = getLexer().peekTokens(Tokens);
7902 
7903   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7904     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7905 }
7906 
7907 AsmToken::TokenKind
7908 AMDGPUAsmParser::getTokenKind() const {
7909   return getLexer().getKind();
7910 }
7911 
7912 SMLoc
7913 AMDGPUAsmParser::getLoc() const {
7914   return getToken().getLoc();
7915 }
7916 
7917 StringRef
7918 AMDGPUAsmParser::getTokenStr() const {
7919   return getToken().getString();
7920 }
7921 
7922 void
7923 AMDGPUAsmParser::lex() {
7924   Parser.Lex();
7925 }
7926 
7927 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7928   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7929 }
7930 
7931 SMLoc
7932 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7933                                const OperandVector &Operands) const {
7934   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7935     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7936     if (Test(Op))
7937       return Op.getStartLoc();
7938   }
7939   return getInstLoc(Operands);
7940 }
7941 
7942 SMLoc
7943 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7944                            const OperandVector &Operands) const {
7945   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7946   return getOperandLoc(Test, Operands);
7947 }
7948 
7949 SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
7950                                  const OperandVector &Operands) const {
7951   auto Test = [=](const AMDGPUOperand& Op) {
7952     return Op.isRegKind() && Op.getReg() == Reg;
7953   };
7954   return getOperandLoc(Test, Operands);
7955 }
7956 
7957 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7958                                  bool SearchMandatoryLiterals) const {
7959   auto Test = [](const AMDGPUOperand& Op) {
7960     return Op.IsImmKindLiteral() || Op.isExpr();
7961   };
7962   SMLoc Loc = getOperandLoc(Test, Operands);
7963   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7964     Loc = getMandatoryLitLoc(Operands);
7965   return Loc;
7966 }
7967 
7968 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7969   auto Test = [](const AMDGPUOperand &Op) {
7970     return Op.IsImmKindMandatoryLiteral();
7971   };
7972   return getOperandLoc(Test, Operands);
7973 }
7974 
7975 SMLoc
7976 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7977   auto Test = [](const AMDGPUOperand& Op) {
7978     return Op.isImmKindConst();
7979   };
7980   return getOperandLoc(Test, Operands);
7981 }
7982 
7983 ParseStatus
7984 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7985   if (!trySkipToken(AsmToken::LCurly))
7986     return ParseStatus::NoMatch;
7987 
7988   bool First = true;
7989   while (!trySkipToken(AsmToken::RCurly)) {
7990     if (!First &&
7991         !skipToken(AsmToken::Comma, "comma or closing brace expected"))
7992       return ParseStatus::Failure;
7993 
7994     StringRef Id = getTokenStr();
7995     SMLoc IdLoc = getLoc();
7996     if (!skipToken(AsmToken::Identifier, "field name expected") ||
7997         !skipToken(AsmToken::Colon, "colon expected"))
7998       return ParseStatus::Failure;
7999 
8000     const auto *I =
8001         find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8002     if (I == Fields.end())
8003       return Error(IdLoc, "unknown field");
8004     if ((*I)->IsDefined)
8005       return Error(IdLoc, "duplicate field");
8006 
8007     // TODO: Support symbolic values.
8008     (*I)->Loc = getLoc();
8009     if (!parseExpr((*I)->Val))
8010       return ParseStatus::Failure;
8011     (*I)->IsDefined = true;
8012 
8013     First = false;
8014   }
8015   return ParseStatus::Success;
8016 }
8017 
8018 bool AMDGPUAsmParser::validateStructuredOpFields(
8019     ArrayRef<const StructuredOpField *> Fields) {
8020   return all_of(Fields, [this](const StructuredOpField *F) {
8021     return F->validate(*this);
8022   });
8023 }
8024 
8025 //===----------------------------------------------------------------------===//
8026 // swizzle
8027 //===----------------------------------------------------------------------===//
8028 
8029 LLVM_READNONE
8030 static unsigned
8031 encodeBitmaskPerm(const unsigned AndMask,
8032                   const unsigned OrMask,
8033                   const unsigned XorMask) {
8034   using namespace llvm::AMDGPU::Swizzle;
8035 
8036   return BITMASK_PERM_ENC |
8037          (AndMask << BITMASK_AND_SHIFT) |
8038          (OrMask  << BITMASK_OR_SHIFT)  |
8039          (XorMask << BITMASK_XOR_SHIFT);
8040 }
8041 
8042 bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8043                                           const unsigned MaxVal,
8044                                           const Twine &ErrMsg, SMLoc &Loc) {
8045   if (!skipToken(AsmToken::Comma, "expected a comma")) {
8046     return false;
8047   }
8048   Loc = getLoc();
8049   if (!parseExpr(Op)) {
8050     return false;
8051   }
8052   if (Op < MinVal || Op > MaxVal) {
8053     Error(Loc, ErrMsg);
8054     return false;
8055   }
8056 
8057   return true;
8058 }
8059 
8060 bool
8061 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8062                                       const unsigned MinVal,
8063                                       const unsigned MaxVal,
8064                                       const StringRef ErrMsg) {
8065   SMLoc Loc;
8066   for (unsigned i = 0; i < OpNum; ++i) {
8067     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8068       return false;
8069   }
8070 
8071   return true;
8072 }
8073 
8074 bool
8075 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8076   using namespace llvm::AMDGPU::Swizzle;
8077 
8078   int64_t Lane[LANE_NUM];
8079   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8080                            "expected a 2-bit lane id")) {
8081     Imm = QUAD_PERM_ENC;
8082     for (unsigned I = 0; I < LANE_NUM; ++I) {
8083       Imm |= Lane[I] << (LANE_SHIFT * I);
8084     }
8085     return true;
8086   }
8087   return false;
8088 }
8089 
8090 bool
8091 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8092   using namespace llvm::AMDGPU::Swizzle;
8093 
8094   SMLoc Loc;
8095   int64_t GroupSize;
8096   int64_t LaneIdx;
8097 
8098   if (!parseSwizzleOperand(GroupSize,
8099                            2, 32,
8100                            "group size must be in the interval [2,32]",
8101                            Loc)) {
8102     return false;
8103   }
8104   if (!isPowerOf2_64(GroupSize)) {
8105     Error(Loc, "group size must be a power of two");
8106     return false;
8107   }
8108   if (parseSwizzleOperand(LaneIdx,
8109                           0, GroupSize - 1,
8110                           "lane id must be in the interval [0,group size - 1]",
8111                           Loc)) {
8112     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8113     return true;
8114   }
8115   return false;
8116 }
8117 
8118 bool
8119 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8120   using namespace llvm::AMDGPU::Swizzle;
8121 
8122   SMLoc Loc;
8123   int64_t GroupSize;
8124 
8125   if (!parseSwizzleOperand(GroupSize,
8126                            2, 32,
8127                            "group size must be in the interval [2,32]",
8128                            Loc)) {
8129     return false;
8130   }
8131   if (!isPowerOf2_64(GroupSize)) {
8132     Error(Loc, "group size must be a power of two");
8133     return false;
8134   }
8135 
8136   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8137   return true;
8138 }
8139 
8140 bool
8141 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8142   using namespace llvm::AMDGPU::Swizzle;
8143 
8144   SMLoc Loc;
8145   int64_t GroupSize;
8146 
8147   if (!parseSwizzleOperand(GroupSize,
8148                            1, 16,
8149                            "group size must be in the interval [1,16]",
8150                            Loc)) {
8151     return false;
8152   }
8153   if (!isPowerOf2_64(GroupSize)) {
8154     Error(Loc, "group size must be a power of two");
8155     return false;
8156   }
8157 
8158   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8159   return true;
8160 }
8161 
8162 bool
8163 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8164   using namespace llvm::AMDGPU::Swizzle;
8165 
8166   if (!skipToken(AsmToken::Comma, "expected a comma")) {
8167     return false;
8168   }
8169 
8170   StringRef Ctl;
8171   SMLoc StrLoc = getLoc();
8172   if (!parseString(Ctl)) {
8173     return false;
8174   }
8175   if (Ctl.size() != BITMASK_WIDTH) {
8176     Error(StrLoc, "expected a 5-character mask");
8177     return false;
8178   }
8179 
8180   unsigned AndMask = 0;
8181   unsigned OrMask = 0;
8182   unsigned XorMask = 0;
8183 
8184   for (size_t i = 0; i < Ctl.size(); ++i) {
8185     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8186     switch(Ctl[i]) {
8187     default:
8188       Error(StrLoc, "invalid mask");
8189       return false;
8190     case '0':
8191       break;
8192     case '1':
8193       OrMask |= Mask;
8194       break;
8195     case 'p':
8196       AndMask |= Mask;
8197       break;
8198     case 'i':
8199       AndMask |= Mask;
8200       XorMask |= Mask;
8201       break;
8202     }
8203   }
8204 
8205   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8206   return true;
8207 }
8208 
8209 bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8210   using namespace llvm::AMDGPU::Swizzle;
8211 
8212   if (!AMDGPU::isGFX9Plus(getSTI())) {
8213     Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8214     return false;
8215   }
8216 
8217   int64_t Swizzle;
8218   SMLoc Loc;
8219   if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8220                            "FFT swizzle must be in the interval [0," +
8221                                Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8222                            Loc))
8223     return false;
8224 
8225   Imm = FFT_MODE_ENC | Swizzle;
8226   return true;
8227 }
8228 
8229 bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8230   using namespace llvm::AMDGPU::Swizzle;
8231 
8232   if (!AMDGPU::isGFX9Plus(getSTI())) {
8233     Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8234     return false;
8235   }
8236 
8237   SMLoc Loc;
8238   int64_t Direction;
8239 
8240   if (!parseSwizzleOperand(Direction, 0, 1,
8241                            "direction must be 0 (left) or 1 (right)", Loc))
8242     return false;
8243 
8244   int64_t RotateSize;
8245   if (!parseSwizzleOperand(
8246           RotateSize, 0, ROTATE_MAX_SIZE,
8247           "number of threads to rotate must be in the interval [0," +
8248               Twine(ROTATE_MAX_SIZE) + Twine(']'),
8249           Loc))
8250     return false;
8251 
8252   Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
8253         (RotateSize << ROTATE_SIZE_SHIFT);
8254   return true;
8255 }
8256 
8257 bool
8258 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8259 
8260   SMLoc OffsetLoc = getLoc();
8261 
8262   if (!parseExpr(Imm, "a swizzle macro")) {
8263     return false;
8264   }
8265   if (!isUInt<16>(Imm)) {
8266     Error(OffsetLoc, "expected a 16-bit offset");
8267     return false;
8268   }
8269   return true;
8270 }
8271 
8272 bool
8273 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8274   using namespace llvm::AMDGPU::Swizzle;
8275 
8276   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8277 
8278     SMLoc ModeLoc = getLoc();
8279     bool Ok = false;
8280 
8281     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8282       Ok = parseSwizzleQuadPerm(Imm);
8283     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8284       Ok = parseSwizzleBitmaskPerm(Imm);
8285     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8286       Ok = parseSwizzleBroadcast(Imm);
8287     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8288       Ok = parseSwizzleSwap(Imm);
8289     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8290       Ok = parseSwizzleReverse(Imm);
8291     } else if (trySkipId(IdSymbolic[ID_FFT])) {
8292       Ok = parseSwizzleFFT(Imm);
8293     } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8294       Ok = parseSwizzleRotate(Imm);
8295     } else {
8296       Error(ModeLoc, "expected a swizzle mode");
8297     }
8298 
8299     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8300   }
8301 
8302   return false;
8303 }
8304 
8305 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8306   SMLoc S = getLoc();
8307   int64_t Imm = 0;
8308 
8309   if (trySkipId("offset")) {
8310 
8311     bool Ok = false;
8312     if (skipToken(AsmToken::Colon, "expected a colon")) {
8313       if (trySkipId("swizzle")) {
8314         Ok = parseSwizzleMacro(Imm);
8315       } else {
8316         Ok = parseSwizzleOffset(Imm);
8317       }
8318     }
8319 
8320     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8321 
8322     return Ok ? ParseStatus::Success : ParseStatus::Failure;
8323   }
8324   return ParseStatus::NoMatch;
8325 }
8326 
8327 bool
8328 AMDGPUOperand::isSwizzle() const {
8329   return isImmTy(ImmTySwizzle);
8330 }
8331 
8332 //===----------------------------------------------------------------------===//
8333 // VGPR Index Mode
8334 //===----------------------------------------------------------------------===//
8335 
8336 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8337 
8338   using namespace llvm::AMDGPU::VGPRIndexMode;
8339 
8340   if (trySkipToken(AsmToken::RParen)) {
8341     return OFF;
8342   }
8343 
8344   int64_t Imm = 0;
8345 
8346   while (true) {
8347     unsigned Mode = 0;
8348     SMLoc S = getLoc();
8349 
8350     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8351       if (trySkipId(IdSymbolic[ModeId])) {
8352         Mode = 1 << ModeId;
8353         break;
8354       }
8355     }
8356 
8357     if (Mode == 0) {
8358       Error(S, (Imm == 0)?
8359                "expected a VGPR index mode or a closing parenthesis" :
8360                "expected a VGPR index mode");
8361       return UNDEF;
8362     }
8363 
8364     if (Imm & Mode) {
8365       Error(S, "duplicate VGPR index mode");
8366       return UNDEF;
8367     }
8368     Imm |= Mode;
8369 
8370     if (trySkipToken(AsmToken::RParen))
8371       break;
8372     if (!skipToken(AsmToken::Comma,
8373                    "expected a comma or a closing parenthesis"))
8374       return UNDEF;
8375   }
8376 
8377   return Imm;
8378 }
8379 
8380 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8381 
8382   using namespace llvm::AMDGPU::VGPRIndexMode;
8383 
8384   int64_t Imm = 0;
8385   SMLoc S = getLoc();
8386 
8387   if (trySkipId("gpr_idx", AsmToken::LParen)) {
8388     Imm = parseGPRIdxMacro();
8389     if (Imm == UNDEF)
8390       return ParseStatus::Failure;
8391   } else {
8392     if (getParser().parseAbsoluteExpression(Imm))
8393       return ParseStatus::Failure;
8394     if (Imm < 0 || !isUInt<4>(Imm))
8395       return Error(S, "invalid immediate: only 4-bit values are legal");
8396   }
8397 
8398   Operands.push_back(
8399       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
8400   return ParseStatus::Success;
8401 }
8402 
8403 bool AMDGPUOperand::isGPRIdxMode() const {
8404   return isImmTy(ImmTyGprIdxMode);
8405 }
8406 
8407 //===----------------------------------------------------------------------===//
8408 // sopp branch targets
8409 //===----------------------------------------------------------------------===//
8410 
8411 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8412 
8413   // Make sure we are not parsing something
8414   // that looks like a label or an expression but is not.
8415   // This will improve error messages.
8416   if (isRegister() || isModifier())
8417     return ParseStatus::NoMatch;
8418 
8419   if (!parseExpr(Operands))
8420     return ParseStatus::Failure;
8421 
8422   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8423   assert(Opr.isImm() || Opr.isExpr());
8424   SMLoc Loc = Opr.getStartLoc();
8425 
8426   // Currently we do not support arbitrary expressions as branch targets.
8427   // Only labels and absolute expressions are accepted.
8428   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8429     Error(Loc, "expected an absolute expression or a label");
8430   } else if (Opr.isImm() && !Opr.isS16Imm()) {
8431     Error(Loc, "expected a 16-bit signed jump offset");
8432   }
8433 
8434   return ParseStatus::Success;
8435 }
8436 
8437 //===----------------------------------------------------------------------===//
8438 // Boolean holding registers
8439 //===----------------------------------------------------------------------===//
8440 
8441 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8442   return parseReg(Operands);
8443 }
8444 
8445 //===----------------------------------------------------------------------===//
8446 // mubuf
8447 //===----------------------------------------------------------------------===//
8448 
8449 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8450                                    const OperandVector &Operands,
8451                                    bool IsAtomic) {
8452   OptionalImmIndexMap OptionalIdx;
8453   unsigned FirstOperandIdx = 1;
8454   bool IsAtomicReturn = false;
8455 
8456   if (IsAtomic) {
8457     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
8458                       SIInstrFlags::IsAtomicRet;
8459   }
8460 
8461   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8462     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8463 
8464     // Add the register arguments
8465     if (Op.isReg()) {
8466       Op.addRegOperands(Inst, 1);
8467       // Insert a tied src for atomic return dst.
8468       // This cannot be postponed as subsequent calls to
8469       // addImmOperands rely on correct number of MC operands.
8470       if (IsAtomicReturn && i == FirstOperandIdx)
8471         Op.addRegOperands(Inst, 1);
8472       continue;
8473     }
8474 
8475     // Handle the case where soffset is an immediate
8476     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8477       Op.addImmOperands(Inst, 1);
8478       continue;
8479     }
8480 
8481     // Handle tokens like 'offen' which are sometimes hard-coded into the
8482     // asm string.  There are no MCInst operands for these.
8483     if (Op.isToken()) {
8484       continue;
8485     }
8486     assert(Op.isImm());
8487 
8488     // Handle optional arguments
8489     OptionalIdx[Op.getImmTy()] = i;
8490   }
8491 
8492   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8493   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8494 }
8495 
8496 //===----------------------------------------------------------------------===//
8497 // smrd
8498 //===----------------------------------------------------------------------===//
8499 
8500 bool AMDGPUOperand::isSMRDOffset8() const {
8501   return isImmLiteral() && isUInt<8>(getImm());
8502 }
8503 
8504 bool AMDGPUOperand::isSMEMOffset() const {
8505   // Offset range is checked later by validator.
8506   return isImmLiteral();
8507 }
8508 
8509 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8510   // 32-bit literals are only supported on CI and we only want to use them
8511   // when the offset is > 8-bits.
8512   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8513 }
8514 
8515 //===----------------------------------------------------------------------===//
8516 // vop3
8517 //===----------------------------------------------------------------------===//
8518 
8519 static bool ConvertOmodMul(int64_t &Mul) {
8520   if (Mul != 1 && Mul != 2 && Mul != 4)
8521     return false;
8522 
8523   Mul >>= 1;
8524   return true;
8525 }
8526 
8527 static bool ConvertOmodDiv(int64_t &Div) {
8528   if (Div == 1) {
8529     Div = 0;
8530     return true;
8531   }
8532 
8533   if (Div == 2) {
8534     Div = 3;
8535     return true;
8536   }
8537 
8538   return false;
8539 }
8540 
8541 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8542 // This is intentional and ensures compatibility with sp3.
8543 // See bug 35397 for details.
8544 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8545   if (BoundCtrl == 0 || BoundCtrl == 1) {
8546     if (!isGFX11Plus())
8547       BoundCtrl = 1;
8548     return true;
8549   }
8550   return false;
8551 }
8552 
8553 void AMDGPUAsmParser::onBeginOfFile() {
8554   if (!getParser().getStreamer().getTargetStreamer() ||
8555       getSTI().getTargetTriple().getArch() == Triple::r600)
8556     return;
8557 
8558   if (!getTargetStreamer().getTargetID())
8559     getTargetStreamer().initializeTargetID(getSTI(),
8560                                            getSTI().getFeatureString());
8561 
8562   if (isHsaAbi(getSTI()))
8563     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8564 }
8565 
8566 /// Parse AMDGPU specific expressions.
8567 ///
8568 ///  expr ::= or(expr, ...) |
8569 ///           max(expr, ...)
8570 ///
8571 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8572   using AGVK = AMDGPUMCExpr::VariantKind;
8573 
8574   if (isToken(AsmToken::Identifier)) {
8575     StringRef TokenId = getTokenStr();
8576     AGVK VK = StringSwitch<AGVK>(TokenId)
8577                   .Case("max", AGVK::AGVK_Max)
8578                   .Case("or", AGVK::AGVK_Or)
8579                   .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
8580                   .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
8581                   .Case("alignto", AGVK::AGVK_AlignTo)
8582                   .Case("occupancy", AGVK::AGVK_Occupancy)
8583                   .Default(AGVK::AGVK_None);
8584 
8585     if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
8586       SmallVector<const MCExpr *, 4> Exprs;
8587       uint64_t CommaCount = 0;
8588       lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8589       lex(); // Eat '('
8590       while (true) {
8591         if (trySkipToken(AsmToken::RParen)) {
8592           if (Exprs.empty()) {
8593             Error(getToken().getLoc(),
8594                   "empty " + Twine(TokenId) + " expression");
8595             return true;
8596           }
8597           if (CommaCount + 1 != Exprs.size()) {
8598             Error(getToken().getLoc(),
8599                   "mismatch of commas in " + Twine(TokenId) + " expression");
8600             return true;
8601           }
8602           Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
8603           return false;
8604         }
8605         const MCExpr *Expr;
8606         if (getParser().parseExpression(Expr, EndLoc))
8607           return true;
8608         Exprs.push_back(Expr);
8609         bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
8610         if (LastTokenWasComma)
8611           CommaCount++;
8612         if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
8613           Error(getToken().getLoc(),
8614                 "unexpected token in " + Twine(TokenId) + " expression");
8615           return true;
8616         }
8617       }
8618     }
8619   }
8620   return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
8621 }
8622 
8623 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8624   StringRef Name = getTokenStr();
8625   if (Name == "mul") {
8626     return parseIntWithPrefix("mul", Operands,
8627                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8628   }
8629 
8630   if (Name == "div") {
8631     return parseIntWithPrefix("div", Operands,
8632                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8633   }
8634 
8635   return ParseStatus::NoMatch;
8636 }
8637 
8638 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8639 // the number of src operands present, then copies that bit into src0_modifiers.
8640 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8641   int Opc = Inst.getOpcode();
8642   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8643   if (OpSelIdx == -1)
8644     return;
8645 
8646   int SrcNum;
8647   const int Ops[] = { AMDGPU::OpName::src0,
8648                       AMDGPU::OpName::src1,
8649                       AMDGPU::OpName::src2 };
8650   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8651        ++SrcNum)
8652     ;
8653   assert(SrcNum > 0);
8654 
8655   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8656 
8657   int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
8658   if (DstIdx == -1)
8659     return;
8660 
8661   const MCOperand &DstOp = Inst.getOperand(DstIdx);
8662   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8663   uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8664   if (DstOp.isReg() &&
8665       MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
8666     if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
8667       ModVal |= SISrcMods::DST_OP_SEL;
8668   } else {
8669     if ((OpSel & (1 << SrcNum)) != 0)
8670       ModVal |= SISrcMods::DST_OP_SEL;
8671   }
8672   Inst.getOperand(ModIdx).setImm(ModVal);
8673 }
8674 
8675 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8676                                    const OperandVector &Operands) {
8677   cvtVOP3P(Inst, Operands);
8678   cvtVOP3DstOpSelOnly(Inst, *getMRI());
8679 }
8680 
8681 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8682                                    OptionalImmIndexMap &OptionalIdx) {
8683   cvtVOP3P(Inst, Operands, OptionalIdx);
8684   cvtVOP3DstOpSelOnly(Inst, *getMRI());
8685 }
8686 
8687 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8688   return
8689       // 1. This operand is input modifiers
8690       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8691       // 2. This is not last operand
8692       && Desc.NumOperands > (OpNum + 1)
8693       // 3. Next operand is register class
8694       && Desc.operands()[OpNum + 1].RegClass != -1
8695       // 4. Next register is not tied to any other operand
8696       && Desc.getOperandConstraint(OpNum + 1,
8697                                    MCOI::OperandConstraint::TIED_TO) == -1;
8698 }
8699 
8700 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8701 {
8702   OptionalImmIndexMap OptionalIdx;
8703   unsigned Opc = Inst.getOpcode();
8704 
8705   unsigned I = 1;
8706   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8707   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8708     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8709   }
8710 
8711   for (unsigned E = Operands.size(); I != E; ++I) {
8712     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8713     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8714       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8715     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8716                Op.isInterpAttrChan()) {
8717       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8718     } else if (Op.isImmModifier()) {
8719       OptionalIdx[Op.getImmTy()] = I;
8720     } else {
8721       llvm_unreachable("unhandled operand type");
8722     }
8723   }
8724 
8725   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8726     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8727                           AMDGPUOperand::ImmTyHigh);
8728 
8729   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8730     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8731                           AMDGPUOperand::ImmTyClamp);
8732 
8733   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8734     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8735                           AMDGPUOperand::ImmTyOModSI);
8736 }
8737 
8738 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8739 {
8740   OptionalImmIndexMap OptionalIdx;
8741   unsigned Opc = Inst.getOpcode();
8742 
8743   unsigned I = 1;
8744   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8745   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8746     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8747   }
8748 
8749   for (unsigned E = Operands.size(); I != E; ++I) {
8750     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8751     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8752       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8753     } else if (Op.isImmModifier()) {
8754       OptionalIdx[Op.getImmTy()] = I;
8755     } else {
8756       llvm_unreachable("unhandled operand type");
8757     }
8758   }
8759 
8760   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
8761 
8762   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8763   if (OpSelIdx != -1)
8764     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8765 
8766   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8767 
8768   if (OpSelIdx == -1)
8769     return;
8770 
8771   const int Ops[] = { AMDGPU::OpName::src0,
8772                       AMDGPU::OpName::src1,
8773                       AMDGPU::OpName::src2 };
8774   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8775                          AMDGPU::OpName::src1_modifiers,
8776                          AMDGPU::OpName::src2_modifiers };
8777 
8778   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8779 
8780   for (int J = 0; J < 3; ++J) {
8781     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8782     if (OpIdx == -1)
8783       break;
8784 
8785     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8786     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8787 
8788     if ((OpSel & (1 << J)) != 0)
8789       ModVal |= SISrcMods::OP_SEL_0;
8790     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8791         (OpSel & (1 << 3)) != 0)
8792       ModVal |= SISrcMods::DST_OP_SEL;
8793 
8794     Inst.getOperand(ModIdx).setImm(ModVal);
8795   }
8796 }
8797 
8798 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8799                               OptionalImmIndexMap &OptionalIdx) {
8800   unsigned Opc = Inst.getOpcode();
8801 
8802   unsigned I = 1;
8803   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8804   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8805     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8806   }
8807 
8808   for (unsigned E = Operands.size(); I != E; ++I) {
8809     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8810     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8811       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8812     } else if (Op.isImmModifier()) {
8813       OptionalIdx[Op.getImmTy()] = I;
8814     } else {
8815       Op.addRegOrImmOperands(Inst, 1);
8816     }
8817   }
8818 
8819   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
8820     if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
8821       Inst.addOperand(Inst.getOperand(0));
8822     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8823                           AMDGPUOperand::ImmTyByteSel);
8824   }
8825 
8826   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8827     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8828                           AMDGPUOperand::ImmTyClamp);
8829 
8830   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8831     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8832                           AMDGPUOperand::ImmTyOModSI);
8833 
8834   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8835   // it has src2 register operand that is tied to dst operand
8836   // we don't allow modifiers for this operand in assembler so src2_modifiers
8837   // should be 0.
8838   if (isMAC(Opc)) {
8839     auto *it = Inst.begin();
8840     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8841     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8842     ++it;
8843     // Copy the operand to ensure it's not invalidated when Inst grows.
8844     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8845   }
8846 }
8847 
8848 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8849   OptionalImmIndexMap OptionalIdx;
8850   cvtVOP3(Inst, Operands, OptionalIdx);
8851 }
8852 
8853 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8854                                OptionalImmIndexMap &OptIdx) {
8855   const int Opc = Inst.getOpcode();
8856   const MCInstrDesc &Desc = MII.get(Opc);
8857 
8858   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8859 
8860   if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
8861       Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
8862       Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8863       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8864       Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
8865       Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8866     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8867     Inst.addOperand(Inst.getOperand(0));
8868   }
8869 
8870   // Adding vdst_in operand is already covered for these DPP instructions in
8871   // cvtVOP3DPP.
8872   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8873       !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8874         Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8875         Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8876         Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 ||
8877         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
8878         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
8879         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
8880         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8881     Inst.addOperand(Inst.getOperand(0));
8882   }
8883 
8884   int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
8885   if (BitOp3Idx != -1) {
8886     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
8887   }
8888 
8889   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8890   // instruction, and then figure out where to actually put the modifiers
8891 
8892   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8893   if (OpSelIdx != -1) {
8894     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8895   }
8896 
8897   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8898   if (OpSelHiIdx != -1) {
8899     int DefaultVal = IsPacked ? -1 : 0;
8900     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8901                           DefaultVal);
8902   }
8903 
8904   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8905   if (NegLoIdx != -1)
8906     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8907 
8908   int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8909   if (NegHiIdx != -1)
8910     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8911 
8912   const int Ops[] = { AMDGPU::OpName::src0,
8913                       AMDGPU::OpName::src1,
8914                       AMDGPU::OpName::src2 };
8915   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8916                          AMDGPU::OpName::src1_modifiers,
8917                          AMDGPU::OpName::src2_modifiers };
8918 
8919   unsigned OpSel = 0;
8920   unsigned OpSelHi = 0;
8921   unsigned NegLo = 0;
8922   unsigned NegHi = 0;
8923 
8924   if (OpSelIdx != -1)
8925     OpSel = Inst.getOperand(OpSelIdx).getImm();
8926 
8927   if (OpSelHiIdx != -1)
8928     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8929 
8930   if (NegLoIdx != -1)
8931     NegLo = Inst.getOperand(NegLoIdx).getImm();
8932 
8933   if (NegHiIdx != -1)
8934     NegHi = Inst.getOperand(NegHiIdx).getImm();
8935 
8936   for (int J = 0; J < 3; ++J) {
8937     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8938     if (OpIdx == -1)
8939       break;
8940 
8941     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8942 
8943     if (ModIdx == -1)
8944       continue;
8945 
8946     uint32_t ModVal = 0;
8947 
8948     const MCOperand &SrcOp = Inst.getOperand(OpIdx);
8949     if (SrcOp.isReg() && getMRI()
8950                              ->getRegClass(AMDGPU::VGPR_16RegClassID)
8951                              .contains(SrcOp.getReg())) {
8952       bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
8953       if (VGPRSuffixIsHi)
8954         ModVal |= SISrcMods::OP_SEL_0;
8955     } else {
8956       if ((OpSel & (1 << J)) != 0)
8957         ModVal |= SISrcMods::OP_SEL_0;
8958     }
8959 
8960     if ((OpSelHi & (1 << J)) != 0)
8961       ModVal |= SISrcMods::OP_SEL_1;
8962 
8963     if ((NegLo & (1 << J)) != 0)
8964       ModVal |= SISrcMods::NEG;
8965 
8966     if ((NegHi & (1 << J)) != 0)
8967       ModVal |= SISrcMods::NEG_HI;
8968 
8969     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8970   }
8971 }
8972 
8973 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8974   OptionalImmIndexMap OptIdx;
8975   cvtVOP3(Inst, Operands, OptIdx);
8976   cvtVOP3P(Inst, Operands, OptIdx);
8977 }
8978 
8979 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8980                                   unsigned i, unsigned Opc, unsigned OpName) {
8981   if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8982     ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8983   else
8984     ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8985 }
8986 
8987 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8988   unsigned Opc = Inst.getOpcode();
8989 
8990   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8991   addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8992   addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8993   ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8994   ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8995 
8996   OptionalImmIndexMap OptIdx;
8997   for (unsigned i = 5; i < Operands.size(); ++i) {
8998     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8999     OptIdx[Op.getImmTy()] = i;
9000   }
9001 
9002   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9003     addOptionalImmOperand(Inst, Operands, OptIdx,
9004                           AMDGPUOperand::ImmTyIndexKey8bit);
9005 
9006   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9007     addOptionalImmOperand(Inst, Operands, OptIdx,
9008                           AMDGPUOperand::ImmTyIndexKey16bit);
9009 
9010   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9011     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9012 
9013   cvtVOP3P(Inst, Operands, OptIdx);
9014 }
9015 
9016 //===----------------------------------------------------------------------===//
9017 // VOPD
9018 //===----------------------------------------------------------------------===//
9019 
9020 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9021   if (!hasVOPD(getSTI()))
9022     return ParseStatus::NoMatch;
9023 
9024   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9025     SMLoc S = getLoc();
9026     lex();
9027     lex();
9028     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9029     SMLoc OpYLoc = getLoc();
9030     StringRef OpYName;
9031     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9032       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9033       return ParseStatus::Success;
9034     }
9035     return Error(OpYLoc, "expected a VOPDY instruction after ::");
9036   }
9037   return ParseStatus::NoMatch;
9038 }
9039 
9040 // Create VOPD MCInst operands using parsed assembler operands.
9041 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9042   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9043     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9044     if (Op.isReg()) {
9045       Op.addRegOperands(Inst, 1);
9046       return;
9047     }
9048     if (Op.isImm()) {
9049       Op.addImmOperands(Inst, 1);
9050       return;
9051     }
9052     llvm_unreachable("Unhandled operand type in cvtVOPD");
9053   };
9054 
9055   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9056 
9057   // MCInst operands are ordered as follows:
9058   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9059 
9060   for (auto CompIdx : VOPD::COMPONENTS) {
9061     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9062   }
9063 
9064   for (auto CompIdx : VOPD::COMPONENTS) {
9065     const auto &CInfo = InstInfo[CompIdx];
9066     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9067     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9068       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9069     if (CInfo.hasSrc2Acc())
9070       addOp(CInfo.getIndexOfDstInParsedOperands());
9071   }
9072 }
9073 
9074 //===----------------------------------------------------------------------===//
9075 // dpp
9076 //===----------------------------------------------------------------------===//
9077 
9078 bool AMDGPUOperand::isDPP8() const {
9079   return isImmTy(ImmTyDPP8);
9080 }
9081 
9082 bool AMDGPUOperand::isDPPCtrl() const {
9083   using namespace AMDGPU::DPP;
9084 
9085   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9086   if (result) {
9087     int64_t Imm = getImm();
9088     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9089            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9090            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9091            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9092            (Imm == DppCtrl::WAVE_SHL1) ||
9093            (Imm == DppCtrl::WAVE_ROL1) ||
9094            (Imm == DppCtrl::WAVE_SHR1) ||
9095            (Imm == DppCtrl::WAVE_ROR1) ||
9096            (Imm == DppCtrl::ROW_MIRROR) ||
9097            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9098            (Imm == DppCtrl::BCAST15) ||
9099            (Imm == DppCtrl::BCAST31) ||
9100            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9101            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9102   }
9103   return false;
9104 }
9105 
9106 //===----------------------------------------------------------------------===//
9107 // mAI
9108 //===----------------------------------------------------------------------===//
9109 
9110 bool AMDGPUOperand::isBLGP() const {
9111   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9112 }
9113 
9114 bool AMDGPUOperand::isS16Imm() const {
9115   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9116 }
9117 
9118 bool AMDGPUOperand::isU16Imm() const {
9119   return isImmLiteral() && isUInt<16>(getImm());
9120 }
9121 
9122 //===----------------------------------------------------------------------===//
9123 // dim
9124 //===----------------------------------------------------------------------===//
9125 
9126 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9127   // We want to allow "dim:1D" etc.,
9128   // but the initial 1 is tokenized as an integer.
9129   std::string Token;
9130   if (isToken(AsmToken::Integer)) {
9131     SMLoc Loc = getToken().getEndLoc();
9132     Token = std::string(getTokenStr());
9133     lex();
9134     if (getLoc() != Loc)
9135       return false;
9136   }
9137 
9138   StringRef Suffix;
9139   if (!parseId(Suffix))
9140     return false;
9141   Token += Suffix;
9142 
9143   StringRef DimId = Token;
9144   if (DimId.starts_with("SQ_RSRC_IMG_"))
9145     DimId = DimId.drop_front(12);
9146 
9147   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9148   if (!DimInfo)
9149     return false;
9150 
9151   Encoding = DimInfo->Encoding;
9152   return true;
9153 }
9154 
9155 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9156   if (!isGFX10Plus())
9157     return ParseStatus::NoMatch;
9158 
9159   SMLoc S = getLoc();
9160 
9161   if (!trySkipId("dim", AsmToken::Colon))
9162     return ParseStatus::NoMatch;
9163 
9164   unsigned Encoding;
9165   SMLoc Loc = getLoc();
9166   if (!parseDimId(Encoding))
9167     return Error(Loc, "invalid dim value");
9168 
9169   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9170                                               AMDGPUOperand::ImmTyDim));
9171   return ParseStatus::Success;
9172 }
9173 
9174 //===----------------------------------------------------------------------===//
9175 // dpp
9176 //===----------------------------------------------------------------------===//
9177 
9178 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9179   SMLoc S = getLoc();
9180 
9181   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
9182     return ParseStatus::NoMatch;
9183 
9184   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9185 
9186   int64_t Sels[8];
9187 
9188   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9189     return ParseStatus::Failure;
9190 
9191   for (size_t i = 0; i < 8; ++i) {
9192     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9193       return ParseStatus::Failure;
9194 
9195     SMLoc Loc = getLoc();
9196     if (getParser().parseAbsoluteExpression(Sels[i]))
9197       return ParseStatus::Failure;
9198     if (0 > Sels[i] || 7 < Sels[i])
9199       return Error(Loc, "expected a 3-bit value");
9200   }
9201 
9202   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9203     return ParseStatus::Failure;
9204 
9205   unsigned DPP8 = 0;
9206   for (size_t i = 0; i < 8; ++i)
9207     DPP8 |= (Sels[i] << (i * 3));
9208 
9209   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
9210   return ParseStatus::Success;
9211 }
9212 
9213 bool
9214 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9215                                     const OperandVector &Operands) {
9216   if (Ctrl == "row_newbcast")
9217     return isGFX90A();
9218 
9219   if (Ctrl == "row_share" ||
9220       Ctrl == "row_xmask")
9221     return isGFX10Plus();
9222 
9223   if (Ctrl == "wave_shl" ||
9224       Ctrl == "wave_shr" ||
9225       Ctrl == "wave_rol" ||
9226       Ctrl == "wave_ror" ||
9227       Ctrl == "row_bcast")
9228     return isVI() || isGFX9();
9229 
9230   return Ctrl == "row_mirror" ||
9231          Ctrl == "row_half_mirror" ||
9232          Ctrl == "quad_perm" ||
9233          Ctrl == "row_shl" ||
9234          Ctrl == "row_shr" ||
9235          Ctrl == "row_ror";
9236 }
9237 
9238 int64_t
9239 AMDGPUAsmParser::parseDPPCtrlPerm() {
9240   // quad_perm:[%d,%d,%d,%d]
9241 
9242   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
9243     return -1;
9244 
9245   int64_t Val = 0;
9246   for (int i = 0; i < 4; ++i) {
9247     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
9248       return -1;
9249 
9250     int64_t Temp;
9251     SMLoc Loc = getLoc();
9252     if (getParser().parseAbsoluteExpression(Temp))
9253       return -1;
9254     if (Temp < 0 || Temp > 3) {
9255       Error(Loc, "expected a 2-bit value");
9256       return -1;
9257     }
9258 
9259     Val += (Temp << i * 2);
9260   }
9261 
9262   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
9263     return -1;
9264 
9265   return Val;
9266 }
9267 
9268 int64_t
9269 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9270   using namespace AMDGPU::DPP;
9271 
9272   // sel:%d
9273 
9274   int64_t Val;
9275   SMLoc Loc = getLoc();
9276 
9277   if (getParser().parseAbsoluteExpression(Val))
9278     return -1;
9279 
9280   struct DppCtrlCheck {
9281     int64_t Ctrl;
9282     int Lo;
9283     int Hi;
9284   };
9285 
9286   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9287     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
9288     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
9289     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
9290     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
9291     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
9292     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
9293     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
9294     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
9295     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
9296     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
9297     .Default({-1, 0, 0});
9298 
9299   bool Valid;
9300   if (Check.Ctrl == -1) {
9301     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9302     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9303   } else {
9304     Valid = Check.Lo <= Val && Val <= Check.Hi;
9305     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9306   }
9307 
9308   if (!Valid) {
9309     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
9310     return -1;
9311   }
9312 
9313   return Val;
9314 }
9315 
9316 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9317   using namespace AMDGPU::DPP;
9318 
9319   if (!isToken(AsmToken::Identifier) ||
9320       !isSupportedDPPCtrl(getTokenStr(), Operands))
9321     return ParseStatus::NoMatch;
9322 
9323   SMLoc S = getLoc();
9324   int64_t Val = -1;
9325   StringRef Ctrl;
9326 
9327   parseId(Ctrl);
9328 
9329   if (Ctrl == "row_mirror") {
9330     Val = DppCtrl::ROW_MIRROR;
9331   } else if (Ctrl == "row_half_mirror") {
9332     Val = DppCtrl::ROW_HALF_MIRROR;
9333   } else {
9334     if (skipToken(AsmToken::Colon, "expected a colon")) {
9335       if (Ctrl == "quad_perm") {
9336         Val = parseDPPCtrlPerm();
9337       } else {
9338         Val = parseDPPCtrlSel(Ctrl);
9339       }
9340     }
9341   }
9342 
9343   if (Val == -1)
9344     return ParseStatus::Failure;
9345 
9346   Operands.push_back(
9347     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
9348   return ParseStatus::Success;
9349 }
9350 
9351 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9352                                  bool IsDPP8) {
9353   OptionalImmIndexMap OptionalIdx;
9354   unsigned Opc = Inst.getOpcode();
9355   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9356 
9357   // MAC instructions are special because they have 'old'
9358   // operand which is not tied to dst (but assumed to be).
9359   // They also have dummy unused src2_modifiers.
9360   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
9361   int Src2ModIdx =
9362       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
9363   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9364                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
9365 
9366   unsigned I = 1;
9367   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9368     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9369   }
9370 
9371   int Fi = 0;
9372   for (unsigned E = Operands.size(); I != E; ++I) {
9373 
9374     if (IsMAC) {
9375       int NumOperands = Inst.getNumOperands();
9376       if (OldIdx == NumOperands) {
9377         // Handle old operand
9378         constexpr int DST_IDX = 0;
9379         Inst.addOperand(Inst.getOperand(DST_IDX));
9380       } else if (Src2ModIdx == NumOperands) {
9381         // Add unused dummy src2_modifiers
9382         Inst.addOperand(MCOperand::createImm(0));
9383       }
9384     }
9385 
9386     int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
9387     if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9388       Inst.addOperand(Inst.getOperand(0));
9389     }
9390 
9391     bool IsVOP3CvtSrDpp =
9392         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9393         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9394         Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9395         Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9396     if (IsVOP3CvtSrDpp) {
9397       if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9398         Inst.addOperand(MCOperand::createImm(0));
9399         Inst.addOperand(MCOperand::createReg(MCRegister()));
9400       }
9401     }
9402 
9403     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9404                                             MCOI::TIED_TO);
9405     if (TiedTo != -1) {
9406       assert((unsigned)TiedTo < Inst.getNumOperands());
9407       // handle tied old or src2 for MAC instructions
9408       Inst.addOperand(Inst.getOperand(TiedTo));
9409     }
9410     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9411     // Add the register arguments
9412     if (IsDPP8 && Op.isDppFI()) {
9413       Fi = Op.getImm();
9414     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9415       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9416     } else if (Op.isReg()) {
9417       Op.addRegOperands(Inst, 1);
9418     } else if (Op.isImm() &&
9419                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9420       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9421       Op.addImmOperands(Inst, 1);
9422     } else if (Op.isImm()) {
9423       OptionalIdx[Op.getImmTy()] = I;
9424     } else {
9425       llvm_unreachable("unhandled operand type");
9426     }
9427   }
9428 
9429   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel))
9430     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9431                           AMDGPUOperand::ImmTyByteSel);
9432 
9433   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9434     addOptionalImmOperand(Inst, Operands, OptionalIdx,
9435                           AMDGPUOperand::ImmTyClamp);
9436 
9437   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9438     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
9439 
9440   if (Desc.TSFlags & SIInstrFlags::VOP3P)
9441     cvtVOP3P(Inst, Operands, OptionalIdx);
9442   else if (Desc.TSFlags & SIInstrFlags::VOP3)
9443     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9444   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9445     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9446   }
9447 
9448   if (IsDPP8) {
9449     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
9450     using namespace llvm::AMDGPU::DPP;
9451     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9452   } else {
9453     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
9454     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9455     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9456     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9457 
9458     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
9459       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9460                             AMDGPUOperand::ImmTyDppFI);
9461   }
9462 }
9463 
9464 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9465   OptionalImmIndexMap OptionalIdx;
9466 
9467   unsigned I = 1;
9468   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9469   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9470     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9471   }
9472 
9473   int Fi = 0;
9474   for (unsigned E = Operands.size(); I != E; ++I) {
9475     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
9476                                             MCOI::TIED_TO);
9477     if (TiedTo != -1) {
9478       assert((unsigned)TiedTo < Inst.getNumOperands());
9479       // handle tied old or src2 for MAC instructions
9480       Inst.addOperand(Inst.getOperand(TiedTo));
9481     }
9482     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9483     // Add the register arguments
9484     if (Op.isReg() && validateVccOperand(Op.getReg())) {
9485       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9486       // Skip it.
9487       continue;
9488     }
9489 
9490     if (IsDPP8) {
9491       if (Op.isDPP8()) {
9492         Op.addImmOperands(Inst, 1);
9493       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9494         Op.addRegWithFPInputModsOperands(Inst, 2);
9495       } else if (Op.isDppFI()) {
9496         Fi = Op.getImm();
9497       } else if (Op.isReg()) {
9498         Op.addRegOperands(Inst, 1);
9499       } else {
9500         llvm_unreachable("Invalid operand type");
9501       }
9502     } else {
9503       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9504         Op.addRegWithFPInputModsOperands(Inst, 2);
9505       } else if (Op.isReg()) {
9506         Op.addRegOperands(Inst, 1);
9507       } else if (Op.isDPPCtrl()) {
9508         Op.addImmOperands(Inst, 1);
9509       } else if (Op.isImm()) {
9510         // Handle optional arguments
9511         OptionalIdx[Op.getImmTy()] = I;
9512       } else {
9513         llvm_unreachable("Invalid operand type");
9514       }
9515     }
9516   }
9517 
9518   if (IsDPP8) {
9519     using namespace llvm::AMDGPU::DPP;
9520     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9521   } else {
9522     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9523     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9524     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9525     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9526       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9527                             AMDGPUOperand::ImmTyDppFI);
9528     }
9529   }
9530 }
9531 
9532 //===----------------------------------------------------------------------===//
9533 // sdwa
9534 //===----------------------------------------------------------------------===//
9535 
9536 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9537                                           StringRef Prefix,
9538                                           AMDGPUOperand::ImmTy Type) {
9539   return parseStringOrIntWithPrefix(
9540       Operands, Prefix,
9541       {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9542       Type);
9543 }
9544 
9545 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9546   return parseStringOrIntWithPrefix(
9547       Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9548       AMDGPUOperand::ImmTySDWADstUnused);
9549 }
9550 
9551 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9552   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9553 }
9554 
9555 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9556   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9557 }
9558 
9559 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9560   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9561 }
9562 
9563 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9564   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9565 }
9566 
9567 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9568   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9569 }
9570 
9571 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9572                               uint64_t BasicInstType,
9573                               bool SkipDstVcc,
9574                               bool SkipSrcVcc) {
9575   using namespace llvm::AMDGPU::SDWA;
9576 
9577   OptionalImmIndexMap OptionalIdx;
9578   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9579   bool SkippedVcc = false;
9580 
9581   unsigned I = 1;
9582   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9583   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9584     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9585   }
9586 
9587   for (unsigned E = Operands.size(); I != E; ++I) {
9588     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9589     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9590         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9591       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9592       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9593       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9594       // Skip VCC only if we didn't skip it on previous iteration.
9595       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9596       if (BasicInstType == SIInstrFlags::VOP2 &&
9597           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9598            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9599         SkippedVcc = true;
9600         continue;
9601       }
9602       if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9603         SkippedVcc = true;
9604         continue;
9605       }
9606     }
9607     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9608       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9609     } else if (Op.isImm()) {
9610       // Handle optional arguments
9611       OptionalIdx[Op.getImmTy()] = I;
9612     } else {
9613       llvm_unreachable("Invalid operand type");
9614     }
9615     SkippedVcc = false;
9616   }
9617 
9618   const unsigned Opc = Inst.getOpcode();
9619   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9620       Opc != AMDGPU::V_NOP_sdwa_vi) {
9621     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9622     switch (BasicInstType) {
9623     case SIInstrFlags::VOP1:
9624       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9625         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9626                               AMDGPUOperand::ImmTyClamp, 0);
9627 
9628       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9629         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9630                               AMDGPUOperand::ImmTyOModSI, 0);
9631 
9632       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9633         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9634                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9635 
9636       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9637         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9638                               AMDGPUOperand::ImmTySDWADstUnused,
9639                               DstUnused::UNUSED_PRESERVE);
9640 
9641       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9642       break;
9643 
9644     case SIInstrFlags::VOP2:
9645       addOptionalImmOperand(Inst, Operands, OptionalIdx,
9646                             AMDGPUOperand::ImmTyClamp, 0);
9647 
9648       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9649         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9650 
9651       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9652       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9653       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9654       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9655       break;
9656 
9657     case SIInstrFlags::VOPC:
9658       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9659         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9660                               AMDGPUOperand::ImmTyClamp, 0);
9661       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9662       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9663       break;
9664 
9665     default:
9666       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9667     }
9668   }
9669 
9670   // special case v_mac_{f16, f32}:
9671   // it has src2 register operand that is tied to dst operand
9672   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9673       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9674     auto *it = Inst.begin();
9675     std::advance(
9676       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9677     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9678   }
9679 }
9680 
9681 /// Force static initialization.
9682 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9683   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9684   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9685 }
9686 
9687 #define GET_REGISTER_MATCHER
9688 #define GET_MATCHER_IMPLEMENTATION
9689 #define GET_MNEMONIC_SPELL_CHECKER
9690 #define GET_MNEMONIC_CHECKER
9691 #include "AMDGPUGenAsmMatcher.inc"
9692 
9693 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9694                                                 unsigned MCK) {
9695   switch (MCK) {
9696   case MCK_addr64:
9697     return parseTokenOp("addr64", Operands);
9698   case MCK_done:
9699     return parseTokenOp("done", Operands);
9700   case MCK_idxen:
9701     return parseTokenOp("idxen", Operands);
9702   case MCK_lds:
9703     return parseTokenOp("lds", Operands);
9704   case MCK_offen:
9705     return parseTokenOp("offen", Operands);
9706   case MCK_off:
9707     return parseTokenOp("off", Operands);
9708   case MCK_row_95_en:
9709     return parseTokenOp("row_en", Operands);
9710   case MCK_gds:
9711     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9712   case MCK_tfe:
9713     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9714   }
9715   return tryCustomParseOperand(Operands, MCK);
9716 }
9717 
9718 // This function should be defined after auto-generated include so that we have
9719 // MatchClassKind enum defined
9720 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9721                                                      unsigned Kind) {
9722   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9723   // But MatchInstructionImpl() expects to meet token and fails to validate
9724   // operand. This method checks if we are given immediate operand but expect to
9725   // get corresponding token.
9726   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9727   switch (Kind) {
9728   case MCK_addr64:
9729     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9730   case MCK_gds:
9731     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9732   case MCK_lds:
9733     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9734   case MCK_idxen:
9735     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9736   case MCK_offen:
9737     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9738   case MCK_tfe:
9739     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9740   case MCK_SSrc_b32:
9741     // When operands have expression values, they will return true for isToken,
9742     // because it is not possible to distinguish between a token and an
9743     // expression at parse time. MatchInstructionImpl() will always try to
9744     // match an operand as a token, when isToken returns true, and when the
9745     // name of the expression is not a valid token, the match will fail,
9746     // so we need to handle it here.
9747     return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9748   case MCK_SSrc_f32:
9749     return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9750   case MCK_SOPPBrTarget:
9751     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9752   case MCK_VReg32OrOff:
9753     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9754   case MCK_InterpSlot:
9755     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9756   case MCK_InterpAttr:
9757     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9758   case MCK_InterpAttrChan:
9759     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9760   case MCK_SReg_64:
9761   case MCK_SReg_64_XEXEC:
9762     // Null is defined as a 32-bit register but
9763     // it should also be enabled with 64-bit operands or larger.
9764     // The following code enables it for SReg_64 and larger operands
9765     // used as source and destination. Remaining source
9766     // operands are handled in isInlinableImm.
9767   case MCK_SReg_96:
9768   case MCK_SReg_128:
9769   case MCK_SReg_256:
9770   case MCK_SReg_512:
9771     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9772   default:
9773     return Match_InvalidOperand;
9774   }
9775 }
9776 
9777 //===----------------------------------------------------------------------===//
9778 // endpgm
9779 //===----------------------------------------------------------------------===//
9780 
9781 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9782   SMLoc S = getLoc();
9783   int64_t Imm = 0;
9784 
9785   if (!parseExpr(Imm)) {
9786     // The operand is optional, if not present default to 0
9787     Imm = 0;
9788   }
9789 
9790   if (!isUInt<16>(Imm))
9791     return Error(S, "expected a 16-bit value");
9792 
9793   Operands.push_back(
9794       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9795   return ParseStatus::Success;
9796 }
9797 
9798 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9799 
9800 //===----------------------------------------------------------------------===//
9801 // Split Barrier
9802 //===----------------------------------------------------------------------===//
9803 
9804 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9805