xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyLWE,
167     ImmTyExpTgt,
168     ImmTyExpCompr,
169     ImmTyExpVM,
170     ImmTyFORMAT,
171     ImmTyHwreg,
172     ImmTyOff,
173     ImmTySendMsg,
174     ImmTyInterpSlot,
175     ImmTyInterpAttr,
176     ImmTyAttrChan,
177     ImmTyOpSel,
178     ImmTyOpSelHi,
179     ImmTyNegLo,
180     ImmTyNegHi,
181     ImmTySwizzle,
182     ImmTyGprIdxMode,
183     ImmTyHigh,
184     ImmTyBLGP,
185     ImmTyCBSZ,
186     ImmTyABID,
187     ImmTyEndpgm,
188   };
189 
190 private:
191   struct TokOp {
192     const char *Data;
193     unsigned Length;
194   };
195 
196   struct ImmOp {
197     int64_t Val;
198     ImmTy Type;
199     bool IsFPImm;
200     Modifiers Mods;
201   };
202 
203   struct RegOp {
204     unsigned RegNo;
205     Modifiers Mods;
206   };
207 
208   union {
209     TokOp Tok;
210     ImmOp Imm;
211     RegOp Reg;
212     const MCExpr *Expr;
213   };
214 
215 public:
216   bool isToken() const override {
217     if (Kind == Token)
218       return true;
219 
220     // When parsing operands, we can't always tell if something was meant to be
221     // a token, like 'gds', or an expression that references a global variable.
222     // In this case, we assume the string is an expression, and if we need to
223     // interpret is a token, then we treat the symbol name as the token.
224     return isSymbolRefExpr();
225   }
226 
227   bool isSymbolRefExpr() const {
228     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229   }
230 
231   bool isImm() const override {
232     return Kind == Immediate;
233   }
234 
235   bool isInlinableImm(MVT type) const;
236   bool isLiteralImm(MVT type) const;
237 
238   bool isRegKind() const {
239     return Kind == Register;
240   }
241 
242   bool isReg() const override {
243     return isRegKind() && !hasModifiers();
244   }
245 
246   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
248   }
249 
250   bool isRegOrImmWithInt16InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252   }
253 
254   bool isRegOrImmWithInt32InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
256   }
257 
258   bool isRegOrImmWithInt64InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
260   }
261 
262   bool isRegOrImmWithFP16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
264   }
265 
266   bool isRegOrImmWithFP32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
268   }
269 
270   bool isRegOrImmWithFP64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
272   }
273 
274   bool isVReg() const {
275     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
276            isRegClass(AMDGPU::VReg_64RegClassID) ||
277            isRegClass(AMDGPU::VReg_96RegClassID) ||
278            isRegClass(AMDGPU::VReg_128RegClassID) ||
279            isRegClass(AMDGPU::VReg_160RegClassID) ||
280            isRegClass(AMDGPU::VReg_256RegClassID) ||
281            isRegClass(AMDGPU::VReg_512RegClassID) ||
282            isRegClass(AMDGPU::VReg_1024RegClassID);
283   }
284 
285   bool isVReg32() const {
286     return isRegClass(AMDGPU::VGPR_32RegClassID);
287   }
288 
289   bool isVReg32OrOff() const {
290     return isOff() || isVReg32();
291   }
292 
293   bool isNull() const {
294     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
295   }
296 
297   bool isSDWAOperand(MVT type) const;
298   bool isSDWAFP16Operand() const;
299   bool isSDWAFP32Operand() const;
300   bool isSDWAInt16Operand() const;
301   bool isSDWAInt32Operand() const;
302 
303   bool isImmTy(ImmTy ImmT) const {
304     return isImm() && Imm.Type == ImmT;
305   }
306 
307   bool isImmModifier() const {
308     return isImm() && Imm.Type != ImmTyNone;
309   }
310 
311   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
312   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
313   bool isDMask() const { return isImmTy(ImmTyDMask); }
314   bool isDim() const { return isImmTy(ImmTyDim); }
315   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
316   bool isDA() const { return isImmTy(ImmTyDA); }
317   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
318   bool isLWE() const { return isImmTy(ImmTyLWE); }
319   bool isOff() const { return isImmTy(ImmTyOff); }
320   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
321   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
322   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
323   bool isOffen() const { return isImmTy(ImmTyOffen); }
324   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
325   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
326   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
327   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
328   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
329 
330   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
331   bool isGDS() const { return isImmTy(ImmTyGDS); }
332   bool isLDS() const { return isImmTy(ImmTyLDS); }
333   bool isDLC() const { return isImmTy(ImmTyDLC); }
334   bool isGLC() const { return isImmTy(ImmTyGLC); }
335   bool isSLC() const { return isImmTy(ImmTySLC); }
336   bool isSWZ() const { return isImmTy(ImmTySWZ); }
337   bool isTFE() const { return isImmTy(ImmTyTFE); }
338   bool isD16() const { return isImmTy(ImmTyD16); }
339   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
340   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
341   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
342   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
343   bool isFI() const { return isImmTy(ImmTyDppFi); }
344   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
345   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
346   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
347   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
348   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
349   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
350   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
351   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
352   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
353   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
354   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
355   bool isHigh() const { return isImmTy(ImmTyHigh); }
356 
357   bool isMod() const {
358     return isClampSI() || isOModSI();
359   }
360 
361   bool isRegOrImm() const {
362     return isReg() || isImm();
363   }
364 
365   bool isRegClass(unsigned RCID) const;
366 
367   bool isInlineValue() const;
368 
369   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
370     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
371   }
372 
373   bool isSCSrcB16() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
375   }
376 
377   bool isSCSrcV2B16() const {
378     return isSCSrcB16();
379   }
380 
381   bool isSCSrcB32() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
383   }
384 
385   bool isSCSrcB64() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
387   }
388 
389   bool isBoolReg() const;
390 
391   bool isSCSrcF16() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
393   }
394 
395   bool isSCSrcV2F16() const {
396     return isSCSrcF16();
397   }
398 
399   bool isSCSrcF32() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
401   }
402 
403   bool isSCSrcF64() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
405   }
406 
407   bool isSSrcB32() const {
408     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
409   }
410 
411   bool isSSrcB16() const {
412     return isSCSrcB16() || isLiteralImm(MVT::i16);
413   }
414 
415   bool isSSrcV2B16() const {
416     llvm_unreachable("cannot happen");
417     return isSSrcB16();
418   }
419 
420   bool isSSrcB64() const {
421     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
422     // See isVSrc64().
423     return isSCSrcB64() || isLiteralImm(MVT::i64);
424   }
425 
426   bool isSSrcF32() const {
427     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
428   }
429 
430   bool isSSrcF64() const {
431     return isSCSrcB64() || isLiteralImm(MVT::f64);
432   }
433 
434   bool isSSrcF16() const {
435     return isSCSrcB16() || isLiteralImm(MVT::f16);
436   }
437 
438   bool isSSrcV2F16() const {
439     llvm_unreachable("cannot happen");
440     return isSSrcF16();
441   }
442 
443   bool isSSrcOrLdsB32() const {
444     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
445            isLiteralImm(MVT::i32) || isExpr();
446   }
447 
448   bool isVCSrcB32() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
450   }
451 
452   bool isVCSrcB64() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
454   }
455 
456   bool isVCSrcB16() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
458   }
459 
460   bool isVCSrcV2B16() const {
461     return isVCSrcB16();
462   }
463 
464   bool isVCSrcF32() const {
465     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
466   }
467 
468   bool isVCSrcF64() const {
469     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
470   }
471 
472   bool isVCSrcF16() const {
473     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
474   }
475 
476   bool isVCSrcV2F16() const {
477     return isVCSrcF16();
478   }
479 
480   bool isVSrcB32() const {
481     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
482   }
483 
484   bool isVSrcB64() const {
485     return isVCSrcF64() || isLiteralImm(MVT::i64);
486   }
487 
488   bool isVSrcB16() const {
489     return isVCSrcF16() || isLiteralImm(MVT::i16);
490   }
491 
492   bool isVSrcV2B16() const {
493     return isVSrcB16() || isLiteralImm(MVT::v2i16);
494   }
495 
496   bool isVSrcF32() const {
497     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
498   }
499 
500   bool isVSrcF64() const {
501     return isVCSrcF64() || isLiteralImm(MVT::f64);
502   }
503 
504   bool isVSrcF16() const {
505     return isVCSrcF16() || isLiteralImm(MVT::f16);
506   }
507 
508   bool isVSrcV2F16() const {
509     return isVSrcF16() || isLiteralImm(MVT::v2f16);
510   }
511 
512   bool isVISrcB32() const {
513     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
514   }
515 
516   bool isVISrcB16() const {
517     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
518   }
519 
520   bool isVISrcV2B16() const {
521     return isVISrcB16();
522   }
523 
524   bool isVISrcF32() const {
525     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
526   }
527 
528   bool isVISrcF16() const {
529     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
530   }
531 
532   bool isVISrcV2F16() const {
533     return isVISrcF16() || isVISrcB32();
534   }
535 
536   bool isAISrcB32() const {
537     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
538   }
539 
540   bool isAISrcB16() const {
541     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
542   }
543 
544   bool isAISrcV2B16() const {
545     return isAISrcB16();
546   }
547 
548   bool isAISrcF32() const {
549     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
550   }
551 
552   bool isAISrcF16() const {
553     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
554   }
555 
556   bool isAISrcV2F16() const {
557     return isAISrcF16() || isAISrcB32();
558   }
559 
560   bool isAISrc_128B32() const {
561     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
562   }
563 
564   bool isAISrc_128B16() const {
565     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
566   }
567 
568   bool isAISrc_128V2B16() const {
569     return isAISrc_128B16();
570   }
571 
572   bool isAISrc_128F32() const {
573     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
574   }
575 
576   bool isAISrc_128F16() const {
577     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
578   }
579 
580   bool isAISrc_128V2F16() const {
581     return isAISrc_128F16() || isAISrc_128B32();
582   }
583 
584   bool isAISrc_512B32() const {
585     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
586   }
587 
588   bool isAISrc_512B16() const {
589     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
590   }
591 
592   bool isAISrc_512V2B16() const {
593     return isAISrc_512B16();
594   }
595 
596   bool isAISrc_512F32() const {
597     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
598   }
599 
600   bool isAISrc_512F16() const {
601     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
602   }
603 
604   bool isAISrc_512V2F16() const {
605     return isAISrc_512F16() || isAISrc_512B32();
606   }
607 
608   bool isAISrc_1024B32() const {
609     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
610   }
611 
612   bool isAISrc_1024B16() const {
613     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
614   }
615 
616   bool isAISrc_1024V2B16() const {
617     return isAISrc_1024B16();
618   }
619 
620   bool isAISrc_1024F32() const {
621     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
622   }
623 
624   bool isAISrc_1024F16() const {
625     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
626   }
627 
628   bool isAISrc_1024V2F16() const {
629     return isAISrc_1024F16() || isAISrc_1024B32();
630   }
631 
632   bool isKImmFP32() const {
633     return isLiteralImm(MVT::f32);
634   }
635 
636   bool isKImmFP16() const {
637     return isLiteralImm(MVT::f16);
638   }
639 
640   bool isMem() const override {
641     return false;
642   }
643 
644   bool isExpr() const {
645     return Kind == Expression;
646   }
647 
648   bool isSoppBrTarget() const {
649     return isExpr() || isImm();
650   }
651 
652   bool isSWaitCnt() const;
653   bool isHwreg() const;
654   bool isSendMsg() const;
655   bool isSwizzle() const;
656   bool isSMRDOffset8() const;
657   bool isSMRDOffset20() const;
658   bool isSMRDLiteralOffset() const;
659   bool isDPP8() const;
660   bool isDPPCtrl() const;
661   bool isBLGP() const;
662   bool isCBSZ() const;
663   bool isABID() const;
664   bool isGPRIdxMode() const;
665   bool isS16Imm() const;
666   bool isU16Imm() const;
667   bool isEndpgm() const;
668 
669   StringRef getExpressionAsToken() const {
670     assert(isExpr());
671     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
672     return S->getSymbol().getName();
673   }
674 
675   StringRef getToken() const {
676     assert(isToken());
677 
678     if (Kind == Expression)
679       return getExpressionAsToken();
680 
681     return StringRef(Tok.Data, Tok.Length);
682   }
683 
684   int64_t getImm() const {
685     assert(isImm());
686     return Imm.Val;
687   }
688 
689   ImmTy getImmTy() const {
690     assert(isImm());
691     return Imm.Type;
692   }
693 
694   unsigned getReg() const override {
695     assert(isRegKind());
696     return Reg.RegNo;
697   }
698 
699   SMLoc getStartLoc() const override {
700     return StartLoc;
701   }
702 
703   SMLoc getEndLoc() const override {
704     return EndLoc;
705   }
706 
707   SMRange getLocRange() const {
708     return SMRange(StartLoc, EndLoc);
709   }
710 
711   Modifiers getModifiers() const {
712     assert(isRegKind() || isImmTy(ImmTyNone));
713     return isRegKind() ? Reg.Mods : Imm.Mods;
714   }
715 
716   void setModifiers(Modifiers Mods) {
717     assert(isRegKind() || isImmTy(ImmTyNone));
718     if (isRegKind())
719       Reg.Mods = Mods;
720     else
721       Imm.Mods = Mods;
722   }
723 
724   bool hasModifiers() const {
725     return getModifiers().hasModifiers();
726   }
727 
728   bool hasFPModifiers() const {
729     return getModifiers().hasFPModifiers();
730   }
731 
732   bool hasIntModifiers() const {
733     return getModifiers().hasIntModifiers();
734   }
735 
736   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
737 
738   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
739 
740   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
741 
742   template <unsigned Bitwidth>
743   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
744 
745   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
746     addKImmFPOperands<16>(Inst, N);
747   }
748 
749   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
750     addKImmFPOperands<32>(Inst, N);
751   }
752 
753   void addRegOperands(MCInst &Inst, unsigned N) const;
754 
755   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
756     addRegOperands(Inst, N);
757   }
758 
759   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
760     if (isRegKind())
761       addRegOperands(Inst, N);
762     else if (isExpr())
763       Inst.addOperand(MCOperand::createExpr(Expr));
764     else
765       addImmOperands(Inst, N);
766   }
767 
768   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
769     Modifiers Mods = getModifiers();
770     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
771     if (isRegKind()) {
772       addRegOperands(Inst, N);
773     } else {
774       addImmOperands(Inst, N, false);
775     }
776   }
777 
778   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
779     assert(!hasIntModifiers());
780     addRegOrImmWithInputModsOperands(Inst, N);
781   }
782 
783   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
784     assert(!hasFPModifiers());
785     addRegOrImmWithInputModsOperands(Inst, N);
786   }
787 
788   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
789     Modifiers Mods = getModifiers();
790     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
791     assert(isRegKind());
792     addRegOperands(Inst, N);
793   }
794 
795   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
796     assert(!hasIntModifiers());
797     addRegWithInputModsOperands(Inst, N);
798   }
799 
800   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
801     assert(!hasFPModifiers());
802     addRegWithInputModsOperands(Inst, N);
803   }
804 
805   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
806     if (isImm())
807       addImmOperands(Inst, N);
808     else {
809       assert(isExpr());
810       Inst.addOperand(MCOperand::createExpr(Expr));
811     }
812   }
813 
814   static void printImmTy(raw_ostream& OS, ImmTy Type) {
815     switch (Type) {
816     case ImmTyNone: OS << "None"; break;
817     case ImmTyGDS: OS << "GDS"; break;
818     case ImmTyLDS: OS << "LDS"; break;
819     case ImmTyOffen: OS << "Offen"; break;
820     case ImmTyIdxen: OS << "Idxen"; break;
821     case ImmTyAddr64: OS << "Addr64"; break;
822     case ImmTyOffset: OS << "Offset"; break;
823     case ImmTyInstOffset: OS << "InstOffset"; break;
824     case ImmTyOffset0: OS << "Offset0"; break;
825     case ImmTyOffset1: OS << "Offset1"; break;
826     case ImmTyDLC: OS << "DLC"; break;
827     case ImmTyGLC: OS << "GLC"; break;
828     case ImmTySLC: OS << "SLC"; break;
829     case ImmTySWZ: OS << "SWZ"; break;
830     case ImmTyTFE: OS << "TFE"; break;
831     case ImmTyD16: OS << "D16"; break;
832     case ImmTyFORMAT: OS << "FORMAT"; break;
833     case ImmTyClampSI: OS << "ClampSI"; break;
834     case ImmTyOModSI: OS << "OModSI"; break;
835     case ImmTyDPP8: OS << "DPP8"; break;
836     case ImmTyDppCtrl: OS << "DppCtrl"; break;
837     case ImmTyDppRowMask: OS << "DppRowMask"; break;
838     case ImmTyDppBankMask: OS << "DppBankMask"; break;
839     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
840     case ImmTyDppFi: OS << "FI"; break;
841     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
842     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
843     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
844     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
845     case ImmTyDMask: OS << "DMask"; break;
846     case ImmTyDim: OS << "Dim"; break;
847     case ImmTyUNorm: OS << "UNorm"; break;
848     case ImmTyDA: OS << "DA"; break;
849     case ImmTyR128A16: OS << "R128A16"; break;
850     case ImmTyLWE: OS << "LWE"; break;
851     case ImmTyOff: OS << "Off"; break;
852     case ImmTyExpTgt: OS << "ExpTgt"; break;
853     case ImmTyExpCompr: OS << "ExpCompr"; break;
854     case ImmTyExpVM: OS << "ExpVM"; break;
855     case ImmTyHwreg: OS << "Hwreg"; break;
856     case ImmTySendMsg: OS << "SendMsg"; break;
857     case ImmTyInterpSlot: OS << "InterpSlot"; break;
858     case ImmTyInterpAttr: OS << "InterpAttr"; break;
859     case ImmTyAttrChan: OS << "AttrChan"; break;
860     case ImmTyOpSel: OS << "OpSel"; break;
861     case ImmTyOpSelHi: OS << "OpSelHi"; break;
862     case ImmTyNegLo: OS << "NegLo"; break;
863     case ImmTyNegHi: OS << "NegHi"; break;
864     case ImmTySwizzle: OS << "Swizzle"; break;
865     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
866     case ImmTyHigh: OS << "High"; break;
867     case ImmTyBLGP: OS << "BLGP"; break;
868     case ImmTyCBSZ: OS << "CBSZ"; break;
869     case ImmTyABID: OS << "ABID"; break;
870     case ImmTyEndpgm: OS << "Endpgm"; break;
871     }
872   }
873 
874   void print(raw_ostream &OS) const override {
875     switch (Kind) {
876     case Register:
877       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
878       break;
879     case Immediate:
880       OS << '<' << getImm();
881       if (getImmTy() != ImmTyNone) {
882         OS << " type: "; printImmTy(OS, getImmTy());
883       }
884       OS << " mods: " << Imm.Mods << '>';
885       break;
886     case Token:
887       OS << '\'' << getToken() << '\'';
888       break;
889     case Expression:
890       OS << "<expr " << *Expr << '>';
891       break;
892     }
893   }
894 
895   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
896                                       int64_t Val, SMLoc Loc,
897                                       ImmTy Type = ImmTyNone,
898                                       bool IsFPImm = false) {
899     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
900     Op->Imm.Val = Val;
901     Op->Imm.IsFPImm = IsFPImm;
902     Op->Imm.Type = Type;
903     Op->Imm.Mods = Modifiers();
904     Op->StartLoc = Loc;
905     Op->EndLoc = Loc;
906     return Op;
907   }
908 
909   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
910                                         StringRef Str, SMLoc Loc,
911                                         bool HasExplicitEncodingSize = true) {
912     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
913     Res->Tok.Data = Str.data();
914     Res->Tok.Length = Str.size();
915     Res->StartLoc = Loc;
916     Res->EndLoc = Loc;
917     return Res;
918   }
919 
920   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
921                                       unsigned RegNo, SMLoc S,
922                                       SMLoc E) {
923     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
924     Op->Reg.RegNo = RegNo;
925     Op->Reg.Mods = Modifiers();
926     Op->StartLoc = S;
927     Op->EndLoc = E;
928     return Op;
929   }
930 
931   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
932                                        const class MCExpr *Expr, SMLoc S) {
933     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
934     Op->Expr = Expr;
935     Op->StartLoc = S;
936     Op->EndLoc = S;
937     return Op;
938   }
939 };
940 
941 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
942   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
943   return OS;
944 }
945 
946 //===----------------------------------------------------------------------===//
947 // AsmParser
948 //===----------------------------------------------------------------------===//
949 
950 // Holds info related to the current kernel, e.g. count of SGPRs used.
951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
952 // .amdgpu_hsa_kernel or at EOF.
953 class KernelScopeInfo {
954   int SgprIndexUnusedMin = -1;
955   int VgprIndexUnusedMin = -1;
956   MCContext *Ctx = nullptr;
957 
958   void usesSgprAt(int i) {
959     if (i >= SgprIndexUnusedMin) {
960       SgprIndexUnusedMin = ++i;
961       if (Ctx) {
962         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
963         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
964       }
965     }
966   }
967 
968   void usesVgprAt(int i) {
969     if (i >= VgprIndexUnusedMin) {
970       VgprIndexUnusedMin = ++i;
971       if (Ctx) {
972         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
973         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
974       }
975     }
976   }
977 
978 public:
979   KernelScopeInfo() = default;
980 
981   void initialize(MCContext &Context) {
982     Ctx = &Context;
983     usesSgprAt(SgprIndexUnusedMin = -1);
984     usesVgprAt(VgprIndexUnusedMin = -1);
985   }
986 
987   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
988     switch (RegKind) {
989       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
990       case IS_AGPR: // fall through
991       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
992       default: break;
993     }
994   }
995 };
996 
997 class AMDGPUAsmParser : public MCTargetAsmParser {
998   MCAsmParser &Parser;
999 
1000   // Number of extra operands parsed after the first optional operand.
1001   // This may be necessary to skip hardcoded mandatory operands.
1002   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1003 
1004   unsigned ForcedEncodingSize = 0;
1005   bool ForcedDPP = false;
1006   bool ForcedSDWA = false;
1007   KernelScopeInfo KernelScope;
1008 
1009   /// @name Auto-generated Match Functions
1010   /// {
1011 
1012 #define GET_ASSEMBLER_HEADER
1013 #include "AMDGPUGenAsmMatcher.inc"
1014 
1015   /// }
1016 
1017 private:
1018   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1019   bool OutOfRangeError(SMRange Range);
1020   /// Calculate VGPR/SGPR blocks required for given target, reserved
1021   /// registers, and user-specified NextFreeXGPR values.
1022   ///
1023   /// \param Features [in] Target features, used for bug corrections.
1024   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1025   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1026   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1027   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1028   /// descriptor field, if valid.
1029   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1030   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1031   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1032   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1033   /// \param VGPRBlocks [out] Result VGPR block count.
1034   /// \param SGPRBlocks [out] Result SGPR block count.
1035   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1036                           bool FlatScrUsed, bool XNACKUsed,
1037                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1038                           SMRange VGPRRange, unsigned NextFreeSGPR,
1039                           SMRange SGPRRange, unsigned &VGPRBlocks,
1040                           unsigned &SGPRBlocks);
1041   bool ParseDirectiveAMDGCNTarget();
1042   bool ParseDirectiveAMDHSAKernel();
1043   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1044   bool ParseDirectiveHSACodeObjectVersion();
1045   bool ParseDirectiveHSACodeObjectISA();
1046   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1047   bool ParseDirectiveAMDKernelCodeT();
1048   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1049   bool ParseDirectiveAMDGPUHsaKernel();
1050 
1051   bool ParseDirectiveISAVersion();
1052   bool ParseDirectiveHSAMetadata();
1053   bool ParseDirectivePALMetadataBegin();
1054   bool ParseDirectivePALMetadata();
1055   bool ParseDirectiveAMDGPULDS();
1056 
1057   /// Common code to parse out a block of text (typically YAML) between start and
1058   /// end directives.
1059   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1060                            const char *AssemblerDirectiveEnd,
1061                            std::string &CollectString);
1062 
1063   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1064                              RegisterKind RegKind, unsigned Reg1);
1065   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1066                            unsigned& RegNum, unsigned& RegWidth);
1067   unsigned ParseRegularReg(RegisterKind &RegKind,
1068                            unsigned &RegNum,
1069                            unsigned &RegWidth);
1070   unsigned ParseSpecialReg(RegisterKind &RegKind,
1071                            unsigned &RegNum,
1072                            unsigned &RegWidth);
1073   unsigned ParseRegList(RegisterKind &RegKind,
1074                         unsigned &RegNum,
1075                         unsigned &RegWidth);
1076   bool ParseRegRange(unsigned& Num, unsigned& Width);
1077   unsigned getRegularReg(RegisterKind RegKind,
1078                          unsigned RegNum,
1079                          unsigned RegWidth);
1080 
1081   bool isRegister();
1082   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1083   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1084   void initializeGprCountSymbol(RegisterKind RegKind);
1085   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1086                              unsigned RegWidth);
1087   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1088                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1089   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1090                  bool IsGdsHardcoded);
1091 
1092 public:
1093   enum AMDGPUMatchResultTy {
1094     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1095   };
1096   enum OperandMode {
1097     OperandMode_Default,
1098     OperandMode_NSA,
1099   };
1100 
1101   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1102 
1103   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1104                const MCInstrInfo &MII,
1105                const MCTargetOptions &Options)
1106       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1107     MCAsmParserExtension::Initialize(Parser);
1108 
1109     if (getFeatureBits().none()) {
1110       // Set default features.
1111       copySTI().ToggleFeature("southern-islands");
1112     }
1113 
1114     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1115 
1116     {
1117       // TODO: make those pre-defined variables read-only.
1118       // Currently there is none suitable machinery in the core llvm-mc for this.
1119       // MCSymbol::isRedefinable is intended for another purpose, and
1120       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1121       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1122       MCContext &Ctx = getContext();
1123       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1124         MCSymbol *Sym =
1125             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1126         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1127         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1128         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1129         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1130         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1131       } else {
1132         MCSymbol *Sym =
1133             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1134         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1135         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1136         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1137         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1138         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1139       }
1140       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1141         initializeGprCountSymbol(IS_VGPR);
1142         initializeGprCountSymbol(IS_SGPR);
1143       } else
1144         KernelScope.initialize(getContext());
1145     }
1146   }
1147 
1148   bool hasXNACK() const {
1149     return AMDGPU::hasXNACK(getSTI());
1150   }
1151 
1152   bool hasMIMG_R128() const {
1153     return AMDGPU::hasMIMG_R128(getSTI());
1154   }
1155 
1156   bool hasPackedD16() const {
1157     return AMDGPU::hasPackedD16(getSTI());
1158   }
1159 
1160   bool isSI() const {
1161     return AMDGPU::isSI(getSTI());
1162   }
1163 
1164   bool isCI() const {
1165     return AMDGPU::isCI(getSTI());
1166   }
1167 
1168   bool isVI() const {
1169     return AMDGPU::isVI(getSTI());
1170   }
1171 
1172   bool isGFX9() const {
1173     return AMDGPU::isGFX9(getSTI());
1174   }
1175 
1176   bool isGFX10() const {
1177     return AMDGPU::isGFX10(getSTI());
1178   }
1179 
1180   bool hasInv2PiInlineImm() const {
1181     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1182   }
1183 
1184   bool hasFlatOffsets() const {
1185     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1186   }
1187 
1188   bool hasSGPR102_SGPR103() const {
1189     return !isVI() && !isGFX9();
1190   }
1191 
1192   bool hasSGPR104_SGPR105() const {
1193     return isGFX10();
1194   }
1195 
1196   bool hasIntClamp() const {
1197     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1198   }
1199 
1200   AMDGPUTargetStreamer &getTargetStreamer() {
1201     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1202     return static_cast<AMDGPUTargetStreamer &>(TS);
1203   }
1204 
1205   const MCRegisterInfo *getMRI() const {
1206     // We need this const_cast because for some reason getContext() is not const
1207     // in MCAsmParser.
1208     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1209   }
1210 
1211   const MCInstrInfo *getMII() const {
1212     return &MII;
1213   }
1214 
1215   const FeatureBitset &getFeatureBits() const {
1216     return getSTI().getFeatureBits();
1217   }
1218 
1219   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1220   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1221   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1222 
1223   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1224   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1225   bool isForcedDPP() const { return ForcedDPP; }
1226   bool isForcedSDWA() const { return ForcedSDWA; }
1227   ArrayRef<unsigned> getMatchedVariants() const;
1228 
1229   std::unique_ptr<AMDGPUOperand> parseRegister();
1230   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1231   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1232   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1233                                       unsigned Kind) override;
1234   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1235                                OperandVector &Operands, MCStreamer &Out,
1236                                uint64_t &ErrorInfo,
1237                                bool MatchingInlineAsm) override;
1238   bool ParseDirective(AsmToken DirectiveID) override;
1239   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1240                                     OperandMode Mode = OperandMode_Default);
1241   StringRef parseMnemonicSuffix(StringRef Name);
1242   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1243                         SMLoc NameLoc, OperandVector &Operands) override;
1244   //bool ProcessInstruction(MCInst &Inst);
1245 
1246   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1247 
1248   OperandMatchResultTy
1249   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1250                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1251                      bool (*ConvertResult)(int64_t &) = nullptr);
1252 
1253   OperandMatchResultTy
1254   parseOperandArrayWithPrefix(const char *Prefix,
1255                               OperandVector &Operands,
1256                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1257                               bool (*ConvertResult)(int64_t&) = nullptr);
1258 
1259   OperandMatchResultTy
1260   parseNamedBit(const char *Name, OperandVector &Operands,
1261                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1262   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1263                                              StringRef &Value);
1264 
1265   bool isModifier();
1266   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1267   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1268   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1269   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1270   bool parseSP3NegModifier();
1271   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1272   OperandMatchResultTy parseReg(OperandVector &Operands);
1273   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1274   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1275   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1276   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1277   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1278   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1279   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1280 
1281   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1282   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1283   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1284   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1285 
1286   bool parseCnt(int64_t &IntVal);
1287   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1288   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1289 
1290 private:
1291   struct OperandInfoTy {
1292     int64_t Id;
1293     bool IsSymbolic = false;
1294     bool IsDefined = false;
1295 
1296     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1297   };
1298 
1299   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1300   bool validateSendMsg(const OperandInfoTy &Msg,
1301                        const OperandInfoTy &Op,
1302                        const OperandInfoTy &Stream,
1303                        const SMLoc Loc);
1304 
1305   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1306   bool validateHwreg(const OperandInfoTy &HwReg,
1307                      const int64_t Offset,
1308                      const int64_t Width,
1309                      const SMLoc Loc);
1310 
1311   void errorExpTgt();
1312   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1313   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1314 
1315   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1316   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1317   bool validateSOPLiteral(const MCInst &Inst) const;
1318   bool validateConstantBusLimitations(const MCInst &Inst);
1319   bool validateEarlyClobberLimitations(const MCInst &Inst);
1320   bool validateIntClampSupported(const MCInst &Inst);
1321   bool validateMIMGAtomicDMask(const MCInst &Inst);
1322   bool validateMIMGGatherDMask(const MCInst &Inst);
1323   bool validateMIMGDataSize(const MCInst &Inst);
1324   bool validateMIMGAddrSize(const MCInst &Inst);
1325   bool validateMIMGD16(const MCInst &Inst);
1326   bool validateMIMGDim(const MCInst &Inst);
1327   bool validateLdsDirect(const MCInst &Inst);
1328   bool validateOpSel(const MCInst &Inst);
1329   bool validateVccOperand(unsigned Reg) const;
1330   bool validateVOP3Literal(const MCInst &Inst) const;
1331   unsigned getConstantBusLimit(unsigned Opcode) const;
1332   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1333   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1334   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1335 
1336   bool isId(const StringRef Id) const;
1337   bool isId(const AsmToken &Token, const StringRef Id) const;
1338   bool isToken(const AsmToken::TokenKind Kind) const;
1339   bool trySkipId(const StringRef Id);
1340   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1341   bool trySkipToken(const AsmToken::TokenKind Kind);
1342   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1343   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1344   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1345   AsmToken::TokenKind getTokenKind() const;
1346   bool parseExpr(int64_t &Imm);
1347   bool parseExpr(OperandVector &Operands);
1348   StringRef getTokenStr() const;
1349   AsmToken peekToken();
1350   AsmToken getToken() const;
1351   SMLoc getLoc() const;
1352   void lex();
1353 
1354 public:
1355   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1356   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1357 
1358   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1359   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1360   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1361   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1362   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1363   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1364 
1365   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1366                             const unsigned MinVal,
1367                             const unsigned MaxVal,
1368                             const StringRef ErrMsg);
1369   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1370   bool parseSwizzleOffset(int64_t &Imm);
1371   bool parseSwizzleMacro(int64_t &Imm);
1372   bool parseSwizzleQuadPerm(int64_t &Imm);
1373   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1374   bool parseSwizzleBroadcast(int64_t &Imm);
1375   bool parseSwizzleSwap(int64_t &Imm);
1376   bool parseSwizzleReverse(int64_t &Imm);
1377 
1378   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1379   int64_t parseGPRIdxMacro();
1380 
1381   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1382   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1383   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1384   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1385   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1386 
1387   AMDGPUOperand::Ptr defaultDLC() const;
1388   AMDGPUOperand::Ptr defaultGLC() const;
1389   AMDGPUOperand::Ptr defaultSLC() const;
1390 
1391   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1392   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1393   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1394   AMDGPUOperand::Ptr defaultFlatOffset() const;
1395 
1396   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1397 
1398   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1399                OptionalImmIndexMap &OptionalIdx);
1400   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1401   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1402   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1403 
1404   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1405 
1406   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1407                bool IsAtomic = false);
1408   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1409 
1410   OperandMatchResultTy parseDim(OperandVector &Operands);
1411   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1412   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1413   AMDGPUOperand::Ptr defaultRowMask() const;
1414   AMDGPUOperand::Ptr defaultBankMask() const;
1415   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1416   AMDGPUOperand::Ptr defaultFI() const;
1417   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1418   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1419 
1420   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1421                                     AMDGPUOperand::ImmTy Type);
1422   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1423   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1424   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1425   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1426   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1427   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1428   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1429                uint64_t BasicInstType,
1430                bool SkipDstVcc = false,
1431                bool SkipSrcVcc = false);
1432 
1433   AMDGPUOperand::Ptr defaultBLGP() const;
1434   AMDGPUOperand::Ptr defaultCBSZ() const;
1435   AMDGPUOperand::Ptr defaultABID() const;
1436 
1437   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1438   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1439 };
1440 
1441 struct OptionalOperand {
1442   const char *Name;
1443   AMDGPUOperand::ImmTy Type;
1444   bool IsBit;
1445   bool (*ConvertResult)(int64_t&);
1446 };
1447 
1448 } // end anonymous namespace
1449 
1450 // May be called with integer type with equivalent bitwidth.
1451 static const fltSemantics *getFltSemantics(unsigned Size) {
1452   switch (Size) {
1453   case 4:
1454     return &APFloat::IEEEsingle();
1455   case 8:
1456     return &APFloat::IEEEdouble();
1457   case 2:
1458     return &APFloat::IEEEhalf();
1459   default:
1460     llvm_unreachable("unsupported fp type");
1461   }
1462 }
1463 
1464 static const fltSemantics *getFltSemantics(MVT VT) {
1465   return getFltSemantics(VT.getSizeInBits() / 8);
1466 }
1467 
1468 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1469   switch (OperandType) {
1470   case AMDGPU::OPERAND_REG_IMM_INT32:
1471   case AMDGPU::OPERAND_REG_IMM_FP32:
1472   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1473   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1474   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1475   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1476     return &APFloat::IEEEsingle();
1477   case AMDGPU::OPERAND_REG_IMM_INT64:
1478   case AMDGPU::OPERAND_REG_IMM_FP64:
1479   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1480   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1481     return &APFloat::IEEEdouble();
1482   case AMDGPU::OPERAND_REG_IMM_INT16:
1483   case AMDGPU::OPERAND_REG_IMM_FP16:
1484   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1485   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1486   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1487   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1488   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1489   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1490   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1491   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1492   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1493   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1494     return &APFloat::IEEEhalf();
1495   default:
1496     llvm_unreachable("unsupported fp type");
1497   }
1498 }
1499 
1500 //===----------------------------------------------------------------------===//
1501 // Operand
1502 //===----------------------------------------------------------------------===//
1503 
1504 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1505   bool Lost;
1506 
1507   // Convert literal to single precision
1508   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1509                                                APFloat::rmNearestTiesToEven,
1510                                                &Lost);
1511   // We allow precision lost but not overflow or underflow
1512   if (Status != APFloat::opOK &&
1513       Lost &&
1514       ((Status & APFloat::opOverflow)  != 0 ||
1515        (Status & APFloat::opUnderflow) != 0)) {
1516     return false;
1517   }
1518 
1519   return true;
1520 }
1521 
1522 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1523   return isUIntN(Size, Val) || isIntN(Size, Val);
1524 }
1525 
1526 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1527 
1528   // This is a hack to enable named inline values like
1529   // shared_base with both 32-bit and 64-bit operands.
1530   // Note that these values are defined as
1531   // 32-bit operands only.
1532   if (isInlineValue()) {
1533     return true;
1534   }
1535 
1536   if (!isImmTy(ImmTyNone)) {
1537     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1538     return false;
1539   }
1540   // TODO: We should avoid using host float here. It would be better to
1541   // check the float bit values which is what a few other places do.
1542   // We've had bot failures before due to weird NaN support on mips hosts.
1543 
1544   APInt Literal(64, Imm.Val);
1545 
1546   if (Imm.IsFPImm) { // We got fp literal token
1547     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1548       return AMDGPU::isInlinableLiteral64(Imm.Val,
1549                                           AsmParser->hasInv2PiInlineImm());
1550     }
1551 
1552     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1553     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1554       return false;
1555 
1556     if (type.getScalarSizeInBits() == 16) {
1557       return AMDGPU::isInlinableLiteral16(
1558         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1559         AsmParser->hasInv2PiInlineImm());
1560     }
1561 
1562     // Check if single precision literal is inlinable
1563     return AMDGPU::isInlinableLiteral32(
1564       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1565       AsmParser->hasInv2PiInlineImm());
1566   }
1567 
1568   // We got int literal token.
1569   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1570     return AMDGPU::isInlinableLiteral64(Imm.Val,
1571                                         AsmParser->hasInv2PiInlineImm());
1572   }
1573 
1574   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1575     return false;
1576   }
1577 
1578   if (type.getScalarSizeInBits() == 16) {
1579     return AMDGPU::isInlinableLiteral16(
1580       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1581       AsmParser->hasInv2PiInlineImm());
1582   }
1583 
1584   return AMDGPU::isInlinableLiteral32(
1585     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1586     AsmParser->hasInv2PiInlineImm());
1587 }
1588 
1589 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1590   // Check that this immediate can be added as literal
1591   if (!isImmTy(ImmTyNone)) {
1592     return false;
1593   }
1594 
1595   if (!Imm.IsFPImm) {
1596     // We got int literal token.
1597 
1598     if (type == MVT::f64 && hasFPModifiers()) {
1599       // Cannot apply fp modifiers to int literals preserving the same semantics
1600       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1601       // disable these cases.
1602       return false;
1603     }
1604 
1605     unsigned Size = type.getSizeInBits();
1606     if (Size == 64)
1607       Size = 32;
1608 
1609     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1610     // types.
1611     return isSafeTruncation(Imm.Val, Size);
1612   }
1613 
1614   // We got fp literal token
1615   if (type == MVT::f64) { // Expected 64-bit fp operand
1616     // We would set low 64-bits of literal to zeroes but we accept this literals
1617     return true;
1618   }
1619 
1620   if (type == MVT::i64) { // Expected 64-bit int operand
1621     // We don't allow fp literals in 64-bit integer instructions. It is
1622     // unclear how we should encode them.
1623     return false;
1624   }
1625 
1626   // We allow fp literals with f16x2 operands assuming that the specified
1627   // literal goes into the lower half and the upper half is zero. We also
1628   // require that the literal may be losslesly converted to f16.
1629   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1630                      (type == MVT::v2i16)? MVT::i16 : type;
1631 
1632   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1633   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1634 }
1635 
1636 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1637   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1638 }
1639 
1640 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1641   if (AsmParser->isVI())
1642     return isVReg32();
1643   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1644     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1645   else
1646     return false;
1647 }
1648 
1649 bool AMDGPUOperand::isSDWAFP16Operand() const {
1650   return isSDWAOperand(MVT::f16);
1651 }
1652 
1653 bool AMDGPUOperand::isSDWAFP32Operand() const {
1654   return isSDWAOperand(MVT::f32);
1655 }
1656 
1657 bool AMDGPUOperand::isSDWAInt16Operand() const {
1658   return isSDWAOperand(MVT::i16);
1659 }
1660 
1661 bool AMDGPUOperand::isSDWAInt32Operand() const {
1662   return isSDWAOperand(MVT::i32);
1663 }
1664 
1665 bool AMDGPUOperand::isBoolReg() const {
1666   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1667          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1668 }
1669 
1670 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1671 {
1672   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1673   assert(Size == 2 || Size == 4 || Size == 8);
1674 
1675   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1676 
1677   if (Imm.Mods.Abs) {
1678     Val &= ~FpSignMask;
1679   }
1680   if (Imm.Mods.Neg) {
1681     Val ^= FpSignMask;
1682   }
1683 
1684   return Val;
1685 }
1686 
1687 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1688   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1689                              Inst.getNumOperands())) {
1690     addLiteralImmOperand(Inst, Imm.Val,
1691                          ApplyModifiers &
1692                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1693   } else {
1694     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1695     Inst.addOperand(MCOperand::createImm(Imm.Val));
1696   }
1697 }
1698 
1699 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1700   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1701   auto OpNum = Inst.getNumOperands();
1702   // Check that this operand accepts literals
1703   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1704 
1705   if (ApplyModifiers) {
1706     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1707     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1708     Val = applyInputFPModifiers(Val, Size);
1709   }
1710 
1711   APInt Literal(64, Val);
1712   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1713 
1714   if (Imm.IsFPImm) { // We got fp literal token
1715     switch (OpTy) {
1716     case AMDGPU::OPERAND_REG_IMM_INT64:
1717     case AMDGPU::OPERAND_REG_IMM_FP64:
1718     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1719     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1720       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1721                                        AsmParser->hasInv2PiInlineImm())) {
1722         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1723         return;
1724       }
1725 
1726       // Non-inlineable
1727       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1728         // For fp operands we check if low 32 bits are zeros
1729         if (Literal.getLoBits(32) != 0) {
1730           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1731           "Can't encode literal as exact 64-bit floating-point operand. "
1732           "Low 32-bits will be set to zero");
1733         }
1734 
1735         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1736         return;
1737       }
1738 
1739       // We don't allow fp literals in 64-bit integer instructions. It is
1740       // unclear how we should encode them. This case should be checked earlier
1741       // in predicate methods (isLiteralImm())
1742       llvm_unreachable("fp literal in 64-bit integer instruction.");
1743 
1744     case AMDGPU::OPERAND_REG_IMM_INT32:
1745     case AMDGPU::OPERAND_REG_IMM_FP32:
1746     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1747     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1748     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1749     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1750     case AMDGPU::OPERAND_REG_IMM_INT16:
1751     case AMDGPU::OPERAND_REG_IMM_FP16:
1752     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1753     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1754     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1755     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1756     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1757     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1758     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1759     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1760     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1761     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1762       bool lost;
1763       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1764       // Convert literal to single precision
1765       FPLiteral.convert(*getOpFltSemantics(OpTy),
1766                         APFloat::rmNearestTiesToEven, &lost);
1767       // We allow precision lost but not overflow or underflow. This should be
1768       // checked earlier in isLiteralImm()
1769 
1770       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1771       Inst.addOperand(MCOperand::createImm(ImmVal));
1772       return;
1773     }
1774     default:
1775       llvm_unreachable("invalid operand size");
1776     }
1777 
1778     return;
1779   }
1780 
1781   // We got int literal token.
1782   // Only sign extend inline immediates.
1783   switch (OpTy) {
1784   case AMDGPU::OPERAND_REG_IMM_INT32:
1785   case AMDGPU::OPERAND_REG_IMM_FP32:
1786   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1787   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1788   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1790   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1791   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1792     if (isSafeTruncation(Val, 32) &&
1793         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1794                                      AsmParser->hasInv2PiInlineImm())) {
1795       Inst.addOperand(MCOperand::createImm(Val));
1796       return;
1797     }
1798 
1799     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1800     return;
1801 
1802   case AMDGPU::OPERAND_REG_IMM_INT64:
1803   case AMDGPU::OPERAND_REG_IMM_FP64:
1804   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1805   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1806     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1807       Inst.addOperand(MCOperand::createImm(Val));
1808       return;
1809     }
1810 
1811     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1812     return;
1813 
1814   case AMDGPU::OPERAND_REG_IMM_INT16:
1815   case AMDGPU::OPERAND_REG_IMM_FP16:
1816   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1817   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1818   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1820     if (isSafeTruncation(Val, 16) &&
1821         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1822                                      AsmParser->hasInv2PiInlineImm())) {
1823       Inst.addOperand(MCOperand::createImm(Val));
1824       return;
1825     }
1826 
1827     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1828     return;
1829 
1830   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1831   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1832   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1833   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1834     assert(isSafeTruncation(Val, 16));
1835     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1836                                         AsmParser->hasInv2PiInlineImm()));
1837 
1838     Inst.addOperand(MCOperand::createImm(Val));
1839     return;
1840   }
1841   default:
1842     llvm_unreachable("invalid operand size");
1843   }
1844 }
1845 
1846 template <unsigned Bitwidth>
1847 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1848   APInt Literal(64, Imm.Val);
1849 
1850   if (!Imm.IsFPImm) {
1851     // We got int literal token.
1852     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1853     return;
1854   }
1855 
1856   bool Lost;
1857   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1858   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1859                     APFloat::rmNearestTiesToEven, &Lost);
1860   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1861 }
1862 
1863 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1864   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1865 }
1866 
1867 static bool isInlineValue(unsigned Reg) {
1868   switch (Reg) {
1869   case AMDGPU::SRC_SHARED_BASE:
1870   case AMDGPU::SRC_SHARED_LIMIT:
1871   case AMDGPU::SRC_PRIVATE_BASE:
1872   case AMDGPU::SRC_PRIVATE_LIMIT:
1873   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1874     return true;
1875   case AMDGPU::SRC_VCCZ:
1876   case AMDGPU::SRC_EXECZ:
1877   case AMDGPU::SRC_SCC:
1878     return true;
1879   case AMDGPU::SGPR_NULL:
1880     return true;
1881   default:
1882     return false;
1883   }
1884 }
1885 
1886 bool AMDGPUOperand::isInlineValue() const {
1887   return isRegKind() && ::isInlineValue(getReg());
1888 }
1889 
1890 //===----------------------------------------------------------------------===//
1891 // AsmParser
1892 //===----------------------------------------------------------------------===//
1893 
1894 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1895   if (Is == IS_VGPR) {
1896     switch (RegWidth) {
1897       default: return -1;
1898       case 1: return AMDGPU::VGPR_32RegClassID;
1899       case 2: return AMDGPU::VReg_64RegClassID;
1900       case 3: return AMDGPU::VReg_96RegClassID;
1901       case 4: return AMDGPU::VReg_128RegClassID;
1902       case 5: return AMDGPU::VReg_160RegClassID;
1903       case 8: return AMDGPU::VReg_256RegClassID;
1904       case 16: return AMDGPU::VReg_512RegClassID;
1905       case 32: return AMDGPU::VReg_1024RegClassID;
1906     }
1907   } else if (Is == IS_TTMP) {
1908     switch (RegWidth) {
1909       default: return -1;
1910       case 1: return AMDGPU::TTMP_32RegClassID;
1911       case 2: return AMDGPU::TTMP_64RegClassID;
1912       case 4: return AMDGPU::TTMP_128RegClassID;
1913       case 8: return AMDGPU::TTMP_256RegClassID;
1914       case 16: return AMDGPU::TTMP_512RegClassID;
1915     }
1916   } else if (Is == IS_SGPR) {
1917     switch (RegWidth) {
1918       default: return -1;
1919       case 1: return AMDGPU::SGPR_32RegClassID;
1920       case 2: return AMDGPU::SGPR_64RegClassID;
1921       case 4: return AMDGPU::SGPR_128RegClassID;
1922       case 8: return AMDGPU::SGPR_256RegClassID;
1923       case 16: return AMDGPU::SGPR_512RegClassID;
1924     }
1925   } else if (Is == IS_AGPR) {
1926     switch (RegWidth) {
1927       default: return -1;
1928       case 1: return AMDGPU::AGPR_32RegClassID;
1929       case 2: return AMDGPU::AReg_64RegClassID;
1930       case 4: return AMDGPU::AReg_128RegClassID;
1931       case 16: return AMDGPU::AReg_512RegClassID;
1932       case 32: return AMDGPU::AReg_1024RegClassID;
1933     }
1934   }
1935   return -1;
1936 }
1937 
1938 static unsigned getSpecialRegForName(StringRef RegName) {
1939   return StringSwitch<unsigned>(RegName)
1940     .Case("exec", AMDGPU::EXEC)
1941     .Case("vcc", AMDGPU::VCC)
1942     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1943     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1944     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1945     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1946     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1947     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1948     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1949     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1950     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1951     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1952     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1953     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1954     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1955     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1956     .Case("m0", AMDGPU::M0)
1957     .Case("vccz", AMDGPU::SRC_VCCZ)
1958     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1959     .Case("execz", AMDGPU::SRC_EXECZ)
1960     .Case("src_execz", AMDGPU::SRC_EXECZ)
1961     .Case("scc", AMDGPU::SRC_SCC)
1962     .Case("src_scc", AMDGPU::SRC_SCC)
1963     .Case("tba", AMDGPU::TBA)
1964     .Case("tma", AMDGPU::TMA)
1965     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1966     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1967     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1968     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1969     .Case("vcc_lo", AMDGPU::VCC_LO)
1970     .Case("vcc_hi", AMDGPU::VCC_HI)
1971     .Case("exec_lo", AMDGPU::EXEC_LO)
1972     .Case("exec_hi", AMDGPU::EXEC_HI)
1973     .Case("tma_lo", AMDGPU::TMA_LO)
1974     .Case("tma_hi", AMDGPU::TMA_HI)
1975     .Case("tba_lo", AMDGPU::TBA_LO)
1976     .Case("tba_hi", AMDGPU::TBA_HI)
1977     .Case("null", AMDGPU::SGPR_NULL)
1978     .Default(AMDGPU::NoRegister);
1979 }
1980 
1981 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1982                                     SMLoc &EndLoc) {
1983   auto R = parseRegister();
1984   if (!R) return true;
1985   assert(R->isReg());
1986   RegNo = R->getReg();
1987   StartLoc = R->getStartLoc();
1988   EndLoc = R->getEndLoc();
1989   return false;
1990 }
1991 
1992 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1993                                             RegisterKind RegKind, unsigned Reg1) {
1994   switch (RegKind) {
1995   case IS_SPECIAL:
1996     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1997       Reg = AMDGPU::EXEC;
1998       RegWidth = 2;
1999       return true;
2000     }
2001     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2002       Reg = AMDGPU::FLAT_SCR;
2003       RegWidth = 2;
2004       return true;
2005     }
2006     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2007       Reg = AMDGPU::XNACK_MASK;
2008       RegWidth = 2;
2009       return true;
2010     }
2011     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2012       Reg = AMDGPU::VCC;
2013       RegWidth = 2;
2014       return true;
2015     }
2016     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2017       Reg = AMDGPU::TBA;
2018       RegWidth = 2;
2019       return true;
2020     }
2021     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2022       Reg = AMDGPU::TMA;
2023       RegWidth = 2;
2024       return true;
2025     }
2026     return false;
2027   case IS_VGPR:
2028   case IS_SGPR:
2029   case IS_AGPR:
2030   case IS_TTMP:
2031     if (Reg1 != Reg + RegWidth) {
2032       return false;
2033     }
2034     RegWidth++;
2035     return true;
2036   default:
2037     llvm_unreachable("unexpected register kind");
2038   }
2039 }
2040 
2041 struct RegInfo {
2042   StringLiteral Name;
2043   RegisterKind Kind;
2044 };
2045 
2046 static constexpr RegInfo RegularRegisters[] = {
2047   {{"v"},    IS_VGPR},
2048   {{"s"},    IS_SGPR},
2049   {{"ttmp"}, IS_TTMP},
2050   {{"acc"},  IS_AGPR},
2051   {{"a"},    IS_AGPR},
2052 };
2053 
2054 static bool isRegularReg(RegisterKind Kind) {
2055   return Kind == IS_VGPR ||
2056          Kind == IS_SGPR ||
2057          Kind == IS_TTMP ||
2058          Kind == IS_AGPR;
2059 }
2060 
2061 static const RegInfo* getRegularRegInfo(StringRef Str) {
2062   for (const RegInfo &Reg : RegularRegisters)
2063     if (Str.startswith(Reg.Name))
2064       return &Reg;
2065   return nullptr;
2066 }
2067 
2068 static bool getRegNum(StringRef Str, unsigned& Num) {
2069   return !Str.getAsInteger(10, Num);
2070 }
2071 
2072 bool
2073 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2074                             const AsmToken &NextToken) const {
2075 
2076   // A list of consecutive registers: [s0,s1,s2,s3]
2077   if (Token.is(AsmToken::LBrac))
2078     return true;
2079 
2080   if (!Token.is(AsmToken::Identifier))
2081     return false;
2082 
2083   // A single register like s0 or a range of registers like s[0:1]
2084 
2085   StringRef Str = Token.getString();
2086   const RegInfo *Reg = getRegularRegInfo(Str);
2087   if (Reg) {
2088     StringRef RegName = Reg->Name;
2089     StringRef RegSuffix = Str.substr(RegName.size());
2090     if (!RegSuffix.empty()) {
2091       unsigned Num;
2092       // A single register with an index: rXX
2093       if (getRegNum(RegSuffix, Num))
2094         return true;
2095     } else {
2096       // A range of registers: r[XX:YY].
2097       if (NextToken.is(AsmToken::LBrac))
2098         return true;
2099     }
2100   }
2101 
2102   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2103 }
2104 
2105 bool
2106 AMDGPUAsmParser::isRegister()
2107 {
2108   return isRegister(getToken(), peekToken());
2109 }
2110 
2111 unsigned
2112 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2113                                unsigned RegNum,
2114                                unsigned RegWidth) {
2115 
2116   assert(isRegularReg(RegKind));
2117 
2118   unsigned AlignSize = 1;
2119   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2120     // SGPR and TTMP registers must be aligned.
2121     // Max required alignment is 4 dwords.
2122     AlignSize = std::min(RegWidth, 4u);
2123   }
2124 
2125   if (RegNum % AlignSize != 0)
2126     return AMDGPU::NoRegister;
2127 
2128   unsigned RegIdx = RegNum / AlignSize;
2129   int RCID = getRegClass(RegKind, RegWidth);
2130   if (RCID == -1)
2131     return AMDGPU::NoRegister;
2132 
2133   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2134   const MCRegisterClass RC = TRI->getRegClass(RCID);
2135   if (RegIdx >= RC.getNumRegs())
2136     return AMDGPU::NoRegister;
2137 
2138   return RC.getRegister(RegIdx);
2139 }
2140 
2141 bool
2142 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2143   int64_t RegLo, RegHi;
2144   if (!trySkipToken(AsmToken::LBrac))
2145     return false;
2146 
2147   if (!parseExpr(RegLo))
2148     return false;
2149 
2150   if (trySkipToken(AsmToken::Colon)) {
2151     if (!parseExpr(RegHi))
2152       return false;
2153   } else {
2154     RegHi = RegLo;
2155   }
2156 
2157   if (!trySkipToken(AsmToken::RBrac))
2158     return false;
2159 
2160   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2161     return false;
2162 
2163   Num = static_cast<unsigned>(RegLo);
2164   Width = (RegHi - RegLo) + 1;
2165   return true;
2166 }
2167 
2168 unsigned
2169 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2170                                  unsigned &RegNum,
2171                                  unsigned &RegWidth) {
2172   assert(isToken(AsmToken::Identifier));
2173   unsigned Reg = getSpecialRegForName(getTokenStr());
2174   if (Reg) {
2175     RegNum = 0;
2176     RegWidth = 1;
2177     RegKind = IS_SPECIAL;
2178     lex(); // skip register name
2179   }
2180   return Reg;
2181 }
2182 
2183 unsigned
2184 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2185                                  unsigned &RegNum,
2186                                  unsigned &RegWidth) {
2187   assert(isToken(AsmToken::Identifier));
2188   StringRef RegName = getTokenStr();
2189 
2190   const RegInfo *RI = getRegularRegInfo(RegName);
2191   if (!RI)
2192     return AMDGPU::NoRegister;
2193   lex(); // skip register name
2194 
2195   RegKind = RI->Kind;
2196   StringRef RegSuffix = RegName.substr(RI->Name.size());
2197   if (!RegSuffix.empty()) {
2198     // Single 32-bit register: vXX.
2199     if (!getRegNum(RegSuffix, RegNum))
2200       return AMDGPU::NoRegister;
2201     RegWidth = 1;
2202   } else {
2203     // Range of registers: v[XX:YY]. ":YY" is optional.
2204     if (!ParseRegRange(RegNum, RegWidth))
2205       return AMDGPU::NoRegister;
2206   }
2207 
2208   return getRegularReg(RegKind, RegNum, RegWidth);
2209 }
2210 
2211 unsigned
2212 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2213                               unsigned &RegNum,
2214                               unsigned &RegWidth) {
2215   unsigned Reg = AMDGPU::NoRegister;
2216 
2217   if (!trySkipToken(AsmToken::LBrac))
2218     return AMDGPU::NoRegister;
2219 
2220   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2221 
2222   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2223     return AMDGPU::NoRegister;
2224   if (RegWidth != 1)
2225     return AMDGPU::NoRegister;
2226 
2227   for (; trySkipToken(AsmToken::Comma); ) {
2228     RegisterKind NextRegKind;
2229     unsigned NextReg, NextRegNum, NextRegWidth;
2230 
2231     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2232       return AMDGPU::NoRegister;
2233     if (NextRegWidth != 1)
2234       return AMDGPU::NoRegister;
2235     if (NextRegKind != RegKind)
2236       return AMDGPU::NoRegister;
2237     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2238       return AMDGPU::NoRegister;
2239   }
2240 
2241   if (!trySkipToken(AsmToken::RBrac))
2242     return AMDGPU::NoRegister;
2243 
2244   if (isRegularReg(RegKind))
2245     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2246 
2247   return Reg;
2248 }
2249 
2250 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2251                                           unsigned &Reg,
2252                                           unsigned &RegNum,
2253                                           unsigned &RegWidth) {
2254   Reg = AMDGPU::NoRegister;
2255 
2256   if (isToken(AsmToken::Identifier)) {
2257     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2258     if (Reg == AMDGPU::NoRegister)
2259       Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2260   } else {
2261     Reg = ParseRegList(RegKind, RegNum, RegWidth);
2262   }
2263 
2264   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2265   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2266 }
2267 
2268 Optional<StringRef>
2269 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2270   switch (RegKind) {
2271   case IS_VGPR:
2272     return StringRef(".amdgcn.next_free_vgpr");
2273   case IS_SGPR:
2274     return StringRef(".amdgcn.next_free_sgpr");
2275   default:
2276     return None;
2277   }
2278 }
2279 
2280 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2281   auto SymbolName = getGprCountSymbolName(RegKind);
2282   assert(SymbolName && "initializing invalid register kind");
2283   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2284   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2285 }
2286 
2287 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2288                                             unsigned DwordRegIndex,
2289                                             unsigned RegWidth) {
2290   // Symbols are only defined for GCN targets
2291   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2292     return true;
2293 
2294   auto SymbolName = getGprCountSymbolName(RegKind);
2295   if (!SymbolName)
2296     return true;
2297   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2298 
2299   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2300   int64_t OldCount;
2301 
2302   if (!Sym->isVariable())
2303     return !Error(getParser().getTok().getLoc(),
2304                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2305   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2306     return !Error(
2307         getParser().getTok().getLoc(),
2308         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2309 
2310   if (OldCount <= NewMax)
2311     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2312 
2313   return true;
2314 }
2315 
2316 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2317   const auto &Tok = Parser.getTok();
2318   SMLoc StartLoc = Tok.getLoc();
2319   SMLoc EndLoc = Tok.getEndLoc();
2320   RegisterKind RegKind;
2321   unsigned Reg, RegNum, RegWidth;
2322 
2323   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2324     //FIXME: improve error messages (bug 41303).
2325     Error(StartLoc, "not a valid operand.");
2326     return nullptr;
2327   }
2328   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2329     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2330       return nullptr;
2331   } else
2332     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2333   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2334 }
2335 
2336 OperandMatchResultTy
2337 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2338   // TODO: add syntactic sugar for 1/(2*PI)
2339 
2340   assert(!isRegister());
2341   assert(!isModifier());
2342 
2343   const auto& Tok = getToken();
2344   const auto& NextTok = peekToken();
2345   bool IsReal = Tok.is(AsmToken::Real);
2346   SMLoc S = getLoc();
2347   bool Negate = false;
2348 
2349   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2350     lex();
2351     IsReal = true;
2352     Negate = true;
2353   }
2354 
2355   if (IsReal) {
2356     // Floating-point expressions are not supported.
2357     // Can only allow floating-point literals with an
2358     // optional sign.
2359 
2360     StringRef Num = getTokenStr();
2361     lex();
2362 
2363     APFloat RealVal(APFloat::IEEEdouble());
2364     auto roundMode = APFloat::rmNearestTiesToEven;
2365     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2366       return MatchOperand_ParseFail;
2367     }
2368     if (Negate)
2369       RealVal.changeSign();
2370 
2371     Operands.push_back(
2372       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2373                                AMDGPUOperand::ImmTyNone, true));
2374 
2375     return MatchOperand_Success;
2376 
2377   } else {
2378     int64_t IntVal;
2379     const MCExpr *Expr;
2380     SMLoc S = getLoc();
2381 
2382     if (HasSP3AbsModifier) {
2383       // This is a workaround for handling expressions
2384       // as arguments of SP3 'abs' modifier, for example:
2385       //     |1.0|
2386       //     |-1|
2387       //     |1+x|
2388       // This syntax is not compatible with syntax of standard
2389       // MC expressions (due to the trailing '|').
2390       SMLoc EndLoc;
2391       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2392         return MatchOperand_ParseFail;
2393     } else {
2394       if (Parser.parseExpression(Expr))
2395         return MatchOperand_ParseFail;
2396     }
2397 
2398     if (Expr->evaluateAsAbsolute(IntVal)) {
2399       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2400     } else {
2401       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2402     }
2403 
2404     return MatchOperand_Success;
2405   }
2406 
2407   return MatchOperand_NoMatch;
2408 }
2409 
2410 OperandMatchResultTy
2411 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2412   if (!isRegister())
2413     return MatchOperand_NoMatch;
2414 
2415   if (auto R = parseRegister()) {
2416     assert(R->isReg());
2417     Operands.push_back(std::move(R));
2418     return MatchOperand_Success;
2419   }
2420   return MatchOperand_ParseFail;
2421 }
2422 
2423 OperandMatchResultTy
2424 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2425   auto res = parseReg(Operands);
2426   if (res != MatchOperand_NoMatch) {
2427     return res;
2428   } else if (isModifier()) {
2429     return MatchOperand_NoMatch;
2430   } else {
2431     return parseImm(Operands, HasSP3AbsMod);
2432   }
2433 }
2434 
2435 bool
2436 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2437   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2438     const auto &str = Token.getString();
2439     return str == "abs" || str == "neg" || str == "sext";
2440   }
2441   return false;
2442 }
2443 
2444 bool
2445 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2446   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2447 }
2448 
2449 bool
2450 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2451   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2452 }
2453 
2454 bool
2455 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2456   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2457 }
2458 
2459 // Check if this is an operand modifier or an opcode modifier
2460 // which may look like an expression but it is not. We should
2461 // avoid parsing these modifiers as expressions. Currently
2462 // recognized sequences are:
2463 //   |...|
2464 //   abs(...)
2465 //   neg(...)
2466 //   sext(...)
2467 //   -reg
2468 //   -|...|
2469 //   -abs(...)
2470 //   name:...
2471 // Note that simple opcode modifiers like 'gds' may be parsed as
2472 // expressions; this is a special case. See getExpressionAsToken.
2473 //
2474 bool
2475 AMDGPUAsmParser::isModifier() {
2476 
2477   AsmToken Tok = getToken();
2478   AsmToken NextToken[2];
2479   peekTokens(NextToken);
2480 
2481   return isOperandModifier(Tok, NextToken[0]) ||
2482          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2483          isOpcodeModifierWithVal(Tok, NextToken[0]);
2484 }
2485 
2486 // Check if the current token is an SP3 'neg' modifier.
2487 // Currently this modifier is allowed in the following context:
2488 //
2489 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2490 // 2. Before an 'abs' modifier: -abs(...)
2491 // 3. Before an SP3 'abs' modifier: -|...|
2492 //
2493 // In all other cases "-" is handled as a part
2494 // of an expression that follows the sign.
2495 //
2496 // Note: When "-" is followed by an integer literal,
2497 // this is interpreted as integer negation rather
2498 // than a floating-point NEG modifier applied to N.
2499 // Beside being contr-intuitive, such use of floating-point
2500 // NEG modifier would have resulted in different meaning
2501 // of integer literals used with VOP1/2/C and VOP3,
2502 // for example:
2503 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2504 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2505 // Negative fp literals with preceding "-" are
2506 // handled likewise for unifomtity
2507 //
2508 bool
2509 AMDGPUAsmParser::parseSP3NegModifier() {
2510 
2511   AsmToken NextToken[2];
2512   peekTokens(NextToken);
2513 
2514   if (isToken(AsmToken::Minus) &&
2515       (isRegister(NextToken[0], NextToken[1]) ||
2516        NextToken[0].is(AsmToken::Pipe) ||
2517        isId(NextToken[0], "abs"))) {
2518     lex();
2519     return true;
2520   }
2521 
2522   return false;
2523 }
2524 
2525 OperandMatchResultTy
2526 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2527                                               bool AllowImm) {
2528   bool Neg, SP3Neg;
2529   bool Abs, SP3Abs;
2530   SMLoc Loc;
2531 
2532   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2533   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2534     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2535     return MatchOperand_ParseFail;
2536   }
2537 
2538   SP3Neg = parseSP3NegModifier();
2539 
2540   Loc = getLoc();
2541   Neg = trySkipId("neg");
2542   if (Neg && SP3Neg) {
2543     Error(Loc, "expected register or immediate");
2544     return MatchOperand_ParseFail;
2545   }
2546   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2547     return MatchOperand_ParseFail;
2548 
2549   Abs = trySkipId("abs");
2550   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2551     return MatchOperand_ParseFail;
2552 
2553   Loc = getLoc();
2554   SP3Abs = trySkipToken(AsmToken::Pipe);
2555   if (Abs && SP3Abs) {
2556     Error(Loc, "expected register or immediate");
2557     return MatchOperand_ParseFail;
2558   }
2559 
2560   OperandMatchResultTy Res;
2561   if (AllowImm) {
2562     Res = parseRegOrImm(Operands, SP3Abs);
2563   } else {
2564     Res = parseReg(Operands);
2565   }
2566   if (Res != MatchOperand_Success) {
2567     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2568   }
2569 
2570   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2571     return MatchOperand_ParseFail;
2572   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2573     return MatchOperand_ParseFail;
2574   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2575     return MatchOperand_ParseFail;
2576 
2577   AMDGPUOperand::Modifiers Mods;
2578   Mods.Abs = Abs || SP3Abs;
2579   Mods.Neg = Neg || SP3Neg;
2580 
2581   if (Mods.hasFPModifiers()) {
2582     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2583     if (Op.isExpr()) {
2584       Error(Op.getStartLoc(), "expected an absolute expression");
2585       return MatchOperand_ParseFail;
2586     }
2587     Op.setModifiers(Mods);
2588   }
2589   return MatchOperand_Success;
2590 }
2591 
2592 OperandMatchResultTy
2593 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2594                                                bool AllowImm) {
2595   bool Sext = trySkipId("sext");
2596   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2597     return MatchOperand_ParseFail;
2598 
2599   OperandMatchResultTy Res;
2600   if (AllowImm) {
2601     Res = parseRegOrImm(Operands);
2602   } else {
2603     Res = parseReg(Operands);
2604   }
2605   if (Res != MatchOperand_Success) {
2606     return Sext? MatchOperand_ParseFail : Res;
2607   }
2608 
2609   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2610     return MatchOperand_ParseFail;
2611 
2612   AMDGPUOperand::Modifiers Mods;
2613   Mods.Sext = Sext;
2614 
2615   if (Mods.hasIntModifiers()) {
2616     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2617     if (Op.isExpr()) {
2618       Error(Op.getStartLoc(), "expected an absolute expression");
2619       return MatchOperand_ParseFail;
2620     }
2621     Op.setModifiers(Mods);
2622   }
2623 
2624   return MatchOperand_Success;
2625 }
2626 
2627 OperandMatchResultTy
2628 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2629   return parseRegOrImmWithFPInputMods(Operands, false);
2630 }
2631 
2632 OperandMatchResultTy
2633 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2634   return parseRegOrImmWithIntInputMods(Operands, false);
2635 }
2636 
2637 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2638   auto Loc = getLoc();
2639   if (trySkipId("off")) {
2640     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2641                                                 AMDGPUOperand::ImmTyOff, false));
2642     return MatchOperand_Success;
2643   }
2644 
2645   if (!isRegister())
2646     return MatchOperand_NoMatch;
2647 
2648   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2649   if (Reg) {
2650     Operands.push_back(std::move(Reg));
2651     return MatchOperand_Success;
2652   }
2653 
2654   return MatchOperand_ParseFail;
2655 
2656 }
2657 
2658 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2659   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2660 
2661   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2662       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2663       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2664       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2665     return Match_InvalidOperand;
2666 
2667   if ((TSFlags & SIInstrFlags::VOP3) &&
2668       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2669       getForcedEncodingSize() != 64)
2670     return Match_PreferE32;
2671 
2672   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2673       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2674     // v_mac_f32/16 allow only dst_sel == DWORD;
2675     auto OpNum =
2676         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2677     const auto &Op = Inst.getOperand(OpNum);
2678     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2679       return Match_InvalidOperand;
2680     }
2681   }
2682 
2683   return Match_Success;
2684 }
2685 
2686 // What asm variants we should check
2687 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2688   if (getForcedEncodingSize() == 32) {
2689     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2690     return makeArrayRef(Variants);
2691   }
2692 
2693   if (isForcedVOP3()) {
2694     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2695     return makeArrayRef(Variants);
2696   }
2697 
2698   if (isForcedSDWA()) {
2699     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2700                                         AMDGPUAsmVariants::SDWA9};
2701     return makeArrayRef(Variants);
2702   }
2703 
2704   if (isForcedDPP()) {
2705     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2706     return makeArrayRef(Variants);
2707   }
2708 
2709   static const unsigned Variants[] = {
2710     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2711     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2712   };
2713 
2714   return makeArrayRef(Variants);
2715 }
2716 
2717 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2718   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2719   const unsigned Num = Desc.getNumImplicitUses();
2720   for (unsigned i = 0; i < Num; ++i) {
2721     unsigned Reg = Desc.ImplicitUses[i];
2722     switch (Reg) {
2723     case AMDGPU::FLAT_SCR:
2724     case AMDGPU::VCC:
2725     case AMDGPU::VCC_LO:
2726     case AMDGPU::VCC_HI:
2727     case AMDGPU::M0:
2728       return Reg;
2729     default:
2730       break;
2731     }
2732   }
2733   return AMDGPU::NoRegister;
2734 }
2735 
2736 // NB: This code is correct only when used to check constant
2737 // bus limitations because GFX7 support no f16 inline constants.
2738 // Note that there are no cases when a GFX7 opcode violates
2739 // constant bus limitations due to the use of an f16 constant.
2740 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2741                                        unsigned OpIdx) const {
2742   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2743 
2744   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2745     return false;
2746   }
2747 
2748   const MCOperand &MO = Inst.getOperand(OpIdx);
2749 
2750   int64_t Val = MO.getImm();
2751   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2752 
2753   switch (OpSize) { // expected operand size
2754   case 8:
2755     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2756   case 4:
2757     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2758   case 2: {
2759     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2760     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2761         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2762         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2763         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2764         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2765         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2766       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2767     } else {
2768       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2769     }
2770   }
2771   default:
2772     llvm_unreachable("invalid operand size");
2773   }
2774 }
2775 
2776 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2777   if (!isGFX10())
2778     return 1;
2779 
2780   switch (Opcode) {
2781   // 64-bit shift instructions can use only one scalar value input
2782   case AMDGPU::V_LSHLREV_B64:
2783   case AMDGPU::V_LSHLREV_B64_gfx10:
2784   case AMDGPU::V_LSHL_B64:
2785   case AMDGPU::V_LSHRREV_B64:
2786   case AMDGPU::V_LSHRREV_B64_gfx10:
2787   case AMDGPU::V_LSHR_B64:
2788   case AMDGPU::V_ASHRREV_I64:
2789   case AMDGPU::V_ASHRREV_I64_gfx10:
2790   case AMDGPU::V_ASHR_I64:
2791     return 1;
2792   default:
2793     return 2;
2794   }
2795 }
2796 
2797 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2798   const MCOperand &MO = Inst.getOperand(OpIdx);
2799   if (MO.isImm()) {
2800     return !isInlineConstant(Inst, OpIdx);
2801   } else if (MO.isReg()) {
2802     auto Reg = MO.getReg();
2803     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2804     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2805   } else {
2806     return true;
2807   }
2808 }
2809 
2810 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2811   const unsigned Opcode = Inst.getOpcode();
2812   const MCInstrDesc &Desc = MII.get(Opcode);
2813   unsigned ConstantBusUseCount = 0;
2814   unsigned NumLiterals = 0;
2815   unsigned LiteralSize;
2816 
2817   if (Desc.TSFlags &
2818       (SIInstrFlags::VOPC |
2819        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2820        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2821        SIInstrFlags::SDWA)) {
2822     // Check special imm operands (used by madmk, etc)
2823     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2824       ++ConstantBusUseCount;
2825     }
2826 
2827     SmallDenseSet<unsigned> SGPRsUsed;
2828     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2829     if (SGPRUsed != AMDGPU::NoRegister) {
2830       SGPRsUsed.insert(SGPRUsed);
2831       ++ConstantBusUseCount;
2832     }
2833 
2834     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2835     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2836     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2837 
2838     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2839 
2840     for (int OpIdx : OpIndices) {
2841       if (OpIdx == -1) break;
2842 
2843       const MCOperand &MO = Inst.getOperand(OpIdx);
2844       if (usesConstantBus(Inst, OpIdx)) {
2845         if (MO.isReg()) {
2846           const unsigned Reg = mc2PseudoReg(MO.getReg());
2847           // Pairs of registers with a partial intersections like these
2848           //   s0, s[0:1]
2849           //   flat_scratch_lo, flat_scratch
2850           //   flat_scratch_lo, flat_scratch_hi
2851           // are theoretically valid but they are disabled anyway.
2852           // Note that this code mimics SIInstrInfo::verifyInstruction
2853           if (!SGPRsUsed.count(Reg)) {
2854             SGPRsUsed.insert(Reg);
2855             ++ConstantBusUseCount;
2856           }
2857         } else { // Expression or a literal
2858 
2859           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2860             continue; // special operand like VINTERP attr_chan
2861 
2862           // An instruction may use only one literal.
2863           // This has been validated on the previous step.
2864           // See validateVOP3Literal.
2865           // This literal may be used as more than one operand.
2866           // If all these operands are of the same size,
2867           // this literal counts as one scalar value.
2868           // Otherwise it counts as 2 scalar values.
2869           // See "GFX10 Shader Programming", section 3.6.2.3.
2870 
2871           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2872           if (Size < 4) Size = 4;
2873 
2874           if (NumLiterals == 0) {
2875             NumLiterals = 1;
2876             LiteralSize = Size;
2877           } else if (LiteralSize != Size) {
2878             NumLiterals = 2;
2879           }
2880         }
2881       }
2882     }
2883   }
2884   ConstantBusUseCount += NumLiterals;
2885 
2886   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2887 }
2888 
2889 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2890   const unsigned Opcode = Inst.getOpcode();
2891   const MCInstrDesc &Desc = MII.get(Opcode);
2892 
2893   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2894   if (DstIdx == -1 ||
2895       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2896     return true;
2897   }
2898 
2899   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2900 
2901   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2902   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2903   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2904 
2905   assert(DstIdx != -1);
2906   const MCOperand &Dst = Inst.getOperand(DstIdx);
2907   assert(Dst.isReg());
2908   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2909 
2910   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2911 
2912   for (int SrcIdx : SrcIndices) {
2913     if (SrcIdx == -1) break;
2914     const MCOperand &Src = Inst.getOperand(SrcIdx);
2915     if (Src.isReg()) {
2916       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2917       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2918         return false;
2919       }
2920     }
2921   }
2922 
2923   return true;
2924 }
2925 
2926 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2927 
2928   const unsigned Opc = Inst.getOpcode();
2929   const MCInstrDesc &Desc = MII.get(Opc);
2930 
2931   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2932     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2933     assert(ClampIdx != -1);
2934     return Inst.getOperand(ClampIdx).getImm() == 0;
2935   }
2936 
2937   return true;
2938 }
2939 
2940 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2941 
2942   const unsigned Opc = Inst.getOpcode();
2943   const MCInstrDesc &Desc = MII.get(Opc);
2944 
2945   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2946     return true;
2947 
2948   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2949   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2951 
2952   assert(VDataIdx != -1);
2953   assert(DMaskIdx != -1);
2954   assert(TFEIdx != -1);
2955 
2956   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2957   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2958   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2959   if (DMask == 0)
2960     DMask = 1;
2961 
2962   unsigned DataSize =
2963     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2964   if (hasPackedD16()) {
2965     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2966     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2967       DataSize = (DataSize + 1) / 2;
2968   }
2969 
2970   return (VDataSize / 4) == DataSize + TFESize;
2971 }
2972 
2973 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2974   const unsigned Opc = Inst.getOpcode();
2975   const MCInstrDesc &Desc = MII.get(Opc);
2976 
2977   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2978     return true;
2979 
2980   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2981   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2982       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2983   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2984   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2985   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2986 
2987   assert(VAddr0Idx != -1);
2988   assert(SrsrcIdx != -1);
2989   assert(DimIdx != -1);
2990   assert(SrsrcIdx > VAddr0Idx);
2991 
2992   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2993   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2994   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2995   unsigned VAddrSize =
2996       IsNSA ? SrsrcIdx - VAddr0Idx
2997             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2998 
2999   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3000                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3001                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3002                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3003   if (!IsNSA) {
3004     if (AddrSize > 8)
3005       AddrSize = 16;
3006     else if (AddrSize > 4)
3007       AddrSize = 8;
3008   }
3009 
3010   return VAddrSize == AddrSize;
3011 }
3012 
3013 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3014 
3015   const unsigned Opc = Inst.getOpcode();
3016   const MCInstrDesc &Desc = MII.get(Opc);
3017 
3018   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3019     return true;
3020   if (!Desc.mayLoad() || !Desc.mayStore())
3021     return true; // Not atomic
3022 
3023   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3024   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3025 
3026   // This is an incomplete check because image_atomic_cmpswap
3027   // may only use 0x3 and 0xf while other atomic operations
3028   // may use 0x1 and 0x3. However these limitations are
3029   // verified when we check that dmask matches dst size.
3030   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3031 }
3032 
3033 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3034 
3035   const unsigned Opc = Inst.getOpcode();
3036   const MCInstrDesc &Desc = MII.get(Opc);
3037 
3038   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3039     return true;
3040 
3041   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3042   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3043 
3044   // GATHER4 instructions use dmask in a different fashion compared to
3045   // other MIMG instructions. The only useful DMASK values are
3046   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3047   // (red,red,red,red) etc.) The ISA document doesn't mention
3048   // this.
3049   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3050 }
3051 
3052 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3053 
3054   const unsigned Opc = Inst.getOpcode();
3055   const MCInstrDesc &Desc = MII.get(Opc);
3056 
3057   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3058     return true;
3059 
3060   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3061   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3062     if (isCI() || isSI())
3063       return false;
3064   }
3065 
3066   return true;
3067 }
3068 
3069 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3070   const unsigned Opc = Inst.getOpcode();
3071   const MCInstrDesc &Desc = MII.get(Opc);
3072 
3073   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3074     return true;
3075 
3076   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3077   if (DimIdx < 0)
3078     return true;
3079 
3080   long Imm = Inst.getOperand(DimIdx).getImm();
3081   if (Imm < 0 || Imm >= 8)
3082     return false;
3083 
3084   return true;
3085 }
3086 
3087 static bool IsRevOpcode(const unsigned Opcode)
3088 {
3089   switch (Opcode) {
3090   case AMDGPU::V_SUBREV_F32_e32:
3091   case AMDGPU::V_SUBREV_F32_e64:
3092   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3093   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3094   case AMDGPU::V_SUBREV_F32_e32_vi:
3095   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3096   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3097   case AMDGPU::V_SUBREV_F32_e64_vi:
3098 
3099   case AMDGPU::V_SUBREV_I32_e32:
3100   case AMDGPU::V_SUBREV_I32_e64:
3101   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3102   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3103 
3104   case AMDGPU::V_SUBBREV_U32_e32:
3105   case AMDGPU::V_SUBBREV_U32_e64:
3106   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3107   case AMDGPU::V_SUBBREV_U32_e32_vi:
3108   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3109   case AMDGPU::V_SUBBREV_U32_e64_vi:
3110 
3111   case AMDGPU::V_SUBREV_U32_e32:
3112   case AMDGPU::V_SUBREV_U32_e64:
3113   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3114   case AMDGPU::V_SUBREV_U32_e32_vi:
3115   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3116   case AMDGPU::V_SUBREV_U32_e64_vi:
3117 
3118   case AMDGPU::V_SUBREV_F16_e32:
3119   case AMDGPU::V_SUBREV_F16_e64:
3120   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3121   case AMDGPU::V_SUBREV_F16_e32_vi:
3122   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3123   case AMDGPU::V_SUBREV_F16_e64_vi:
3124 
3125   case AMDGPU::V_SUBREV_U16_e32:
3126   case AMDGPU::V_SUBREV_U16_e64:
3127   case AMDGPU::V_SUBREV_U16_e32_vi:
3128   case AMDGPU::V_SUBREV_U16_e64_vi:
3129 
3130   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3131   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3132   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3133 
3134   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3135   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3136 
3137   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3138   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3139 
3140   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3141   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3142 
3143   case AMDGPU::V_LSHRREV_B32_e32:
3144   case AMDGPU::V_LSHRREV_B32_e64:
3145   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3146   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3147   case AMDGPU::V_LSHRREV_B32_e32_vi:
3148   case AMDGPU::V_LSHRREV_B32_e64_vi:
3149   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3150   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3151 
3152   case AMDGPU::V_ASHRREV_I32_e32:
3153   case AMDGPU::V_ASHRREV_I32_e64:
3154   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3155   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3156   case AMDGPU::V_ASHRREV_I32_e32_vi:
3157   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3158   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3159   case AMDGPU::V_ASHRREV_I32_e64_vi:
3160 
3161   case AMDGPU::V_LSHLREV_B32_e32:
3162   case AMDGPU::V_LSHLREV_B32_e64:
3163   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3164   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3165   case AMDGPU::V_LSHLREV_B32_e32_vi:
3166   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3167   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3168   case AMDGPU::V_LSHLREV_B32_e64_vi:
3169 
3170   case AMDGPU::V_LSHLREV_B16_e32:
3171   case AMDGPU::V_LSHLREV_B16_e64:
3172   case AMDGPU::V_LSHLREV_B16_e32_vi:
3173   case AMDGPU::V_LSHLREV_B16_e64_vi:
3174   case AMDGPU::V_LSHLREV_B16_gfx10:
3175 
3176   case AMDGPU::V_LSHRREV_B16_e32:
3177   case AMDGPU::V_LSHRREV_B16_e64:
3178   case AMDGPU::V_LSHRREV_B16_e32_vi:
3179   case AMDGPU::V_LSHRREV_B16_e64_vi:
3180   case AMDGPU::V_LSHRREV_B16_gfx10:
3181 
3182   case AMDGPU::V_ASHRREV_I16_e32:
3183   case AMDGPU::V_ASHRREV_I16_e64:
3184   case AMDGPU::V_ASHRREV_I16_e32_vi:
3185   case AMDGPU::V_ASHRREV_I16_e64_vi:
3186   case AMDGPU::V_ASHRREV_I16_gfx10:
3187 
3188   case AMDGPU::V_LSHLREV_B64:
3189   case AMDGPU::V_LSHLREV_B64_gfx10:
3190   case AMDGPU::V_LSHLREV_B64_vi:
3191 
3192   case AMDGPU::V_LSHRREV_B64:
3193   case AMDGPU::V_LSHRREV_B64_gfx10:
3194   case AMDGPU::V_LSHRREV_B64_vi:
3195 
3196   case AMDGPU::V_ASHRREV_I64:
3197   case AMDGPU::V_ASHRREV_I64_gfx10:
3198   case AMDGPU::V_ASHRREV_I64_vi:
3199 
3200   case AMDGPU::V_PK_LSHLREV_B16:
3201   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3202   case AMDGPU::V_PK_LSHLREV_B16_vi:
3203 
3204   case AMDGPU::V_PK_LSHRREV_B16:
3205   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3206   case AMDGPU::V_PK_LSHRREV_B16_vi:
3207   case AMDGPU::V_PK_ASHRREV_I16:
3208   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3209   case AMDGPU::V_PK_ASHRREV_I16_vi:
3210     return true;
3211   default:
3212     return false;
3213   }
3214 }
3215 
3216 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3217 
3218   using namespace SIInstrFlags;
3219   const unsigned Opcode = Inst.getOpcode();
3220   const MCInstrDesc &Desc = MII.get(Opcode);
3221 
3222   // lds_direct register is defined so that it can be used
3223   // with 9-bit operands only. Ignore encodings which do not accept these.
3224   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3225     return true;
3226 
3227   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3228   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3229   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3230 
3231   const int SrcIndices[] = { Src1Idx, Src2Idx };
3232 
3233   // lds_direct cannot be specified as either src1 or src2.
3234   for (int SrcIdx : SrcIndices) {
3235     if (SrcIdx == -1) break;
3236     const MCOperand &Src = Inst.getOperand(SrcIdx);
3237     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3238       return false;
3239     }
3240   }
3241 
3242   if (Src0Idx == -1)
3243     return true;
3244 
3245   const MCOperand &Src = Inst.getOperand(Src0Idx);
3246   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3247     return true;
3248 
3249   // lds_direct is specified as src0. Check additional limitations.
3250   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3251 }
3252 
3253 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3254   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3255     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3256     if (Op.isFlatOffset())
3257       return Op.getStartLoc();
3258   }
3259   return getLoc();
3260 }
3261 
3262 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3263                                          const OperandVector &Operands) {
3264   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3265   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3266     return true;
3267 
3268   auto Opcode = Inst.getOpcode();
3269   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3270   assert(OpNum != -1);
3271 
3272   const auto &Op = Inst.getOperand(OpNum);
3273   if (!hasFlatOffsets() && Op.getImm() != 0) {
3274     Error(getFlatOffsetLoc(Operands),
3275           "flat offset modifier is not supported on this GPU");
3276     return false;
3277   }
3278 
3279   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3280   // For FLAT segment the offset must be positive;
3281   // MSB is ignored and forced to zero.
3282   unsigned OffsetSize = isGFX9() ? 13 : 12;
3283   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3284     if (!isIntN(OffsetSize, Op.getImm())) {
3285       Error(getFlatOffsetLoc(Operands),
3286             isGFX9() ? "expected a 13-bit signed offset" :
3287                        "expected a 12-bit signed offset");
3288       return false;
3289     }
3290   } else {
3291     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3292       Error(getFlatOffsetLoc(Operands),
3293             isGFX9() ? "expected a 12-bit unsigned offset" :
3294                        "expected an 11-bit unsigned offset");
3295       return false;
3296     }
3297   }
3298 
3299   return true;
3300 }
3301 
3302 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3303   unsigned Opcode = Inst.getOpcode();
3304   const MCInstrDesc &Desc = MII.get(Opcode);
3305   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3306     return true;
3307 
3308   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3309   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3310 
3311   const int OpIndices[] = { Src0Idx, Src1Idx };
3312 
3313   unsigned NumExprs = 0;
3314   unsigned NumLiterals = 0;
3315   uint32_t LiteralValue;
3316 
3317   for (int OpIdx : OpIndices) {
3318     if (OpIdx == -1) break;
3319 
3320     const MCOperand &MO = Inst.getOperand(OpIdx);
3321     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3322     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3323       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3324         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3325         if (NumLiterals == 0 || LiteralValue != Value) {
3326           LiteralValue = Value;
3327           ++NumLiterals;
3328         }
3329       } else if (MO.isExpr()) {
3330         ++NumExprs;
3331       }
3332     }
3333   }
3334 
3335   return NumLiterals + NumExprs <= 1;
3336 }
3337 
3338 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3339   const unsigned Opc = Inst.getOpcode();
3340   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3341       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3342     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3343     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3344 
3345     if (OpSel & ~3)
3346       return false;
3347   }
3348   return true;
3349 }
3350 
3351 // Check if VCC register matches wavefront size
3352 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3353   auto FB = getFeatureBits();
3354   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3355     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3356 }
3357 
3358 // VOP3 literal is only allowed in GFX10+ and only one can be used
3359 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3360   unsigned Opcode = Inst.getOpcode();
3361   const MCInstrDesc &Desc = MII.get(Opcode);
3362   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3363     return true;
3364 
3365   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3366   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3367   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3368 
3369   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3370 
3371   unsigned NumExprs = 0;
3372   unsigned NumLiterals = 0;
3373   uint32_t LiteralValue;
3374 
3375   for (int OpIdx : OpIndices) {
3376     if (OpIdx == -1) break;
3377 
3378     const MCOperand &MO = Inst.getOperand(OpIdx);
3379     if (!MO.isImm() && !MO.isExpr())
3380       continue;
3381     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3382       continue;
3383 
3384     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3385         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3386       return false;
3387 
3388     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3389       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3390       if (NumLiterals == 0 || LiteralValue != Value) {
3391         LiteralValue = Value;
3392         ++NumLiterals;
3393       }
3394     } else if (MO.isExpr()) {
3395       ++NumExprs;
3396     }
3397   }
3398   NumLiterals += NumExprs;
3399 
3400   return !NumLiterals ||
3401          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3402 }
3403 
3404 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3405                                           const SMLoc &IDLoc,
3406                                           const OperandVector &Operands) {
3407   if (!validateLdsDirect(Inst)) {
3408     Error(IDLoc,
3409       "invalid use of lds_direct");
3410     return false;
3411   }
3412   if (!validateSOPLiteral(Inst)) {
3413     Error(IDLoc,
3414       "only one literal operand is allowed");
3415     return false;
3416   }
3417   if (!validateVOP3Literal(Inst)) {
3418     Error(IDLoc,
3419       "invalid literal operand");
3420     return false;
3421   }
3422   if (!validateConstantBusLimitations(Inst)) {
3423     Error(IDLoc,
3424       "invalid operand (violates constant bus restrictions)");
3425     return false;
3426   }
3427   if (!validateEarlyClobberLimitations(Inst)) {
3428     Error(IDLoc,
3429       "destination must be different than all sources");
3430     return false;
3431   }
3432   if (!validateIntClampSupported(Inst)) {
3433     Error(IDLoc,
3434       "integer clamping is not supported on this GPU");
3435     return false;
3436   }
3437   if (!validateOpSel(Inst)) {
3438     Error(IDLoc,
3439       "invalid op_sel operand");
3440     return false;
3441   }
3442   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3443   if (!validateMIMGD16(Inst)) {
3444     Error(IDLoc,
3445       "d16 modifier is not supported on this GPU");
3446     return false;
3447   }
3448   if (!validateMIMGDim(Inst)) {
3449     Error(IDLoc, "dim modifier is required on this GPU");
3450     return false;
3451   }
3452   if (!validateMIMGDataSize(Inst)) {
3453     Error(IDLoc,
3454       "image data size does not match dmask and tfe");
3455     return false;
3456   }
3457   if (!validateMIMGAddrSize(Inst)) {
3458     Error(IDLoc,
3459       "image address size does not match dim and a16");
3460     return false;
3461   }
3462   if (!validateMIMGAtomicDMask(Inst)) {
3463     Error(IDLoc,
3464       "invalid atomic image dmask");
3465     return false;
3466   }
3467   if (!validateMIMGGatherDMask(Inst)) {
3468     Error(IDLoc,
3469       "invalid image_gather dmask: only one bit must be set");
3470     return false;
3471   }
3472   if (!validateFlatOffset(Inst, Operands)) {
3473     return false;
3474   }
3475 
3476   return true;
3477 }
3478 
3479 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3480                                             const FeatureBitset &FBS,
3481                                             unsigned VariantID = 0);
3482 
3483 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3484                                               OperandVector &Operands,
3485                                               MCStreamer &Out,
3486                                               uint64_t &ErrorInfo,
3487                                               bool MatchingInlineAsm) {
3488   MCInst Inst;
3489   unsigned Result = Match_Success;
3490   for (auto Variant : getMatchedVariants()) {
3491     uint64_t EI;
3492     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3493                                   Variant);
3494     // We order match statuses from least to most specific. We use most specific
3495     // status as resulting
3496     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3497     if ((R == Match_Success) ||
3498         (R == Match_PreferE32) ||
3499         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3500         (R == Match_InvalidOperand && Result != Match_MissingFeature
3501                                    && Result != Match_PreferE32) ||
3502         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3503                                    && Result != Match_MissingFeature
3504                                    && Result != Match_PreferE32)) {
3505       Result = R;
3506       ErrorInfo = EI;
3507     }
3508     if (R == Match_Success)
3509       break;
3510   }
3511 
3512   switch (Result) {
3513   default: break;
3514   case Match_Success:
3515     if (!validateInstruction(Inst, IDLoc, Operands)) {
3516       return true;
3517     }
3518     Inst.setLoc(IDLoc);
3519     Out.EmitInstruction(Inst, getSTI());
3520     return false;
3521 
3522   case Match_MissingFeature:
3523     return Error(IDLoc, "instruction not supported on this GPU");
3524 
3525   case Match_MnemonicFail: {
3526     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3527     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3528         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3529     return Error(IDLoc, "invalid instruction" + Suggestion,
3530                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3531   }
3532 
3533   case Match_InvalidOperand: {
3534     SMLoc ErrorLoc = IDLoc;
3535     if (ErrorInfo != ~0ULL) {
3536       if (ErrorInfo >= Operands.size()) {
3537         return Error(IDLoc, "too few operands for instruction");
3538       }
3539       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3540       if (ErrorLoc == SMLoc())
3541         ErrorLoc = IDLoc;
3542     }
3543     return Error(ErrorLoc, "invalid operand for instruction");
3544   }
3545 
3546   case Match_PreferE32:
3547     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3548                         "should be encoded as e32");
3549   }
3550   llvm_unreachable("Implement any new match types added!");
3551 }
3552 
3553 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3554   int64_t Tmp = -1;
3555   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3556     return true;
3557   }
3558   if (getParser().parseAbsoluteExpression(Tmp)) {
3559     return true;
3560   }
3561   Ret = static_cast<uint32_t>(Tmp);
3562   return false;
3563 }
3564 
3565 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3566                                                uint32_t &Minor) {
3567   if (ParseAsAbsoluteExpression(Major))
3568     return TokError("invalid major version");
3569 
3570   if (getLexer().isNot(AsmToken::Comma))
3571     return TokError("minor version number required, comma expected");
3572   Lex();
3573 
3574   if (ParseAsAbsoluteExpression(Minor))
3575     return TokError("invalid minor version");
3576 
3577   return false;
3578 }
3579 
3580 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3581   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3582     return TokError("directive only supported for amdgcn architecture");
3583 
3584   std::string Target;
3585 
3586   SMLoc TargetStart = getTok().getLoc();
3587   if (getParser().parseEscapedString(Target))
3588     return true;
3589   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3590 
3591   std::string ExpectedTarget;
3592   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3593   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3594 
3595   if (Target != ExpectedTargetOS.str())
3596     return getParser().Error(TargetRange.Start, "target must match options",
3597                              TargetRange);
3598 
3599   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3600   return false;
3601 }
3602 
3603 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3604   return getParser().Error(Range.Start, "value out of range", Range);
3605 }
3606 
3607 bool AMDGPUAsmParser::calculateGPRBlocks(
3608     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3609     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3610     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3611     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3612   // TODO(scott.linder): These calculations are duplicated from
3613   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3614   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3615 
3616   unsigned NumVGPRs = NextFreeVGPR;
3617   unsigned NumSGPRs = NextFreeSGPR;
3618 
3619   if (Version.Major >= 10)
3620     NumSGPRs = 0;
3621   else {
3622     unsigned MaxAddressableNumSGPRs =
3623         IsaInfo::getAddressableNumSGPRs(&getSTI());
3624 
3625     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3626         NumSGPRs > MaxAddressableNumSGPRs)
3627       return OutOfRangeError(SGPRRange);
3628 
3629     NumSGPRs +=
3630         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3631 
3632     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3633         NumSGPRs > MaxAddressableNumSGPRs)
3634       return OutOfRangeError(SGPRRange);
3635 
3636     if (Features.test(FeatureSGPRInitBug))
3637       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3638   }
3639 
3640   VGPRBlocks =
3641       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3642   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3643 
3644   return false;
3645 }
3646 
3647 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3648   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3649     return TokError("directive only supported for amdgcn architecture");
3650 
3651   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3652     return TokError("directive only supported for amdhsa OS");
3653 
3654   StringRef KernelName;
3655   if (getParser().parseIdentifier(KernelName))
3656     return true;
3657 
3658   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3659 
3660   StringSet<> Seen;
3661 
3662   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3663 
3664   SMRange VGPRRange;
3665   uint64_t NextFreeVGPR = 0;
3666   SMRange SGPRRange;
3667   uint64_t NextFreeSGPR = 0;
3668   unsigned UserSGPRCount = 0;
3669   bool ReserveVCC = true;
3670   bool ReserveFlatScr = true;
3671   bool ReserveXNACK = hasXNACK();
3672   Optional<bool> EnableWavefrontSize32;
3673 
3674   while (true) {
3675     while (getLexer().is(AsmToken::EndOfStatement))
3676       Lex();
3677 
3678     if (getLexer().isNot(AsmToken::Identifier))
3679       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3680 
3681     StringRef ID = getTok().getIdentifier();
3682     SMRange IDRange = getTok().getLocRange();
3683     Lex();
3684 
3685     if (ID == ".end_amdhsa_kernel")
3686       break;
3687 
3688     if (Seen.find(ID) != Seen.end())
3689       return TokError(".amdhsa_ directives cannot be repeated");
3690     Seen.insert(ID);
3691 
3692     SMLoc ValStart = getTok().getLoc();
3693     int64_t IVal;
3694     if (getParser().parseAbsoluteExpression(IVal))
3695       return true;
3696     SMLoc ValEnd = getTok().getLoc();
3697     SMRange ValRange = SMRange(ValStart, ValEnd);
3698 
3699     if (IVal < 0)
3700       return OutOfRangeError(ValRange);
3701 
3702     uint64_t Val = IVal;
3703 
3704 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3705   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3706     return OutOfRangeError(RANGE);                                             \
3707   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3708 
3709     if (ID == ".amdhsa_group_segment_fixed_size") {
3710       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3711         return OutOfRangeError(ValRange);
3712       KD.group_segment_fixed_size = Val;
3713     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3714       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3715         return OutOfRangeError(ValRange);
3716       KD.private_segment_fixed_size = Val;
3717     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3718       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3719                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3720                        Val, ValRange);
3721       if (Val)
3722         UserSGPRCount += 4;
3723     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3724       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3725                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3726                        ValRange);
3727       if (Val)
3728         UserSGPRCount += 2;
3729     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3730       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3731                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3732                        ValRange);
3733       if (Val)
3734         UserSGPRCount += 2;
3735     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3736       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3737                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3738                        Val, ValRange);
3739       if (Val)
3740         UserSGPRCount += 2;
3741     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3742       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3743                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3744                        ValRange);
3745       if (Val)
3746         UserSGPRCount += 2;
3747     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3748       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3749                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3750                        ValRange);
3751       if (Val)
3752         UserSGPRCount += 2;
3753     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3754       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3755                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3756                        Val, ValRange);
3757       if (Val)
3758         UserSGPRCount += 1;
3759     } else if (ID == ".amdhsa_wavefront_size32") {
3760       if (IVersion.Major < 10)
3761         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3762                                  IDRange);
3763       EnableWavefrontSize32 = Val;
3764       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3765                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3766                        Val, ValRange);
3767     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3768       PARSE_BITS_ENTRY(
3769           KD.compute_pgm_rsrc2,
3770           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3771           ValRange);
3772     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3773       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3774                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3775                        ValRange);
3776     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3777       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3778                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3779                        ValRange);
3780     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3781       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3782                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3783                        ValRange);
3784     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3785       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3786                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3787                        ValRange);
3788     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3789       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3790                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3791                        ValRange);
3792     } else if (ID == ".amdhsa_next_free_vgpr") {
3793       VGPRRange = ValRange;
3794       NextFreeVGPR = Val;
3795     } else if (ID == ".amdhsa_next_free_sgpr") {
3796       SGPRRange = ValRange;
3797       NextFreeSGPR = Val;
3798     } else if (ID == ".amdhsa_reserve_vcc") {
3799       if (!isUInt<1>(Val))
3800         return OutOfRangeError(ValRange);
3801       ReserveVCC = Val;
3802     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3803       if (IVersion.Major < 7)
3804         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3805                                  IDRange);
3806       if (!isUInt<1>(Val))
3807         return OutOfRangeError(ValRange);
3808       ReserveFlatScr = Val;
3809     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3810       if (IVersion.Major < 8)
3811         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3812                                  IDRange);
3813       if (!isUInt<1>(Val))
3814         return OutOfRangeError(ValRange);
3815       ReserveXNACK = Val;
3816     } else if (ID == ".amdhsa_float_round_mode_32") {
3817       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3818                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3819     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3820       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3821                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3822     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3823       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3824                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3825     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3826       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3827                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3828                        ValRange);
3829     } else if (ID == ".amdhsa_dx10_clamp") {
3830       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3831                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3832     } else if (ID == ".amdhsa_ieee_mode") {
3833       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3834                        Val, ValRange);
3835     } else if (ID == ".amdhsa_fp16_overflow") {
3836       if (IVersion.Major < 9)
3837         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3838                                  IDRange);
3839       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3840                        ValRange);
3841     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3842       if (IVersion.Major < 10)
3843         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3844                                  IDRange);
3845       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3846                        ValRange);
3847     } else if (ID == ".amdhsa_memory_ordered") {
3848       if (IVersion.Major < 10)
3849         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3850                                  IDRange);
3851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3852                        ValRange);
3853     } else if (ID == ".amdhsa_forward_progress") {
3854       if (IVersion.Major < 10)
3855         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3856                                  IDRange);
3857       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3858                        ValRange);
3859     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3860       PARSE_BITS_ENTRY(
3861           KD.compute_pgm_rsrc2,
3862           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3863           ValRange);
3864     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3865       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3866                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3867                        Val, ValRange);
3868     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3869       PARSE_BITS_ENTRY(
3870           KD.compute_pgm_rsrc2,
3871           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3872           ValRange);
3873     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3874       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3875                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3876                        Val, ValRange);
3877     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3878       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3879                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3880                        Val, ValRange);
3881     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3882       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3883                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3884                        Val, ValRange);
3885     } else if (ID == ".amdhsa_exception_int_div_zero") {
3886       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3887                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3888                        Val, ValRange);
3889     } else {
3890       return getParser().Error(IDRange.Start,
3891                                "unknown .amdhsa_kernel directive", IDRange);
3892     }
3893 
3894 #undef PARSE_BITS_ENTRY
3895   }
3896 
3897   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3898     return TokError(".amdhsa_next_free_vgpr directive is required");
3899 
3900   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3901     return TokError(".amdhsa_next_free_sgpr directive is required");
3902 
3903   unsigned VGPRBlocks;
3904   unsigned SGPRBlocks;
3905   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3906                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3907                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3908                          SGPRBlocks))
3909     return true;
3910 
3911   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3912           VGPRBlocks))
3913     return OutOfRangeError(VGPRRange);
3914   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3915                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3916 
3917   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3918           SGPRBlocks))
3919     return OutOfRangeError(SGPRRange);
3920   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3921                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3922                   SGPRBlocks);
3923 
3924   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3925     return TokError("too many user SGPRs enabled");
3926   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3927                   UserSGPRCount);
3928 
3929   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3930       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3931       ReserveFlatScr, ReserveXNACK);
3932   return false;
3933 }
3934 
3935 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3936   uint32_t Major;
3937   uint32_t Minor;
3938 
3939   if (ParseDirectiveMajorMinor(Major, Minor))
3940     return true;
3941 
3942   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3943   return false;
3944 }
3945 
3946 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3947   uint32_t Major;
3948   uint32_t Minor;
3949   uint32_t Stepping;
3950   StringRef VendorName;
3951   StringRef ArchName;
3952 
3953   // If this directive has no arguments, then use the ISA version for the
3954   // targeted GPU.
3955   if (getLexer().is(AsmToken::EndOfStatement)) {
3956     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3957     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3958                                                       ISA.Stepping,
3959                                                       "AMD", "AMDGPU");
3960     return false;
3961   }
3962 
3963   if (ParseDirectiveMajorMinor(Major, Minor))
3964     return true;
3965 
3966   if (getLexer().isNot(AsmToken::Comma))
3967     return TokError("stepping version number required, comma expected");
3968   Lex();
3969 
3970   if (ParseAsAbsoluteExpression(Stepping))
3971     return TokError("invalid stepping version");
3972 
3973   if (getLexer().isNot(AsmToken::Comma))
3974     return TokError("vendor name required, comma expected");
3975   Lex();
3976 
3977   if (getLexer().isNot(AsmToken::String))
3978     return TokError("invalid vendor name");
3979 
3980   VendorName = getLexer().getTok().getStringContents();
3981   Lex();
3982 
3983   if (getLexer().isNot(AsmToken::Comma))
3984     return TokError("arch name required, comma expected");
3985   Lex();
3986 
3987   if (getLexer().isNot(AsmToken::String))
3988     return TokError("invalid arch name");
3989 
3990   ArchName = getLexer().getTok().getStringContents();
3991   Lex();
3992 
3993   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3994                                                     VendorName, ArchName);
3995   return false;
3996 }
3997 
3998 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3999                                                amd_kernel_code_t &Header) {
4000   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4001   // assembly for backwards compatibility.
4002   if (ID == "max_scratch_backing_memory_byte_size") {
4003     Parser.eatToEndOfStatement();
4004     return false;
4005   }
4006 
4007   SmallString<40> ErrStr;
4008   raw_svector_ostream Err(ErrStr);
4009   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4010     return TokError(Err.str());
4011   }
4012   Lex();
4013 
4014   if (ID == "enable_wavefront_size32") {
4015     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4016       if (!isGFX10())
4017         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4018       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4019         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4020     } else {
4021       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4022         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4023     }
4024   }
4025 
4026   if (ID == "wavefront_size") {
4027     if (Header.wavefront_size == 5) {
4028       if (!isGFX10())
4029         return TokError("wavefront_size=5 is only allowed on GFX10+");
4030       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4031         return TokError("wavefront_size=5 requires +WavefrontSize32");
4032     } else if (Header.wavefront_size == 6) {
4033       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4034         return TokError("wavefront_size=6 requires +WavefrontSize64");
4035     }
4036   }
4037 
4038   if (ID == "enable_wgp_mode") {
4039     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4040       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4041   }
4042 
4043   if (ID == "enable_mem_ordered") {
4044     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4045       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4046   }
4047 
4048   if (ID == "enable_fwd_progress") {
4049     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4050       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4051   }
4052 
4053   return false;
4054 }
4055 
4056 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4057   amd_kernel_code_t Header;
4058   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4059 
4060   while (true) {
4061     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4062     // will set the current token to EndOfStatement.
4063     while(getLexer().is(AsmToken::EndOfStatement))
4064       Lex();
4065 
4066     if (getLexer().isNot(AsmToken::Identifier))
4067       return TokError("expected value identifier or .end_amd_kernel_code_t");
4068 
4069     StringRef ID = getLexer().getTok().getIdentifier();
4070     Lex();
4071 
4072     if (ID == ".end_amd_kernel_code_t")
4073       break;
4074 
4075     if (ParseAMDKernelCodeTValue(ID, Header))
4076       return true;
4077   }
4078 
4079   getTargetStreamer().EmitAMDKernelCodeT(Header);
4080 
4081   return false;
4082 }
4083 
4084 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4085   if (getLexer().isNot(AsmToken::Identifier))
4086     return TokError("expected symbol name");
4087 
4088   StringRef KernelName = Parser.getTok().getString();
4089 
4090   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4091                                            ELF::STT_AMDGPU_HSA_KERNEL);
4092   Lex();
4093   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4094     KernelScope.initialize(getContext());
4095   return false;
4096 }
4097 
4098 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4099   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4100     return Error(getParser().getTok().getLoc(),
4101                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4102                  "architectures");
4103   }
4104 
4105   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4106 
4107   std::string ISAVersionStringFromSTI;
4108   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4109   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4110 
4111   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4112     return Error(getParser().getTok().getLoc(),
4113                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4114                  "arguments specified through the command line");
4115   }
4116 
4117   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4118   Lex();
4119 
4120   return false;
4121 }
4122 
4123 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4124   const char *AssemblerDirectiveBegin;
4125   const char *AssemblerDirectiveEnd;
4126   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4127       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4128           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4129                             HSAMD::V3::AssemblerDirectiveEnd)
4130           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4131                             HSAMD::AssemblerDirectiveEnd);
4132 
4133   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4134     return Error(getParser().getTok().getLoc(),
4135                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4136                  "not available on non-amdhsa OSes")).str());
4137   }
4138 
4139   std::string HSAMetadataString;
4140   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4141                           HSAMetadataString))
4142     return true;
4143 
4144   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4145     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4146       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4147   } else {
4148     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4149       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4150   }
4151 
4152   return false;
4153 }
4154 
4155 /// Common code to parse out a block of text (typically YAML) between start and
4156 /// end directives.
4157 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4158                                           const char *AssemblerDirectiveEnd,
4159                                           std::string &CollectString) {
4160 
4161   raw_string_ostream CollectStream(CollectString);
4162 
4163   getLexer().setSkipSpace(false);
4164 
4165   bool FoundEnd = false;
4166   while (!getLexer().is(AsmToken::Eof)) {
4167     while (getLexer().is(AsmToken::Space)) {
4168       CollectStream << getLexer().getTok().getString();
4169       Lex();
4170     }
4171 
4172     if (getLexer().is(AsmToken::Identifier)) {
4173       StringRef ID = getLexer().getTok().getIdentifier();
4174       if (ID == AssemblerDirectiveEnd) {
4175         Lex();
4176         FoundEnd = true;
4177         break;
4178       }
4179     }
4180 
4181     CollectStream << Parser.parseStringToEndOfStatement()
4182                   << getContext().getAsmInfo()->getSeparatorString();
4183 
4184     Parser.eatToEndOfStatement();
4185   }
4186 
4187   getLexer().setSkipSpace(true);
4188 
4189   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4190     return TokError(Twine("expected directive ") +
4191                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4192   }
4193 
4194   CollectStream.flush();
4195   return false;
4196 }
4197 
4198 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4199 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4200   std::string String;
4201   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4202                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4203     return true;
4204 
4205   auto PALMetadata = getTargetStreamer().getPALMetadata();
4206   if (!PALMetadata->setFromString(String))
4207     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4208   return false;
4209 }
4210 
4211 /// Parse the assembler directive for old linear-format PAL metadata.
4212 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4213   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4214     return Error(getParser().getTok().getLoc(),
4215                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4216                  "not available on non-amdpal OSes")).str());
4217   }
4218 
4219   auto PALMetadata = getTargetStreamer().getPALMetadata();
4220   PALMetadata->setLegacy();
4221   for (;;) {
4222     uint32_t Key, Value;
4223     if (ParseAsAbsoluteExpression(Key)) {
4224       return TokError(Twine("invalid value in ") +
4225                       Twine(PALMD::AssemblerDirective));
4226     }
4227     if (getLexer().isNot(AsmToken::Comma)) {
4228       return TokError(Twine("expected an even number of values in ") +
4229                       Twine(PALMD::AssemblerDirective));
4230     }
4231     Lex();
4232     if (ParseAsAbsoluteExpression(Value)) {
4233       return TokError(Twine("invalid value in ") +
4234                       Twine(PALMD::AssemblerDirective));
4235     }
4236     PALMetadata->setRegister(Key, Value);
4237     if (getLexer().isNot(AsmToken::Comma))
4238       break;
4239     Lex();
4240   }
4241   return false;
4242 }
4243 
4244 /// ParseDirectiveAMDGPULDS
4245 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4246 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4247   if (getParser().checkForValidSection())
4248     return true;
4249 
4250   StringRef Name;
4251   SMLoc NameLoc = getLexer().getLoc();
4252   if (getParser().parseIdentifier(Name))
4253     return TokError("expected identifier in directive");
4254 
4255   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4256   if (parseToken(AsmToken::Comma, "expected ','"))
4257     return true;
4258 
4259   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4260 
4261   int64_t Size;
4262   SMLoc SizeLoc = getLexer().getLoc();
4263   if (getParser().parseAbsoluteExpression(Size))
4264     return true;
4265   if (Size < 0)
4266     return Error(SizeLoc, "size must be non-negative");
4267   if (Size > LocalMemorySize)
4268     return Error(SizeLoc, "size is too large");
4269 
4270   int64_t Align = 4;
4271   if (getLexer().is(AsmToken::Comma)) {
4272     Lex();
4273     SMLoc AlignLoc = getLexer().getLoc();
4274     if (getParser().parseAbsoluteExpression(Align))
4275       return true;
4276     if (Align < 0 || !isPowerOf2_64(Align))
4277       return Error(AlignLoc, "alignment must be a power of two");
4278 
4279     // Alignment larger than the size of LDS is possible in theory, as long
4280     // as the linker manages to place to symbol at address 0, but we do want
4281     // to make sure the alignment fits nicely into a 32-bit integer.
4282     if (Align >= 1u << 31)
4283       return Error(AlignLoc, "alignment is too large");
4284   }
4285 
4286   if (parseToken(AsmToken::EndOfStatement,
4287                  "unexpected token in '.amdgpu_lds' directive"))
4288     return true;
4289 
4290   Symbol->redefineIfPossible();
4291   if (!Symbol->isUndefined())
4292     return Error(NameLoc, "invalid symbol redefinition");
4293 
4294   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4295   return false;
4296 }
4297 
4298 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4299   StringRef IDVal = DirectiveID.getString();
4300 
4301   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4302     if (IDVal == ".amdgcn_target")
4303       return ParseDirectiveAMDGCNTarget();
4304 
4305     if (IDVal == ".amdhsa_kernel")
4306       return ParseDirectiveAMDHSAKernel();
4307 
4308     // TODO: Restructure/combine with PAL metadata directive.
4309     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4310       return ParseDirectiveHSAMetadata();
4311   } else {
4312     if (IDVal == ".hsa_code_object_version")
4313       return ParseDirectiveHSACodeObjectVersion();
4314 
4315     if (IDVal == ".hsa_code_object_isa")
4316       return ParseDirectiveHSACodeObjectISA();
4317 
4318     if (IDVal == ".amd_kernel_code_t")
4319       return ParseDirectiveAMDKernelCodeT();
4320 
4321     if (IDVal == ".amdgpu_hsa_kernel")
4322       return ParseDirectiveAMDGPUHsaKernel();
4323 
4324     if (IDVal == ".amd_amdgpu_isa")
4325       return ParseDirectiveISAVersion();
4326 
4327     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4328       return ParseDirectiveHSAMetadata();
4329   }
4330 
4331   if (IDVal == ".amdgpu_lds")
4332     return ParseDirectiveAMDGPULDS();
4333 
4334   if (IDVal == PALMD::AssemblerDirectiveBegin)
4335     return ParseDirectivePALMetadataBegin();
4336 
4337   if (IDVal == PALMD::AssemblerDirective)
4338     return ParseDirectivePALMetadata();
4339 
4340   return true;
4341 }
4342 
4343 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4344                                            unsigned RegNo) const {
4345 
4346   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4347        R.isValid(); ++R) {
4348     if (*R == RegNo)
4349       return isGFX9() || isGFX10();
4350   }
4351 
4352   // GFX10 has 2 more SGPRs 104 and 105.
4353   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4354        R.isValid(); ++R) {
4355     if (*R == RegNo)
4356       return hasSGPR104_SGPR105();
4357   }
4358 
4359   switch (RegNo) {
4360   case AMDGPU::SRC_SHARED_BASE:
4361   case AMDGPU::SRC_SHARED_LIMIT:
4362   case AMDGPU::SRC_PRIVATE_BASE:
4363   case AMDGPU::SRC_PRIVATE_LIMIT:
4364   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4365     return !isCI() && !isSI() && !isVI();
4366   case AMDGPU::TBA:
4367   case AMDGPU::TBA_LO:
4368   case AMDGPU::TBA_HI:
4369   case AMDGPU::TMA:
4370   case AMDGPU::TMA_LO:
4371   case AMDGPU::TMA_HI:
4372     return !isGFX9() && !isGFX10();
4373   case AMDGPU::XNACK_MASK:
4374   case AMDGPU::XNACK_MASK_LO:
4375   case AMDGPU::XNACK_MASK_HI:
4376     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4377   case AMDGPU::SGPR_NULL:
4378     return isGFX10();
4379   default:
4380     break;
4381   }
4382 
4383   if (isCI())
4384     return true;
4385 
4386   if (isSI() || isGFX10()) {
4387     // No flat_scr on SI.
4388     // On GFX10 flat scratch is not a valid register operand and can only be
4389     // accessed with s_setreg/s_getreg.
4390     switch (RegNo) {
4391     case AMDGPU::FLAT_SCR:
4392     case AMDGPU::FLAT_SCR_LO:
4393     case AMDGPU::FLAT_SCR_HI:
4394       return false;
4395     default:
4396       return true;
4397     }
4398   }
4399 
4400   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4401   // SI/CI have.
4402   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4403        R.isValid(); ++R) {
4404     if (*R == RegNo)
4405       return hasSGPR102_SGPR103();
4406   }
4407 
4408   return true;
4409 }
4410 
4411 OperandMatchResultTy
4412 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4413                               OperandMode Mode) {
4414   // Try to parse with a custom parser
4415   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4416 
4417   // If we successfully parsed the operand or if there as an error parsing,
4418   // we are done.
4419   //
4420   // If we are parsing after we reach EndOfStatement then this means we
4421   // are appending default values to the Operands list.  This is only done
4422   // by custom parser, so we shouldn't continue on to the generic parsing.
4423   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4424       getLexer().is(AsmToken::EndOfStatement))
4425     return ResTy;
4426 
4427   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4428     unsigned Prefix = Operands.size();
4429     SMLoc LBraceLoc = getTok().getLoc();
4430     Parser.Lex(); // eat the '['
4431 
4432     for (;;) {
4433       ResTy = parseReg(Operands);
4434       if (ResTy != MatchOperand_Success)
4435         return ResTy;
4436 
4437       if (getLexer().is(AsmToken::RBrac))
4438         break;
4439 
4440       if (getLexer().isNot(AsmToken::Comma))
4441         return MatchOperand_ParseFail;
4442       Parser.Lex();
4443     }
4444 
4445     if (Operands.size() - Prefix > 1) {
4446       Operands.insert(Operands.begin() + Prefix,
4447                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4448       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4449                                                     getTok().getLoc()));
4450     }
4451 
4452     Parser.Lex(); // eat the ']'
4453     return MatchOperand_Success;
4454   }
4455 
4456   return parseRegOrImm(Operands);
4457 }
4458 
4459 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4460   // Clear any forced encodings from the previous instruction.
4461   setForcedEncodingSize(0);
4462   setForcedDPP(false);
4463   setForcedSDWA(false);
4464 
4465   if (Name.endswith("_e64")) {
4466     setForcedEncodingSize(64);
4467     return Name.substr(0, Name.size() - 4);
4468   } else if (Name.endswith("_e32")) {
4469     setForcedEncodingSize(32);
4470     return Name.substr(0, Name.size() - 4);
4471   } else if (Name.endswith("_dpp")) {
4472     setForcedDPP(true);
4473     return Name.substr(0, Name.size() - 4);
4474   } else if (Name.endswith("_sdwa")) {
4475     setForcedSDWA(true);
4476     return Name.substr(0, Name.size() - 5);
4477   }
4478   return Name;
4479 }
4480 
4481 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4482                                        StringRef Name,
4483                                        SMLoc NameLoc, OperandVector &Operands) {
4484   // Add the instruction mnemonic
4485   Name = parseMnemonicSuffix(Name);
4486   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4487 
4488   bool IsMIMG = Name.startswith("image_");
4489 
4490   while (!getLexer().is(AsmToken::EndOfStatement)) {
4491     OperandMode Mode = OperandMode_Default;
4492     if (IsMIMG && isGFX10() && Operands.size() == 2)
4493       Mode = OperandMode_NSA;
4494     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4495 
4496     // Eat the comma or space if there is one.
4497     if (getLexer().is(AsmToken::Comma))
4498       Parser.Lex();
4499 
4500     switch (Res) {
4501       case MatchOperand_Success: break;
4502       case MatchOperand_ParseFail:
4503         // FIXME: use real operand location rather than the current location.
4504         Error(getLexer().getLoc(), "failed parsing operand.");
4505         while (!getLexer().is(AsmToken::EndOfStatement)) {
4506           Parser.Lex();
4507         }
4508         return true;
4509       case MatchOperand_NoMatch:
4510         // FIXME: use real operand location rather than the current location.
4511         Error(getLexer().getLoc(), "not a valid operand.");
4512         while (!getLexer().is(AsmToken::EndOfStatement)) {
4513           Parser.Lex();
4514         }
4515         return true;
4516     }
4517   }
4518 
4519   return false;
4520 }
4521 
4522 //===----------------------------------------------------------------------===//
4523 // Utility functions
4524 //===----------------------------------------------------------------------===//
4525 
4526 OperandMatchResultTy
4527 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4528 
4529   if (!trySkipId(Prefix, AsmToken::Colon))
4530     return MatchOperand_NoMatch;
4531 
4532   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4533 }
4534 
4535 OperandMatchResultTy
4536 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4537                                     AMDGPUOperand::ImmTy ImmTy,
4538                                     bool (*ConvertResult)(int64_t&)) {
4539   SMLoc S = getLoc();
4540   int64_t Value = 0;
4541 
4542   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4543   if (Res != MatchOperand_Success)
4544     return Res;
4545 
4546   if (ConvertResult && !ConvertResult(Value)) {
4547     Error(S, "invalid " + StringRef(Prefix) + " value.");
4548   }
4549 
4550   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4551   return MatchOperand_Success;
4552 }
4553 
4554 OperandMatchResultTy
4555 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4556                                              OperandVector &Operands,
4557                                              AMDGPUOperand::ImmTy ImmTy,
4558                                              bool (*ConvertResult)(int64_t&)) {
4559   SMLoc S = getLoc();
4560   if (!trySkipId(Prefix, AsmToken::Colon))
4561     return MatchOperand_NoMatch;
4562 
4563   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4564     return MatchOperand_ParseFail;
4565 
4566   unsigned Val = 0;
4567   const unsigned MaxSize = 4;
4568 
4569   // FIXME: How to verify the number of elements matches the number of src
4570   // operands?
4571   for (int I = 0; ; ++I) {
4572     int64_t Op;
4573     SMLoc Loc = getLoc();
4574     if (!parseExpr(Op))
4575       return MatchOperand_ParseFail;
4576 
4577     if (Op != 0 && Op != 1) {
4578       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4579       return MatchOperand_ParseFail;
4580     }
4581 
4582     Val |= (Op << I);
4583 
4584     if (trySkipToken(AsmToken::RBrac))
4585       break;
4586 
4587     if (I + 1 == MaxSize) {
4588       Error(getLoc(), "expected a closing square bracket");
4589       return MatchOperand_ParseFail;
4590     }
4591 
4592     if (!skipToken(AsmToken::Comma, "expected a comma"))
4593       return MatchOperand_ParseFail;
4594   }
4595 
4596   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4597   return MatchOperand_Success;
4598 }
4599 
4600 OperandMatchResultTy
4601 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4602                                AMDGPUOperand::ImmTy ImmTy) {
4603   int64_t Bit = 0;
4604   SMLoc S = Parser.getTok().getLoc();
4605 
4606   // We are at the end of the statement, and this is a default argument, so
4607   // use a default value.
4608   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4609     switch(getLexer().getKind()) {
4610       case AsmToken::Identifier: {
4611         StringRef Tok = Parser.getTok().getString();
4612         if (Tok == Name) {
4613           if (Tok == "r128" && isGFX9())
4614             Error(S, "r128 modifier is not supported on this GPU");
4615           if (Tok == "a16" && !isGFX9() && !isGFX10())
4616             Error(S, "a16 modifier is not supported on this GPU");
4617           Bit = 1;
4618           Parser.Lex();
4619         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4620           Bit = 0;
4621           Parser.Lex();
4622         } else {
4623           return MatchOperand_NoMatch;
4624         }
4625         break;
4626       }
4627       default:
4628         return MatchOperand_NoMatch;
4629     }
4630   }
4631 
4632   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4633     return MatchOperand_ParseFail;
4634 
4635   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4636   return MatchOperand_Success;
4637 }
4638 
4639 static void addOptionalImmOperand(
4640   MCInst& Inst, const OperandVector& Operands,
4641   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4642   AMDGPUOperand::ImmTy ImmT,
4643   int64_t Default = 0) {
4644   auto i = OptionalIdx.find(ImmT);
4645   if (i != OptionalIdx.end()) {
4646     unsigned Idx = i->second;
4647     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4648   } else {
4649     Inst.addOperand(MCOperand::createImm(Default));
4650   }
4651 }
4652 
4653 OperandMatchResultTy
4654 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4655   if (getLexer().isNot(AsmToken::Identifier)) {
4656     return MatchOperand_NoMatch;
4657   }
4658   StringRef Tok = Parser.getTok().getString();
4659   if (Tok != Prefix) {
4660     return MatchOperand_NoMatch;
4661   }
4662 
4663   Parser.Lex();
4664   if (getLexer().isNot(AsmToken::Colon)) {
4665     return MatchOperand_ParseFail;
4666   }
4667 
4668   Parser.Lex();
4669   if (getLexer().isNot(AsmToken::Identifier)) {
4670     return MatchOperand_ParseFail;
4671   }
4672 
4673   Value = Parser.getTok().getString();
4674   return MatchOperand_Success;
4675 }
4676 
4677 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4678 // values to live in a joint format operand in the MCInst encoding.
4679 OperandMatchResultTy
4680 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4681   SMLoc S = Parser.getTok().getLoc();
4682   int64_t Dfmt = 0, Nfmt = 0;
4683   // dfmt and nfmt can appear in either order, and each is optional.
4684   bool GotDfmt = false, GotNfmt = false;
4685   while (!GotDfmt || !GotNfmt) {
4686     if (!GotDfmt) {
4687       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4688       if (Res != MatchOperand_NoMatch) {
4689         if (Res != MatchOperand_Success)
4690           return Res;
4691         if (Dfmt >= 16) {
4692           Error(Parser.getTok().getLoc(), "out of range dfmt");
4693           return MatchOperand_ParseFail;
4694         }
4695         GotDfmt = true;
4696         Parser.Lex();
4697         continue;
4698       }
4699     }
4700     if (!GotNfmt) {
4701       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4702       if (Res != MatchOperand_NoMatch) {
4703         if (Res != MatchOperand_Success)
4704           return Res;
4705         if (Nfmt >= 8) {
4706           Error(Parser.getTok().getLoc(), "out of range nfmt");
4707           return MatchOperand_ParseFail;
4708         }
4709         GotNfmt = true;
4710         Parser.Lex();
4711         continue;
4712       }
4713     }
4714     break;
4715   }
4716   if (!GotDfmt && !GotNfmt)
4717     return MatchOperand_NoMatch;
4718   auto Format = Dfmt | Nfmt << 4;
4719   Operands.push_back(
4720       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4721   return MatchOperand_Success;
4722 }
4723 
4724 //===----------------------------------------------------------------------===//
4725 // ds
4726 //===----------------------------------------------------------------------===//
4727 
4728 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4729                                     const OperandVector &Operands) {
4730   OptionalImmIndexMap OptionalIdx;
4731 
4732   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4733     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4734 
4735     // Add the register arguments
4736     if (Op.isReg()) {
4737       Op.addRegOperands(Inst, 1);
4738       continue;
4739     }
4740 
4741     // Handle optional arguments
4742     OptionalIdx[Op.getImmTy()] = i;
4743   }
4744 
4745   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4746   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4747   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4748 
4749   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4750 }
4751 
4752 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4753                                 bool IsGdsHardcoded) {
4754   OptionalImmIndexMap OptionalIdx;
4755 
4756   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4757     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4758 
4759     // Add the register arguments
4760     if (Op.isReg()) {
4761       Op.addRegOperands(Inst, 1);
4762       continue;
4763     }
4764 
4765     if (Op.isToken() && Op.getToken() == "gds") {
4766       IsGdsHardcoded = true;
4767       continue;
4768     }
4769 
4770     // Handle optional arguments
4771     OptionalIdx[Op.getImmTy()] = i;
4772   }
4773 
4774   AMDGPUOperand::ImmTy OffsetType =
4775     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4776      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4777      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4778                                                       AMDGPUOperand::ImmTyOffset;
4779 
4780   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4781 
4782   if (!IsGdsHardcoded) {
4783     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4784   }
4785   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4786 }
4787 
4788 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4789   OptionalImmIndexMap OptionalIdx;
4790 
4791   unsigned OperandIdx[4];
4792   unsigned EnMask = 0;
4793   int SrcIdx = 0;
4794 
4795   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4796     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4797 
4798     // Add the register arguments
4799     if (Op.isReg()) {
4800       assert(SrcIdx < 4);
4801       OperandIdx[SrcIdx] = Inst.size();
4802       Op.addRegOperands(Inst, 1);
4803       ++SrcIdx;
4804       continue;
4805     }
4806 
4807     if (Op.isOff()) {
4808       assert(SrcIdx < 4);
4809       OperandIdx[SrcIdx] = Inst.size();
4810       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4811       ++SrcIdx;
4812       continue;
4813     }
4814 
4815     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4816       Op.addImmOperands(Inst, 1);
4817       continue;
4818     }
4819 
4820     if (Op.isToken() && Op.getToken() == "done")
4821       continue;
4822 
4823     // Handle optional arguments
4824     OptionalIdx[Op.getImmTy()] = i;
4825   }
4826 
4827   assert(SrcIdx == 4);
4828 
4829   bool Compr = false;
4830   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4831     Compr = true;
4832     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4833     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4834     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4835   }
4836 
4837   for (auto i = 0; i < SrcIdx; ++i) {
4838     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4839       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4840     }
4841   }
4842 
4843   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4844   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4845 
4846   Inst.addOperand(MCOperand::createImm(EnMask));
4847 }
4848 
4849 //===----------------------------------------------------------------------===//
4850 // s_waitcnt
4851 //===----------------------------------------------------------------------===//
4852 
4853 static bool
4854 encodeCnt(
4855   const AMDGPU::IsaVersion ISA,
4856   int64_t &IntVal,
4857   int64_t CntVal,
4858   bool Saturate,
4859   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4860   unsigned (*decode)(const IsaVersion &Version, unsigned))
4861 {
4862   bool Failed = false;
4863 
4864   IntVal = encode(ISA, IntVal, CntVal);
4865   if (CntVal != decode(ISA, IntVal)) {
4866     if (Saturate) {
4867       IntVal = encode(ISA, IntVal, -1);
4868     } else {
4869       Failed = true;
4870     }
4871   }
4872   return Failed;
4873 }
4874 
4875 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4876 
4877   SMLoc CntLoc = getLoc();
4878   StringRef CntName = getTokenStr();
4879 
4880   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4881       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4882     return false;
4883 
4884   int64_t CntVal;
4885   SMLoc ValLoc = getLoc();
4886   if (!parseExpr(CntVal))
4887     return false;
4888 
4889   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4890 
4891   bool Failed = true;
4892   bool Sat = CntName.endswith("_sat");
4893 
4894   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4895     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4896   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4897     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4898   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4899     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4900   } else {
4901     Error(CntLoc, "invalid counter name " + CntName);
4902     return false;
4903   }
4904 
4905   if (Failed) {
4906     Error(ValLoc, "too large value for " + CntName);
4907     return false;
4908   }
4909 
4910   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4911     return false;
4912 
4913   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4914     if (isToken(AsmToken::EndOfStatement)) {
4915       Error(getLoc(), "expected a counter name");
4916       return false;
4917     }
4918   }
4919 
4920   return true;
4921 }
4922 
4923 OperandMatchResultTy
4924 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4925   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4926   int64_t Waitcnt = getWaitcntBitMask(ISA);
4927   SMLoc S = getLoc();
4928 
4929   // If parse failed, do not return error code
4930   // to avoid excessive error messages.
4931   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4932     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4933   } else {
4934     parseExpr(Waitcnt);
4935   }
4936 
4937   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4938   return MatchOperand_Success;
4939 }
4940 
4941 bool
4942 AMDGPUOperand::isSWaitCnt() const {
4943   return isImm();
4944 }
4945 
4946 //===----------------------------------------------------------------------===//
4947 // hwreg
4948 //===----------------------------------------------------------------------===//
4949 
4950 bool
4951 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4952                                 int64_t &Offset,
4953                                 int64_t &Width) {
4954   using namespace llvm::AMDGPU::Hwreg;
4955 
4956   // The register may be specified by name or using a numeric code
4957   if (isToken(AsmToken::Identifier) &&
4958       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4959     HwReg.IsSymbolic = true;
4960     lex(); // skip message name
4961   } else if (!parseExpr(HwReg.Id)) {
4962     return false;
4963   }
4964 
4965   if (trySkipToken(AsmToken::RParen))
4966     return true;
4967 
4968   // parse optional params
4969   return
4970     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4971     parseExpr(Offset) &&
4972     skipToken(AsmToken::Comma, "expected a comma") &&
4973     parseExpr(Width) &&
4974     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4975 }
4976 
4977 bool
4978 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4979                                const int64_t Offset,
4980                                const int64_t Width,
4981                                const SMLoc Loc) {
4982 
4983   using namespace llvm::AMDGPU::Hwreg;
4984 
4985   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4986     Error(Loc, "specified hardware register is not supported on this GPU");
4987     return false;
4988   } else if (!isValidHwreg(HwReg.Id)) {
4989     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4990     return false;
4991   } else if (!isValidHwregOffset(Offset)) {
4992     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4993     return false;
4994   } else if (!isValidHwregWidth(Width)) {
4995     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4996     return false;
4997   }
4998   return true;
4999 }
5000 
5001 OperandMatchResultTy
5002 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5003   using namespace llvm::AMDGPU::Hwreg;
5004 
5005   int64_t ImmVal = 0;
5006   SMLoc Loc = getLoc();
5007 
5008   // If parse failed, do not return error code
5009   // to avoid excessive error messages.
5010   if (trySkipId("hwreg", AsmToken::LParen)) {
5011     OperandInfoTy HwReg(ID_UNKNOWN_);
5012     int64_t Offset = OFFSET_DEFAULT_;
5013     int64_t Width = WIDTH_DEFAULT_;
5014     if (parseHwregBody(HwReg, Offset, Width) &&
5015         validateHwreg(HwReg, Offset, Width, Loc)) {
5016       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5017     }
5018   } else if (parseExpr(ImmVal)) {
5019     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5020       Error(Loc, "invalid immediate: only 16-bit values are legal");
5021   }
5022 
5023   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5024   return MatchOperand_Success;
5025 }
5026 
5027 bool AMDGPUOperand::isHwreg() const {
5028   return isImmTy(ImmTyHwreg);
5029 }
5030 
5031 //===----------------------------------------------------------------------===//
5032 // sendmsg
5033 //===----------------------------------------------------------------------===//
5034 
5035 bool
5036 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5037                                   OperandInfoTy &Op,
5038                                   OperandInfoTy &Stream) {
5039   using namespace llvm::AMDGPU::SendMsg;
5040 
5041   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5042     Msg.IsSymbolic = true;
5043     lex(); // skip message name
5044   } else if (!parseExpr(Msg.Id)) {
5045     return false;
5046   }
5047 
5048   if (trySkipToken(AsmToken::Comma)) {
5049     Op.IsDefined = true;
5050     if (isToken(AsmToken::Identifier) &&
5051         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5052       lex(); // skip operation name
5053     } else if (!parseExpr(Op.Id)) {
5054       return false;
5055     }
5056 
5057     if (trySkipToken(AsmToken::Comma)) {
5058       Stream.IsDefined = true;
5059       if (!parseExpr(Stream.Id))
5060         return false;
5061     }
5062   }
5063 
5064   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5065 }
5066 
5067 bool
5068 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5069                                  const OperandInfoTy &Op,
5070                                  const OperandInfoTy &Stream,
5071                                  const SMLoc S) {
5072   using namespace llvm::AMDGPU::SendMsg;
5073 
5074   // Validation strictness depends on whether message is specified
5075   // in a symbolc or in a numeric form. In the latter case
5076   // only encoding possibility is checked.
5077   bool Strict = Msg.IsSymbolic;
5078 
5079   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5080     Error(S, "invalid message id");
5081     return false;
5082   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5083     Error(S, Op.IsDefined ?
5084              "message does not support operations" :
5085              "missing message operation");
5086     return false;
5087   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5088     Error(S, "invalid operation id");
5089     return false;
5090   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5091     Error(S, "message operation does not support streams");
5092     return false;
5093   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5094     Error(S, "invalid message stream id");
5095     return false;
5096   }
5097   return true;
5098 }
5099 
5100 OperandMatchResultTy
5101 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5102   using namespace llvm::AMDGPU::SendMsg;
5103 
5104   int64_t ImmVal = 0;
5105   SMLoc Loc = getLoc();
5106 
5107   // If parse failed, do not return error code
5108   // to avoid excessive error messages.
5109   if (trySkipId("sendmsg", AsmToken::LParen)) {
5110     OperandInfoTy Msg(ID_UNKNOWN_);
5111     OperandInfoTy Op(OP_NONE_);
5112     OperandInfoTy Stream(STREAM_ID_NONE_);
5113     if (parseSendMsgBody(Msg, Op, Stream) &&
5114         validateSendMsg(Msg, Op, Stream, Loc)) {
5115       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5116     }
5117   } else if (parseExpr(ImmVal)) {
5118     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5119       Error(Loc, "invalid immediate: only 16-bit values are legal");
5120   }
5121 
5122   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5123   return MatchOperand_Success;
5124 }
5125 
5126 bool AMDGPUOperand::isSendMsg() const {
5127   return isImmTy(ImmTySendMsg);
5128 }
5129 
5130 //===----------------------------------------------------------------------===//
5131 // v_interp
5132 //===----------------------------------------------------------------------===//
5133 
5134 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5135   if (getLexer().getKind() != AsmToken::Identifier)
5136     return MatchOperand_NoMatch;
5137 
5138   StringRef Str = Parser.getTok().getString();
5139   int Slot = StringSwitch<int>(Str)
5140     .Case("p10", 0)
5141     .Case("p20", 1)
5142     .Case("p0", 2)
5143     .Default(-1);
5144 
5145   SMLoc S = Parser.getTok().getLoc();
5146   if (Slot == -1)
5147     return MatchOperand_ParseFail;
5148 
5149   Parser.Lex();
5150   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5151                                               AMDGPUOperand::ImmTyInterpSlot));
5152   return MatchOperand_Success;
5153 }
5154 
5155 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5156   if (getLexer().getKind() != AsmToken::Identifier)
5157     return MatchOperand_NoMatch;
5158 
5159   StringRef Str = Parser.getTok().getString();
5160   if (!Str.startswith("attr"))
5161     return MatchOperand_NoMatch;
5162 
5163   StringRef Chan = Str.take_back(2);
5164   int AttrChan = StringSwitch<int>(Chan)
5165     .Case(".x", 0)
5166     .Case(".y", 1)
5167     .Case(".z", 2)
5168     .Case(".w", 3)
5169     .Default(-1);
5170   if (AttrChan == -1)
5171     return MatchOperand_ParseFail;
5172 
5173   Str = Str.drop_back(2).drop_front(4);
5174 
5175   uint8_t Attr;
5176   if (Str.getAsInteger(10, Attr))
5177     return MatchOperand_ParseFail;
5178 
5179   SMLoc S = Parser.getTok().getLoc();
5180   Parser.Lex();
5181   if (Attr > 63) {
5182     Error(S, "out of bounds attr");
5183     return MatchOperand_Success;
5184   }
5185 
5186   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5187 
5188   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5189                                               AMDGPUOperand::ImmTyInterpAttr));
5190   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5191                                               AMDGPUOperand::ImmTyAttrChan));
5192   return MatchOperand_Success;
5193 }
5194 
5195 //===----------------------------------------------------------------------===//
5196 // exp
5197 //===----------------------------------------------------------------------===//
5198 
5199 void AMDGPUAsmParser::errorExpTgt() {
5200   Error(Parser.getTok().getLoc(), "invalid exp target");
5201 }
5202 
5203 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5204                                                       uint8_t &Val) {
5205   if (Str == "null") {
5206     Val = 9;
5207     return MatchOperand_Success;
5208   }
5209 
5210   if (Str.startswith("mrt")) {
5211     Str = Str.drop_front(3);
5212     if (Str == "z") { // == mrtz
5213       Val = 8;
5214       return MatchOperand_Success;
5215     }
5216 
5217     if (Str.getAsInteger(10, Val))
5218       return MatchOperand_ParseFail;
5219 
5220     if (Val > 7)
5221       errorExpTgt();
5222 
5223     return MatchOperand_Success;
5224   }
5225 
5226   if (Str.startswith("pos")) {
5227     Str = Str.drop_front(3);
5228     if (Str.getAsInteger(10, Val))
5229       return MatchOperand_ParseFail;
5230 
5231     if (Val > 4 || (Val == 4 && !isGFX10()))
5232       errorExpTgt();
5233 
5234     Val += 12;
5235     return MatchOperand_Success;
5236   }
5237 
5238   if (isGFX10() && Str == "prim") {
5239     Val = 20;
5240     return MatchOperand_Success;
5241   }
5242 
5243   if (Str.startswith("param")) {
5244     Str = Str.drop_front(5);
5245     if (Str.getAsInteger(10, Val))
5246       return MatchOperand_ParseFail;
5247 
5248     if (Val >= 32)
5249       errorExpTgt();
5250 
5251     Val += 32;
5252     return MatchOperand_Success;
5253   }
5254 
5255   if (Str.startswith("invalid_target_")) {
5256     Str = Str.drop_front(15);
5257     if (Str.getAsInteger(10, Val))
5258       return MatchOperand_ParseFail;
5259 
5260     errorExpTgt();
5261     return MatchOperand_Success;
5262   }
5263 
5264   return MatchOperand_NoMatch;
5265 }
5266 
5267 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5268   uint8_t Val;
5269   StringRef Str = Parser.getTok().getString();
5270 
5271   auto Res = parseExpTgtImpl(Str, Val);
5272   if (Res != MatchOperand_Success)
5273     return Res;
5274 
5275   SMLoc S = Parser.getTok().getLoc();
5276   Parser.Lex();
5277 
5278   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5279                                               AMDGPUOperand::ImmTyExpTgt));
5280   return MatchOperand_Success;
5281 }
5282 
5283 //===----------------------------------------------------------------------===//
5284 // parser helpers
5285 //===----------------------------------------------------------------------===//
5286 
5287 bool
5288 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5289   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5290 }
5291 
5292 bool
5293 AMDGPUAsmParser::isId(const StringRef Id) const {
5294   return isId(getToken(), Id);
5295 }
5296 
5297 bool
5298 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5299   return getTokenKind() == Kind;
5300 }
5301 
5302 bool
5303 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5304   if (isId(Id)) {
5305     lex();
5306     return true;
5307   }
5308   return false;
5309 }
5310 
5311 bool
5312 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5313   if (isId(Id) && peekToken().is(Kind)) {
5314     lex();
5315     lex();
5316     return true;
5317   }
5318   return false;
5319 }
5320 
5321 bool
5322 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5323   if (isToken(Kind)) {
5324     lex();
5325     return true;
5326   }
5327   return false;
5328 }
5329 
5330 bool
5331 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5332                            const StringRef ErrMsg) {
5333   if (!trySkipToken(Kind)) {
5334     Error(getLoc(), ErrMsg);
5335     return false;
5336   }
5337   return true;
5338 }
5339 
5340 bool
5341 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5342   return !getParser().parseAbsoluteExpression(Imm);
5343 }
5344 
5345 bool
5346 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5347   SMLoc S = getLoc();
5348 
5349   const MCExpr *Expr;
5350   if (Parser.parseExpression(Expr))
5351     return false;
5352 
5353   int64_t IntVal;
5354   if (Expr->evaluateAsAbsolute(IntVal)) {
5355     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5356   } else {
5357     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5358   }
5359   return true;
5360 }
5361 
5362 bool
5363 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5364   if (isToken(AsmToken::String)) {
5365     Val = getToken().getStringContents();
5366     lex();
5367     return true;
5368   } else {
5369     Error(getLoc(), ErrMsg);
5370     return false;
5371   }
5372 }
5373 
5374 AsmToken
5375 AMDGPUAsmParser::getToken() const {
5376   return Parser.getTok();
5377 }
5378 
5379 AsmToken
5380 AMDGPUAsmParser::peekToken() {
5381   return getLexer().peekTok();
5382 }
5383 
5384 void
5385 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5386   auto TokCount = getLexer().peekTokens(Tokens);
5387 
5388   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5389     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5390 }
5391 
5392 AsmToken::TokenKind
5393 AMDGPUAsmParser::getTokenKind() const {
5394   return getLexer().getKind();
5395 }
5396 
5397 SMLoc
5398 AMDGPUAsmParser::getLoc() const {
5399   return getToken().getLoc();
5400 }
5401 
5402 StringRef
5403 AMDGPUAsmParser::getTokenStr() const {
5404   return getToken().getString();
5405 }
5406 
5407 void
5408 AMDGPUAsmParser::lex() {
5409   Parser.Lex();
5410 }
5411 
5412 //===----------------------------------------------------------------------===//
5413 // swizzle
5414 //===----------------------------------------------------------------------===//
5415 
5416 LLVM_READNONE
5417 static unsigned
5418 encodeBitmaskPerm(const unsigned AndMask,
5419                   const unsigned OrMask,
5420                   const unsigned XorMask) {
5421   using namespace llvm::AMDGPU::Swizzle;
5422 
5423   return BITMASK_PERM_ENC |
5424          (AndMask << BITMASK_AND_SHIFT) |
5425          (OrMask  << BITMASK_OR_SHIFT)  |
5426          (XorMask << BITMASK_XOR_SHIFT);
5427 }
5428 
5429 bool
5430 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5431                                       const unsigned MinVal,
5432                                       const unsigned MaxVal,
5433                                       const StringRef ErrMsg) {
5434   for (unsigned i = 0; i < OpNum; ++i) {
5435     if (!skipToken(AsmToken::Comma, "expected a comma")){
5436       return false;
5437     }
5438     SMLoc ExprLoc = Parser.getTok().getLoc();
5439     if (!parseExpr(Op[i])) {
5440       return false;
5441     }
5442     if (Op[i] < MinVal || Op[i] > MaxVal) {
5443       Error(ExprLoc, ErrMsg);
5444       return false;
5445     }
5446   }
5447 
5448   return true;
5449 }
5450 
5451 bool
5452 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5453   using namespace llvm::AMDGPU::Swizzle;
5454 
5455   int64_t Lane[LANE_NUM];
5456   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5457                            "expected a 2-bit lane id")) {
5458     Imm = QUAD_PERM_ENC;
5459     for (unsigned I = 0; I < LANE_NUM; ++I) {
5460       Imm |= Lane[I] << (LANE_SHIFT * I);
5461     }
5462     return true;
5463   }
5464   return false;
5465 }
5466 
5467 bool
5468 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5469   using namespace llvm::AMDGPU::Swizzle;
5470 
5471   SMLoc S = Parser.getTok().getLoc();
5472   int64_t GroupSize;
5473   int64_t LaneIdx;
5474 
5475   if (!parseSwizzleOperands(1, &GroupSize,
5476                             2, 32,
5477                             "group size must be in the interval [2,32]")) {
5478     return false;
5479   }
5480   if (!isPowerOf2_64(GroupSize)) {
5481     Error(S, "group size must be a power of two");
5482     return false;
5483   }
5484   if (parseSwizzleOperands(1, &LaneIdx,
5485                            0, GroupSize - 1,
5486                            "lane id must be in the interval [0,group size - 1]")) {
5487     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5488     return true;
5489   }
5490   return false;
5491 }
5492 
5493 bool
5494 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5495   using namespace llvm::AMDGPU::Swizzle;
5496 
5497   SMLoc S = Parser.getTok().getLoc();
5498   int64_t GroupSize;
5499 
5500   if (!parseSwizzleOperands(1, &GroupSize,
5501       2, 32, "group size must be in the interval [2,32]")) {
5502     return false;
5503   }
5504   if (!isPowerOf2_64(GroupSize)) {
5505     Error(S, "group size must be a power of two");
5506     return false;
5507   }
5508 
5509   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5510   return true;
5511 }
5512 
5513 bool
5514 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5515   using namespace llvm::AMDGPU::Swizzle;
5516 
5517   SMLoc S = Parser.getTok().getLoc();
5518   int64_t GroupSize;
5519 
5520   if (!parseSwizzleOperands(1, &GroupSize,
5521       1, 16, "group size must be in the interval [1,16]")) {
5522     return false;
5523   }
5524   if (!isPowerOf2_64(GroupSize)) {
5525     Error(S, "group size must be a power of two");
5526     return false;
5527   }
5528 
5529   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5530   return true;
5531 }
5532 
5533 bool
5534 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5535   using namespace llvm::AMDGPU::Swizzle;
5536 
5537   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5538     return false;
5539   }
5540 
5541   StringRef Ctl;
5542   SMLoc StrLoc = Parser.getTok().getLoc();
5543   if (!parseString(Ctl)) {
5544     return false;
5545   }
5546   if (Ctl.size() != BITMASK_WIDTH) {
5547     Error(StrLoc, "expected a 5-character mask");
5548     return false;
5549   }
5550 
5551   unsigned AndMask = 0;
5552   unsigned OrMask = 0;
5553   unsigned XorMask = 0;
5554 
5555   for (size_t i = 0; i < Ctl.size(); ++i) {
5556     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5557     switch(Ctl[i]) {
5558     default:
5559       Error(StrLoc, "invalid mask");
5560       return false;
5561     case '0':
5562       break;
5563     case '1':
5564       OrMask |= Mask;
5565       break;
5566     case 'p':
5567       AndMask |= Mask;
5568       break;
5569     case 'i':
5570       AndMask |= Mask;
5571       XorMask |= Mask;
5572       break;
5573     }
5574   }
5575 
5576   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5577   return true;
5578 }
5579 
5580 bool
5581 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5582 
5583   SMLoc OffsetLoc = Parser.getTok().getLoc();
5584 
5585   if (!parseExpr(Imm)) {
5586     return false;
5587   }
5588   if (!isUInt<16>(Imm)) {
5589     Error(OffsetLoc, "expected a 16-bit offset");
5590     return false;
5591   }
5592   return true;
5593 }
5594 
5595 bool
5596 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5597   using namespace llvm::AMDGPU::Swizzle;
5598 
5599   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5600 
5601     SMLoc ModeLoc = Parser.getTok().getLoc();
5602     bool Ok = false;
5603 
5604     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5605       Ok = parseSwizzleQuadPerm(Imm);
5606     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5607       Ok = parseSwizzleBitmaskPerm(Imm);
5608     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5609       Ok = parseSwizzleBroadcast(Imm);
5610     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5611       Ok = parseSwizzleSwap(Imm);
5612     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5613       Ok = parseSwizzleReverse(Imm);
5614     } else {
5615       Error(ModeLoc, "expected a swizzle mode");
5616     }
5617 
5618     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5619   }
5620 
5621   return false;
5622 }
5623 
5624 OperandMatchResultTy
5625 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5626   SMLoc S = Parser.getTok().getLoc();
5627   int64_t Imm = 0;
5628 
5629   if (trySkipId("offset")) {
5630 
5631     bool Ok = false;
5632     if (skipToken(AsmToken::Colon, "expected a colon")) {
5633       if (trySkipId("swizzle")) {
5634         Ok = parseSwizzleMacro(Imm);
5635       } else {
5636         Ok = parseSwizzleOffset(Imm);
5637       }
5638     }
5639 
5640     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5641 
5642     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5643   } else {
5644     // Swizzle "offset" operand is optional.
5645     // If it is omitted, try parsing other optional operands.
5646     return parseOptionalOpr(Operands);
5647   }
5648 }
5649 
5650 bool
5651 AMDGPUOperand::isSwizzle() const {
5652   return isImmTy(ImmTySwizzle);
5653 }
5654 
5655 //===----------------------------------------------------------------------===//
5656 // VGPR Index Mode
5657 //===----------------------------------------------------------------------===//
5658 
5659 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5660 
5661   using namespace llvm::AMDGPU::VGPRIndexMode;
5662 
5663   if (trySkipToken(AsmToken::RParen)) {
5664     return OFF;
5665   }
5666 
5667   int64_t Imm = 0;
5668 
5669   while (true) {
5670     unsigned Mode = 0;
5671     SMLoc S = Parser.getTok().getLoc();
5672 
5673     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5674       if (trySkipId(IdSymbolic[ModeId])) {
5675         Mode = 1 << ModeId;
5676         break;
5677       }
5678     }
5679 
5680     if (Mode == 0) {
5681       Error(S, (Imm == 0)?
5682                "expected a VGPR index mode or a closing parenthesis" :
5683                "expected a VGPR index mode");
5684       break;
5685     }
5686 
5687     if (Imm & Mode) {
5688       Error(S, "duplicate VGPR index mode");
5689       break;
5690     }
5691     Imm |= Mode;
5692 
5693     if (trySkipToken(AsmToken::RParen))
5694       break;
5695     if (!skipToken(AsmToken::Comma,
5696                    "expected a comma or a closing parenthesis"))
5697       break;
5698   }
5699 
5700   return Imm;
5701 }
5702 
5703 OperandMatchResultTy
5704 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5705 
5706   int64_t Imm = 0;
5707   SMLoc S = Parser.getTok().getLoc();
5708 
5709   if (getLexer().getKind() == AsmToken::Identifier &&
5710       Parser.getTok().getString() == "gpr_idx" &&
5711       getLexer().peekTok().is(AsmToken::LParen)) {
5712 
5713     Parser.Lex();
5714     Parser.Lex();
5715 
5716     // If parse failed, trigger an error but do not return error code
5717     // to avoid excessive error messages.
5718     Imm = parseGPRIdxMacro();
5719 
5720   } else {
5721     if (getParser().parseAbsoluteExpression(Imm))
5722       return MatchOperand_NoMatch;
5723     if (Imm < 0 || !isUInt<4>(Imm)) {
5724       Error(S, "invalid immediate: only 4-bit values are legal");
5725     }
5726   }
5727 
5728   Operands.push_back(
5729       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5730   return MatchOperand_Success;
5731 }
5732 
5733 bool AMDGPUOperand::isGPRIdxMode() const {
5734   return isImmTy(ImmTyGprIdxMode);
5735 }
5736 
5737 //===----------------------------------------------------------------------===//
5738 // sopp branch targets
5739 //===----------------------------------------------------------------------===//
5740 
5741 OperandMatchResultTy
5742 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5743 
5744   // Make sure we are not parsing something
5745   // that looks like a label or an expression but is not.
5746   // This will improve error messages.
5747   if (isRegister() || isModifier())
5748     return MatchOperand_NoMatch;
5749 
5750   if (parseExpr(Operands)) {
5751 
5752     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5753     assert(Opr.isImm() || Opr.isExpr());
5754     SMLoc Loc = Opr.getStartLoc();
5755 
5756     // Currently we do not support arbitrary expressions as branch targets.
5757     // Only labels and absolute expressions are accepted.
5758     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5759       Error(Loc, "expected an absolute expression or a label");
5760     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5761       Error(Loc, "expected a 16-bit signed jump offset");
5762     }
5763   }
5764 
5765   return MatchOperand_Success; // avoid excessive error messages
5766 }
5767 
5768 //===----------------------------------------------------------------------===//
5769 // Boolean holding registers
5770 //===----------------------------------------------------------------------===//
5771 
5772 OperandMatchResultTy
5773 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5774   return parseReg(Operands);
5775 }
5776 
5777 //===----------------------------------------------------------------------===//
5778 // mubuf
5779 //===----------------------------------------------------------------------===//
5780 
5781 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5782   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5783 }
5784 
5785 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5786   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5787 }
5788 
5789 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5790   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5791 }
5792 
5793 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5794                                const OperandVector &Operands,
5795                                bool IsAtomic,
5796                                bool IsAtomicReturn,
5797                                bool IsLds) {
5798   bool IsLdsOpcode = IsLds;
5799   bool HasLdsModifier = false;
5800   OptionalImmIndexMap OptionalIdx;
5801   assert(IsAtomicReturn ? IsAtomic : true);
5802   unsigned FirstOperandIdx = 1;
5803 
5804   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5805     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5806 
5807     // Add the register arguments
5808     if (Op.isReg()) {
5809       Op.addRegOperands(Inst, 1);
5810       // Insert a tied src for atomic return dst.
5811       // This cannot be postponed as subsequent calls to
5812       // addImmOperands rely on correct number of MC operands.
5813       if (IsAtomicReturn && i == FirstOperandIdx)
5814         Op.addRegOperands(Inst, 1);
5815       continue;
5816     }
5817 
5818     // Handle the case where soffset is an immediate
5819     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5820       Op.addImmOperands(Inst, 1);
5821       continue;
5822     }
5823 
5824     HasLdsModifier |= Op.isLDS();
5825 
5826     // Handle tokens like 'offen' which are sometimes hard-coded into the
5827     // asm string.  There are no MCInst operands for these.
5828     if (Op.isToken()) {
5829       continue;
5830     }
5831     assert(Op.isImm());
5832 
5833     // Handle optional arguments
5834     OptionalIdx[Op.getImmTy()] = i;
5835   }
5836 
5837   // This is a workaround for an llvm quirk which may result in an
5838   // incorrect instruction selection. Lds and non-lds versions of
5839   // MUBUF instructions are identical except that lds versions
5840   // have mandatory 'lds' modifier. However this modifier follows
5841   // optional modifiers and llvm asm matcher regards this 'lds'
5842   // modifier as an optional one. As a result, an lds version
5843   // of opcode may be selected even if it has no 'lds' modifier.
5844   if (IsLdsOpcode && !HasLdsModifier) {
5845     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5846     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5847       Inst.setOpcode(NoLdsOpcode);
5848       IsLdsOpcode = false;
5849     }
5850   }
5851 
5852   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5853   if (!IsAtomic) { // glc is hard-coded.
5854     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5855   }
5856   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5857 
5858   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5859     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5860   }
5861 
5862   if (isGFX10())
5863     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5864 }
5865 
5866 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5867   OptionalImmIndexMap OptionalIdx;
5868 
5869   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5870     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5871 
5872     // Add the register arguments
5873     if (Op.isReg()) {
5874       Op.addRegOperands(Inst, 1);
5875       continue;
5876     }
5877 
5878     // Handle the case where soffset is an immediate
5879     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5880       Op.addImmOperands(Inst, 1);
5881       continue;
5882     }
5883 
5884     // Handle tokens like 'offen' which are sometimes hard-coded into the
5885     // asm string.  There are no MCInst operands for these.
5886     if (Op.isToken()) {
5887       continue;
5888     }
5889     assert(Op.isImm());
5890 
5891     // Handle optional arguments
5892     OptionalIdx[Op.getImmTy()] = i;
5893   }
5894 
5895   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5896                         AMDGPUOperand::ImmTyOffset);
5897   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5898   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5899   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5900   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5901 
5902   if (isGFX10())
5903     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5904 }
5905 
5906 //===----------------------------------------------------------------------===//
5907 // mimg
5908 //===----------------------------------------------------------------------===//
5909 
5910 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5911                               bool IsAtomic) {
5912   unsigned I = 1;
5913   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5914   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5915     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5916   }
5917 
5918   if (IsAtomic) {
5919     // Add src, same as dst
5920     assert(Desc.getNumDefs() == 1);
5921     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5922   }
5923 
5924   OptionalImmIndexMap OptionalIdx;
5925 
5926   for (unsigned E = Operands.size(); I != E; ++I) {
5927     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5928 
5929     // Add the register arguments
5930     if (Op.isReg()) {
5931       Op.addRegOperands(Inst, 1);
5932     } else if (Op.isImmModifier()) {
5933       OptionalIdx[Op.getImmTy()] = I;
5934     } else if (!Op.isToken()) {
5935       llvm_unreachable("unexpected operand type");
5936     }
5937   }
5938 
5939   bool IsGFX10 = isGFX10();
5940 
5941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5942   if (IsGFX10)
5943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5944   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5945   if (IsGFX10)
5946     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5947   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5948   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5949   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5950   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5951   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5952   if (!IsGFX10)
5953     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5954   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5955 }
5956 
5957 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5958   cvtMIMG(Inst, Operands, true);
5959 }
5960 
5961 //===----------------------------------------------------------------------===//
5962 // smrd
5963 //===----------------------------------------------------------------------===//
5964 
5965 bool AMDGPUOperand::isSMRDOffset8() const {
5966   return isImm() && isUInt<8>(getImm());
5967 }
5968 
5969 bool AMDGPUOperand::isSMRDOffset20() const {
5970   return isImm() && isUInt<20>(getImm());
5971 }
5972 
5973 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5974   // 32-bit literals are only supported on CI and we only want to use them
5975   // when the offset is > 8-bits.
5976   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5977 }
5978 
5979 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5980   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5981 }
5982 
5983 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5984   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5985 }
5986 
5987 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5988   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5989 }
5990 
5991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5992   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5993 }
5994 
5995 //===----------------------------------------------------------------------===//
5996 // vop3
5997 //===----------------------------------------------------------------------===//
5998 
5999 static bool ConvertOmodMul(int64_t &Mul) {
6000   if (Mul != 1 && Mul != 2 && Mul != 4)
6001     return false;
6002 
6003   Mul >>= 1;
6004   return true;
6005 }
6006 
6007 static bool ConvertOmodDiv(int64_t &Div) {
6008   if (Div == 1) {
6009     Div = 0;
6010     return true;
6011   }
6012 
6013   if (Div == 2) {
6014     Div = 3;
6015     return true;
6016   }
6017 
6018   return false;
6019 }
6020 
6021 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6022   if (BoundCtrl == 0) {
6023     BoundCtrl = 1;
6024     return true;
6025   }
6026 
6027   if (BoundCtrl == -1) {
6028     BoundCtrl = 0;
6029     return true;
6030   }
6031 
6032   return false;
6033 }
6034 
6035 // Note: the order in this table matches the order of operands in AsmString.
6036 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6037   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6038   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6039   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6040   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6041   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6042   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6043   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6044   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6045   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6046   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6047   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6048   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6049   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6050   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6051   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6052   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6053   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6054   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6055   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6056   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6057   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6058   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6059   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6060   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6061   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6062   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6063   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6064   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6065   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6066   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6067   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6068   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6069   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6070   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6071   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6072   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6073   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6074   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6075   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6076   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6077   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6078   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6079   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6080   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6081 };
6082 
6083 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6084 
6085   OperandMatchResultTy res = parseOptionalOpr(Operands);
6086 
6087   // This is a hack to enable hardcoded mandatory operands which follow
6088   // optional operands.
6089   //
6090   // Current design assumes that all operands after the first optional operand
6091   // are also optional. However implementation of some instructions violates
6092   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6093   //
6094   // To alleviate this problem, we have to (implicitly) parse extra operands
6095   // to make sure autogenerated parser of custom operands never hit hardcoded
6096   // mandatory operands.
6097 
6098   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6099     if (res != MatchOperand_Success ||
6100         isToken(AsmToken::EndOfStatement))
6101       break;
6102 
6103     trySkipToken(AsmToken::Comma);
6104     res = parseOptionalOpr(Operands);
6105   }
6106 
6107   return res;
6108 }
6109 
6110 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6111   OperandMatchResultTy res;
6112   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6113     // try to parse any optional operand here
6114     if (Op.IsBit) {
6115       res = parseNamedBit(Op.Name, Operands, Op.Type);
6116     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6117       res = parseOModOperand(Operands);
6118     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6119                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6120                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6121       res = parseSDWASel(Operands, Op.Name, Op.Type);
6122     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6123       res = parseSDWADstUnused(Operands);
6124     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6125                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6126                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6127                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6128       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6129                                         Op.ConvertResult);
6130     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6131       res = parseDim(Operands);
6132     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6133       res = parseDfmtNfmt(Operands);
6134     } else {
6135       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6136     }
6137     if (res != MatchOperand_NoMatch) {
6138       return res;
6139     }
6140   }
6141   return MatchOperand_NoMatch;
6142 }
6143 
6144 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6145   StringRef Name = Parser.getTok().getString();
6146   if (Name == "mul") {
6147     return parseIntWithPrefix("mul", Operands,
6148                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6149   }
6150 
6151   if (Name == "div") {
6152     return parseIntWithPrefix("div", Operands,
6153                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6154   }
6155 
6156   return MatchOperand_NoMatch;
6157 }
6158 
6159 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6160   cvtVOP3P(Inst, Operands);
6161 
6162   int Opc = Inst.getOpcode();
6163 
6164   int SrcNum;
6165   const int Ops[] = { AMDGPU::OpName::src0,
6166                       AMDGPU::OpName::src1,
6167                       AMDGPU::OpName::src2 };
6168   for (SrcNum = 0;
6169        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6170        ++SrcNum);
6171   assert(SrcNum > 0);
6172 
6173   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6174   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6175 
6176   if ((OpSel & (1 << SrcNum)) != 0) {
6177     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6178     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6179     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6180   }
6181 }
6182 
6183 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6184       // 1. This operand is input modifiers
6185   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6186       // 2. This is not last operand
6187       && Desc.NumOperands > (OpNum + 1)
6188       // 3. Next operand is register class
6189       && Desc.OpInfo[OpNum + 1].RegClass != -1
6190       // 4. Next register is not tied to any other operand
6191       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6192 }
6193 
6194 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6195 {
6196   OptionalImmIndexMap OptionalIdx;
6197   unsigned Opc = Inst.getOpcode();
6198 
6199   unsigned I = 1;
6200   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6201   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6202     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6203   }
6204 
6205   for (unsigned E = Operands.size(); I != E; ++I) {
6206     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6207     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6208       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6209     } else if (Op.isInterpSlot() ||
6210                Op.isInterpAttr() ||
6211                Op.isAttrChan()) {
6212       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6213     } else if (Op.isImmModifier()) {
6214       OptionalIdx[Op.getImmTy()] = I;
6215     } else {
6216       llvm_unreachable("unhandled operand type");
6217     }
6218   }
6219 
6220   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6221     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6222   }
6223 
6224   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6225     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6226   }
6227 
6228   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6229     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6230   }
6231 }
6232 
6233 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6234                               OptionalImmIndexMap &OptionalIdx) {
6235   unsigned Opc = Inst.getOpcode();
6236 
6237   unsigned I = 1;
6238   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6239   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6240     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6241   }
6242 
6243   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6244     // This instruction has src modifiers
6245     for (unsigned E = Operands.size(); I != E; ++I) {
6246       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6247       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6248         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6249       } else if (Op.isImmModifier()) {
6250         OptionalIdx[Op.getImmTy()] = I;
6251       } else if (Op.isRegOrImm()) {
6252         Op.addRegOrImmOperands(Inst, 1);
6253       } else {
6254         llvm_unreachable("unhandled operand type");
6255       }
6256     }
6257   } else {
6258     // No src modifiers
6259     for (unsigned E = Operands.size(); I != E; ++I) {
6260       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6261       if (Op.isMod()) {
6262         OptionalIdx[Op.getImmTy()] = I;
6263       } else {
6264         Op.addRegOrImmOperands(Inst, 1);
6265       }
6266     }
6267   }
6268 
6269   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6270     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6271   }
6272 
6273   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6274     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6275   }
6276 
6277   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6278   // it has src2 register operand that is tied to dst operand
6279   // we don't allow modifiers for this operand in assembler so src2_modifiers
6280   // should be 0.
6281   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6282       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6283       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6284       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6285       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6286       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6287       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6288     auto it = Inst.begin();
6289     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6290     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6291     ++it;
6292     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6293   }
6294 }
6295 
6296 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6297   OptionalImmIndexMap OptionalIdx;
6298   cvtVOP3(Inst, Operands, OptionalIdx);
6299 }
6300 
6301 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6302                                const OperandVector &Operands) {
6303   OptionalImmIndexMap OptIdx;
6304   const int Opc = Inst.getOpcode();
6305   const MCInstrDesc &Desc = MII.get(Opc);
6306 
6307   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6308 
6309   cvtVOP3(Inst, Operands, OptIdx);
6310 
6311   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6312     assert(!IsPacked);
6313     Inst.addOperand(Inst.getOperand(0));
6314   }
6315 
6316   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6317   // instruction, and then figure out where to actually put the modifiers
6318 
6319   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6320 
6321   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6322   if (OpSelHiIdx != -1) {
6323     int DefaultVal = IsPacked ? -1 : 0;
6324     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6325                           DefaultVal);
6326   }
6327 
6328   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6329   if (NegLoIdx != -1) {
6330     assert(IsPacked);
6331     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6332     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6333   }
6334 
6335   const int Ops[] = { AMDGPU::OpName::src0,
6336                       AMDGPU::OpName::src1,
6337                       AMDGPU::OpName::src2 };
6338   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6339                          AMDGPU::OpName::src1_modifiers,
6340                          AMDGPU::OpName::src2_modifiers };
6341 
6342   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6343 
6344   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6345   unsigned OpSelHi = 0;
6346   unsigned NegLo = 0;
6347   unsigned NegHi = 0;
6348 
6349   if (OpSelHiIdx != -1) {
6350     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6351   }
6352 
6353   if (NegLoIdx != -1) {
6354     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6355     NegLo = Inst.getOperand(NegLoIdx).getImm();
6356     NegHi = Inst.getOperand(NegHiIdx).getImm();
6357   }
6358 
6359   for (int J = 0; J < 3; ++J) {
6360     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6361     if (OpIdx == -1)
6362       break;
6363 
6364     uint32_t ModVal = 0;
6365 
6366     if ((OpSel & (1 << J)) != 0)
6367       ModVal |= SISrcMods::OP_SEL_0;
6368 
6369     if ((OpSelHi & (1 << J)) != 0)
6370       ModVal |= SISrcMods::OP_SEL_1;
6371 
6372     if ((NegLo & (1 << J)) != 0)
6373       ModVal |= SISrcMods::NEG;
6374 
6375     if ((NegHi & (1 << J)) != 0)
6376       ModVal |= SISrcMods::NEG_HI;
6377 
6378     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6379 
6380     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6381   }
6382 }
6383 
6384 //===----------------------------------------------------------------------===//
6385 // dpp
6386 //===----------------------------------------------------------------------===//
6387 
6388 bool AMDGPUOperand::isDPP8() const {
6389   return isImmTy(ImmTyDPP8);
6390 }
6391 
6392 bool AMDGPUOperand::isDPPCtrl() const {
6393   using namespace AMDGPU::DPP;
6394 
6395   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6396   if (result) {
6397     int64_t Imm = getImm();
6398     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6399            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6400            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6401            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6402            (Imm == DppCtrl::WAVE_SHL1) ||
6403            (Imm == DppCtrl::WAVE_ROL1) ||
6404            (Imm == DppCtrl::WAVE_SHR1) ||
6405            (Imm == DppCtrl::WAVE_ROR1) ||
6406            (Imm == DppCtrl::ROW_MIRROR) ||
6407            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6408            (Imm == DppCtrl::BCAST15) ||
6409            (Imm == DppCtrl::BCAST31) ||
6410            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6411            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6412   }
6413   return false;
6414 }
6415 
6416 //===----------------------------------------------------------------------===//
6417 // mAI
6418 //===----------------------------------------------------------------------===//
6419 
6420 bool AMDGPUOperand::isBLGP() const {
6421   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6422 }
6423 
6424 bool AMDGPUOperand::isCBSZ() const {
6425   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6426 }
6427 
6428 bool AMDGPUOperand::isABID() const {
6429   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6430 }
6431 
6432 bool AMDGPUOperand::isS16Imm() const {
6433   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6434 }
6435 
6436 bool AMDGPUOperand::isU16Imm() const {
6437   return isImm() && isUInt<16>(getImm());
6438 }
6439 
6440 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6441   if (!isGFX10())
6442     return MatchOperand_NoMatch;
6443 
6444   SMLoc S = Parser.getTok().getLoc();
6445 
6446   if (getLexer().isNot(AsmToken::Identifier))
6447     return MatchOperand_NoMatch;
6448   if (getLexer().getTok().getString() != "dim")
6449     return MatchOperand_NoMatch;
6450 
6451   Parser.Lex();
6452   if (getLexer().isNot(AsmToken::Colon))
6453     return MatchOperand_ParseFail;
6454 
6455   Parser.Lex();
6456 
6457   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6458   // integer.
6459   std::string Token;
6460   if (getLexer().is(AsmToken::Integer)) {
6461     SMLoc Loc = getLexer().getTok().getEndLoc();
6462     Token = getLexer().getTok().getString();
6463     Parser.Lex();
6464     if (getLexer().getTok().getLoc() != Loc)
6465       return MatchOperand_ParseFail;
6466   }
6467   if (getLexer().isNot(AsmToken::Identifier))
6468     return MatchOperand_ParseFail;
6469   Token += getLexer().getTok().getString();
6470 
6471   StringRef DimId = Token;
6472   if (DimId.startswith("SQ_RSRC_IMG_"))
6473     DimId = DimId.substr(12);
6474 
6475   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6476   if (!DimInfo)
6477     return MatchOperand_ParseFail;
6478 
6479   Parser.Lex();
6480 
6481   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6482                                               AMDGPUOperand::ImmTyDim));
6483   return MatchOperand_Success;
6484 }
6485 
6486 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6487   SMLoc S = Parser.getTok().getLoc();
6488   StringRef Prefix;
6489 
6490   if (getLexer().getKind() == AsmToken::Identifier) {
6491     Prefix = Parser.getTok().getString();
6492   } else {
6493     return MatchOperand_NoMatch;
6494   }
6495 
6496   if (Prefix != "dpp8")
6497     return parseDPPCtrl(Operands);
6498   if (!isGFX10())
6499     return MatchOperand_NoMatch;
6500 
6501   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6502 
6503   int64_t Sels[8];
6504 
6505   Parser.Lex();
6506   if (getLexer().isNot(AsmToken::Colon))
6507     return MatchOperand_ParseFail;
6508 
6509   Parser.Lex();
6510   if (getLexer().isNot(AsmToken::LBrac))
6511     return MatchOperand_ParseFail;
6512 
6513   Parser.Lex();
6514   if (getParser().parseAbsoluteExpression(Sels[0]))
6515     return MatchOperand_ParseFail;
6516   if (0 > Sels[0] || 7 < Sels[0])
6517     return MatchOperand_ParseFail;
6518 
6519   for (size_t i = 1; i < 8; ++i) {
6520     if (getLexer().isNot(AsmToken::Comma))
6521       return MatchOperand_ParseFail;
6522 
6523     Parser.Lex();
6524     if (getParser().parseAbsoluteExpression(Sels[i]))
6525       return MatchOperand_ParseFail;
6526     if (0 > Sels[i] || 7 < Sels[i])
6527       return MatchOperand_ParseFail;
6528   }
6529 
6530   if (getLexer().isNot(AsmToken::RBrac))
6531     return MatchOperand_ParseFail;
6532   Parser.Lex();
6533 
6534   unsigned DPP8 = 0;
6535   for (size_t i = 0; i < 8; ++i)
6536     DPP8 |= (Sels[i] << (i * 3));
6537 
6538   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6539   return MatchOperand_Success;
6540 }
6541 
6542 OperandMatchResultTy
6543 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6544   using namespace AMDGPU::DPP;
6545 
6546   SMLoc S = Parser.getTok().getLoc();
6547   StringRef Prefix;
6548   int64_t Int;
6549 
6550   if (getLexer().getKind() == AsmToken::Identifier) {
6551     Prefix = Parser.getTok().getString();
6552   } else {
6553     return MatchOperand_NoMatch;
6554   }
6555 
6556   if (Prefix == "row_mirror") {
6557     Int = DppCtrl::ROW_MIRROR;
6558     Parser.Lex();
6559   } else if (Prefix == "row_half_mirror") {
6560     Int = DppCtrl::ROW_HALF_MIRROR;
6561     Parser.Lex();
6562   } else {
6563     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6564     if (Prefix != "quad_perm"
6565         && Prefix != "row_shl"
6566         && Prefix != "row_shr"
6567         && Prefix != "row_ror"
6568         && Prefix != "wave_shl"
6569         && Prefix != "wave_rol"
6570         && Prefix != "wave_shr"
6571         && Prefix != "wave_ror"
6572         && Prefix != "row_bcast"
6573         && Prefix != "row_share"
6574         && Prefix != "row_xmask") {
6575       return MatchOperand_NoMatch;
6576     }
6577 
6578     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6579       return MatchOperand_NoMatch;
6580 
6581     if (!isVI() && !isGFX9() &&
6582         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6583          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6584          Prefix == "row_bcast"))
6585       return MatchOperand_NoMatch;
6586 
6587     Parser.Lex();
6588     if (getLexer().isNot(AsmToken::Colon))
6589       return MatchOperand_ParseFail;
6590 
6591     if (Prefix == "quad_perm") {
6592       // quad_perm:[%d,%d,%d,%d]
6593       Parser.Lex();
6594       if (getLexer().isNot(AsmToken::LBrac))
6595         return MatchOperand_ParseFail;
6596       Parser.Lex();
6597 
6598       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6599         return MatchOperand_ParseFail;
6600 
6601       for (int i = 0; i < 3; ++i) {
6602         if (getLexer().isNot(AsmToken::Comma))
6603           return MatchOperand_ParseFail;
6604         Parser.Lex();
6605 
6606         int64_t Temp;
6607         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6608           return MatchOperand_ParseFail;
6609         const int shift = i*2 + 2;
6610         Int += (Temp << shift);
6611       }
6612 
6613       if (getLexer().isNot(AsmToken::RBrac))
6614         return MatchOperand_ParseFail;
6615       Parser.Lex();
6616     } else {
6617       // sel:%d
6618       Parser.Lex();
6619       if (getParser().parseAbsoluteExpression(Int))
6620         return MatchOperand_ParseFail;
6621 
6622       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6623         Int |= DppCtrl::ROW_SHL0;
6624       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6625         Int |= DppCtrl::ROW_SHR0;
6626       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6627         Int |= DppCtrl::ROW_ROR0;
6628       } else if (Prefix == "wave_shl" && 1 == Int) {
6629         Int = DppCtrl::WAVE_SHL1;
6630       } else if (Prefix == "wave_rol" && 1 == Int) {
6631         Int = DppCtrl::WAVE_ROL1;
6632       } else if (Prefix == "wave_shr" && 1 == Int) {
6633         Int = DppCtrl::WAVE_SHR1;
6634       } else if (Prefix == "wave_ror" && 1 == Int) {
6635         Int = DppCtrl::WAVE_ROR1;
6636       } else if (Prefix == "row_bcast") {
6637         if (Int == 15) {
6638           Int = DppCtrl::BCAST15;
6639         } else if (Int == 31) {
6640           Int = DppCtrl::BCAST31;
6641         } else {
6642           return MatchOperand_ParseFail;
6643         }
6644       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6645         Int |= DppCtrl::ROW_SHARE_FIRST;
6646       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6647         Int |= DppCtrl::ROW_XMASK_FIRST;
6648       } else {
6649         return MatchOperand_ParseFail;
6650       }
6651     }
6652   }
6653 
6654   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6655   return MatchOperand_Success;
6656 }
6657 
6658 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6659   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6660 }
6661 
6662 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6663   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6664 }
6665 
6666 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6667   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6668 }
6669 
6670 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6671   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6672 }
6673 
6674 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6675   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6676 }
6677 
6678 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6679   OptionalImmIndexMap OptionalIdx;
6680 
6681   unsigned I = 1;
6682   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6683   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6684     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6685   }
6686 
6687   int Fi = 0;
6688   for (unsigned E = Operands.size(); I != E; ++I) {
6689     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6690                                             MCOI::TIED_TO);
6691     if (TiedTo != -1) {
6692       assert((unsigned)TiedTo < Inst.getNumOperands());
6693       // handle tied old or src2 for MAC instructions
6694       Inst.addOperand(Inst.getOperand(TiedTo));
6695     }
6696     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6697     // Add the register arguments
6698     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6699       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6700       // Skip it.
6701       continue;
6702     }
6703 
6704     if (IsDPP8) {
6705       if (Op.isDPP8()) {
6706         Op.addImmOperands(Inst, 1);
6707       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6708         Op.addRegWithFPInputModsOperands(Inst, 2);
6709       } else if (Op.isFI()) {
6710         Fi = Op.getImm();
6711       } else if (Op.isReg()) {
6712         Op.addRegOperands(Inst, 1);
6713       } else {
6714         llvm_unreachable("Invalid operand type");
6715       }
6716     } else {
6717       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6718         Op.addRegWithFPInputModsOperands(Inst, 2);
6719       } else if (Op.isDPPCtrl()) {
6720         Op.addImmOperands(Inst, 1);
6721       } else if (Op.isImm()) {
6722         // Handle optional arguments
6723         OptionalIdx[Op.getImmTy()] = I;
6724       } else {
6725         llvm_unreachable("Invalid operand type");
6726       }
6727     }
6728   }
6729 
6730   if (IsDPP8) {
6731     using namespace llvm::AMDGPU::DPP;
6732     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6733   } else {
6734     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6735     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6736     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6737     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6738       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6739     }
6740   }
6741 }
6742 
6743 //===----------------------------------------------------------------------===//
6744 // sdwa
6745 //===----------------------------------------------------------------------===//
6746 
6747 OperandMatchResultTy
6748 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6749                               AMDGPUOperand::ImmTy Type) {
6750   using namespace llvm::AMDGPU::SDWA;
6751 
6752   SMLoc S = Parser.getTok().getLoc();
6753   StringRef Value;
6754   OperandMatchResultTy res;
6755 
6756   res = parseStringWithPrefix(Prefix, Value);
6757   if (res != MatchOperand_Success) {
6758     return res;
6759   }
6760 
6761   int64_t Int;
6762   Int = StringSwitch<int64_t>(Value)
6763         .Case("BYTE_0", SdwaSel::BYTE_0)
6764         .Case("BYTE_1", SdwaSel::BYTE_1)
6765         .Case("BYTE_2", SdwaSel::BYTE_2)
6766         .Case("BYTE_3", SdwaSel::BYTE_3)
6767         .Case("WORD_0", SdwaSel::WORD_0)
6768         .Case("WORD_1", SdwaSel::WORD_1)
6769         .Case("DWORD", SdwaSel::DWORD)
6770         .Default(0xffffffff);
6771   Parser.Lex(); // eat last token
6772 
6773   if (Int == 0xffffffff) {
6774     return MatchOperand_ParseFail;
6775   }
6776 
6777   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6778   return MatchOperand_Success;
6779 }
6780 
6781 OperandMatchResultTy
6782 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6783   using namespace llvm::AMDGPU::SDWA;
6784 
6785   SMLoc S = Parser.getTok().getLoc();
6786   StringRef Value;
6787   OperandMatchResultTy res;
6788 
6789   res = parseStringWithPrefix("dst_unused", Value);
6790   if (res != MatchOperand_Success) {
6791     return res;
6792   }
6793 
6794   int64_t Int;
6795   Int = StringSwitch<int64_t>(Value)
6796         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6797         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6798         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6799         .Default(0xffffffff);
6800   Parser.Lex(); // eat last token
6801 
6802   if (Int == 0xffffffff) {
6803     return MatchOperand_ParseFail;
6804   }
6805 
6806   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6807   return MatchOperand_Success;
6808 }
6809 
6810 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6811   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6812 }
6813 
6814 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6815   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6816 }
6817 
6818 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6819   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6820 }
6821 
6822 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6823   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6824 }
6825 
6826 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6827   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6828 }
6829 
6830 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6831                               uint64_t BasicInstType,
6832                               bool SkipDstVcc,
6833                               bool SkipSrcVcc) {
6834   using namespace llvm::AMDGPU::SDWA;
6835 
6836   OptionalImmIndexMap OptionalIdx;
6837   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6838   bool SkippedVcc = false;
6839 
6840   unsigned I = 1;
6841   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6842   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6843     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6844   }
6845 
6846   for (unsigned E = Operands.size(); I != E; ++I) {
6847     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6848     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6849         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6850       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6851       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6852       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6853       // Skip VCC only if we didn't skip it on previous iteration.
6854       // Note that src0 and src1 occupy 2 slots each because of modifiers.
6855       if (BasicInstType == SIInstrFlags::VOP2 &&
6856           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6857            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6858         SkippedVcc = true;
6859         continue;
6860       } else if (BasicInstType == SIInstrFlags::VOPC &&
6861                  Inst.getNumOperands() == 0) {
6862         SkippedVcc = true;
6863         continue;
6864       }
6865     }
6866     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6867       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6868     } else if (Op.isImm()) {
6869       // Handle optional arguments
6870       OptionalIdx[Op.getImmTy()] = I;
6871     } else {
6872       llvm_unreachable("Invalid operand type");
6873     }
6874     SkippedVcc = false;
6875   }
6876 
6877   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6878       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6879       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6880     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6881     switch (BasicInstType) {
6882     case SIInstrFlags::VOP1:
6883       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6884       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6885         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6886       }
6887       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6888       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6889       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6890       break;
6891 
6892     case SIInstrFlags::VOP2:
6893       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6894       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6895         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6896       }
6897       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6898       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6899       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6900       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6901       break;
6902 
6903     case SIInstrFlags::VOPC:
6904       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6905         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6906       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6907       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6908       break;
6909 
6910     default:
6911       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6912     }
6913   }
6914 
6915   // special case v_mac_{f16, f32}:
6916   // it has src2 register operand that is tied to dst operand
6917   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6918       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6919     auto it = Inst.begin();
6920     std::advance(
6921       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6922     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6923   }
6924 }
6925 
6926 //===----------------------------------------------------------------------===//
6927 // mAI
6928 //===----------------------------------------------------------------------===//
6929 
6930 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6931   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6932 }
6933 
6934 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6935   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6936 }
6937 
6938 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6939   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6940 }
6941 
6942 /// Force static initialization.
6943 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6944   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6945   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6946 }
6947 
6948 #define GET_REGISTER_MATCHER
6949 #define GET_MATCHER_IMPLEMENTATION
6950 #define GET_MNEMONIC_SPELL_CHECKER
6951 #include "AMDGPUGenAsmMatcher.inc"
6952 
6953 // This fuction should be defined after auto-generated include so that we have
6954 // MatchClassKind enum defined
6955 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6956                                                      unsigned Kind) {
6957   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6958   // But MatchInstructionImpl() expects to meet token and fails to validate
6959   // operand. This method checks if we are given immediate operand but expect to
6960   // get corresponding token.
6961   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6962   switch (Kind) {
6963   case MCK_addr64:
6964     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6965   case MCK_gds:
6966     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6967   case MCK_lds:
6968     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6969   case MCK_glc:
6970     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6971   case MCK_idxen:
6972     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6973   case MCK_offen:
6974     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6975   case MCK_SSrcB32:
6976     // When operands have expression values, they will return true for isToken,
6977     // because it is not possible to distinguish between a token and an
6978     // expression at parse time. MatchInstructionImpl() will always try to
6979     // match an operand as a token, when isToken returns true, and when the
6980     // name of the expression is not a valid token, the match will fail,
6981     // so we need to handle it here.
6982     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6983   case MCK_SSrcF32:
6984     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6985   case MCK_SoppBrTarget:
6986     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6987   case MCK_VReg32OrOff:
6988     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6989   case MCK_InterpSlot:
6990     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6991   case MCK_Attr:
6992     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6993   case MCK_AttrChan:
6994     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6995   case MCK_SReg_64:
6996   case MCK_SReg_64_XEXEC:
6997     // Null is defined as a 32-bit register but
6998     // it should also be enabled with 64-bit operands.
6999     // The following code enables it for SReg_64 operands
7000     // used as source and destination. Remaining source
7001     // operands are handled in isInlinableImm.
7002     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7003   default:
7004     return Match_InvalidOperand;
7005   }
7006 }
7007 
7008 //===----------------------------------------------------------------------===//
7009 // endpgm
7010 //===----------------------------------------------------------------------===//
7011 
7012 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7013   SMLoc S = Parser.getTok().getLoc();
7014   int64_t Imm = 0;
7015 
7016   if (!parseExpr(Imm)) {
7017     // The operand is optional, if not present default to 0
7018     Imm = 0;
7019   }
7020 
7021   if (!isUInt<16>(Imm)) {
7022     Error(S, "expected a 16-bit value");
7023     return MatchOperand_ParseFail;
7024   }
7025 
7026   Operands.push_back(
7027       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7028   return MatchOperand_Success;
7029 }
7030 
7031 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7032