1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/AMDGPUMetadata.h"
32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/MachineValueType.h"
35 #include "llvm/Support/TargetParser.h"
36 #include "llvm/Support/TargetRegistry.h"
37
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41
42 namespace {
43
44 class AMDGPUAsmParser;
45
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51
52 class AMDGPUOperand : public MCParsedAsmOperand {
53 enum KindTy {
54 Token,
55 Immediate,
56 Register,
57 Expression
58 } Kind;
59
60 SMLoc StartLoc, EndLoc;
61 const AMDGPUAsmParser *AsmParser;
62
63 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66
67 using Ptr = std::unique_ptr<AMDGPUOperand>;
68
69 struct Modifiers {
70 bool Abs = false;
71 bool Neg = false;
72 bool Sext = false;
73
hasFPModifiers__anon649c30870111::AMDGPUOperand::Modifiers74 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anon649c30870111::AMDGPUOperand::Modifiers75 bool hasIntModifiers() const { return Sext; }
hasModifiers__anon649c30870111::AMDGPUOperand::Modifiers76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77
getFPModifiersOperand__anon649c30870111::AMDGPUOperand::Modifiers78 int64_t getFPModifiersOperand() const {
79 int64_t Operand = 0;
80 Operand |= Abs ? SISrcMods::ABS : 0u;
81 Operand |= Neg ? SISrcMods::NEG : 0u;
82 return Operand;
83 }
84
getIntModifiersOperand__anon649c30870111::AMDGPUOperand::Modifiers85 int64_t getIntModifiersOperand() const {
86 int64_t Operand = 0;
87 Operand |= Sext ? SISrcMods::SEXT : 0u;
88 return Operand;
89 }
90
getModifiersOperand__anon649c30870111::AMDGPUOperand::Modifiers91 int64_t getModifiersOperand() const {
92 assert(!(hasFPModifiers() && hasIntModifiers())
93 && "fp and int modifiers should not be used simultaneously");
94 if (hasFPModifiers()) {
95 return getFPModifiersOperand();
96 } else if (hasIntModifiers()) {
97 return getIntModifiersOperand();
98 } else {
99 return 0;
100 }
101 }
102
103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104 };
105
106 enum ImmTy {
107 ImmTyNone,
108 ImmTyGDS,
109 ImmTyLDS,
110 ImmTyOffen,
111 ImmTyIdxen,
112 ImmTyAddr64,
113 ImmTyOffset,
114 ImmTyInstOffset,
115 ImmTyOffset0,
116 ImmTyOffset1,
117 ImmTyCPol,
118 ImmTySWZ,
119 ImmTyTFE,
120 ImmTyD16,
121 ImmTyClampSI,
122 ImmTyOModSI,
123 ImmTyDPP8,
124 ImmTyDppCtrl,
125 ImmTyDppRowMask,
126 ImmTyDppBankMask,
127 ImmTyDppBoundCtrl,
128 ImmTyDppFi,
129 ImmTySdwaDstSel,
130 ImmTySdwaSrc0Sel,
131 ImmTySdwaSrc1Sel,
132 ImmTySdwaDstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTySwizzle,
155 ImmTyGprIdxMode,
156 ImmTyHigh,
157 ImmTyBLGP,
158 ImmTyCBSZ,
159 ImmTyABID,
160 ImmTyEndpgm,
161 };
162
163 enum ImmKindTy {
164 ImmKindTyNone,
165 ImmKindTyLiteral,
166 ImmKindTyConst,
167 };
168
169 private:
170 struct TokOp {
171 const char *Data;
172 unsigned Length;
173 };
174
175 struct ImmOp {
176 int64_t Val;
177 ImmTy Type;
178 bool IsFPImm;
179 mutable ImmKindTy Kind;
180 Modifiers Mods;
181 };
182
183 struct RegOp {
184 unsigned RegNo;
185 Modifiers Mods;
186 };
187
188 union {
189 TokOp Tok;
190 ImmOp Imm;
191 RegOp Reg;
192 const MCExpr *Expr;
193 };
194
195 public:
isToken() const196 bool isToken() const override {
197 if (Kind == Token)
198 return true;
199
200 // When parsing operands, we can't always tell if something was meant to be
201 // a token, like 'gds', or an expression that references a global variable.
202 // In this case, we assume the string is an expression, and if we need to
203 // interpret is a token, then we treat the symbol name as the token.
204 return isSymbolRefExpr();
205 }
206
isSymbolRefExpr() const207 bool isSymbolRefExpr() const {
208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209 }
210
isImm() const211 bool isImm() const override {
212 return Kind == Immediate;
213 }
214
setImmKindNone() const215 void setImmKindNone() const {
216 assert(isImm());
217 Imm.Kind = ImmKindTyNone;
218 }
219
setImmKindLiteral() const220 void setImmKindLiteral() const {
221 assert(isImm());
222 Imm.Kind = ImmKindTyLiteral;
223 }
224
setImmKindConst() const225 void setImmKindConst() const {
226 assert(isImm());
227 Imm.Kind = ImmKindTyConst;
228 }
229
IsImmKindLiteral() const230 bool IsImmKindLiteral() const {
231 return isImm() && Imm.Kind == ImmKindTyLiteral;
232 }
233
isImmKindConst() const234 bool isImmKindConst() const {
235 return isImm() && Imm.Kind == ImmKindTyConst;
236 }
237
238 bool isInlinableImm(MVT type) const;
239 bool isLiteralImm(MVT type) const;
240
isRegKind() const241 bool isRegKind() const {
242 return Kind == Register;
243 }
244
isReg() const245 bool isReg() const override {
246 return isRegKind() && !hasModifiers();
247 }
248
isRegOrImmWithInputMods(unsigned RCID,MVT type) const249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251 }
252
isRegOrImmWithInt16InputMods() const253 bool isRegOrImmWithInt16InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255 }
256
isRegOrImmWithInt32InputMods() const257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
isRegOrImmWithInt64InputMods() const261 bool isRegOrImmWithInt64InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263 }
264
isRegOrImmWithFP16InputMods() const265 bool isRegOrImmWithFP16InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267 }
268
isRegOrImmWithFP32InputMods() const269 bool isRegOrImmWithFP32InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271 }
272
isRegOrImmWithFP64InputMods() const273 bool isRegOrImmWithFP64InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275 }
276
isVReg() const277 bool isVReg() const {
278 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279 isRegClass(AMDGPU::VReg_64RegClassID) ||
280 isRegClass(AMDGPU::VReg_96RegClassID) ||
281 isRegClass(AMDGPU::VReg_128RegClassID) ||
282 isRegClass(AMDGPU::VReg_160RegClassID) ||
283 isRegClass(AMDGPU::VReg_192RegClassID) ||
284 isRegClass(AMDGPU::VReg_256RegClassID) ||
285 isRegClass(AMDGPU::VReg_512RegClassID) ||
286 isRegClass(AMDGPU::VReg_1024RegClassID);
287 }
288
isVReg32() const289 bool isVReg32() const {
290 return isRegClass(AMDGPU::VGPR_32RegClassID);
291 }
292
isVReg32OrOff() const293 bool isVReg32OrOff() const {
294 return isOff() || isVReg32();
295 }
296
isNull() const297 bool isNull() const {
298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299 }
300
301 bool isVRegWithInputMods() const;
302
303 bool isSDWAOperand(MVT type) const;
304 bool isSDWAFP16Operand() const;
305 bool isSDWAFP32Operand() const;
306 bool isSDWAInt16Operand() const;
307 bool isSDWAInt32Operand() const;
308
isImmTy(ImmTy ImmT) const309 bool isImmTy(ImmTy ImmT) const {
310 return isImm() && Imm.Type == ImmT;
311 }
312
isImmModifier() const313 bool isImmModifier() const {
314 return isImm() && Imm.Type != ImmTyNone;
315 }
316
isClampSI() const317 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
isOModSI() const318 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDMask() const319 bool isDMask() const { return isImmTy(ImmTyDMask); }
isDim() const320 bool isDim() const { return isImmTy(ImmTyDim); }
isUNorm() const321 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
isDA() const322 bool isDA() const { return isImmTy(ImmTyDA); }
isR128A16() const323 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isGFX10A16() const324 bool isGFX10A16() const { return isImmTy(ImmTyA16); }
isLWE() const325 bool isLWE() const { return isImmTy(ImmTyLWE); }
isOff() const326 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isExpVM() const328 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
isExpCompr() const329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
isOffen() const330 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const331 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const332 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isOffset() const333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
isOffset0() const334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
isOffset1() const335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336
isFlatOffset() const337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const338 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const339 bool isLDS() const { return isImmTy(ImmTyLDS); }
isCPol() const340 bool isCPol() const { return isImmTy(ImmTyCPol); }
isSWZ() const341 bool isSWZ() const { return isImmTy(ImmTySWZ); }
isTFE() const342 bool isTFE() const { return isImmTy(ImmTyTFE); }
isD16() const343 bool isD16() const { return isImmTy(ImmTyD16); }
isFORMAT() const344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isBankMask() const345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
isRowMask() const346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
isBoundCtrl() const347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
isFI() const348 bool isFI() const { return isImmTy(ImmTyDppFi); }
isSDWADstSel() const349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
isSDWASrc0Sel() const350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
isSDWASrc1Sel() const351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
isSDWADstUnused() const352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
isInterpSlot() const353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isAttrChan() const355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
isOpSel() const356 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const358 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const359 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
isHigh() const360 bool isHigh() const { return isImmTy(ImmTyHigh); }
361
isMod() const362 bool isMod() const {
363 return isClampSI() || isOModSI();
364 }
365
isRegOrImm() const366 bool isRegOrImm() const {
367 return isReg() || isImm();
368 }
369
370 bool isRegClass(unsigned RCID) const;
371
372 bool isInlineValue() const;
373
isRegOrInlineNoMods(unsigned RCID,MVT type) const374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376 }
377
isSCSrcB16() const378 bool isSCSrcB16() const {
379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380 }
381
isSCSrcV2B16() const382 bool isSCSrcV2B16() const {
383 return isSCSrcB16();
384 }
385
isSCSrcB32() const386 bool isSCSrcB32() const {
387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388 }
389
isSCSrcB64() const390 bool isSCSrcB64() const {
391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392 }
393
394 bool isBoolReg() const;
395
isSCSrcF16() const396 bool isSCSrcF16() const {
397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398 }
399
isSCSrcV2F16() const400 bool isSCSrcV2F16() const {
401 return isSCSrcF16();
402 }
403
isSCSrcF32() const404 bool isSCSrcF32() const {
405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406 }
407
isSCSrcF64() const408 bool isSCSrcF64() const {
409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410 }
411
isSSrcB32() const412 bool isSSrcB32() const {
413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414 }
415
isSSrcB16() const416 bool isSSrcB16() const {
417 return isSCSrcB16() || isLiteralImm(MVT::i16);
418 }
419
isSSrcV2B16() const420 bool isSSrcV2B16() const {
421 llvm_unreachable("cannot happen");
422 return isSSrcB16();
423 }
424
isSSrcB64() const425 bool isSSrcB64() const {
426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427 // See isVSrc64().
428 return isSCSrcB64() || isLiteralImm(MVT::i64);
429 }
430
isSSrcF32() const431 bool isSSrcF32() const {
432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433 }
434
isSSrcF64() const435 bool isSSrcF64() const {
436 return isSCSrcB64() || isLiteralImm(MVT::f64);
437 }
438
isSSrcF16() const439 bool isSSrcF16() const {
440 return isSCSrcB16() || isLiteralImm(MVT::f16);
441 }
442
isSSrcV2F16() const443 bool isSSrcV2F16() const {
444 llvm_unreachable("cannot happen");
445 return isSSrcF16();
446 }
447
isSSrcV2FP32() const448 bool isSSrcV2FP32() const {
449 llvm_unreachable("cannot happen");
450 return isSSrcF32();
451 }
452
isSCSrcV2FP32() const453 bool isSCSrcV2FP32() const {
454 llvm_unreachable("cannot happen");
455 return isSCSrcF32();
456 }
457
isSSrcV2INT32() const458 bool isSSrcV2INT32() const {
459 llvm_unreachable("cannot happen");
460 return isSSrcB32();
461 }
462
isSCSrcV2INT32() const463 bool isSCSrcV2INT32() const {
464 llvm_unreachable("cannot happen");
465 return isSCSrcB32();
466 }
467
isSSrcOrLdsB32() const468 bool isSSrcOrLdsB32() const {
469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470 isLiteralImm(MVT::i32) || isExpr();
471 }
472
isVCSrcB32() const473 bool isVCSrcB32() const {
474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475 }
476
isVCSrcB64() const477 bool isVCSrcB64() const {
478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479 }
480
isVCSrcB16() const481 bool isVCSrcB16() const {
482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483 }
484
isVCSrcV2B16() const485 bool isVCSrcV2B16() const {
486 return isVCSrcB16();
487 }
488
isVCSrcF32() const489 bool isVCSrcF32() const {
490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491 }
492
isVCSrcF64() const493 bool isVCSrcF64() const {
494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495 }
496
isVCSrcF16() const497 bool isVCSrcF16() const {
498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499 }
500
isVCSrcV2F16() const501 bool isVCSrcV2F16() const {
502 return isVCSrcF16();
503 }
504
isVSrcB32() const505 bool isVSrcB32() const {
506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507 }
508
isVSrcB64() const509 bool isVSrcB64() const {
510 return isVCSrcF64() || isLiteralImm(MVT::i64);
511 }
512
isVSrcB16() const513 bool isVSrcB16() const {
514 return isVCSrcB16() || isLiteralImm(MVT::i16);
515 }
516
isVSrcV2B16() const517 bool isVSrcV2B16() const {
518 return isVSrcB16() || isLiteralImm(MVT::v2i16);
519 }
520
isVCSrcV2FP32() const521 bool isVCSrcV2FP32() const {
522 return isVCSrcF64();
523 }
524
isVSrcV2FP32() const525 bool isVSrcV2FP32() const {
526 return isVSrcF64() || isLiteralImm(MVT::v2f32);
527 }
528
isVCSrcV2INT32() const529 bool isVCSrcV2INT32() const {
530 return isVCSrcB64();
531 }
532
isVSrcV2INT32() const533 bool isVSrcV2INT32() const {
534 return isVSrcB64() || isLiteralImm(MVT::v2i32);
535 }
536
isVSrcF32() const537 bool isVSrcF32() const {
538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539 }
540
isVSrcF64() const541 bool isVSrcF64() const {
542 return isVCSrcF64() || isLiteralImm(MVT::f64);
543 }
544
isVSrcF16() const545 bool isVSrcF16() const {
546 return isVCSrcF16() || isLiteralImm(MVT::f16);
547 }
548
isVSrcV2F16() const549 bool isVSrcV2F16() const {
550 return isVSrcF16() || isLiteralImm(MVT::v2f16);
551 }
552
isVISrcB32() const553 bool isVISrcB32() const {
554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555 }
556
isVISrcB16() const557 bool isVISrcB16() const {
558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559 }
560
isVISrcV2B16() const561 bool isVISrcV2B16() const {
562 return isVISrcB16();
563 }
564
isVISrcF32() const565 bool isVISrcF32() const {
566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567 }
568
isVISrcF16() const569 bool isVISrcF16() const {
570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571 }
572
isVISrcV2F16() const573 bool isVISrcV2F16() const {
574 return isVISrcF16() || isVISrcB32();
575 }
576
isVISrc_64B64() const577 bool isVISrc_64B64() const {
578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579 }
580
isVISrc_64F64() const581 bool isVISrc_64F64() const {
582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583 }
584
isVISrc_64V2FP32() const585 bool isVISrc_64V2FP32() const {
586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587 }
588
isVISrc_64V2INT32() const589 bool isVISrc_64V2INT32() const {
590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591 }
592
isVISrc_256B64() const593 bool isVISrc_256B64() const {
594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595 }
596
isVISrc_256F64() const597 bool isVISrc_256F64() const {
598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599 }
600
isVISrc_128B16() const601 bool isVISrc_128B16() const {
602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603 }
604
isVISrc_128V2B16() const605 bool isVISrc_128V2B16() const {
606 return isVISrc_128B16();
607 }
608
isVISrc_128B32() const609 bool isVISrc_128B32() const {
610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611 }
612
isVISrc_128F32() const613 bool isVISrc_128F32() const {
614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615 }
616
isVISrc_256V2FP32() const617 bool isVISrc_256V2FP32() const {
618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619 }
620
isVISrc_256V2INT32() const621 bool isVISrc_256V2INT32() const {
622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623 }
624
isVISrc_512B32() const625 bool isVISrc_512B32() const {
626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627 }
628
isVISrc_512B16() const629 bool isVISrc_512B16() const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631 }
632
isVISrc_512V2B16() const633 bool isVISrc_512V2B16() const {
634 return isVISrc_512B16();
635 }
636
isVISrc_512F32() const637 bool isVISrc_512F32() const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639 }
640
isVISrc_512F16() const641 bool isVISrc_512F16() const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643 }
644
isVISrc_512V2F16() const645 bool isVISrc_512V2F16() const {
646 return isVISrc_512F16() || isVISrc_512B32();
647 }
648
isVISrc_1024B32() const649 bool isVISrc_1024B32() const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651 }
652
isVISrc_1024B16() const653 bool isVISrc_1024B16() const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655 }
656
isVISrc_1024V2B16() const657 bool isVISrc_1024V2B16() const {
658 return isVISrc_1024B16();
659 }
660
isVISrc_1024F32() const661 bool isVISrc_1024F32() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663 }
664
isVISrc_1024F16() const665 bool isVISrc_1024F16() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667 }
668
isVISrc_1024V2F16() const669 bool isVISrc_1024V2F16() const {
670 return isVISrc_1024F16() || isVISrc_1024B32();
671 }
672
isAISrcB32() const673 bool isAISrcB32() const {
674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675 }
676
isAISrcB16() const677 bool isAISrcB16() const {
678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679 }
680
isAISrcV2B16() const681 bool isAISrcV2B16() const {
682 return isAISrcB16();
683 }
684
isAISrcF32() const685 bool isAISrcF32() const {
686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687 }
688
isAISrcF16() const689 bool isAISrcF16() const {
690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691 }
692
isAISrcV2F16() const693 bool isAISrcV2F16() const {
694 return isAISrcF16() || isAISrcB32();
695 }
696
isAISrc_64B64() const697 bool isAISrc_64B64() const {
698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699 }
700
isAISrc_64F64() const701 bool isAISrc_64F64() const {
702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703 }
704
isAISrc_128B32() const705 bool isAISrc_128B32() const {
706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707 }
708
isAISrc_128B16() const709 bool isAISrc_128B16() const {
710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711 }
712
isAISrc_128V2B16() const713 bool isAISrc_128V2B16() const {
714 return isAISrc_128B16();
715 }
716
isAISrc_128F32() const717 bool isAISrc_128F32() const {
718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719 }
720
isAISrc_128F16() const721 bool isAISrc_128F16() const {
722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723 }
724
isAISrc_128V2F16() const725 bool isAISrc_128V2F16() const {
726 return isAISrc_128F16() || isAISrc_128B32();
727 }
728
isVISrc_128F16() const729 bool isVISrc_128F16() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731 }
732
isVISrc_128V2F16() const733 bool isVISrc_128V2F16() const {
734 return isVISrc_128F16() || isVISrc_128B32();
735 }
736
isAISrc_256B64() const737 bool isAISrc_256B64() const {
738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739 }
740
isAISrc_256F64() const741 bool isAISrc_256F64() const {
742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743 }
744
isAISrc_512B32() const745 bool isAISrc_512B32() const {
746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747 }
748
isAISrc_512B16() const749 bool isAISrc_512B16() const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751 }
752
isAISrc_512V2B16() const753 bool isAISrc_512V2B16() const {
754 return isAISrc_512B16();
755 }
756
isAISrc_512F32() const757 bool isAISrc_512F32() const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759 }
760
isAISrc_512F16() const761 bool isAISrc_512F16() const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763 }
764
isAISrc_512V2F16() const765 bool isAISrc_512V2F16() const {
766 return isAISrc_512F16() || isAISrc_512B32();
767 }
768
isAISrc_1024B32() const769 bool isAISrc_1024B32() const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771 }
772
isAISrc_1024B16() const773 bool isAISrc_1024B16() const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775 }
776
isAISrc_1024V2B16() const777 bool isAISrc_1024V2B16() const {
778 return isAISrc_1024B16();
779 }
780
isAISrc_1024F32() const781 bool isAISrc_1024F32() const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783 }
784
isAISrc_1024F16() const785 bool isAISrc_1024F16() const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787 }
788
isAISrc_1024V2F16() const789 bool isAISrc_1024V2F16() const {
790 return isAISrc_1024F16() || isAISrc_1024B32();
791 }
792
isKImmFP32() const793 bool isKImmFP32() const {
794 return isLiteralImm(MVT::f32);
795 }
796
isKImmFP16() const797 bool isKImmFP16() const {
798 return isLiteralImm(MVT::f16);
799 }
800
isMem() const801 bool isMem() const override {
802 return false;
803 }
804
isExpr() const805 bool isExpr() const {
806 return Kind == Expression;
807 }
808
isSoppBrTarget() const809 bool isSoppBrTarget() const {
810 return isExpr() || isImm();
811 }
812
813 bool isSWaitCnt() const;
814 bool isHwreg() const;
815 bool isSendMsg() const;
816 bool isSwizzle() const;
817 bool isSMRDOffset8() const;
818 bool isSMEMOffset() const;
819 bool isSMRDLiteralOffset() const;
820 bool isDPP8() const;
821 bool isDPPCtrl() const;
822 bool isBLGP() const;
823 bool isCBSZ() const;
824 bool isABID() const;
825 bool isGPRIdxMode() const;
826 bool isS16Imm() const;
827 bool isU16Imm() const;
828 bool isEndpgm() const;
829
getExpressionAsToken() const830 StringRef getExpressionAsToken() const {
831 assert(isExpr());
832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833 return S->getSymbol().getName();
834 }
835
getToken() const836 StringRef getToken() const {
837 assert(isToken());
838
839 if (Kind == Expression)
840 return getExpressionAsToken();
841
842 return StringRef(Tok.Data, Tok.Length);
843 }
844
getImm() const845 int64_t getImm() const {
846 assert(isImm());
847 return Imm.Val;
848 }
849
setImm(int64_t Val)850 void setImm(int64_t Val) {
851 assert(isImm());
852 Imm.Val = Val;
853 }
854
getImmTy() const855 ImmTy getImmTy() const {
856 assert(isImm());
857 return Imm.Type;
858 }
859
getReg() const860 unsigned getReg() const override {
861 assert(isRegKind());
862 return Reg.RegNo;
863 }
864
getStartLoc() const865 SMLoc getStartLoc() const override {
866 return StartLoc;
867 }
868
getEndLoc() const869 SMLoc getEndLoc() const override {
870 return EndLoc;
871 }
872
getLocRange() const873 SMRange getLocRange() const {
874 return SMRange(StartLoc, EndLoc);
875 }
876
getModifiers() const877 Modifiers getModifiers() const {
878 assert(isRegKind() || isImmTy(ImmTyNone));
879 return isRegKind() ? Reg.Mods : Imm.Mods;
880 }
881
setModifiers(Modifiers Mods)882 void setModifiers(Modifiers Mods) {
883 assert(isRegKind() || isImmTy(ImmTyNone));
884 if (isRegKind())
885 Reg.Mods = Mods;
886 else
887 Imm.Mods = Mods;
888 }
889
hasModifiers() const890 bool hasModifiers() const {
891 return getModifiers().hasModifiers();
892 }
893
hasFPModifiers() const894 bool hasFPModifiers() const {
895 return getModifiers().hasFPModifiers();
896 }
897
hasIntModifiers() const898 bool hasIntModifiers() const {
899 return getModifiers().hasIntModifiers();
900 }
901
902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903
904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905
906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907
908 template <unsigned Bitwidth>
909 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910
addKImmFP16Operands(MCInst & Inst,unsigned N) const911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912 addKImmFPOperands<16>(Inst, N);
913 }
914
addKImmFP32Operands(MCInst & Inst,unsigned N) const915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916 addKImmFPOperands<32>(Inst, N);
917 }
918
919 void addRegOperands(MCInst &Inst, unsigned N) const;
920
addBoolRegOperands(MCInst & Inst,unsigned N) const921 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922 addRegOperands(Inst, N);
923 }
924
addRegOrImmOperands(MCInst & Inst,unsigned N) const925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926 if (isRegKind())
927 addRegOperands(Inst, N);
928 else if (isExpr())
929 Inst.addOperand(MCOperand::createExpr(Expr));
930 else
931 addImmOperands(Inst, N);
932 }
933
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935 Modifiers Mods = getModifiers();
936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937 if (isRegKind()) {
938 addRegOperands(Inst, N);
939 } else {
940 addImmOperands(Inst, N, false);
941 }
942 }
943
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945 assert(!hasIntModifiers());
946 addRegOrImmWithInputModsOperands(Inst, N);
947 }
948
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950 assert(!hasFPModifiers());
951 addRegOrImmWithInputModsOperands(Inst, N);
952 }
953
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955 Modifiers Mods = getModifiers();
956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957 assert(isRegKind());
958 addRegOperands(Inst, N);
959 }
960
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962 assert(!hasIntModifiers());
963 addRegWithInputModsOperands(Inst, N);
964 }
965
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967 assert(!hasFPModifiers());
968 addRegWithInputModsOperands(Inst, N);
969 }
970
addSoppBrTargetOperands(MCInst & Inst,unsigned N) const971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972 if (isImm())
973 addImmOperands(Inst, N);
974 else {
975 assert(isExpr());
976 Inst.addOperand(MCOperand::createExpr(Expr));
977 }
978 }
979
printImmTy(raw_ostream & OS,ImmTy Type)980 static void printImmTy(raw_ostream& OS, ImmTy Type) {
981 switch (Type) {
982 case ImmTyNone: OS << "None"; break;
983 case ImmTyGDS: OS << "GDS"; break;
984 case ImmTyLDS: OS << "LDS"; break;
985 case ImmTyOffen: OS << "Offen"; break;
986 case ImmTyIdxen: OS << "Idxen"; break;
987 case ImmTyAddr64: OS << "Addr64"; break;
988 case ImmTyOffset: OS << "Offset"; break;
989 case ImmTyInstOffset: OS << "InstOffset"; break;
990 case ImmTyOffset0: OS << "Offset0"; break;
991 case ImmTyOffset1: OS << "Offset1"; break;
992 case ImmTyCPol: OS << "CPol"; break;
993 case ImmTySWZ: OS << "SWZ"; break;
994 case ImmTyTFE: OS << "TFE"; break;
995 case ImmTyD16: OS << "D16"; break;
996 case ImmTyFORMAT: OS << "FORMAT"; break;
997 case ImmTyClampSI: OS << "ClampSI"; break;
998 case ImmTyOModSI: OS << "OModSI"; break;
999 case ImmTyDPP8: OS << "DPP8"; break;
1000 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004 case ImmTyDppFi: OS << "FI"; break;
1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009 case ImmTyDMask: OS << "DMask"; break;
1010 case ImmTyDim: OS << "Dim"; break;
1011 case ImmTyUNorm: OS << "UNorm"; break;
1012 case ImmTyDA: OS << "DA"; break;
1013 case ImmTyR128A16: OS << "R128A16"; break;
1014 case ImmTyA16: OS << "A16"; break;
1015 case ImmTyLWE: OS << "LWE"; break;
1016 case ImmTyOff: OS << "Off"; break;
1017 case ImmTyExpTgt: OS << "ExpTgt"; break;
1018 case ImmTyExpCompr: OS << "ExpCompr"; break;
1019 case ImmTyExpVM: OS << "ExpVM"; break;
1020 case ImmTyHwreg: OS << "Hwreg"; break;
1021 case ImmTySendMsg: OS << "SendMsg"; break;
1022 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024 case ImmTyAttrChan: OS << "AttrChan"; break;
1025 case ImmTyOpSel: OS << "OpSel"; break;
1026 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027 case ImmTyNegLo: OS << "NegLo"; break;
1028 case ImmTyNegHi: OS << "NegHi"; break;
1029 case ImmTySwizzle: OS << "Swizzle"; break;
1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031 case ImmTyHigh: OS << "High"; break;
1032 case ImmTyBLGP: OS << "BLGP"; break;
1033 case ImmTyCBSZ: OS << "CBSZ"; break;
1034 case ImmTyABID: OS << "ABID"; break;
1035 case ImmTyEndpgm: OS << "Endpgm"; break;
1036 }
1037 }
1038
print(raw_ostream & OS) const1039 void print(raw_ostream &OS) const override {
1040 switch (Kind) {
1041 case Register:
1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043 break;
1044 case Immediate:
1045 OS << '<' << getImm();
1046 if (getImmTy() != ImmTyNone) {
1047 OS << " type: "; printImmTy(OS, getImmTy());
1048 }
1049 OS << " mods: " << Imm.Mods << '>';
1050 break;
1051 case Token:
1052 OS << '\'' << getToken() << '\'';
1053 break;
1054 case Expression:
1055 OS << "<expr " << *Expr << '>';
1056 break;
1057 }
1058 }
1059
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061 int64_t Val, SMLoc Loc,
1062 ImmTy Type = ImmTyNone,
1063 bool IsFPImm = false) {
1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065 Op->Imm.Val = Val;
1066 Op->Imm.IsFPImm = IsFPImm;
1067 Op->Imm.Kind = ImmKindTyNone;
1068 Op->Imm.Type = Type;
1069 Op->Imm.Mods = Modifiers();
1070 Op->StartLoc = Loc;
1071 Op->EndLoc = Loc;
1072 return Op;
1073 }
1074
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076 StringRef Str, SMLoc Loc,
1077 bool HasExplicitEncodingSize = true) {
1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079 Res->Tok.Data = Str.data();
1080 Res->Tok.Length = Str.size();
1081 Res->StartLoc = Loc;
1082 Res->EndLoc = Loc;
1083 return Res;
1084 }
1085
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087 unsigned RegNo, SMLoc S,
1088 SMLoc E) {
1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090 Op->Reg.RegNo = RegNo;
1091 Op->Reg.Mods = Modifiers();
1092 Op->StartLoc = S;
1093 Op->EndLoc = E;
1094 return Op;
1095 }
1096
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098 const class MCExpr *Expr, SMLoc S) {
1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100 Op->Expr = Expr;
1101 Op->StartLoc = S;
1102 Op->EndLoc = S;
1103 return Op;
1104 }
1105 };
1106
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109 return OS;
1110 }
1111
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120 int SgprIndexUnusedMin = -1;
1121 int VgprIndexUnusedMin = -1;
1122 MCContext *Ctx = nullptr;
1123
usesSgprAt(int i)1124 void usesSgprAt(int i) {
1125 if (i >= SgprIndexUnusedMin) {
1126 SgprIndexUnusedMin = ++i;
1127 if (Ctx) {
1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130 }
1131 }
1132 }
1133
usesVgprAt(int i)1134 void usesVgprAt(int i) {
1135 if (i >= VgprIndexUnusedMin) {
1136 VgprIndexUnusedMin = ++i;
1137 if (Ctx) {
1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140 }
1141 }
1142 }
1143
1144 public:
1145 KernelScopeInfo() = default;
1146
initialize(MCContext & Context)1147 void initialize(MCContext &Context) {
1148 Ctx = &Context;
1149 usesSgprAt(SgprIndexUnusedMin = -1);
1150 usesVgprAt(VgprIndexUnusedMin = -1);
1151 }
1152
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154 switch (RegKind) {
1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156 case IS_AGPR: // fall through
1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158 default: break;
1159 }
1160 }
1161 };
1162
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164 MCAsmParser &Parser;
1165
1166 // Number of extra operands parsed after the first optional operand.
1167 // This may be necessary to skip hardcoded mandatory operands.
1168 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169
1170 unsigned ForcedEncodingSize = 0;
1171 bool ForcedDPP = false;
1172 bool ForcedSDWA = false;
1173 KernelScopeInfo KernelScope;
1174 unsigned CPolSeen;
1175
1176 /// @name Auto-generated Match Functions
1177 /// {
1178
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181
1182 /// }
1183
1184 private:
1185 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186 bool OutOfRangeError(SMRange Range);
1187 /// Calculate VGPR/SGPR blocks required for given target, reserved
1188 /// registers, and user-specified NextFreeXGPR values.
1189 ///
1190 /// \param Features [in] Target features, used for bug corrections.
1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195 /// descriptor field, if valid.
1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200 /// \param VGPRBlocks [out] Result VGPR block count.
1201 /// \param SGPRBlocks [out] Result SGPR block count.
1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203 bool FlatScrUsed, bool XNACKUsed,
1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205 SMRange VGPRRange, unsigned NextFreeSGPR,
1206 SMRange SGPRRange, unsigned &VGPRBlocks,
1207 unsigned &SGPRBlocks);
1208 bool ParseDirectiveAMDGCNTarget();
1209 bool ParseDirectiveAMDHSAKernel();
1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211 bool ParseDirectiveHSACodeObjectVersion();
1212 bool ParseDirectiveHSACodeObjectISA();
1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214 bool ParseDirectiveAMDKernelCodeT();
1215 // TODO: Possibly make subtargetHasRegister const.
1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217 bool ParseDirectiveAMDGPUHsaKernel();
1218
1219 bool ParseDirectiveISAVersion();
1220 bool ParseDirectiveHSAMetadata();
1221 bool ParseDirectivePALMetadataBegin();
1222 bool ParseDirectivePALMetadata();
1223 bool ParseDirectiveAMDGPULDS();
1224
1225 /// Common code to parse out a block of text (typically YAML) between start and
1226 /// end directives.
1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228 const char *AssemblerDirectiveEnd,
1229 std::string &CollectString);
1230
1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234 unsigned &RegNum, unsigned &RegWidth,
1235 bool RestoreOnFailure = false);
1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237 unsigned &RegNum, unsigned &RegWidth,
1238 SmallVectorImpl<AsmToken> &Tokens);
1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240 unsigned &RegWidth,
1241 SmallVectorImpl<AsmToken> &Tokens);
1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243 unsigned &RegWidth,
1244 SmallVectorImpl<AsmToken> &Tokens);
1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247 bool ParseRegRange(unsigned& Num, unsigned& Width);
1248 unsigned getRegularReg(RegisterKind RegKind,
1249 unsigned RegNum,
1250 unsigned RegWidth,
1251 SMLoc Loc);
1252
1253 bool isRegister();
1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256 void initializeGprCountSymbol(RegisterKind RegKind);
1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258 unsigned RegWidth);
1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260 bool IsAtomic, bool IsLds = false);
1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262 bool IsGdsHardcoded);
1263
1264 public:
1265 enum AMDGPUMatchResultTy {
1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267 };
1268 enum OperandMode {
1269 OperandMode_Default,
1270 OperandMode_NSA,
1271 };
1272
1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276 const MCInstrInfo &MII,
1277 const MCTargetOptions &Options)
1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279 MCAsmParserExtension::Initialize(Parser);
1280
1281 if (getFeatureBits().none()) {
1282 // Set default features.
1283 copySTI().ToggleFeature("southern-islands");
1284 }
1285
1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287
1288 {
1289 // TODO: make those pre-defined variables read-only.
1290 // Currently there is none suitable machinery in the core llvm-mc for this.
1291 // MCSymbol::isRedefinable is intended for another purpose, and
1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294 MCContext &Ctx = getContext();
1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296 MCSymbol *Sym =
1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303 } else {
1304 MCSymbol *Sym =
1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311 }
1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313 initializeGprCountSymbol(IS_VGPR);
1314 initializeGprCountSymbol(IS_SGPR);
1315 } else
1316 KernelScope.initialize(getContext());
1317 }
1318 }
1319
hasMIMG_R128() const1320 bool hasMIMG_R128() const {
1321 return AMDGPU::hasMIMG_R128(getSTI());
1322 }
1323
hasPackedD16() const1324 bool hasPackedD16() const {
1325 return AMDGPU::hasPackedD16(getSTI());
1326 }
1327
hasGFX10A16() const1328 bool hasGFX10A16() const {
1329 return AMDGPU::hasGFX10A16(getSTI());
1330 }
1331
hasG16() const1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333
isSI() const1334 bool isSI() const {
1335 return AMDGPU::isSI(getSTI());
1336 }
1337
isCI() const1338 bool isCI() const {
1339 return AMDGPU::isCI(getSTI());
1340 }
1341
isVI() const1342 bool isVI() const {
1343 return AMDGPU::isVI(getSTI());
1344 }
1345
isGFX9() const1346 bool isGFX9() const {
1347 return AMDGPU::isGFX9(getSTI());
1348 }
1349
isGFX90A() const1350 bool isGFX90A() const {
1351 return AMDGPU::isGFX90A(getSTI());
1352 }
1353
isGFX9Plus() const1354 bool isGFX9Plus() const {
1355 return AMDGPU::isGFX9Plus(getSTI());
1356 }
1357
isGFX10() const1358 bool isGFX10() const {
1359 return AMDGPU::isGFX10(getSTI());
1360 }
1361
isGFX10Plus() const1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363
isGFX10_BEncoding() const1364 bool isGFX10_BEncoding() const {
1365 return AMDGPU::isGFX10_BEncoding(getSTI());
1366 }
1367
hasInv2PiInlineImm() const1368 bool hasInv2PiInlineImm() const {
1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370 }
1371
hasFlatOffsets() const1372 bool hasFlatOffsets() const {
1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374 }
1375
hasArchitectedFlatScratch() const1376 bool hasArchitectedFlatScratch() const {
1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378 }
1379
hasSGPR102_SGPR103() const1380 bool hasSGPR102_SGPR103() const {
1381 return !isVI() && !isGFX9();
1382 }
1383
hasSGPR104_SGPR105() const1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385
hasIntClamp() const1386 bool hasIntClamp() const {
1387 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388 }
1389
getTargetStreamer()1390 AMDGPUTargetStreamer &getTargetStreamer() {
1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392 return static_cast<AMDGPUTargetStreamer &>(TS);
1393 }
1394
getMRI() const1395 const MCRegisterInfo *getMRI() const {
1396 // We need this const_cast because for some reason getContext() is not const
1397 // in MCAsmParser.
1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399 }
1400
getMII() const1401 const MCInstrInfo *getMII() const {
1402 return &MII;
1403 }
1404
getFeatureBits() const1405 const FeatureBitset &getFeatureBits() const {
1406 return getSTI().getFeatureBits();
1407 }
1408
setForcedEncodingSize(unsigned Size)1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412
getForcedEncodingSize() const1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1415 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1416 bool isForcedSDWA() const { return ForcedSDWA; }
1417 ArrayRef<unsigned> getMatchedVariants() const;
1418 StringRef getMatchedVariantName() const;
1419
1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422 bool RestoreOnFailure);
1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425 SMLoc &EndLoc) override;
1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428 unsigned Kind) override;
1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430 OperandVector &Operands, MCStreamer &Out,
1431 uint64_t &ErrorInfo,
1432 bool MatchingInlineAsm) override;
1433 bool ParseDirective(AsmToken DirectiveID) override;
1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435 OperandMode Mode = OperandMode_Default);
1436 StringRef parseMnemonicSuffix(StringRef Name);
1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438 SMLoc NameLoc, OperandVector &Operands) override;
1439 //bool ProcessInstruction(MCInst &Inst);
1440
1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442
1443 OperandMatchResultTy
1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446 bool (*ConvertResult)(int64_t &) = nullptr);
1447
1448 OperandMatchResultTy
1449 parseOperandArrayWithPrefix(const char *Prefix,
1450 OperandVector &Operands,
1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452 bool (*ConvertResult)(int64_t&) = nullptr);
1453
1454 OperandMatchResultTy
1455 parseNamedBit(StringRef Name, OperandVector &Operands,
1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457 OperandMatchResultTy parseCPol(OperandVector &Operands);
1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459 StringRef &Value,
1460 SMLoc &StringLoc);
1461
1462 bool isModifier();
1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467 bool parseSP3NegModifier();
1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469 OperandMatchResultTy parseReg(OperandVector &Operands);
1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477 OperandMatchResultTy parseUfmt(int64_t &Format);
1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485
1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
cvtDS(MCInst & Inst,const OperandVector & Operands)1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
cvtDSGds(MCInst & Inst,const OperandVector & Operands)1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490
1491 bool parseCnt(int64_t &IntVal);
1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494
1495 private:
1496 struct OperandInfoTy {
1497 SMLoc Loc;
1498 int64_t Id;
1499 bool IsSymbolic = false;
1500 bool IsDefined = false;
1501
OperandInfoTy__anon649c30870111::AMDGPUAsmParser::OperandInfoTy1502 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503 };
1504
1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506 bool validateSendMsg(const OperandInfoTy &Msg,
1507 const OperandInfoTy &Op,
1508 const OperandInfoTy &Stream);
1509
1510 bool parseHwregBody(OperandInfoTy &HwReg,
1511 OperandInfoTy &Offset,
1512 OperandInfoTy &Width);
1513 bool validateHwreg(const OperandInfoTy &HwReg,
1514 const OperandInfoTy &Offset,
1515 const OperandInfoTy &Width);
1516
1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519
1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521 const OperandVector &Operands) const;
1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524 SMLoc getLitLoc(const OperandVector &Operands) const;
1525 SMLoc getConstLoc(const OperandVector &Operands) const;
1526
1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530 bool validateSOPLiteral(const MCInst &Inst) const;
1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533 bool validateIntClampSupported(const MCInst &Inst);
1534 bool validateMIMGAtomicDMask(const MCInst &Inst);
1535 bool validateMIMGGatherDMask(const MCInst &Inst);
1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537 bool validateMIMGDataSize(const MCInst &Inst);
1538 bool validateMIMGAddrSize(const MCInst &Inst);
1539 bool validateMIMGD16(const MCInst &Inst);
1540 bool validateMIMGDim(const MCInst &Inst);
1541 bool validateMIMGMSAA(const MCInst &Inst);
1542 bool validateOpSel(const MCInst &Inst);
1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544 bool validateVccOperand(unsigned Reg) const;
1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547 bool validateAGPRLdSt(const MCInst &Inst) const;
1548 bool validateVGPRAlign(const MCInst &Inst) const;
1549 bool validateDivScale(const MCInst &Inst);
1550 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1551 const SMLoc &IDLoc);
1552 Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1553 unsigned getConstantBusLimit(unsigned Opcode) const;
1554 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1555 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1556 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1557
1558 bool isSupportedMnemo(StringRef Mnemo,
1559 const FeatureBitset &FBS);
1560 bool isSupportedMnemo(StringRef Mnemo,
1561 const FeatureBitset &FBS,
1562 ArrayRef<unsigned> Variants);
1563 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1564
1565 bool isId(const StringRef Id) const;
1566 bool isId(const AsmToken &Token, const StringRef Id) const;
1567 bool isToken(const AsmToken::TokenKind Kind) const;
1568 bool trySkipId(const StringRef Id);
1569 bool trySkipId(const StringRef Pref, const StringRef Id);
1570 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1571 bool trySkipToken(const AsmToken::TokenKind Kind);
1572 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1573 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1574 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1575
1576 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1577 AsmToken::TokenKind getTokenKind() const;
1578 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1579 bool parseExpr(OperandVector &Operands);
1580 StringRef getTokenStr() const;
1581 AsmToken peekToken();
1582 AsmToken getToken() const;
1583 SMLoc getLoc() const;
1584 void lex();
1585
1586 public:
1587 void onBeginOfFile() override;
1588
1589 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1590 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1591
1592 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1593 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1594 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1595 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1596 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1597 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1598
1599 bool parseSwizzleOperand(int64_t &Op,
1600 const unsigned MinVal,
1601 const unsigned MaxVal,
1602 const StringRef ErrMsg,
1603 SMLoc &Loc);
1604 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1605 const unsigned MinVal,
1606 const unsigned MaxVal,
1607 const StringRef ErrMsg);
1608 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1609 bool parseSwizzleOffset(int64_t &Imm);
1610 bool parseSwizzleMacro(int64_t &Imm);
1611 bool parseSwizzleQuadPerm(int64_t &Imm);
1612 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1613 bool parseSwizzleBroadcast(int64_t &Imm);
1614 bool parseSwizzleSwap(int64_t &Imm);
1615 bool parseSwizzleReverse(int64_t &Imm);
1616
1617 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1618 int64_t parseGPRIdxMacro();
1619
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1620 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1621 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
cvtMubufLds(MCInst & Inst,const OperandVector & Operands)1622 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1623 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1624
1625 AMDGPUOperand::Ptr defaultCPol() const;
1626
1627 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1628 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1629 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1630 AMDGPUOperand::Ptr defaultFlatOffset() const;
1631
1632 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1633
1634 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1635 OptionalImmIndexMap &OptionalIdx);
1636 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1637 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1638 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1640 OptionalImmIndexMap &OptionalIdx);
1641
1642 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1643
1644 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1645 bool IsAtomic = false);
1646 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1647 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1648
1649 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1650
1651 bool parseDimId(unsigned &Encoding);
1652 OperandMatchResultTy parseDim(OperandVector &Operands);
1653 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1654 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1655 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1656 int64_t parseDPPCtrlSel(StringRef Ctrl);
1657 int64_t parseDPPCtrlPerm();
1658 AMDGPUOperand::Ptr defaultRowMask() const;
1659 AMDGPUOperand::Ptr defaultBankMask() const;
1660 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1661 AMDGPUOperand::Ptr defaultFI() const;
1662 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1663 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1664
1665 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1666 AMDGPUOperand::ImmTy Type);
1667 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1668 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1669 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1670 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1671 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1672 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1673 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1674 uint64_t BasicInstType,
1675 bool SkipDstVcc = false,
1676 bool SkipSrcVcc = false);
1677
1678 AMDGPUOperand::Ptr defaultBLGP() const;
1679 AMDGPUOperand::Ptr defaultCBSZ() const;
1680 AMDGPUOperand::Ptr defaultABID() const;
1681
1682 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1683 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1684 };
1685
1686 struct OptionalOperand {
1687 const char *Name;
1688 AMDGPUOperand::ImmTy Type;
1689 bool IsBit;
1690 bool (*ConvertResult)(int64_t&);
1691 };
1692
1693 } // end anonymous namespace
1694
1695 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1696 static const fltSemantics *getFltSemantics(unsigned Size) {
1697 switch (Size) {
1698 case 4:
1699 return &APFloat::IEEEsingle();
1700 case 8:
1701 return &APFloat::IEEEdouble();
1702 case 2:
1703 return &APFloat::IEEEhalf();
1704 default:
1705 llvm_unreachable("unsupported fp type");
1706 }
1707 }
1708
getFltSemantics(MVT VT)1709 static const fltSemantics *getFltSemantics(MVT VT) {
1710 return getFltSemantics(VT.getSizeInBits() / 8);
1711 }
1712
getOpFltSemantics(uint8_t OperandType)1713 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1714 switch (OperandType) {
1715 case AMDGPU::OPERAND_REG_IMM_INT32:
1716 case AMDGPU::OPERAND_REG_IMM_FP32:
1717 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1718 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1719 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1720 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1721 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1722 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1723 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1724 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1725 return &APFloat::IEEEsingle();
1726 case AMDGPU::OPERAND_REG_IMM_INT64:
1727 case AMDGPU::OPERAND_REG_IMM_FP64:
1728 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1729 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1731 return &APFloat::IEEEdouble();
1732 case AMDGPU::OPERAND_REG_IMM_INT16:
1733 case AMDGPU::OPERAND_REG_IMM_FP16:
1734 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1735 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1736 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1737 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1738 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1739 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1740 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1742 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1743 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1744 return &APFloat::IEEEhalf();
1745 default:
1746 llvm_unreachable("unsupported fp type");
1747 }
1748 }
1749
1750 //===----------------------------------------------------------------------===//
1751 // Operand
1752 //===----------------------------------------------------------------------===//
1753
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1754 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1755 bool Lost;
1756
1757 // Convert literal to single precision
1758 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1759 APFloat::rmNearestTiesToEven,
1760 &Lost);
1761 // We allow precision lost but not overflow or underflow
1762 if (Status != APFloat::opOK &&
1763 Lost &&
1764 ((Status & APFloat::opOverflow) != 0 ||
1765 (Status & APFloat::opUnderflow) != 0)) {
1766 return false;
1767 }
1768
1769 return true;
1770 }
1771
isSafeTruncation(int64_t Val,unsigned Size)1772 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1773 return isUIntN(Size, Val) || isIntN(Size, Val);
1774 }
1775
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)1776 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1777 if (VT.getScalarType() == MVT::i16) {
1778 // FP immediate values are broken.
1779 return isInlinableIntLiteral(Val);
1780 }
1781
1782 // f16/v2f16 operands work correctly for all values.
1783 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1784 }
1785
isInlinableImm(MVT type) const1786 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1787
1788 // This is a hack to enable named inline values like
1789 // shared_base with both 32-bit and 64-bit operands.
1790 // Note that these values are defined as
1791 // 32-bit operands only.
1792 if (isInlineValue()) {
1793 return true;
1794 }
1795
1796 if (!isImmTy(ImmTyNone)) {
1797 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1798 return false;
1799 }
1800 // TODO: We should avoid using host float here. It would be better to
1801 // check the float bit values which is what a few other places do.
1802 // We've had bot failures before due to weird NaN support on mips hosts.
1803
1804 APInt Literal(64, Imm.Val);
1805
1806 if (Imm.IsFPImm) { // We got fp literal token
1807 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1808 return AMDGPU::isInlinableLiteral64(Imm.Val,
1809 AsmParser->hasInv2PiInlineImm());
1810 }
1811
1812 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1813 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1814 return false;
1815
1816 if (type.getScalarSizeInBits() == 16) {
1817 return isInlineableLiteralOp16(
1818 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1819 type, AsmParser->hasInv2PiInlineImm());
1820 }
1821
1822 // Check if single precision literal is inlinable
1823 return AMDGPU::isInlinableLiteral32(
1824 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1825 AsmParser->hasInv2PiInlineImm());
1826 }
1827
1828 // We got int literal token.
1829 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1830 return AMDGPU::isInlinableLiteral64(Imm.Val,
1831 AsmParser->hasInv2PiInlineImm());
1832 }
1833
1834 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1835 return false;
1836 }
1837
1838 if (type.getScalarSizeInBits() == 16) {
1839 return isInlineableLiteralOp16(
1840 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1841 type, AsmParser->hasInv2PiInlineImm());
1842 }
1843
1844 return AMDGPU::isInlinableLiteral32(
1845 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1846 AsmParser->hasInv2PiInlineImm());
1847 }
1848
isLiteralImm(MVT type) const1849 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1850 // Check that this immediate can be added as literal
1851 if (!isImmTy(ImmTyNone)) {
1852 return false;
1853 }
1854
1855 if (!Imm.IsFPImm) {
1856 // We got int literal token.
1857
1858 if (type == MVT::f64 && hasFPModifiers()) {
1859 // Cannot apply fp modifiers to int literals preserving the same semantics
1860 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1861 // disable these cases.
1862 return false;
1863 }
1864
1865 unsigned Size = type.getSizeInBits();
1866 if (Size == 64)
1867 Size = 32;
1868
1869 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1870 // types.
1871 return isSafeTruncation(Imm.Val, Size);
1872 }
1873
1874 // We got fp literal token
1875 if (type == MVT::f64) { // Expected 64-bit fp operand
1876 // We would set low 64-bits of literal to zeroes but we accept this literals
1877 return true;
1878 }
1879
1880 if (type == MVT::i64) { // Expected 64-bit int operand
1881 // We don't allow fp literals in 64-bit integer instructions. It is
1882 // unclear how we should encode them.
1883 return false;
1884 }
1885
1886 // We allow fp literals with f16x2 operands assuming that the specified
1887 // literal goes into the lower half and the upper half is zero. We also
1888 // require that the literal may be losslesly converted to f16.
1889 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1890 (type == MVT::v2i16)? MVT::i16 :
1891 (type == MVT::v2f32)? MVT::f32 : type;
1892
1893 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1894 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1895 }
1896
isRegClass(unsigned RCID) const1897 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1898 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1899 }
1900
isVRegWithInputMods() const1901 bool AMDGPUOperand::isVRegWithInputMods() const {
1902 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1903 // GFX90A allows DPP on 64-bit operands.
1904 (isRegClass(AMDGPU::VReg_64RegClassID) &&
1905 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1906 }
1907
isSDWAOperand(MVT type) const1908 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1909 if (AsmParser->isVI())
1910 return isVReg32();
1911 else if (AsmParser->isGFX9Plus())
1912 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1913 else
1914 return false;
1915 }
1916
isSDWAFP16Operand() const1917 bool AMDGPUOperand::isSDWAFP16Operand() const {
1918 return isSDWAOperand(MVT::f16);
1919 }
1920
isSDWAFP32Operand() const1921 bool AMDGPUOperand::isSDWAFP32Operand() const {
1922 return isSDWAOperand(MVT::f32);
1923 }
1924
isSDWAInt16Operand() const1925 bool AMDGPUOperand::isSDWAInt16Operand() const {
1926 return isSDWAOperand(MVT::i16);
1927 }
1928
isSDWAInt32Operand() const1929 bool AMDGPUOperand::isSDWAInt32Operand() const {
1930 return isSDWAOperand(MVT::i32);
1931 }
1932
isBoolReg() const1933 bool AMDGPUOperand::isBoolReg() const {
1934 auto FB = AsmParser->getFeatureBits();
1935 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1936 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1937 }
1938
applyInputFPModifiers(uint64_t Val,unsigned Size) const1939 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1940 {
1941 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1942 assert(Size == 2 || Size == 4 || Size == 8);
1943
1944 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1945
1946 if (Imm.Mods.Abs) {
1947 Val &= ~FpSignMask;
1948 }
1949 if (Imm.Mods.Neg) {
1950 Val ^= FpSignMask;
1951 }
1952
1953 return Val;
1954 }
1955
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const1956 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1957 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1958 Inst.getNumOperands())) {
1959 addLiteralImmOperand(Inst, Imm.Val,
1960 ApplyModifiers &
1961 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1962 } else {
1963 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1964 Inst.addOperand(MCOperand::createImm(Imm.Val));
1965 setImmKindNone();
1966 }
1967 }
1968
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const1969 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1970 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1971 auto OpNum = Inst.getNumOperands();
1972 // Check that this operand accepts literals
1973 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1974
1975 if (ApplyModifiers) {
1976 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1977 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1978 Val = applyInputFPModifiers(Val, Size);
1979 }
1980
1981 APInt Literal(64, Val);
1982 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1983
1984 if (Imm.IsFPImm) { // We got fp literal token
1985 switch (OpTy) {
1986 case AMDGPU::OPERAND_REG_IMM_INT64:
1987 case AMDGPU::OPERAND_REG_IMM_FP64:
1988 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1989 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1990 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1991 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1992 AsmParser->hasInv2PiInlineImm())) {
1993 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1994 setImmKindConst();
1995 return;
1996 }
1997
1998 // Non-inlineable
1999 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2000 // For fp operands we check if low 32 bits are zeros
2001 if (Literal.getLoBits(32) != 0) {
2002 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2003 "Can't encode literal as exact 64-bit floating-point operand. "
2004 "Low 32-bits will be set to zero");
2005 }
2006
2007 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2008 setImmKindLiteral();
2009 return;
2010 }
2011
2012 // We don't allow fp literals in 64-bit integer instructions. It is
2013 // unclear how we should encode them. This case should be checked earlier
2014 // in predicate methods (isLiteralImm())
2015 llvm_unreachable("fp literal in 64-bit integer instruction.");
2016
2017 case AMDGPU::OPERAND_REG_IMM_INT32:
2018 case AMDGPU::OPERAND_REG_IMM_FP32:
2019 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2020 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2021 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2022 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2023 case AMDGPU::OPERAND_REG_IMM_INT16:
2024 case AMDGPU::OPERAND_REG_IMM_FP16:
2025 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2026 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2027 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2028 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2029 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2030 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2031 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2033 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2034 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2035 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2036 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2037 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2038 case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2039 bool lost;
2040 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2041 // Convert literal to single precision
2042 FPLiteral.convert(*getOpFltSemantics(OpTy),
2043 APFloat::rmNearestTiesToEven, &lost);
2044 // We allow precision lost but not overflow or underflow. This should be
2045 // checked earlier in isLiteralImm()
2046
2047 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2048 Inst.addOperand(MCOperand::createImm(ImmVal));
2049 setImmKindLiteral();
2050 return;
2051 }
2052 default:
2053 llvm_unreachable("invalid operand size");
2054 }
2055
2056 return;
2057 }
2058
2059 // We got int literal token.
2060 // Only sign extend inline immediates.
2061 switch (OpTy) {
2062 case AMDGPU::OPERAND_REG_IMM_INT32:
2063 case AMDGPU::OPERAND_REG_IMM_FP32:
2064 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2065 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2066 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2067 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2068 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2069 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2070 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2071 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2072 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2073 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2074 if (isSafeTruncation(Val, 32) &&
2075 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2076 AsmParser->hasInv2PiInlineImm())) {
2077 Inst.addOperand(MCOperand::createImm(Val));
2078 setImmKindConst();
2079 return;
2080 }
2081
2082 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2083 setImmKindLiteral();
2084 return;
2085
2086 case AMDGPU::OPERAND_REG_IMM_INT64:
2087 case AMDGPU::OPERAND_REG_IMM_FP64:
2088 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2089 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2090 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2091 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2092 Inst.addOperand(MCOperand::createImm(Val));
2093 setImmKindConst();
2094 return;
2095 }
2096
2097 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2098 setImmKindLiteral();
2099 return;
2100
2101 case AMDGPU::OPERAND_REG_IMM_INT16:
2102 case AMDGPU::OPERAND_REG_IMM_FP16:
2103 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2104 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2105 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2106 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2107 if (isSafeTruncation(Val, 16) &&
2108 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2109 AsmParser->hasInv2PiInlineImm())) {
2110 Inst.addOperand(MCOperand::createImm(Val));
2111 setImmKindConst();
2112 return;
2113 }
2114
2115 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2116 setImmKindLiteral();
2117 return;
2118
2119 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2120 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2121 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2123 assert(isSafeTruncation(Val, 16));
2124 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2125 AsmParser->hasInv2PiInlineImm()));
2126
2127 Inst.addOperand(MCOperand::createImm(Val));
2128 return;
2129 }
2130 default:
2131 llvm_unreachable("invalid operand size");
2132 }
2133 }
2134
2135 template <unsigned Bitwidth>
addKImmFPOperands(MCInst & Inst,unsigned N) const2136 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2137 APInt Literal(64, Imm.Val);
2138 setImmKindNone();
2139
2140 if (!Imm.IsFPImm) {
2141 // We got int literal token.
2142 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2143 return;
2144 }
2145
2146 bool Lost;
2147 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2148 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2149 APFloat::rmNearestTiesToEven, &Lost);
2150 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2151 }
2152
addRegOperands(MCInst & Inst,unsigned N) const2153 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2154 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2155 }
2156
isInlineValue(unsigned Reg)2157 static bool isInlineValue(unsigned Reg) {
2158 switch (Reg) {
2159 case AMDGPU::SRC_SHARED_BASE:
2160 case AMDGPU::SRC_SHARED_LIMIT:
2161 case AMDGPU::SRC_PRIVATE_BASE:
2162 case AMDGPU::SRC_PRIVATE_LIMIT:
2163 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2164 return true;
2165 case AMDGPU::SRC_VCCZ:
2166 case AMDGPU::SRC_EXECZ:
2167 case AMDGPU::SRC_SCC:
2168 return true;
2169 case AMDGPU::SGPR_NULL:
2170 return true;
2171 default:
2172 return false;
2173 }
2174 }
2175
isInlineValue() const2176 bool AMDGPUOperand::isInlineValue() const {
2177 return isRegKind() && ::isInlineValue(getReg());
2178 }
2179
2180 //===----------------------------------------------------------------------===//
2181 // AsmParser
2182 //===----------------------------------------------------------------------===//
2183
getRegClass(RegisterKind Is,unsigned RegWidth)2184 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2185 if (Is == IS_VGPR) {
2186 switch (RegWidth) {
2187 default: return -1;
2188 case 1: return AMDGPU::VGPR_32RegClassID;
2189 case 2: return AMDGPU::VReg_64RegClassID;
2190 case 3: return AMDGPU::VReg_96RegClassID;
2191 case 4: return AMDGPU::VReg_128RegClassID;
2192 case 5: return AMDGPU::VReg_160RegClassID;
2193 case 6: return AMDGPU::VReg_192RegClassID;
2194 case 8: return AMDGPU::VReg_256RegClassID;
2195 case 16: return AMDGPU::VReg_512RegClassID;
2196 case 32: return AMDGPU::VReg_1024RegClassID;
2197 }
2198 } else if (Is == IS_TTMP) {
2199 switch (RegWidth) {
2200 default: return -1;
2201 case 1: return AMDGPU::TTMP_32RegClassID;
2202 case 2: return AMDGPU::TTMP_64RegClassID;
2203 case 4: return AMDGPU::TTMP_128RegClassID;
2204 case 8: return AMDGPU::TTMP_256RegClassID;
2205 case 16: return AMDGPU::TTMP_512RegClassID;
2206 }
2207 } else if (Is == IS_SGPR) {
2208 switch (RegWidth) {
2209 default: return -1;
2210 case 1: return AMDGPU::SGPR_32RegClassID;
2211 case 2: return AMDGPU::SGPR_64RegClassID;
2212 case 3: return AMDGPU::SGPR_96RegClassID;
2213 case 4: return AMDGPU::SGPR_128RegClassID;
2214 case 5: return AMDGPU::SGPR_160RegClassID;
2215 case 6: return AMDGPU::SGPR_192RegClassID;
2216 case 8: return AMDGPU::SGPR_256RegClassID;
2217 case 16: return AMDGPU::SGPR_512RegClassID;
2218 }
2219 } else if (Is == IS_AGPR) {
2220 switch (RegWidth) {
2221 default: return -1;
2222 case 1: return AMDGPU::AGPR_32RegClassID;
2223 case 2: return AMDGPU::AReg_64RegClassID;
2224 case 3: return AMDGPU::AReg_96RegClassID;
2225 case 4: return AMDGPU::AReg_128RegClassID;
2226 case 5: return AMDGPU::AReg_160RegClassID;
2227 case 6: return AMDGPU::AReg_192RegClassID;
2228 case 8: return AMDGPU::AReg_256RegClassID;
2229 case 16: return AMDGPU::AReg_512RegClassID;
2230 case 32: return AMDGPU::AReg_1024RegClassID;
2231 }
2232 }
2233 return -1;
2234 }
2235
getSpecialRegForName(StringRef RegName)2236 static unsigned getSpecialRegForName(StringRef RegName) {
2237 return StringSwitch<unsigned>(RegName)
2238 .Case("exec", AMDGPU::EXEC)
2239 .Case("vcc", AMDGPU::VCC)
2240 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2241 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2242 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2243 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2244 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2245 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2246 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2247 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2248 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2249 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2250 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2251 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2252 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2253 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2254 .Case("m0", AMDGPU::M0)
2255 .Case("vccz", AMDGPU::SRC_VCCZ)
2256 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2257 .Case("execz", AMDGPU::SRC_EXECZ)
2258 .Case("src_execz", AMDGPU::SRC_EXECZ)
2259 .Case("scc", AMDGPU::SRC_SCC)
2260 .Case("src_scc", AMDGPU::SRC_SCC)
2261 .Case("tba", AMDGPU::TBA)
2262 .Case("tma", AMDGPU::TMA)
2263 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2264 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2265 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2266 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2267 .Case("vcc_lo", AMDGPU::VCC_LO)
2268 .Case("vcc_hi", AMDGPU::VCC_HI)
2269 .Case("exec_lo", AMDGPU::EXEC_LO)
2270 .Case("exec_hi", AMDGPU::EXEC_HI)
2271 .Case("tma_lo", AMDGPU::TMA_LO)
2272 .Case("tma_hi", AMDGPU::TMA_HI)
2273 .Case("tba_lo", AMDGPU::TBA_LO)
2274 .Case("tba_hi", AMDGPU::TBA_HI)
2275 .Case("pc", AMDGPU::PC_REG)
2276 .Case("null", AMDGPU::SGPR_NULL)
2277 .Default(AMDGPU::NoRegister);
2278 }
2279
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2280 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2281 SMLoc &EndLoc, bool RestoreOnFailure) {
2282 auto R = parseRegister();
2283 if (!R) return true;
2284 assert(R->isReg());
2285 RegNo = R->getReg();
2286 StartLoc = R->getStartLoc();
2287 EndLoc = R->getEndLoc();
2288 return false;
2289 }
2290
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2291 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2292 SMLoc &EndLoc) {
2293 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2294 }
2295
tryParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2296 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2297 SMLoc &StartLoc,
2298 SMLoc &EndLoc) {
2299 bool Result =
2300 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2301 bool PendingErrors = getParser().hasPendingError();
2302 getParser().clearPendingErrors();
2303 if (PendingErrors)
2304 return MatchOperand_ParseFail;
2305 if (Result)
2306 return MatchOperand_NoMatch;
2307 return MatchOperand_Success;
2308 }
2309
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1,SMLoc Loc)2310 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2311 RegisterKind RegKind, unsigned Reg1,
2312 SMLoc Loc) {
2313 switch (RegKind) {
2314 case IS_SPECIAL:
2315 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2316 Reg = AMDGPU::EXEC;
2317 RegWidth = 2;
2318 return true;
2319 }
2320 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2321 Reg = AMDGPU::FLAT_SCR;
2322 RegWidth = 2;
2323 return true;
2324 }
2325 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2326 Reg = AMDGPU::XNACK_MASK;
2327 RegWidth = 2;
2328 return true;
2329 }
2330 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2331 Reg = AMDGPU::VCC;
2332 RegWidth = 2;
2333 return true;
2334 }
2335 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2336 Reg = AMDGPU::TBA;
2337 RegWidth = 2;
2338 return true;
2339 }
2340 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2341 Reg = AMDGPU::TMA;
2342 RegWidth = 2;
2343 return true;
2344 }
2345 Error(Loc, "register does not fit in the list");
2346 return false;
2347 case IS_VGPR:
2348 case IS_SGPR:
2349 case IS_AGPR:
2350 case IS_TTMP:
2351 if (Reg1 != Reg + RegWidth) {
2352 Error(Loc, "registers in a list must have consecutive indices");
2353 return false;
2354 }
2355 RegWidth++;
2356 return true;
2357 default:
2358 llvm_unreachable("unexpected register kind");
2359 }
2360 }
2361
2362 struct RegInfo {
2363 StringLiteral Name;
2364 RegisterKind Kind;
2365 };
2366
2367 static constexpr RegInfo RegularRegisters[] = {
2368 {{"v"}, IS_VGPR},
2369 {{"s"}, IS_SGPR},
2370 {{"ttmp"}, IS_TTMP},
2371 {{"acc"}, IS_AGPR},
2372 {{"a"}, IS_AGPR},
2373 };
2374
isRegularReg(RegisterKind Kind)2375 static bool isRegularReg(RegisterKind Kind) {
2376 return Kind == IS_VGPR ||
2377 Kind == IS_SGPR ||
2378 Kind == IS_TTMP ||
2379 Kind == IS_AGPR;
2380 }
2381
getRegularRegInfo(StringRef Str)2382 static const RegInfo* getRegularRegInfo(StringRef Str) {
2383 for (const RegInfo &Reg : RegularRegisters)
2384 if (Str.startswith(Reg.Name))
2385 return &Reg;
2386 return nullptr;
2387 }
2388
getRegNum(StringRef Str,unsigned & Num)2389 static bool getRegNum(StringRef Str, unsigned& Num) {
2390 return !Str.getAsInteger(10, Num);
2391 }
2392
2393 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2394 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2395 const AsmToken &NextToken) const {
2396
2397 // A list of consecutive registers: [s0,s1,s2,s3]
2398 if (Token.is(AsmToken::LBrac))
2399 return true;
2400
2401 if (!Token.is(AsmToken::Identifier))
2402 return false;
2403
2404 // A single register like s0 or a range of registers like s[0:1]
2405
2406 StringRef Str = Token.getString();
2407 const RegInfo *Reg = getRegularRegInfo(Str);
2408 if (Reg) {
2409 StringRef RegName = Reg->Name;
2410 StringRef RegSuffix = Str.substr(RegName.size());
2411 if (!RegSuffix.empty()) {
2412 unsigned Num;
2413 // A single register with an index: rXX
2414 if (getRegNum(RegSuffix, Num))
2415 return true;
2416 } else {
2417 // A range of registers: r[XX:YY].
2418 if (NextToken.is(AsmToken::LBrac))
2419 return true;
2420 }
2421 }
2422
2423 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2424 }
2425
2426 bool
isRegister()2427 AMDGPUAsmParser::isRegister()
2428 {
2429 return isRegister(getToken(), peekToken());
2430 }
2431
2432 unsigned
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned RegWidth,SMLoc Loc)2433 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2434 unsigned RegNum,
2435 unsigned RegWidth,
2436 SMLoc Loc) {
2437
2438 assert(isRegularReg(RegKind));
2439
2440 unsigned AlignSize = 1;
2441 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2442 // SGPR and TTMP registers must be aligned.
2443 // Max required alignment is 4 dwords.
2444 AlignSize = std::min(RegWidth, 4u);
2445 }
2446
2447 if (RegNum % AlignSize != 0) {
2448 Error(Loc, "invalid register alignment");
2449 return AMDGPU::NoRegister;
2450 }
2451
2452 unsigned RegIdx = RegNum / AlignSize;
2453 int RCID = getRegClass(RegKind, RegWidth);
2454 if (RCID == -1) {
2455 Error(Loc, "invalid or unsupported register size");
2456 return AMDGPU::NoRegister;
2457 }
2458
2459 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2460 const MCRegisterClass RC = TRI->getRegClass(RCID);
2461 if (RegIdx >= RC.getNumRegs()) {
2462 Error(Loc, "register index is out of range");
2463 return AMDGPU::NoRegister;
2464 }
2465
2466 return RC.getRegister(RegIdx);
2467 }
2468
2469 bool
ParseRegRange(unsigned & Num,unsigned & Width)2470 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2471 int64_t RegLo, RegHi;
2472 if (!skipToken(AsmToken::LBrac, "missing register index"))
2473 return false;
2474
2475 SMLoc FirstIdxLoc = getLoc();
2476 SMLoc SecondIdxLoc;
2477
2478 if (!parseExpr(RegLo))
2479 return false;
2480
2481 if (trySkipToken(AsmToken::Colon)) {
2482 SecondIdxLoc = getLoc();
2483 if (!parseExpr(RegHi))
2484 return false;
2485 } else {
2486 RegHi = RegLo;
2487 }
2488
2489 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2490 return false;
2491
2492 if (!isUInt<32>(RegLo)) {
2493 Error(FirstIdxLoc, "invalid register index");
2494 return false;
2495 }
2496
2497 if (!isUInt<32>(RegHi)) {
2498 Error(SecondIdxLoc, "invalid register index");
2499 return false;
2500 }
2501
2502 if (RegLo > RegHi) {
2503 Error(FirstIdxLoc, "first register index should not exceed second index");
2504 return false;
2505 }
2506
2507 Num = static_cast<unsigned>(RegLo);
2508 Width = (RegHi - RegLo) + 1;
2509 return true;
2510 }
2511
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2512 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2513 unsigned &RegNum, unsigned &RegWidth,
2514 SmallVectorImpl<AsmToken> &Tokens) {
2515 assert(isToken(AsmToken::Identifier));
2516 unsigned Reg = getSpecialRegForName(getTokenStr());
2517 if (Reg) {
2518 RegNum = 0;
2519 RegWidth = 1;
2520 RegKind = IS_SPECIAL;
2521 Tokens.push_back(getToken());
2522 lex(); // skip register name
2523 }
2524 return Reg;
2525 }
2526
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2527 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2528 unsigned &RegNum, unsigned &RegWidth,
2529 SmallVectorImpl<AsmToken> &Tokens) {
2530 assert(isToken(AsmToken::Identifier));
2531 StringRef RegName = getTokenStr();
2532 auto Loc = getLoc();
2533
2534 const RegInfo *RI = getRegularRegInfo(RegName);
2535 if (!RI) {
2536 Error(Loc, "invalid register name");
2537 return AMDGPU::NoRegister;
2538 }
2539
2540 Tokens.push_back(getToken());
2541 lex(); // skip register name
2542
2543 RegKind = RI->Kind;
2544 StringRef RegSuffix = RegName.substr(RI->Name.size());
2545 if (!RegSuffix.empty()) {
2546 // Single 32-bit register: vXX.
2547 if (!getRegNum(RegSuffix, RegNum)) {
2548 Error(Loc, "invalid register index");
2549 return AMDGPU::NoRegister;
2550 }
2551 RegWidth = 1;
2552 } else {
2553 // Range of registers: v[XX:YY]. ":YY" is optional.
2554 if (!ParseRegRange(RegNum, RegWidth))
2555 return AMDGPU::NoRegister;
2556 }
2557
2558 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2559 }
2560
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2561 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2562 unsigned &RegWidth,
2563 SmallVectorImpl<AsmToken> &Tokens) {
2564 unsigned Reg = AMDGPU::NoRegister;
2565 auto ListLoc = getLoc();
2566
2567 if (!skipToken(AsmToken::LBrac,
2568 "expected a register or a list of registers")) {
2569 return AMDGPU::NoRegister;
2570 }
2571
2572 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2573
2574 auto Loc = getLoc();
2575 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2576 return AMDGPU::NoRegister;
2577 if (RegWidth != 1) {
2578 Error(Loc, "expected a single 32-bit register");
2579 return AMDGPU::NoRegister;
2580 }
2581
2582 for (; trySkipToken(AsmToken::Comma); ) {
2583 RegisterKind NextRegKind;
2584 unsigned NextReg, NextRegNum, NextRegWidth;
2585 Loc = getLoc();
2586
2587 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2588 NextRegNum, NextRegWidth,
2589 Tokens)) {
2590 return AMDGPU::NoRegister;
2591 }
2592 if (NextRegWidth != 1) {
2593 Error(Loc, "expected a single 32-bit register");
2594 return AMDGPU::NoRegister;
2595 }
2596 if (NextRegKind != RegKind) {
2597 Error(Loc, "registers in a list must be of the same kind");
2598 return AMDGPU::NoRegister;
2599 }
2600 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2601 return AMDGPU::NoRegister;
2602 }
2603
2604 if (!skipToken(AsmToken::RBrac,
2605 "expected a comma or a closing square bracket")) {
2606 return AMDGPU::NoRegister;
2607 }
2608
2609 if (isRegularReg(RegKind))
2610 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2611
2612 return Reg;
2613 }
2614
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2615 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2616 unsigned &RegNum, unsigned &RegWidth,
2617 SmallVectorImpl<AsmToken> &Tokens) {
2618 auto Loc = getLoc();
2619 Reg = AMDGPU::NoRegister;
2620
2621 if (isToken(AsmToken::Identifier)) {
2622 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2623 if (Reg == AMDGPU::NoRegister)
2624 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2625 } else {
2626 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2627 }
2628
2629 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2630 if (Reg == AMDGPU::NoRegister) {
2631 assert(Parser.hasPendingError());
2632 return false;
2633 }
2634
2635 if (!subtargetHasRegister(*TRI, Reg)) {
2636 if (Reg == AMDGPU::SGPR_NULL) {
2637 Error(Loc, "'null' operand is not supported on this GPU");
2638 } else {
2639 Error(Loc, "register not available on this GPU");
2640 }
2641 return false;
2642 }
2643
2644 return true;
2645 }
2646
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)2647 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2648 unsigned &RegNum, unsigned &RegWidth,
2649 bool RestoreOnFailure /*=false*/) {
2650 Reg = AMDGPU::NoRegister;
2651
2652 SmallVector<AsmToken, 1> Tokens;
2653 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2654 if (RestoreOnFailure) {
2655 while (!Tokens.empty()) {
2656 getLexer().UnLex(Tokens.pop_back_val());
2657 }
2658 }
2659 return true;
2660 }
2661 return false;
2662 }
2663
2664 Optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)2665 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2666 switch (RegKind) {
2667 case IS_VGPR:
2668 return StringRef(".amdgcn.next_free_vgpr");
2669 case IS_SGPR:
2670 return StringRef(".amdgcn.next_free_sgpr");
2671 default:
2672 return None;
2673 }
2674 }
2675
initializeGprCountSymbol(RegisterKind RegKind)2676 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2677 auto SymbolName = getGprCountSymbolName(RegKind);
2678 assert(SymbolName && "initializing invalid register kind");
2679 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2680 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2681 }
2682
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)2683 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2684 unsigned DwordRegIndex,
2685 unsigned RegWidth) {
2686 // Symbols are only defined for GCN targets
2687 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2688 return true;
2689
2690 auto SymbolName = getGprCountSymbolName(RegKind);
2691 if (!SymbolName)
2692 return true;
2693 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2694
2695 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2696 int64_t OldCount;
2697
2698 if (!Sym->isVariable())
2699 return !Error(getLoc(),
2700 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2701 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2702 return !Error(
2703 getLoc(),
2704 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2705
2706 if (OldCount <= NewMax)
2707 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2708
2709 return true;
2710 }
2711
2712 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)2713 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2714 const auto &Tok = getToken();
2715 SMLoc StartLoc = Tok.getLoc();
2716 SMLoc EndLoc = Tok.getEndLoc();
2717 RegisterKind RegKind;
2718 unsigned Reg, RegNum, RegWidth;
2719
2720 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2721 return nullptr;
2722 }
2723 if (isHsaAbiVersion3Or4(&getSTI())) {
2724 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2725 return nullptr;
2726 } else
2727 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2728 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2729 }
2730
2731 OperandMatchResultTy
parseImm(OperandVector & Operands,bool HasSP3AbsModifier)2732 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2733 // TODO: add syntactic sugar for 1/(2*PI)
2734
2735 assert(!isRegister());
2736 assert(!isModifier());
2737
2738 const auto& Tok = getToken();
2739 const auto& NextTok = peekToken();
2740 bool IsReal = Tok.is(AsmToken::Real);
2741 SMLoc S = getLoc();
2742 bool Negate = false;
2743
2744 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2745 lex();
2746 IsReal = true;
2747 Negate = true;
2748 }
2749
2750 if (IsReal) {
2751 // Floating-point expressions are not supported.
2752 // Can only allow floating-point literals with an
2753 // optional sign.
2754
2755 StringRef Num = getTokenStr();
2756 lex();
2757
2758 APFloat RealVal(APFloat::IEEEdouble());
2759 auto roundMode = APFloat::rmNearestTiesToEven;
2760 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2761 return MatchOperand_ParseFail;
2762 }
2763 if (Negate)
2764 RealVal.changeSign();
2765
2766 Operands.push_back(
2767 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2768 AMDGPUOperand::ImmTyNone, true));
2769
2770 return MatchOperand_Success;
2771
2772 } else {
2773 int64_t IntVal;
2774 const MCExpr *Expr;
2775 SMLoc S = getLoc();
2776
2777 if (HasSP3AbsModifier) {
2778 // This is a workaround for handling expressions
2779 // as arguments of SP3 'abs' modifier, for example:
2780 // |1.0|
2781 // |-1|
2782 // |1+x|
2783 // This syntax is not compatible with syntax of standard
2784 // MC expressions (due to the trailing '|').
2785 SMLoc EndLoc;
2786 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2787 return MatchOperand_ParseFail;
2788 } else {
2789 if (Parser.parseExpression(Expr))
2790 return MatchOperand_ParseFail;
2791 }
2792
2793 if (Expr->evaluateAsAbsolute(IntVal)) {
2794 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2795 } else {
2796 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2797 }
2798
2799 return MatchOperand_Success;
2800 }
2801
2802 return MatchOperand_NoMatch;
2803 }
2804
2805 OperandMatchResultTy
parseReg(OperandVector & Operands)2806 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2807 if (!isRegister())
2808 return MatchOperand_NoMatch;
2809
2810 if (auto R = parseRegister()) {
2811 assert(R->isReg());
2812 Operands.push_back(std::move(R));
2813 return MatchOperand_Success;
2814 }
2815 return MatchOperand_ParseFail;
2816 }
2817
2818 OperandMatchResultTy
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod)2819 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2820 auto res = parseReg(Operands);
2821 if (res != MatchOperand_NoMatch) {
2822 return res;
2823 } else if (isModifier()) {
2824 return MatchOperand_NoMatch;
2825 } else {
2826 return parseImm(Operands, HasSP3AbsMod);
2827 }
2828 }
2829
2830 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2831 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2832 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2833 const auto &str = Token.getString();
2834 return str == "abs" || str == "neg" || str == "sext";
2835 }
2836 return false;
2837 }
2838
2839 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const2840 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2841 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2842 }
2843
2844 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2845 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2846 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2847 }
2848
2849 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2850 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2851 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2852 }
2853
2854 // Check if this is an operand modifier or an opcode modifier
2855 // which may look like an expression but it is not. We should
2856 // avoid parsing these modifiers as expressions. Currently
2857 // recognized sequences are:
2858 // |...|
2859 // abs(...)
2860 // neg(...)
2861 // sext(...)
2862 // -reg
2863 // -|...|
2864 // -abs(...)
2865 // name:...
2866 // Note that simple opcode modifiers like 'gds' may be parsed as
2867 // expressions; this is a special case. See getExpressionAsToken.
2868 //
2869 bool
isModifier()2870 AMDGPUAsmParser::isModifier() {
2871
2872 AsmToken Tok = getToken();
2873 AsmToken NextToken[2];
2874 peekTokens(NextToken);
2875
2876 return isOperandModifier(Tok, NextToken[0]) ||
2877 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2878 isOpcodeModifierWithVal(Tok, NextToken[0]);
2879 }
2880
2881 // Check if the current token is an SP3 'neg' modifier.
2882 // Currently this modifier is allowed in the following context:
2883 //
2884 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2885 // 2. Before an 'abs' modifier: -abs(...)
2886 // 3. Before an SP3 'abs' modifier: -|...|
2887 //
2888 // In all other cases "-" is handled as a part
2889 // of an expression that follows the sign.
2890 //
2891 // Note: When "-" is followed by an integer literal,
2892 // this is interpreted as integer negation rather
2893 // than a floating-point NEG modifier applied to N.
2894 // Beside being contr-intuitive, such use of floating-point
2895 // NEG modifier would have resulted in different meaning
2896 // of integer literals used with VOP1/2/C and VOP3,
2897 // for example:
2898 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2899 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2900 // Negative fp literals with preceding "-" are
2901 // handled likewise for unifomtity
2902 //
2903 bool
parseSP3NegModifier()2904 AMDGPUAsmParser::parseSP3NegModifier() {
2905
2906 AsmToken NextToken[2];
2907 peekTokens(NextToken);
2908
2909 if (isToken(AsmToken::Minus) &&
2910 (isRegister(NextToken[0], NextToken[1]) ||
2911 NextToken[0].is(AsmToken::Pipe) ||
2912 isId(NextToken[0], "abs"))) {
2913 lex();
2914 return true;
2915 }
2916
2917 return false;
2918 }
2919
2920 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)2921 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2922 bool AllowImm) {
2923 bool Neg, SP3Neg;
2924 bool Abs, SP3Abs;
2925 SMLoc Loc;
2926
2927 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2928 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2929 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2930 return MatchOperand_ParseFail;
2931 }
2932
2933 SP3Neg = parseSP3NegModifier();
2934
2935 Loc = getLoc();
2936 Neg = trySkipId("neg");
2937 if (Neg && SP3Neg) {
2938 Error(Loc, "expected register or immediate");
2939 return MatchOperand_ParseFail;
2940 }
2941 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2942 return MatchOperand_ParseFail;
2943
2944 Abs = trySkipId("abs");
2945 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2946 return MatchOperand_ParseFail;
2947
2948 Loc = getLoc();
2949 SP3Abs = trySkipToken(AsmToken::Pipe);
2950 if (Abs && SP3Abs) {
2951 Error(Loc, "expected register or immediate");
2952 return MatchOperand_ParseFail;
2953 }
2954
2955 OperandMatchResultTy Res;
2956 if (AllowImm) {
2957 Res = parseRegOrImm(Operands, SP3Abs);
2958 } else {
2959 Res = parseReg(Operands);
2960 }
2961 if (Res != MatchOperand_Success) {
2962 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2963 }
2964
2965 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2966 return MatchOperand_ParseFail;
2967 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2968 return MatchOperand_ParseFail;
2969 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2970 return MatchOperand_ParseFail;
2971
2972 AMDGPUOperand::Modifiers Mods;
2973 Mods.Abs = Abs || SP3Abs;
2974 Mods.Neg = Neg || SP3Neg;
2975
2976 if (Mods.hasFPModifiers()) {
2977 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2978 if (Op.isExpr()) {
2979 Error(Op.getStartLoc(), "expected an absolute expression");
2980 return MatchOperand_ParseFail;
2981 }
2982 Op.setModifiers(Mods);
2983 }
2984 return MatchOperand_Success;
2985 }
2986
2987 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)2988 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2989 bool AllowImm) {
2990 bool Sext = trySkipId("sext");
2991 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2992 return MatchOperand_ParseFail;
2993
2994 OperandMatchResultTy Res;
2995 if (AllowImm) {
2996 Res = parseRegOrImm(Operands);
2997 } else {
2998 Res = parseReg(Operands);
2999 }
3000 if (Res != MatchOperand_Success) {
3001 return Sext? MatchOperand_ParseFail : Res;
3002 }
3003
3004 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3005 return MatchOperand_ParseFail;
3006
3007 AMDGPUOperand::Modifiers Mods;
3008 Mods.Sext = Sext;
3009
3010 if (Mods.hasIntModifiers()) {
3011 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3012 if (Op.isExpr()) {
3013 Error(Op.getStartLoc(), "expected an absolute expression");
3014 return MatchOperand_ParseFail;
3015 }
3016 Op.setModifiers(Mods);
3017 }
3018
3019 return MatchOperand_Success;
3020 }
3021
3022 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector & Operands)3023 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3024 return parseRegOrImmWithFPInputMods(Operands, false);
3025 }
3026
3027 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector & Operands)3028 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3029 return parseRegOrImmWithIntInputMods(Operands, false);
3030 }
3031
parseVReg32OrOff(OperandVector & Operands)3032 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3033 auto Loc = getLoc();
3034 if (trySkipId("off")) {
3035 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3036 AMDGPUOperand::ImmTyOff, false));
3037 return MatchOperand_Success;
3038 }
3039
3040 if (!isRegister())
3041 return MatchOperand_NoMatch;
3042
3043 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3044 if (Reg) {
3045 Operands.push_back(std::move(Reg));
3046 return MatchOperand_Success;
3047 }
3048
3049 return MatchOperand_ParseFail;
3050
3051 }
3052
checkTargetMatchPredicate(MCInst & Inst)3053 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3054 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3055
3056 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3057 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3058 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3059 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3060 return Match_InvalidOperand;
3061
3062 if ((TSFlags & SIInstrFlags::VOP3) &&
3063 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3064 getForcedEncodingSize() != 64)
3065 return Match_PreferE32;
3066
3067 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3068 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3069 // v_mac_f32/16 allow only dst_sel == DWORD;
3070 auto OpNum =
3071 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3072 const auto &Op = Inst.getOperand(OpNum);
3073 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3074 return Match_InvalidOperand;
3075 }
3076 }
3077
3078 return Match_Success;
3079 }
3080
getAllVariants()3081 static ArrayRef<unsigned> getAllVariants() {
3082 static const unsigned Variants[] = {
3083 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3084 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3085 };
3086
3087 return makeArrayRef(Variants);
3088 }
3089
3090 // What asm variants we should check
getMatchedVariants() const3091 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3092 if (getForcedEncodingSize() == 32) {
3093 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3094 return makeArrayRef(Variants);
3095 }
3096
3097 if (isForcedVOP3()) {
3098 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3099 return makeArrayRef(Variants);
3100 }
3101
3102 if (isForcedSDWA()) {
3103 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3104 AMDGPUAsmVariants::SDWA9};
3105 return makeArrayRef(Variants);
3106 }
3107
3108 if (isForcedDPP()) {
3109 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3110 return makeArrayRef(Variants);
3111 }
3112
3113 return getAllVariants();
3114 }
3115
getMatchedVariantName() const3116 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3117 if (getForcedEncodingSize() == 32)
3118 return "e32";
3119
3120 if (isForcedVOP3())
3121 return "e64";
3122
3123 if (isForcedSDWA())
3124 return "sdwa";
3125
3126 if (isForcedDPP())
3127 return "dpp";
3128
3129 return "";
3130 }
3131
findImplicitSGPRReadInVOP(const MCInst & Inst) const3132 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3133 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3134 const unsigned Num = Desc.getNumImplicitUses();
3135 for (unsigned i = 0; i < Num; ++i) {
3136 unsigned Reg = Desc.ImplicitUses[i];
3137 switch (Reg) {
3138 case AMDGPU::FLAT_SCR:
3139 case AMDGPU::VCC:
3140 case AMDGPU::VCC_LO:
3141 case AMDGPU::VCC_HI:
3142 case AMDGPU::M0:
3143 return Reg;
3144 default:
3145 break;
3146 }
3147 }
3148 return AMDGPU::NoRegister;
3149 }
3150
3151 // NB: This code is correct only when used to check constant
3152 // bus limitations because GFX7 support no f16 inline constants.
3153 // Note that there are no cases when a GFX7 opcode violates
3154 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const3155 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3156 unsigned OpIdx) const {
3157 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3158
3159 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3160 return false;
3161 }
3162
3163 const MCOperand &MO = Inst.getOperand(OpIdx);
3164
3165 int64_t Val = MO.getImm();
3166 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3167
3168 switch (OpSize) { // expected operand size
3169 case 8:
3170 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3171 case 4:
3172 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3173 case 2: {
3174 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3175 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3176 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3177 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3178 return AMDGPU::isInlinableIntLiteral(Val);
3179
3180 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3182 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3183 return AMDGPU::isInlinableIntLiteralV216(Val);
3184
3185 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3186 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3187 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3188 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3189
3190 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3191 }
3192 default:
3193 llvm_unreachable("invalid operand size");
3194 }
3195 }
3196
getConstantBusLimit(unsigned Opcode) const3197 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3198 if (!isGFX10Plus())
3199 return 1;
3200
3201 switch (Opcode) {
3202 // 64-bit shift instructions can use only one scalar value input
3203 case AMDGPU::V_LSHLREV_B64_e64:
3204 case AMDGPU::V_LSHLREV_B64_gfx10:
3205 case AMDGPU::V_LSHRREV_B64_e64:
3206 case AMDGPU::V_LSHRREV_B64_gfx10:
3207 case AMDGPU::V_ASHRREV_I64_e64:
3208 case AMDGPU::V_ASHRREV_I64_gfx10:
3209 case AMDGPU::V_LSHL_B64_e64:
3210 case AMDGPU::V_LSHR_B64_e64:
3211 case AMDGPU::V_ASHR_I64_e64:
3212 return 1;
3213 default:
3214 return 2;
3215 }
3216 }
3217
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3218 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3219 const MCOperand &MO = Inst.getOperand(OpIdx);
3220 if (MO.isImm()) {
3221 return !isInlineConstant(Inst, OpIdx);
3222 } else if (MO.isReg()) {
3223 auto Reg = MO.getReg();
3224 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3225 auto PReg = mc2PseudoReg(Reg);
3226 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3227 } else {
3228 return true;
3229 }
3230 }
3231
3232 bool
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3233 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3234 const OperandVector &Operands) {
3235 const unsigned Opcode = Inst.getOpcode();
3236 const MCInstrDesc &Desc = MII.get(Opcode);
3237 unsigned LastSGPR = AMDGPU::NoRegister;
3238 unsigned ConstantBusUseCount = 0;
3239 unsigned NumLiterals = 0;
3240 unsigned LiteralSize;
3241
3242 if (Desc.TSFlags &
3243 (SIInstrFlags::VOPC |
3244 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3245 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3246 SIInstrFlags::SDWA)) {
3247 // Check special imm operands (used by madmk, etc)
3248 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3249 ++ConstantBusUseCount;
3250 }
3251
3252 SmallDenseSet<unsigned> SGPRsUsed;
3253 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3254 if (SGPRUsed != AMDGPU::NoRegister) {
3255 SGPRsUsed.insert(SGPRUsed);
3256 ++ConstantBusUseCount;
3257 }
3258
3259 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3260 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3261 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3262
3263 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3264
3265 for (int OpIdx : OpIndices) {
3266 if (OpIdx == -1) break;
3267
3268 const MCOperand &MO = Inst.getOperand(OpIdx);
3269 if (usesConstantBus(Inst, OpIdx)) {
3270 if (MO.isReg()) {
3271 LastSGPR = mc2PseudoReg(MO.getReg());
3272 // Pairs of registers with a partial intersections like these
3273 // s0, s[0:1]
3274 // flat_scratch_lo, flat_scratch
3275 // flat_scratch_lo, flat_scratch_hi
3276 // are theoretically valid but they are disabled anyway.
3277 // Note that this code mimics SIInstrInfo::verifyInstruction
3278 if (!SGPRsUsed.count(LastSGPR)) {
3279 SGPRsUsed.insert(LastSGPR);
3280 ++ConstantBusUseCount;
3281 }
3282 } else { // Expression or a literal
3283
3284 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3285 continue; // special operand like VINTERP attr_chan
3286
3287 // An instruction may use only one literal.
3288 // This has been validated on the previous step.
3289 // See validateVOP3Literal.
3290 // This literal may be used as more than one operand.
3291 // If all these operands are of the same size,
3292 // this literal counts as one scalar value.
3293 // Otherwise it counts as 2 scalar values.
3294 // See "GFX10 Shader Programming", section 3.6.2.3.
3295
3296 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3297 if (Size < 4) Size = 4;
3298
3299 if (NumLiterals == 0) {
3300 NumLiterals = 1;
3301 LiteralSize = Size;
3302 } else if (LiteralSize != Size) {
3303 NumLiterals = 2;
3304 }
3305 }
3306 }
3307 }
3308 }
3309 ConstantBusUseCount += NumLiterals;
3310
3311 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3312 return true;
3313
3314 SMLoc LitLoc = getLitLoc(Operands);
3315 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3316 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3317 Error(Loc, "invalid operand (violates constant bus restrictions)");
3318 return false;
3319 }
3320
3321 bool
validateEarlyClobberLimitations(const MCInst & Inst,const OperandVector & Operands)3322 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3323 const OperandVector &Operands) {
3324 const unsigned Opcode = Inst.getOpcode();
3325 const MCInstrDesc &Desc = MII.get(Opcode);
3326
3327 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3328 if (DstIdx == -1 ||
3329 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3330 return true;
3331 }
3332
3333 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3334
3335 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3336 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3337 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3338
3339 assert(DstIdx != -1);
3340 const MCOperand &Dst = Inst.getOperand(DstIdx);
3341 assert(Dst.isReg());
3342 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3343
3344 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3345
3346 for (int SrcIdx : SrcIndices) {
3347 if (SrcIdx == -1) break;
3348 const MCOperand &Src = Inst.getOperand(SrcIdx);
3349 if (Src.isReg()) {
3350 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3351 if (isRegIntersect(DstReg, SrcReg, TRI)) {
3352 Error(getRegLoc(SrcReg, Operands),
3353 "destination must be different than all sources");
3354 return false;
3355 }
3356 }
3357 }
3358
3359 return true;
3360 }
3361
validateIntClampSupported(const MCInst & Inst)3362 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3363
3364 const unsigned Opc = Inst.getOpcode();
3365 const MCInstrDesc &Desc = MII.get(Opc);
3366
3367 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3368 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3369 assert(ClampIdx != -1);
3370 return Inst.getOperand(ClampIdx).getImm() == 0;
3371 }
3372
3373 return true;
3374 }
3375
validateMIMGDataSize(const MCInst & Inst)3376 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3377
3378 const unsigned Opc = Inst.getOpcode();
3379 const MCInstrDesc &Desc = MII.get(Opc);
3380
3381 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3382 return true;
3383
3384 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3385 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3386 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3387
3388 assert(VDataIdx != -1);
3389
3390 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3391 return true;
3392
3393 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3394 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3395 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3396 if (DMask == 0)
3397 DMask = 1;
3398
3399 unsigned DataSize =
3400 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3401 if (hasPackedD16()) {
3402 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3403 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3404 DataSize = (DataSize + 1) / 2;
3405 }
3406
3407 return (VDataSize / 4) == DataSize + TFESize;
3408 }
3409
validateMIMGAddrSize(const MCInst & Inst)3410 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3411 const unsigned Opc = Inst.getOpcode();
3412 const MCInstrDesc &Desc = MII.get(Opc);
3413
3414 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3415 return true;
3416
3417 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3418
3419 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3420 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3421 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3422 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3423 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3424 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3425
3426 assert(VAddr0Idx != -1);
3427 assert(SrsrcIdx != -1);
3428 assert(SrsrcIdx > VAddr0Idx);
3429
3430 if (DimIdx == -1)
3431 return true; // intersect_ray
3432
3433 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3434 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3435 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3436 unsigned VAddrSize =
3437 IsNSA ? SrsrcIdx - VAddr0Idx
3438 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3439 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3440
3441 unsigned AddrSize =
3442 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3443
3444 if (!IsNSA) {
3445 if (AddrSize > 8)
3446 AddrSize = 16;
3447 else if (AddrSize > 4)
3448 AddrSize = 8;
3449 }
3450
3451 return VAddrSize == AddrSize;
3452 }
3453
validateMIMGAtomicDMask(const MCInst & Inst)3454 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3455
3456 const unsigned Opc = Inst.getOpcode();
3457 const MCInstrDesc &Desc = MII.get(Opc);
3458
3459 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3460 return true;
3461 if (!Desc.mayLoad() || !Desc.mayStore())
3462 return true; // Not atomic
3463
3464 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3465 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3466
3467 // This is an incomplete check because image_atomic_cmpswap
3468 // may only use 0x3 and 0xf while other atomic operations
3469 // may use 0x1 and 0x3. However these limitations are
3470 // verified when we check that dmask matches dst size.
3471 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3472 }
3473
validateMIMGGatherDMask(const MCInst & Inst)3474 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3475
3476 const unsigned Opc = Inst.getOpcode();
3477 const MCInstrDesc &Desc = MII.get(Opc);
3478
3479 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3480 return true;
3481
3482 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3483 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3484
3485 // GATHER4 instructions use dmask in a different fashion compared to
3486 // other MIMG instructions. The only useful DMASK values are
3487 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3488 // (red,red,red,red) etc.) The ISA document doesn't mention
3489 // this.
3490 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3491 }
3492
validateMIMGMSAA(const MCInst & Inst)3493 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3494 const unsigned Opc = Inst.getOpcode();
3495 const MCInstrDesc &Desc = MII.get(Opc);
3496
3497 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3498 return true;
3499
3500 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3501 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3502 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3503
3504 if (!BaseOpcode->MSAA)
3505 return true;
3506
3507 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3508 assert(DimIdx != -1);
3509
3510 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3511 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3512
3513 return DimInfo->MSAA;
3514 }
3515
IsMovrelsSDWAOpcode(const unsigned Opcode)3516 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3517 {
3518 switch (Opcode) {
3519 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3520 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3521 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3522 return true;
3523 default:
3524 return false;
3525 }
3526 }
3527
3528 // movrels* opcodes should only allow VGPRS as src0.
3529 // This is specified in .td description for vop1/vop3,
3530 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)3531 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3532 const OperandVector &Operands) {
3533
3534 const unsigned Opc = Inst.getOpcode();
3535 const MCInstrDesc &Desc = MII.get(Opc);
3536
3537 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3538 return true;
3539
3540 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3541 assert(Src0Idx != -1);
3542
3543 SMLoc ErrLoc;
3544 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3545 if (Src0.isReg()) {
3546 auto Reg = mc2PseudoReg(Src0.getReg());
3547 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3548 if (!isSGPR(Reg, TRI))
3549 return true;
3550 ErrLoc = getRegLoc(Reg, Operands);
3551 } else {
3552 ErrLoc = getConstLoc(Operands);
3553 }
3554
3555 Error(ErrLoc, "source operand must be a VGPR");
3556 return false;
3557 }
3558
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)3559 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3560 const OperandVector &Operands) {
3561
3562 const unsigned Opc = Inst.getOpcode();
3563
3564 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3565 return true;
3566
3567 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3568 assert(Src0Idx != -1);
3569
3570 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3571 if (!Src0.isReg())
3572 return true;
3573
3574 auto Reg = mc2PseudoReg(Src0.getReg());
3575 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3576 if (isSGPR(Reg, TRI)) {
3577 Error(getRegLoc(Reg, Operands),
3578 "source operand must be either a VGPR or an inline constant");
3579 return false;
3580 }
3581
3582 return true;
3583 }
3584
validateDivScale(const MCInst & Inst)3585 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3586 switch (Inst.getOpcode()) {
3587 default:
3588 return true;
3589 case V_DIV_SCALE_F32_gfx6_gfx7:
3590 case V_DIV_SCALE_F32_vi:
3591 case V_DIV_SCALE_F32_gfx10:
3592 case V_DIV_SCALE_F64_gfx6_gfx7:
3593 case V_DIV_SCALE_F64_vi:
3594 case V_DIV_SCALE_F64_gfx10:
3595 break;
3596 }
3597
3598 // TODO: Check that src0 = src1 or src2.
3599
3600 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3601 AMDGPU::OpName::src2_modifiers,
3602 AMDGPU::OpName::src2_modifiers}) {
3603 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3604 .getImm() &
3605 SISrcMods::ABS) {
3606 return false;
3607 }
3608 }
3609
3610 return true;
3611 }
3612
validateMIMGD16(const MCInst & Inst)3613 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3614
3615 const unsigned Opc = Inst.getOpcode();
3616 const MCInstrDesc &Desc = MII.get(Opc);
3617
3618 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3619 return true;
3620
3621 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3622 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3623 if (isCI() || isSI())
3624 return false;
3625 }
3626
3627 return true;
3628 }
3629
validateMIMGDim(const MCInst & Inst)3630 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3631 const unsigned Opc = Inst.getOpcode();
3632 const MCInstrDesc &Desc = MII.get(Opc);
3633
3634 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3635 return true;
3636
3637 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3638 if (DimIdx < 0)
3639 return true;
3640
3641 long Imm = Inst.getOperand(DimIdx).getImm();
3642 if (Imm < 0 || Imm >= 8)
3643 return false;
3644
3645 return true;
3646 }
3647
IsRevOpcode(const unsigned Opcode)3648 static bool IsRevOpcode(const unsigned Opcode)
3649 {
3650 switch (Opcode) {
3651 case AMDGPU::V_SUBREV_F32_e32:
3652 case AMDGPU::V_SUBREV_F32_e64:
3653 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3654 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3655 case AMDGPU::V_SUBREV_F32_e32_vi:
3656 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3657 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3658 case AMDGPU::V_SUBREV_F32_e64_vi:
3659
3660 case AMDGPU::V_SUBREV_CO_U32_e32:
3661 case AMDGPU::V_SUBREV_CO_U32_e64:
3662 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3663 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3664
3665 case AMDGPU::V_SUBBREV_U32_e32:
3666 case AMDGPU::V_SUBBREV_U32_e64:
3667 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3668 case AMDGPU::V_SUBBREV_U32_e32_vi:
3669 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3670 case AMDGPU::V_SUBBREV_U32_e64_vi:
3671
3672 case AMDGPU::V_SUBREV_U32_e32:
3673 case AMDGPU::V_SUBREV_U32_e64:
3674 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3675 case AMDGPU::V_SUBREV_U32_e32_vi:
3676 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3677 case AMDGPU::V_SUBREV_U32_e64_vi:
3678
3679 case AMDGPU::V_SUBREV_F16_e32:
3680 case AMDGPU::V_SUBREV_F16_e64:
3681 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3682 case AMDGPU::V_SUBREV_F16_e32_vi:
3683 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3684 case AMDGPU::V_SUBREV_F16_e64_vi:
3685
3686 case AMDGPU::V_SUBREV_U16_e32:
3687 case AMDGPU::V_SUBREV_U16_e64:
3688 case AMDGPU::V_SUBREV_U16_e32_vi:
3689 case AMDGPU::V_SUBREV_U16_e64_vi:
3690
3691 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3692 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3693 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3694
3695 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3696 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3697
3698 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3699 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3700
3701 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3702 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3703
3704 case AMDGPU::V_LSHRREV_B32_e32:
3705 case AMDGPU::V_LSHRREV_B32_e64:
3706 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3707 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3708 case AMDGPU::V_LSHRREV_B32_e32_vi:
3709 case AMDGPU::V_LSHRREV_B32_e64_vi:
3710 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3711 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3712
3713 case AMDGPU::V_ASHRREV_I32_e32:
3714 case AMDGPU::V_ASHRREV_I32_e64:
3715 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3716 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3717 case AMDGPU::V_ASHRREV_I32_e32_vi:
3718 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3719 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3720 case AMDGPU::V_ASHRREV_I32_e64_vi:
3721
3722 case AMDGPU::V_LSHLREV_B32_e32:
3723 case AMDGPU::V_LSHLREV_B32_e64:
3724 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3725 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3726 case AMDGPU::V_LSHLREV_B32_e32_vi:
3727 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3728 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3729 case AMDGPU::V_LSHLREV_B32_e64_vi:
3730
3731 case AMDGPU::V_LSHLREV_B16_e32:
3732 case AMDGPU::V_LSHLREV_B16_e64:
3733 case AMDGPU::V_LSHLREV_B16_e32_vi:
3734 case AMDGPU::V_LSHLREV_B16_e64_vi:
3735 case AMDGPU::V_LSHLREV_B16_gfx10:
3736
3737 case AMDGPU::V_LSHRREV_B16_e32:
3738 case AMDGPU::V_LSHRREV_B16_e64:
3739 case AMDGPU::V_LSHRREV_B16_e32_vi:
3740 case AMDGPU::V_LSHRREV_B16_e64_vi:
3741 case AMDGPU::V_LSHRREV_B16_gfx10:
3742
3743 case AMDGPU::V_ASHRREV_I16_e32:
3744 case AMDGPU::V_ASHRREV_I16_e64:
3745 case AMDGPU::V_ASHRREV_I16_e32_vi:
3746 case AMDGPU::V_ASHRREV_I16_e64_vi:
3747 case AMDGPU::V_ASHRREV_I16_gfx10:
3748
3749 case AMDGPU::V_LSHLREV_B64_e64:
3750 case AMDGPU::V_LSHLREV_B64_gfx10:
3751 case AMDGPU::V_LSHLREV_B64_vi:
3752
3753 case AMDGPU::V_LSHRREV_B64_e64:
3754 case AMDGPU::V_LSHRREV_B64_gfx10:
3755 case AMDGPU::V_LSHRREV_B64_vi:
3756
3757 case AMDGPU::V_ASHRREV_I64_e64:
3758 case AMDGPU::V_ASHRREV_I64_gfx10:
3759 case AMDGPU::V_ASHRREV_I64_vi:
3760
3761 case AMDGPU::V_PK_LSHLREV_B16:
3762 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3763 case AMDGPU::V_PK_LSHLREV_B16_vi:
3764
3765 case AMDGPU::V_PK_LSHRREV_B16:
3766 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3767 case AMDGPU::V_PK_LSHRREV_B16_vi:
3768 case AMDGPU::V_PK_ASHRREV_I16:
3769 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3770 case AMDGPU::V_PK_ASHRREV_I16_vi:
3771 return true;
3772 default:
3773 return false;
3774 }
3775 }
3776
validateLdsDirect(const MCInst & Inst)3777 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3778
3779 using namespace SIInstrFlags;
3780 const unsigned Opcode = Inst.getOpcode();
3781 const MCInstrDesc &Desc = MII.get(Opcode);
3782
3783 // lds_direct register is defined so that it can be used
3784 // with 9-bit operands only. Ignore encodings which do not accept these.
3785 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3786 if ((Desc.TSFlags & Enc) == 0)
3787 return None;
3788
3789 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3790 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3791 if (SrcIdx == -1)
3792 break;
3793 const auto &Src = Inst.getOperand(SrcIdx);
3794 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3795
3796 if (isGFX90A())
3797 return StringRef("lds_direct is not supported on this GPU");
3798
3799 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3800 return StringRef("lds_direct cannot be used with this instruction");
3801
3802 if (SrcName != OpName::src0)
3803 return StringRef("lds_direct may be used as src0 only");
3804 }
3805 }
3806
3807 return None;
3808 }
3809
getFlatOffsetLoc(const OperandVector & Operands) const3810 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3811 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3812 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3813 if (Op.isFlatOffset())
3814 return Op.getStartLoc();
3815 }
3816 return getLoc();
3817 }
3818
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)3819 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3820 const OperandVector &Operands) {
3821 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3822 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3823 return true;
3824
3825 auto Opcode = Inst.getOpcode();
3826 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3827 assert(OpNum != -1);
3828
3829 const auto &Op = Inst.getOperand(OpNum);
3830 if (!hasFlatOffsets() && Op.getImm() != 0) {
3831 Error(getFlatOffsetLoc(Operands),
3832 "flat offset modifier is not supported on this GPU");
3833 return false;
3834 }
3835
3836 // For FLAT segment the offset must be positive;
3837 // MSB is ignored and forced to zero.
3838 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3839 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3840 if (!isIntN(OffsetSize, Op.getImm())) {
3841 Error(getFlatOffsetLoc(Operands),
3842 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3843 return false;
3844 }
3845 } else {
3846 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3847 if (!isUIntN(OffsetSize, Op.getImm())) {
3848 Error(getFlatOffsetLoc(Operands),
3849 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3850 return false;
3851 }
3852 }
3853
3854 return true;
3855 }
3856
getSMEMOffsetLoc(const OperandVector & Operands) const3857 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3858 // Start with second operand because SMEM Offset cannot be dst or src0.
3859 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3860 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3861 if (Op.isSMEMOffset())
3862 return Op.getStartLoc();
3863 }
3864 return getLoc();
3865 }
3866
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)3867 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3868 const OperandVector &Operands) {
3869 if (isCI() || isSI())
3870 return true;
3871
3872 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3873 if ((TSFlags & SIInstrFlags::SMRD) == 0)
3874 return true;
3875
3876 auto Opcode = Inst.getOpcode();
3877 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3878 if (OpNum == -1)
3879 return true;
3880
3881 const auto &Op = Inst.getOperand(OpNum);
3882 if (!Op.isImm())
3883 return true;
3884
3885 uint64_t Offset = Op.getImm();
3886 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3887 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3888 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3889 return true;
3890
3891 Error(getSMEMOffsetLoc(Operands),
3892 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3893 "expected a 21-bit signed offset");
3894
3895 return false;
3896 }
3897
validateSOPLiteral(const MCInst & Inst) const3898 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3899 unsigned Opcode = Inst.getOpcode();
3900 const MCInstrDesc &Desc = MII.get(Opcode);
3901 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3902 return true;
3903
3904 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3905 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3906
3907 const int OpIndices[] = { Src0Idx, Src1Idx };
3908
3909 unsigned NumExprs = 0;
3910 unsigned NumLiterals = 0;
3911 uint32_t LiteralValue;
3912
3913 for (int OpIdx : OpIndices) {
3914 if (OpIdx == -1) break;
3915
3916 const MCOperand &MO = Inst.getOperand(OpIdx);
3917 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3918 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3919 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3920 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3921 if (NumLiterals == 0 || LiteralValue != Value) {
3922 LiteralValue = Value;
3923 ++NumLiterals;
3924 }
3925 } else if (MO.isExpr()) {
3926 ++NumExprs;
3927 }
3928 }
3929 }
3930
3931 return NumLiterals + NumExprs <= 1;
3932 }
3933
validateOpSel(const MCInst & Inst)3934 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3935 const unsigned Opc = Inst.getOpcode();
3936 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3937 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3938 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3939 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3940
3941 if (OpSel & ~3)
3942 return false;
3943 }
3944 return true;
3945 }
3946
validateDPP(const MCInst & Inst,const OperandVector & Operands)3947 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3948 const OperandVector &Operands) {
3949 const unsigned Opc = Inst.getOpcode();
3950 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3951 if (DppCtrlIdx < 0)
3952 return true;
3953 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3954
3955 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3956 // DPP64 is supported for row_newbcast only.
3957 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3958 if (Src0Idx >= 0 &&
3959 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3960 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3961 Error(S, "64 bit dpp only supports row_newbcast");
3962 return false;
3963 }
3964 }
3965
3966 return true;
3967 }
3968
3969 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const3970 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3971 auto FB = getFeatureBits();
3972 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3973 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3974 }
3975
3976 // VOP3 literal is only allowed in GFX10+ and only one can be used
validateVOP3Literal(const MCInst & Inst,const OperandVector & Operands)3977 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3978 const OperandVector &Operands) {
3979 unsigned Opcode = Inst.getOpcode();
3980 const MCInstrDesc &Desc = MII.get(Opcode);
3981 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3982 return true;
3983
3984 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3985 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3986 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3987
3988 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3989
3990 unsigned NumExprs = 0;
3991 unsigned NumLiterals = 0;
3992 uint32_t LiteralValue;
3993
3994 for (int OpIdx : OpIndices) {
3995 if (OpIdx == -1) break;
3996
3997 const MCOperand &MO = Inst.getOperand(OpIdx);
3998 if (!MO.isImm() && !MO.isExpr())
3999 continue;
4000 if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4001 continue;
4002
4003 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4004 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4005 Error(getConstLoc(Operands),
4006 "inline constants are not allowed for this operand");
4007 return false;
4008 }
4009
4010 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4011 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4012 if (NumLiterals == 0 || LiteralValue != Value) {
4013 LiteralValue = Value;
4014 ++NumLiterals;
4015 }
4016 } else if (MO.isExpr()) {
4017 ++NumExprs;
4018 }
4019 }
4020 NumLiterals += NumExprs;
4021
4022 if (!NumLiterals)
4023 return true;
4024
4025 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4026 Error(getLitLoc(Operands), "literal operands are not supported");
4027 return false;
4028 }
4029
4030 if (NumLiterals > 1) {
4031 Error(getLitLoc(Operands), "only one literal operand is allowed");
4032 return false;
4033 }
4034
4035 return true;
4036 }
4037
4038 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
IsAGPROperand(const MCInst & Inst,uint16_t NameIdx,const MCRegisterInfo * MRI)4039 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4040 const MCRegisterInfo *MRI) {
4041 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4042 if (OpIdx < 0)
4043 return -1;
4044
4045 const MCOperand &Op = Inst.getOperand(OpIdx);
4046 if (!Op.isReg())
4047 return -1;
4048
4049 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4050 auto Reg = Sub ? Sub : Op.getReg();
4051 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4052 return AGRP32.contains(Reg) ? 1 : 0;
4053 }
4054
validateAGPRLdSt(const MCInst & Inst) const4055 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4056 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4057 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4058 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4059 SIInstrFlags::DS)) == 0)
4060 return true;
4061
4062 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4063 : AMDGPU::OpName::vdata;
4064
4065 const MCRegisterInfo *MRI = getMRI();
4066 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4067 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4068
4069 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4070 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4071 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4072 return false;
4073 }
4074
4075 auto FB = getFeatureBits();
4076 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4077 if (DataAreg < 0 || DstAreg < 0)
4078 return true;
4079 return DstAreg == DataAreg;
4080 }
4081
4082 return DstAreg < 1 && DataAreg < 1;
4083 }
4084
validateVGPRAlign(const MCInst & Inst) const4085 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4086 auto FB = getFeatureBits();
4087 if (!FB[AMDGPU::FeatureGFX90AInsts])
4088 return true;
4089
4090 const MCRegisterInfo *MRI = getMRI();
4091 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4092 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4093 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4094 const MCOperand &Op = Inst.getOperand(I);
4095 if (!Op.isReg())
4096 continue;
4097
4098 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4099 if (!Sub)
4100 continue;
4101
4102 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4103 return false;
4104 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4105 return false;
4106 }
4107
4108 return true;
4109 }
4110
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)4111 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4112 const OperandVector &Operands,
4113 const SMLoc &IDLoc) {
4114 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4115 AMDGPU::OpName::cpol);
4116 if (CPolPos == -1)
4117 return true;
4118
4119 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4120
4121 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4122 if ((TSFlags & (SIInstrFlags::SMRD)) &&
4123 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4124 Error(IDLoc, "invalid cache policy for SMRD instruction");
4125 return false;
4126 }
4127
4128 if (isGFX90A() && (CPol & CPol::SCC)) {
4129 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4130 StringRef CStr(S.getPointer());
4131 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4132 Error(S, "scc is not supported on this GPU");
4133 return false;
4134 }
4135
4136 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4137 return true;
4138
4139 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4140 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4141 Error(IDLoc, "instruction must use glc");
4142 return false;
4143 }
4144 } else {
4145 if (CPol & CPol::GLC) {
4146 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4147 StringRef CStr(S.getPointer());
4148 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4149 Error(S, "instruction must not use glc");
4150 return false;
4151 }
4152 }
4153
4154 return true;
4155 }
4156
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)4157 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4158 const SMLoc &IDLoc,
4159 const OperandVector &Operands) {
4160 if (auto ErrMsg = validateLdsDirect(Inst)) {
4161 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4162 return false;
4163 }
4164 if (!validateSOPLiteral(Inst)) {
4165 Error(getLitLoc(Operands),
4166 "only one literal operand is allowed");
4167 return false;
4168 }
4169 if (!validateVOP3Literal(Inst, Operands)) {
4170 return false;
4171 }
4172 if (!validateConstantBusLimitations(Inst, Operands)) {
4173 return false;
4174 }
4175 if (!validateEarlyClobberLimitations(Inst, Operands)) {
4176 return false;
4177 }
4178 if (!validateIntClampSupported(Inst)) {
4179 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4180 "integer clamping is not supported on this GPU");
4181 return false;
4182 }
4183 if (!validateOpSel(Inst)) {
4184 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4185 "invalid op_sel operand");
4186 return false;
4187 }
4188 if (!validateDPP(Inst, Operands)) {
4189 return false;
4190 }
4191 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4192 if (!validateMIMGD16(Inst)) {
4193 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4194 "d16 modifier is not supported on this GPU");
4195 return false;
4196 }
4197 if (!validateMIMGDim(Inst)) {
4198 Error(IDLoc, "dim modifier is required on this GPU");
4199 return false;
4200 }
4201 if (!validateMIMGMSAA(Inst)) {
4202 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4203 "invalid dim; must be MSAA type");
4204 return false;
4205 }
4206 if (!validateMIMGDataSize(Inst)) {
4207 Error(IDLoc,
4208 "image data size does not match dmask and tfe");
4209 return false;
4210 }
4211 if (!validateMIMGAddrSize(Inst)) {
4212 Error(IDLoc,
4213 "image address size does not match dim and a16");
4214 return false;
4215 }
4216 if (!validateMIMGAtomicDMask(Inst)) {
4217 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4218 "invalid atomic image dmask");
4219 return false;
4220 }
4221 if (!validateMIMGGatherDMask(Inst)) {
4222 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4223 "invalid image_gather dmask: only one bit must be set");
4224 return false;
4225 }
4226 if (!validateMovrels(Inst, Operands)) {
4227 return false;
4228 }
4229 if (!validateFlatOffset(Inst, Operands)) {
4230 return false;
4231 }
4232 if (!validateSMEMOffset(Inst, Operands)) {
4233 return false;
4234 }
4235 if (!validateMAIAccWrite(Inst, Operands)) {
4236 return false;
4237 }
4238 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4239 return false;
4240 }
4241
4242 if (!validateAGPRLdSt(Inst)) {
4243 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4244 ? "invalid register class: data and dst should be all VGPR or AGPR"
4245 : "invalid register class: agpr loads and stores not supported on this GPU"
4246 );
4247 return false;
4248 }
4249 if (!validateVGPRAlign(Inst)) {
4250 Error(IDLoc,
4251 "invalid register class: vgpr tuples must be 64 bit aligned");
4252 return false;
4253 }
4254
4255 if (!validateDivScale(Inst)) {
4256 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4257 return false;
4258 }
4259 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4260 return false;
4261 }
4262
4263 return true;
4264 }
4265
4266 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4267 const FeatureBitset &FBS,
4268 unsigned VariantID = 0);
4269
4270 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4271 const FeatureBitset &AvailableFeatures,
4272 unsigned VariantID);
4273
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)4274 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4275 const FeatureBitset &FBS) {
4276 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4277 }
4278
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)4279 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4280 const FeatureBitset &FBS,
4281 ArrayRef<unsigned> Variants) {
4282 for (auto Variant : Variants) {
4283 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4284 return true;
4285 }
4286
4287 return false;
4288 }
4289
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)4290 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4291 const SMLoc &IDLoc) {
4292 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4293
4294 // Check if requested instruction variant is supported.
4295 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4296 return false;
4297
4298 // This instruction is not supported.
4299 // Clear any other pending errors because they are no longer relevant.
4300 getParser().clearPendingErrors();
4301
4302 // Requested instruction variant is not supported.
4303 // Check if any other variants are supported.
4304 StringRef VariantName = getMatchedVariantName();
4305 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4306 return Error(IDLoc,
4307 Twine(VariantName,
4308 " variant of this instruction is not supported"));
4309 }
4310
4311 // Finally check if this instruction is supported on any other GPU.
4312 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4313 return Error(IDLoc, "instruction not supported on this GPU");
4314 }
4315
4316 // Instruction not supported on any GPU. Probably a typo.
4317 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4318 return Error(IDLoc, "invalid instruction" + Suggestion);
4319 }
4320
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4321 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4322 OperandVector &Operands,
4323 MCStreamer &Out,
4324 uint64_t &ErrorInfo,
4325 bool MatchingInlineAsm) {
4326 MCInst Inst;
4327 unsigned Result = Match_Success;
4328 for (auto Variant : getMatchedVariants()) {
4329 uint64_t EI;
4330 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4331 Variant);
4332 // We order match statuses from least to most specific. We use most specific
4333 // status as resulting
4334 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4335 if ((R == Match_Success) ||
4336 (R == Match_PreferE32) ||
4337 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4338 (R == Match_InvalidOperand && Result != Match_MissingFeature
4339 && Result != Match_PreferE32) ||
4340 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4341 && Result != Match_MissingFeature
4342 && Result != Match_PreferE32)) {
4343 Result = R;
4344 ErrorInfo = EI;
4345 }
4346 if (R == Match_Success)
4347 break;
4348 }
4349
4350 if (Result == Match_Success) {
4351 if (!validateInstruction(Inst, IDLoc, Operands)) {
4352 return true;
4353 }
4354 Inst.setLoc(IDLoc);
4355 Out.emitInstruction(Inst, getSTI());
4356 return false;
4357 }
4358
4359 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4360 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4361 return true;
4362 }
4363
4364 switch (Result) {
4365 default: break;
4366 case Match_MissingFeature:
4367 // It has been verified that the specified instruction
4368 // mnemonic is valid. A match was found but it requires
4369 // features which are not supported on this GPU.
4370 return Error(IDLoc, "operands are not valid for this GPU or mode");
4371
4372 case Match_InvalidOperand: {
4373 SMLoc ErrorLoc = IDLoc;
4374 if (ErrorInfo != ~0ULL) {
4375 if (ErrorInfo >= Operands.size()) {
4376 return Error(IDLoc, "too few operands for instruction");
4377 }
4378 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4379 if (ErrorLoc == SMLoc())
4380 ErrorLoc = IDLoc;
4381 }
4382 return Error(ErrorLoc, "invalid operand for instruction");
4383 }
4384
4385 case Match_PreferE32:
4386 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4387 "should be encoded as e32");
4388 case Match_MnemonicFail:
4389 llvm_unreachable("Invalid instructions should have been handled already");
4390 }
4391 llvm_unreachable("Implement any new match types added!");
4392 }
4393
ParseAsAbsoluteExpression(uint32_t & Ret)4394 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4395 int64_t Tmp = -1;
4396 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4397 return true;
4398 }
4399 if (getParser().parseAbsoluteExpression(Tmp)) {
4400 return true;
4401 }
4402 Ret = static_cast<uint32_t>(Tmp);
4403 return false;
4404 }
4405
ParseDirectiveMajorMinor(uint32_t & Major,uint32_t & Minor)4406 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4407 uint32_t &Minor) {
4408 if (ParseAsAbsoluteExpression(Major))
4409 return TokError("invalid major version");
4410
4411 if (!trySkipToken(AsmToken::Comma))
4412 return TokError("minor version number required, comma expected");
4413
4414 if (ParseAsAbsoluteExpression(Minor))
4415 return TokError("invalid minor version");
4416
4417 return false;
4418 }
4419
ParseDirectiveAMDGCNTarget()4420 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4421 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4422 return TokError("directive only supported for amdgcn architecture");
4423
4424 std::string TargetIDDirective;
4425 SMLoc TargetStart = getTok().getLoc();
4426 if (getParser().parseEscapedString(TargetIDDirective))
4427 return true;
4428
4429 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4430 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4431 return getParser().Error(TargetRange.Start,
4432 (Twine(".amdgcn_target directive's target id ") +
4433 Twine(TargetIDDirective) +
4434 Twine(" does not match the specified target id ") +
4435 Twine(getTargetStreamer().getTargetID()->toString())).str());
4436
4437 return false;
4438 }
4439
OutOfRangeError(SMRange Range)4440 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4441 return Error(Range.Start, "value out of range", Range);
4442 }
4443
calculateGPRBlocks(const FeatureBitset & Features,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed,Optional<bool> EnableWavefrontSize32,unsigned NextFreeVGPR,SMRange VGPRRange,unsigned NextFreeSGPR,SMRange SGPRRange,unsigned & VGPRBlocks,unsigned & SGPRBlocks)4444 bool AMDGPUAsmParser::calculateGPRBlocks(
4445 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4446 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4447 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4448 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4449 // TODO(scott.linder): These calculations are duplicated from
4450 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4451 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4452
4453 unsigned NumVGPRs = NextFreeVGPR;
4454 unsigned NumSGPRs = NextFreeSGPR;
4455
4456 if (Version.Major >= 10)
4457 NumSGPRs = 0;
4458 else {
4459 unsigned MaxAddressableNumSGPRs =
4460 IsaInfo::getAddressableNumSGPRs(&getSTI());
4461
4462 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4463 NumSGPRs > MaxAddressableNumSGPRs)
4464 return OutOfRangeError(SGPRRange);
4465
4466 NumSGPRs +=
4467 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4468
4469 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4470 NumSGPRs > MaxAddressableNumSGPRs)
4471 return OutOfRangeError(SGPRRange);
4472
4473 if (Features.test(FeatureSGPRInitBug))
4474 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4475 }
4476
4477 VGPRBlocks =
4478 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4479 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4480
4481 return false;
4482 }
4483
ParseDirectiveAMDHSAKernel()4484 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4485 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4486 return TokError("directive only supported for amdgcn architecture");
4487
4488 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4489 return TokError("directive only supported for amdhsa OS");
4490
4491 StringRef KernelName;
4492 if (getParser().parseIdentifier(KernelName))
4493 return true;
4494
4495 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4496
4497 StringSet<> Seen;
4498
4499 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4500
4501 SMRange VGPRRange;
4502 uint64_t NextFreeVGPR = 0;
4503 uint64_t AccumOffset = 0;
4504 SMRange SGPRRange;
4505 uint64_t NextFreeSGPR = 0;
4506 unsigned UserSGPRCount = 0;
4507 bool ReserveVCC = true;
4508 bool ReserveFlatScr = true;
4509 Optional<bool> EnableWavefrontSize32;
4510
4511 while (true) {
4512 while (trySkipToken(AsmToken::EndOfStatement));
4513
4514 StringRef ID;
4515 SMRange IDRange = getTok().getLocRange();
4516 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4517 return true;
4518
4519 if (ID == ".end_amdhsa_kernel")
4520 break;
4521
4522 if (Seen.find(ID) != Seen.end())
4523 return TokError(".amdhsa_ directives cannot be repeated");
4524 Seen.insert(ID);
4525
4526 SMLoc ValStart = getLoc();
4527 int64_t IVal;
4528 if (getParser().parseAbsoluteExpression(IVal))
4529 return true;
4530 SMLoc ValEnd = getLoc();
4531 SMRange ValRange = SMRange(ValStart, ValEnd);
4532
4533 if (IVal < 0)
4534 return OutOfRangeError(ValRange);
4535
4536 uint64_t Val = IVal;
4537
4538 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4539 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4540 return OutOfRangeError(RANGE); \
4541 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4542
4543 if (ID == ".amdhsa_group_segment_fixed_size") {
4544 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4545 return OutOfRangeError(ValRange);
4546 KD.group_segment_fixed_size = Val;
4547 } else if (ID == ".amdhsa_private_segment_fixed_size") {
4548 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4549 return OutOfRangeError(ValRange);
4550 KD.private_segment_fixed_size = Val;
4551 } else if (ID == ".amdhsa_kernarg_size") {
4552 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4553 return OutOfRangeError(ValRange);
4554 KD.kernarg_size = Val;
4555 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4556 if (hasArchitectedFlatScratch())
4557 return Error(IDRange.Start,
4558 "directive is not supported with architected flat scratch",
4559 IDRange);
4560 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4562 Val, ValRange);
4563 if (Val)
4564 UserSGPRCount += 4;
4565 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4566 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4567 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4568 ValRange);
4569 if (Val)
4570 UserSGPRCount += 2;
4571 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4572 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4573 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4574 ValRange);
4575 if (Val)
4576 UserSGPRCount += 2;
4577 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4578 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4579 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4580 Val, ValRange);
4581 if (Val)
4582 UserSGPRCount += 2;
4583 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4584 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4585 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4586 ValRange);
4587 if (Val)
4588 UserSGPRCount += 2;
4589 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4590 if (hasArchitectedFlatScratch())
4591 return Error(IDRange.Start,
4592 "directive is not supported with architected flat scratch",
4593 IDRange);
4594 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4595 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4596 ValRange);
4597 if (Val)
4598 UserSGPRCount += 2;
4599 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4600 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4601 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4602 Val, ValRange);
4603 if (Val)
4604 UserSGPRCount += 1;
4605 } else if (ID == ".amdhsa_wavefront_size32") {
4606 if (IVersion.Major < 10)
4607 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4608 EnableWavefrontSize32 = Val;
4609 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4610 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4611 Val, ValRange);
4612 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4613 if (hasArchitectedFlatScratch())
4614 return Error(IDRange.Start,
4615 "directive is not supported with architected flat scratch",
4616 IDRange);
4617 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4618 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4619 } else if (ID == ".amdhsa_enable_private_segment") {
4620 if (!hasArchitectedFlatScratch())
4621 return Error(
4622 IDRange.Start,
4623 "directive is not supported without architected flat scratch",
4624 IDRange);
4625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4626 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4627 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4628 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4629 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4630 ValRange);
4631 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4632 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4633 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4634 ValRange);
4635 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4636 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4637 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4638 ValRange);
4639 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4640 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4641 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4642 ValRange);
4643 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4644 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4645 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4646 ValRange);
4647 } else if (ID == ".amdhsa_next_free_vgpr") {
4648 VGPRRange = ValRange;
4649 NextFreeVGPR = Val;
4650 } else if (ID == ".amdhsa_next_free_sgpr") {
4651 SGPRRange = ValRange;
4652 NextFreeSGPR = Val;
4653 } else if (ID == ".amdhsa_accum_offset") {
4654 if (!isGFX90A())
4655 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4656 AccumOffset = Val;
4657 } else if (ID == ".amdhsa_reserve_vcc") {
4658 if (!isUInt<1>(Val))
4659 return OutOfRangeError(ValRange);
4660 ReserveVCC = Val;
4661 } else if (ID == ".amdhsa_reserve_flat_scratch") {
4662 if (IVersion.Major < 7)
4663 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4664 if (hasArchitectedFlatScratch())
4665 return Error(IDRange.Start,
4666 "directive is not supported with architected flat scratch",
4667 IDRange);
4668 if (!isUInt<1>(Val))
4669 return OutOfRangeError(ValRange);
4670 ReserveFlatScr = Val;
4671 } else if (ID == ".amdhsa_reserve_xnack_mask") {
4672 if (IVersion.Major < 8)
4673 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4674 if (!isUInt<1>(Val))
4675 return OutOfRangeError(ValRange);
4676 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4677 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4678 IDRange);
4679 } else if (ID == ".amdhsa_float_round_mode_32") {
4680 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4681 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4682 } else if (ID == ".amdhsa_float_round_mode_16_64") {
4683 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4684 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4685 } else if (ID == ".amdhsa_float_denorm_mode_32") {
4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4687 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4688 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4689 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4690 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4691 ValRange);
4692 } else if (ID == ".amdhsa_dx10_clamp") {
4693 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4694 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4695 } else if (ID == ".amdhsa_ieee_mode") {
4696 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4697 Val, ValRange);
4698 } else if (ID == ".amdhsa_fp16_overflow") {
4699 if (IVersion.Major < 9)
4700 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4702 ValRange);
4703 } else if (ID == ".amdhsa_tg_split") {
4704 if (!isGFX90A())
4705 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4706 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4707 ValRange);
4708 } else if (ID == ".amdhsa_workgroup_processor_mode") {
4709 if (IVersion.Major < 10)
4710 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4711 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4712 ValRange);
4713 } else if (ID == ".amdhsa_memory_ordered") {
4714 if (IVersion.Major < 10)
4715 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4716 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4717 ValRange);
4718 } else if (ID == ".amdhsa_forward_progress") {
4719 if (IVersion.Major < 10)
4720 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4722 ValRange);
4723 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4724 PARSE_BITS_ENTRY(
4725 KD.compute_pgm_rsrc2,
4726 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4727 ValRange);
4728 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4729 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4730 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4731 Val, ValRange);
4732 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4733 PARSE_BITS_ENTRY(
4734 KD.compute_pgm_rsrc2,
4735 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4736 ValRange);
4737 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4738 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4739 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4740 Val, ValRange);
4741 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4742 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4743 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4744 Val, ValRange);
4745 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4746 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4747 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4748 Val, ValRange);
4749 } else if (ID == ".amdhsa_exception_int_div_zero") {
4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4751 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4752 Val, ValRange);
4753 } else {
4754 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4755 }
4756
4757 #undef PARSE_BITS_ENTRY
4758 }
4759
4760 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4761 return TokError(".amdhsa_next_free_vgpr directive is required");
4762
4763 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4764 return TokError(".amdhsa_next_free_sgpr directive is required");
4765
4766 unsigned VGPRBlocks;
4767 unsigned SGPRBlocks;
4768 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4769 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4770 EnableWavefrontSize32, NextFreeVGPR,
4771 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4772 SGPRBlocks))
4773 return true;
4774
4775 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4776 VGPRBlocks))
4777 return OutOfRangeError(VGPRRange);
4778 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4779 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4780
4781 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4782 SGPRBlocks))
4783 return OutOfRangeError(SGPRRange);
4784 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4785 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4786 SGPRBlocks);
4787
4788 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4789 return TokError("too many user SGPRs enabled");
4790 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4791 UserSGPRCount);
4792
4793 if (isGFX90A()) {
4794 if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4795 return TokError(".amdhsa_accum_offset directive is required");
4796 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4797 return TokError("accum_offset should be in range [4..256] in "
4798 "increments of 4");
4799 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4800 return TokError("accum_offset exceeds total VGPR allocation");
4801 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4802 (AccumOffset / 4 - 1));
4803 }
4804
4805 getTargetStreamer().EmitAmdhsaKernelDescriptor(
4806 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4807 ReserveFlatScr);
4808 return false;
4809 }
4810
ParseDirectiveHSACodeObjectVersion()4811 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4812 uint32_t Major;
4813 uint32_t Minor;
4814
4815 if (ParseDirectiveMajorMinor(Major, Minor))
4816 return true;
4817
4818 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4819 return false;
4820 }
4821
ParseDirectiveHSACodeObjectISA()4822 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4823 uint32_t Major;
4824 uint32_t Minor;
4825 uint32_t Stepping;
4826 StringRef VendorName;
4827 StringRef ArchName;
4828
4829 // If this directive has no arguments, then use the ISA version for the
4830 // targeted GPU.
4831 if (isToken(AsmToken::EndOfStatement)) {
4832 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4833 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4834 ISA.Stepping,
4835 "AMD", "AMDGPU");
4836 return false;
4837 }
4838
4839 if (ParseDirectiveMajorMinor(Major, Minor))
4840 return true;
4841
4842 if (!trySkipToken(AsmToken::Comma))
4843 return TokError("stepping version number required, comma expected");
4844
4845 if (ParseAsAbsoluteExpression(Stepping))
4846 return TokError("invalid stepping version");
4847
4848 if (!trySkipToken(AsmToken::Comma))
4849 return TokError("vendor name required, comma expected");
4850
4851 if (!parseString(VendorName, "invalid vendor name"))
4852 return true;
4853
4854 if (!trySkipToken(AsmToken::Comma))
4855 return TokError("arch name required, comma expected");
4856
4857 if (!parseString(ArchName, "invalid arch name"))
4858 return true;
4859
4860 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4861 VendorName, ArchName);
4862 return false;
4863 }
4864
ParseAMDKernelCodeTValue(StringRef ID,amd_kernel_code_t & Header)4865 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4866 amd_kernel_code_t &Header) {
4867 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4868 // assembly for backwards compatibility.
4869 if (ID == "max_scratch_backing_memory_byte_size") {
4870 Parser.eatToEndOfStatement();
4871 return false;
4872 }
4873
4874 SmallString<40> ErrStr;
4875 raw_svector_ostream Err(ErrStr);
4876 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4877 return TokError(Err.str());
4878 }
4879 Lex();
4880
4881 if (ID == "enable_wavefront_size32") {
4882 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4883 if (!isGFX10Plus())
4884 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4885 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4886 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4887 } else {
4888 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4889 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4890 }
4891 }
4892
4893 if (ID == "wavefront_size") {
4894 if (Header.wavefront_size == 5) {
4895 if (!isGFX10Plus())
4896 return TokError("wavefront_size=5 is only allowed on GFX10+");
4897 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4898 return TokError("wavefront_size=5 requires +WavefrontSize32");
4899 } else if (Header.wavefront_size == 6) {
4900 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4901 return TokError("wavefront_size=6 requires +WavefrontSize64");
4902 }
4903 }
4904
4905 if (ID == "enable_wgp_mode") {
4906 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4907 !isGFX10Plus())
4908 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4909 }
4910
4911 if (ID == "enable_mem_ordered") {
4912 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4913 !isGFX10Plus())
4914 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4915 }
4916
4917 if (ID == "enable_fwd_progress") {
4918 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4919 !isGFX10Plus())
4920 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4921 }
4922
4923 return false;
4924 }
4925
ParseDirectiveAMDKernelCodeT()4926 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4927 amd_kernel_code_t Header;
4928 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4929
4930 while (true) {
4931 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4932 // will set the current token to EndOfStatement.
4933 while(trySkipToken(AsmToken::EndOfStatement));
4934
4935 StringRef ID;
4936 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4937 return true;
4938
4939 if (ID == ".end_amd_kernel_code_t")
4940 break;
4941
4942 if (ParseAMDKernelCodeTValue(ID, Header))
4943 return true;
4944 }
4945
4946 getTargetStreamer().EmitAMDKernelCodeT(Header);
4947
4948 return false;
4949 }
4950
ParseDirectiveAMDGPUHsaKernel()4951 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4952 StringRef KernelName;
4953 if (!parseId(KernelName, "expected symbol name"))
4954 return true;
4955
4956 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4957 ELF::STT_AMDGPU_HSA_KERNEL);
4958
4959 KernelScope.initialize(getContext());
4960 return false;
4961 }
4962
ParseDirectiveISAVersion()4963 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4964 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4965 return Error(getLoc(),
4966 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4967 "architectures");
4968 }
4969
4970 auto TargetIDDirective = getLexer().getTok().getStringContents();
4971 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4972 return Error(getParser().getTok().getLoc(), "target id must match options");
4973
4974 getTargetStreamer().EmitISAVersion();
4975 Lex();
4976
4977 return false;
4978 }
4979
ParseDirectiveHSAMetadata()4980 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4981 const char *AssemblerDirectiveBegin;
4982 const char *AssemblerDirectiveEnd;
4983 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4984 isHsaAbiVersion3Or4(&getSTI())
4985 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4986 HSAMD::V3::AssemblerDirectiveEnd)
4987 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4988 HSAMD::AssemblerDirectiveEnd);
4989
4990 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4991 return Error(getLoc(),
4992 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4993 "not available on non-amdhsa OSes")).str());
4994 }
4995
4996 std::string HSAMetadataString;
4997 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4998 HSAMetadataString))
4999 return true;
5000
5001 if (isHsaAbiVersion3Or4(&getSTI())) {
5002 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5003 return Error(getLoc(), "invalid HSA metadata");
5004 } else {
5005 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5006 return Error(getLoc(), "invalid HSA metadata");
5007 }
5008
5009 return false;
5010 }
5011
5012 /// Common code to parse out a block of text (typically YAML) between start and
5013 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)5014 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5015 const char *AssemblerDirectiveEnd,
5016 std::string &CollectString) {
5017
5018 raw_string_ostream CollectStream(CollectString);
5019
5020 getLexer().setSkipSpace(false);
5021
5022 bool FoundEnd = false;
5023 while (!isToken(AsmToken::Eof)) {
5024 while (isToken(AsmToken::Space)) {
5025 CollectStream << getTokenStr();
5026 Lex();
5027 }
5028
5029 if (trySkipId(AssemblerDirectiveEnd)) {
5030 FoundEnd = true;
5031 break;
5032 }
5033
5034 CollectStream << Parser.parseStringToEndOfStatement()
5035 << getContext().getAsmInfo()->getSeparatorString();
5036
5037 Parser.eatToEndOfStatement();
5038 }
5039
5040 getLexer().setSkipSpace(true);
5041
5042 if (isToken(AsmToken::Eof) && !FoundEnd) {
5043 return TokError(Twine("expected directive ") +
5044 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5045 }
5046
5047 CollectStream.flush();
5048 return false;
5049 }
5050
5051 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()5052 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5053 std::string String;
5054 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5055 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5056 return true;
5057
5058 auto PALMetadata = getTargetStreamer().getPALMetadata();
5059 if (!PALMetadata->setFromString(String))
5060 return Error(getLoc(), "invalid PAL metadata");
5061 return false;
5062 }
5063
5064 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()5065 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5066 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5067 return Error(getLoc(),
5068 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5069 "not available on non-amdpal OSes")).str());
5070 }
5071
5072 auto PALMetadata = getTargetStreamer().getPALMetadata();
5073 PALMetadata->setLegacy();
5074 for (;;) {
5075 uint32_t Key, Value;
5076 if (ParseAsAbsoluteExpression(Key)) {
5077 return TokError(Twine("invalid value in ") +
5078 Twine(PALMD::AssemblerDirective));
5079 }
5080 if (!trySkipToken(AsmToken::Comma)) {
5081 return TokError(Twine("expected an even number of values in ") +
5082 Twine(PALMD::AssemblerDirective));
5083 }
5084 if (ParseAsAbsoluteExpression(Value)) {
5085 return TokError(Twine("invalid value in ") +
5086 Twine(PALMD::AssemblerDirective));
5087 }
5088 PALMetadata->setRegister(Key, Value);
5089 if (!trySkipToken(AsmToken::Comma))
5090 break;
5091 }
5092 return false;
5093 }
5094
5095 /// ParseDirectiveAMDGPULDS
5096 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()5097 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5098 if (getParser().checkForValidSection())
5099 return true;
5100
5101 StringRef Name;
5102 SMLoc NameLoc = getLoc();
5103 if (getParser().parseIdentifier(Name))
5104 return TokError("expected identifier in directive");
5105
5106 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5107 if (parseToken(AsmToken::Comma, "expected ','"))
5108 return true;
5109
5110 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5111
5112 int64_t Size;
5113 SMLoc SizeLoc = getLoc();
5114 if (getParser().parseAbsoluteExpression(Size))
5115 return true;
5116 if (Size < 0)
5117 return Error(SizeLoc, "size must be non-negative");
5118 if (Size > LocalMemorySize)
5119 return Error(SizeLoc, "size is too large");
5120
5121 int64_t Alignment = 4;
5122 if (trySkipToken(AsmToken::Comma)) {
5123 SMLoc AlignLoc = getLoc();
5124 if (getParser().parseAbsoluteExpression(Alignment))
5125 return true;
5126 if (Alignment < 0 || !isPowerOf2_64(Alignment))
5127 return Error(AlignLoc, "alignment must be a power of two");
5128
5129 // Alignment larger than the size of LDS is possible in theory, as long
5130 // as the linker manages to place to symbol at address 0, but we do want
5131 // to make sure the alignment fits nicely into a 32-bit integer.
5132 if (Alignment >= 1u << 31)
5133 return Error(AlignLoc, "alignment is too large");
5134 }
5135
5136 if (parseToken(AsmToken::EndOfStatement,
5137 "unexpected token in '.amdgpu_lds' directive"))
5138 return true;
5139
5140 Symbol->redefineIfPossible();
5141 if (!Symbol->isUndefined())
5142 return Error(NameLoc, "invalid symbol redefinition");
5143
5144 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5145 return false;
5146 }
5147
ParseDirective(AsmToken DirectiveID)5148 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5149 StringRef IDVal = DirectiveID.getString();
5150
5151 if (isHsaAbiVersion3Or4(&getSTI())) {
5152 if (IDVal == ".amdhsa_kernel")
5153 return ParseDirectiveAMDHSAKernel();
5154
5155 // TODO: Restructure/combine with PAL metadata directive.
5156 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5157 return ParseDirectiveHSAMetadata();
5158 } else {
5159 if (IDVal == ".hsa_code_object_version")
5160 return ParseDirectiveHSACodeObjectVersion();
5161
5162 if (IDVal == ".hsa_code_object_isa")
5163 return ParseDirectiveHSACodeObjectISA();
5164
5165 if (IDVal == ".amd_kernel_code_t")
5166 return ParseDirectiveAMDKernelCodeT();
5167
5168 if (IDVal == ".amdgpu_hsa_kernel")
5169 return ParseDirectiveAMDGPUHsaKernel();
5170
5171 if (IDVal == ".amd_amdgpu_isa")
5172 return ParseDirectiveISAVersion();
5173
5174 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5175 return ParseDirectiveHSAMetadata();
5176 }
5177
5178 if (IDVal == ".amdgcn_target")
5179 return ParseDirectiveAMDGCNTarget();
5180
5181 if (IDVal == ".amdgpu_lds")
5182 return ParseDirectiveAMDGPULDS();
5183
5184 if (IDVal == PALMD::AssemblerDirectiveBegin)
5185 return ParseDirectivePALMetadataBegin();
5186
5187 if (IDVal == PALMD::AssemblerDirective)
5188 return ParseDirectivePALMetadata();
5189
5190 return true;
5191 }
5192
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo)5193 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5194 unsigned RegNo) {
5195
5196 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5197 R.isValid(); ++R) {
5198 if (*R == RegNo)
5199 return isGFX9Plus();
5200 }
5201
5202 // GFX10 has 2 more SGPRs 104 and 105.
5203 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5204 R.isValid(); ++R) {
5205 if (*R == RegNo)
5206 return hasSGPR104_SGPR105();
5207 }
5208
5209 switch (RegNo) {
5210 case AMDGPU::SRC_SHARED_BASE:
5211 case AMDGPU::SRC_SHARED_LIMIT:
5212 case AMDGPU::SRC_PRIVATE_BASE:
5213 case AMDGPU::SRC_PRIVATE_LIMIT:
5214 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5215 return isGFX9Plus();
5216 case AMDGPU::TBA:
5217 case AMDGPU::TBA_LO:
5218 case AMDGPU::TBA_HI:
5219 case AMDGPU::TMA:
5220 case AMDGPU::TMA_LO:
5221 case AMDGPU::TMA_HI:
5222 return !isGFX9Plus();
5223 case AMDGPU::XNACK_MASK:
5224 case AMDGPU::XNACK_MASK_LO:
5225 case AMDGPU::XNACK_MASK_HI:
5226 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5227 case AMDGPU::SGPR_NULL:
5228 return isGFX10Plus();
5229 default:
5230 break;
5231 }
5232
5233 if (isCI())
5234 return true;
5235
5236 if (isSI() || isGFX10Plus()) {
5237 // No flat_scr on SI.
5238 // On GFX10 flat scratch is not a valid register operand and can only be
5239 // accessed with s_setreg/s_getreg.
5240 switch (RegNo) {
5241 case AMDGPU::FLAT_SCR:
5242 case AMDGPU::FLAT_SCR_LO:
5243 case AMDGPU::FLAT_SCR_HI:
5244 return false;
5245 default:
5246 return true;
5247 }
5248 }
5249
5250 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5251 // SI/CI have.
5252 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5253 R.isValid(); ++R) {
5254 if (*R == RegNo)
5255 return hasSGPR102_SGPR103();
5256 }
5257
5258 return true;
5259 }
5260
5261 OperandMatchResultTy
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)5262 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5263 OperandMode Mode) {
5264 // Try to parse with a custom parser
5265 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5266
5267 // If we successfully parsed the operand or if there as an error parsing,
5268 // we are done.
5269 //
5270 // If we are parsing after we reach EndOfStatement then this means we
5271 // are appending default values to the Operands list. This is only done
5272 // by custom parser, so we shouldn't continue on to the generic parsing.
5273 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5274 isToken(AsmToken::EndOfStatement))
5275 return ResTy;
5276
5277 SMLoc RBraceLoc;
5278 SMLoc LBraceLoc = getLoc();
5279 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5280 unsigned Prefix = Operands.size();
5281
5282 for (;;) {
5283 auto Loc = getLoc();
5284 ResTy = parseReg(Operands);
5285 if (ResTy == MatchOperand_NoMatch)
5286 Error(Loc, "expected a register");
5287 if (ResTy != MatchOperand_Success)
5288 return MatchOperand_ParseFail;
5289
5290 RBraceLoc = getLoc();
5291 if (trySkipToken(AsmToken::RBrac))
5292 break;
5293
5294 if (!skipToken(AsmToken::Comma,
5295 "expected a comma or a closing square bracket")) {
5296 return MatchOperand_ParseFail;
5297 }
5298 }
5299
5300 if (Operands.size() - Prefix > 1) {
5301 Operands.insert(Operands.begin() + Prefix,
5302 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5303 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5304 }
5305
5306 return MatchOperand_Success;
5307 }
5308
5309 return parseRegOrImm(Operands);
5310 }
5311
parseMnemonicSuffix(StringRef Name)5312 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5313 // Clear any forced encodings from the previous instruction.
5314 setForcedEncodingSize(0);
5315 setForcedDPP(false);
5316 setForcedSDWA(false);
5317
5318 if (Name.endswith("_e64")) {
5319 setForcedEncodingSize(64);
5320 return Name.substr(0, Name.size() - 4);
5321 } else if (Name.endswith("_e32")) {
5322 setForcedEncodingSize(32);
5323 return Name.substr(0, Name.size() - 4);
5324 } else if (Name.endswith("_dpp")) {
5325 setForcedDPP(true);
5326 return Name.substr(0, Name.size() - 4);
5327 } else if (Name.endswith("_sdwa")) {
5328 setForcedSDWA(true);
5329 return Name.substr(0, Name.size() - 5);
5330 }
5331 return Name;
5332 }
5333
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)5334 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5335 StringRef Name,
5336 SMLoc NameLoc, OperandVector &Operands) {
5337 // Add the instruction mnemonic
5338 Name = parseMnemonicSuffix(Name);
5339 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5340
5341 bool IsMIMG = Name.startswith("image_");
5342
5343 while (!trySkipToken(AsmToken::EndOfStatement)) {
5344 OperandMode Mode = OperandMode_Default;
5345 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5346 Mode = OperandMode_NSA;
5347 CPolSeen = 0;
5348 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5349
5350 if (Res != MatchOperand_Success) {
5351 checkUnsupportedInstruction(Name, NameLoc);
5352 if (!Parser.hasPendingError()) {
5353 // FIXME: use real operand location rather than the current location.
5354 StringRef Msg =
5355 (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5356 "not a valid operand.";
5357 Error(getLoc(), Msg);
5358 }
5359 while (!trySkipToken(AsmToken::EndOfStatement)) {
5360 lex();
5361 }
5362 return true;
5363 }
5364
5365 // Eat the comma or space if there is one.
5366 trySkipToken(AsmToken::Comma);
5367 }
5368
5369 return false;
5370 }
5371
5372 //===----------------------------------------------------------------------===//
5373 // Utility functions
5374 //===----------------------------------------------------------------------===//
5375
5376 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)5377 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5378
5379 if (!trySkipId(Prefix, AsmToken::Colon))
5380 return MatchOperand_NoMatch;
5381
5382 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5383 }
5384
5385 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5386 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5387 AMDGPUOperand::ImmTy ImmTy,
5388 bool (*ConvertResult)(int64_t&)) {
5389 SMLoc S = getLoc();
5390 int64_t Value = 0;
5391
5392 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5393 if (Res != MatchOperand_Success)
5394 return Res;
5395
5396 if (ConvertResult && !ConvertResult(Value)) {
5397 Error(S, "invalid " + StringRef(Prefix) + " value.");
5398 }
5399
5400 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5401 return MatchOperand_Success;
5402 }
5403
5404 OperandMatchResultTy
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5405 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5406 OperandVector &Operands,
5407 AMDGPUOperand::ImmTy ImmTy,
5408 bool (*ConvertResult)(int64_t&)) {
5409 SMLoc S = getLoc();
5410 if (!trySkipId(Prefix, AsmToken::Colon))
5411 return MatchOperand_NoMatch;
5412
5413 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5414 return MatchOperand_ParseFail;
5415
5416 unsigned Val = 0;
5417 const unsigned MaxSize = 4;
5418
5419 // FIXME: How to verify the number of elements matches the number of src
5420 // operands?
5421 for (int I = 0; ; ++I) {
5422 int64_t Op;
5423 SMLoc Loc = getLoc();
5424 if (!parseExpr(Op))
5425 return MatchOperand_ParseFail;
5426
5427 if (Op != 0 && Op != 1) {
5428 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5429 return MatchOperand_ParseFail;
5430 }
5431
5432 Val |= (Op << I);
5433
5434 if (trySkipToken(AsmToken::RBrac))
5435 break;
5436
5437 if (I + 1 == MaxSize) {
5438 Error(getLoc(), "expected a closing square bracket");
5439 return MatchOperand_ParseFail;
5440 }
5441
5442 if (!skipToken(AsmToken::Comma, "expected a comma"))
5443 return MatchOperand_ParseFail;
5444 }
5445
5446 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5447 return MatchOperand_Success;
5448 }
5449
5450 OperandMatchResultTy
parseNamedBit(StringRef Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)5451 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5452 AMDGPUOperand::ImmTy ImmTy) {
5453 int64_t Bit;
5454 SMLoc S = getLoc();
5455
5456 if (trySkipId(Name)) {
5457 Bit = 1;
5458 } else if (trySkipId("no", Name)) {
5459 Bit = 0;
5460 } else {
5461 return MatchOperand_NoMatch;
5462 }
5463
5464 if (Name == "r128" && !hasMIMG_R128()) {
5465 Error(S, "r128 modifier is not supported on this GPU");
5466 return MatchOperand_ParseFail;
5467 }
5468 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5469 Error(S, "a16 modifier is not supported on this GPU");
5470 return MatchOperand_ParseFail;
5471 }
5472
5473 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5474 ImmTy = AMDGPUOperand::ImmTyR128A16;
5475
5476 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5477 return MatchOperand_Success;
5478 }
5479
5480 OperandMatchResultTy
parseCPol(OperandVector & Operands)5481 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5482 unsigned CPolOn = 0;
5483 unsigned CPolOff = 0;
5484 SMLoc S = getLoc();
5485
5486 if (trySkipId("glc"))
5487 CPolOn = AMDGPU::CPol::GLC;
5488 else if (trySkipId("noglc"))
5489 CPolOff = AMDGPU::CPol::GLC;
5490 else if (trySkipId("slc"))
5491 CPolOn = AMDGPU::CPol::SLC;
5492 else if (trySkipId("noslc"))
5493 CPolOff = AMDGPU::CPol::SLC;
5494 else if (trySkipId("dlc"))
5495 CPolOn = AMDGPU::CPol::DLC;
5496 else if (trySkipId("nodlc"))
5497 CPolOff = AMDGPU::CPol::DLC;
5498 else if (trySkipId("scc"))
5499 CPolOn = AMDGPU::CPol::SCC;
5500 else if (trySkipId("noscc"))
5501 CPolOff = AMDGPU::CPol::SCC;
5502 else
5503 return MatchOperand_NoMatch;
5504
5505 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5506 Error(S, "dlc modifier is not supported on this GPU");
5507 return MatchOperand_ParseFail;
5508 }
5509
5510 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5511 Error(S, "scc modifier is not supported on this GPU");
5512 return MatchOperand_ParseFail;
5513 }
5514
5515 if (CPolSeen & (CPolOn | CPolOff)) {
5516 Error(S, "duplicate cache policy modifier");
5517 return MatchOperand_ParseFail;
5518 }
5519
5520 CPolSeen |= (CPolOn | CPolOff);
5521
5522 for (unsigned I = 1; I != Operands.size(); ++I) {
5523 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5524 if (Op.isCPol()) {
5525 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5526 return MatchOperand_Success;
5527 }
5528 }
5529
5530 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5531 AMDGPUOperand::ImmTyCPol));
5532
5533 return MatchOperand_Success;
5534 }
5535
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)5536 static void addOptionalImmOperand(
5537 MCInst& Inst, const OperandVector& Operands,
5538 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5539 AMDGPUOperand::ImmTy ImmT,
5540 int64_t Default = 0) {
5541 auto i = OptionalIdx.find(ImmT);
5542 if (i != OptionalIdx.end()) {
5543 unsigned Idx = i->second;
5544 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5545 } else {
5546 Inst.addOperand(MCOperand::createImm(Default));
5547 }
5548 }
5549
5550 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)5551 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5552 StringRef &Value,
5553 SMLoc &StringLoc) {
5554 if (!trySkipId(Prefix, AsmToken::Colon))
5555 return MatchOperand_NoMatch;
5556
5557 StringLoc = getLoc();
5558 return parseId(Value, "expected an identifier") ? MatchOperand_Success
5559 : MatchOperand_ParseFail;
5560 }
5561
5562 //===----------------------------------------------------------------------===//
5563 // MTBUF format
5564 //===----------------------------------------------------------------------===//
5565
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)5566 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5567 int64_t MaxVal,
5568 int64_t &Fmt) {
5569 int64_t Val;
5570 SMLoc Loc = getLoc();
5571
5572 auto Res = parseIntWithPrefix(Pref, Val);
5573 if (Res == MatchOperand_ParseFail)
5574 return false;
5575 if (Res == MatchOperand_NoMatch)
5576 return true;
5577
5578 if (Val < 0 || Val > MaxVal) {
5579 Error(Loc, Twine("out of range ", StringRef(Pref)));
5580 return false;
5581 }
5582
5583 Fmt = Val;
5584 return true;
5585 }
5586
5587 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5588 // values to live in a joint format operand in the MCInst encoding.
5589 OperandMatchResultTy
parseDfmtNfmt(int64_t & Format)5590 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5591 using namespace llvm::AMDGPU::MTBUFFormat;
5592
5593 int64_t Dfmt = DFMT_UNDEF;
5594 int64_t Nfmt = NFMT_UNDEF;
5595
5596 // dfmt and nfmt can appear in either order, and each is optional.
5597 for (int I = 0; I < 2; ++I) {
5598 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5599 return MatchOperand_ParseFail;
5600
5601 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5602 return MatchOperand_ParseFail;
5603 }
5604 // Skip optional comma between dfmt/nfmt
5605 // but guard against 2 commas following each other.
5606 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5607 !peekToken().is(AsmToken::Comma)) {
5608 trySkipToken(AsmToken::Comma);
5609 }
5610 }
5611
5612 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5613 return MatchOperand_NoMatch;
5614
5615 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5616 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5617
5618 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5619 return MatchOperand_Success;
5620 }
5621
5622 OperandMatchResultTy
parseUfmt(int64_t & Format)5623 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5624 using namespace llvm::AMDGPU::MTBUFFormat;
5625
5626 int64_t Fmt = UFMT_UNDEF;
5627
5628 if (!tryParseFmt("format", UFMT_MAX, Fmt))
5629 return MatchOperand_ParseFail;
5630
5631 if (Fmt == UFMT_UNDEF)
5632 return MatchOperand_NoMatch;
5633
5634 Format = Fmt;
5635 return MatchOperand_Success;
5636 }
5637
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)5638 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5639 int64_t &Nfmt,
5640 StringRef FormatStr,
5641 SMLoc Loc) {
5642 using namespace llvm::AMDGPU::MTBUFFormat;
5643 int64_t Format;
5644
5645 Format = getDfmt(FormatStr);
5646 if (Format != DFMT_UNDEF) {
5647 Dfmt = Format;
5648 return true;
5649 }
5650
5651 Format = getNfmt(FormatStr, getSTI());
5652 if (Format != NFMT_UNDEF) {
5653 Nfmt = Format;
5654 return true;
5655 }
5656
5657 Error(Loc, "unsupported format");
5658 return false;
5659 }
5660
5661 OperandMatchResultTy
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)5662 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5663 SMLoc FormatLoc,
5664 int64_t &Format) {
5665 using namespace llvm::AMDGPU::MTBUFFormat;
5666
5667 int64_t Dfmt = DFMT_UNDEF;
5668 int64_t Nfmt = NFMT_UNDEF;
5669 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5670 return MatchOperand_ParseFail;
5671
5672 if (trySkipToken(AsmToken::Comma)) {
5673 StringRef Str;
5674 SMLoc Loc = getLoc();
5675 if (!parseId(Str, "expected a format string") ||
5676 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5677 return MatchOperand_ParseFail;
5678 }
5679 if (Dfmt == DFMT_UNDEF) {
5680 Error(Loc, "duplicate numeric format");
5681 return MatchOperand_ParseFail;
5682 } else if (Nfmt == NFMT_UNDEF) {
5683 Error(Loc, "duplicate data format");
5684 return MatchOperand_ParseFail;
5685 }
5686 }
5687
5688 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5689 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5690
5691 if (isGFX10Plus()) {
5692 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5693 if (Ufmt == UFMT_UNDEF) {
5694 Error(FormatLoc, "unsupported format");
5695 return MatchOperand_ParseFail;
5696 }
5697 Format = Ufmt;
5698 } else {
5699 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5700 }
5701
5702 return MatchOperand_Success;
5703 }
5704
5705 OperandMatchResultTy
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)5706 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5707 SMLoc Loc,
5708 int64_t &Format) {
5709 using namespace llvm::AMDGPU::MTBUFFormat;
5710
5711 auto Id = getUnifiedFormat(FormatStr);
5712 if (Id == UFMT_UNDEF)
5713 return MatchOperand_NoMatch;
5714
5715 if (!isGFX10Plus()) {
5716 Error(Loc, "unified format is not supported on this GPU");
5717 return MatchOperand_ParseFail;
5718 }
5719
5720 Format = Id;
5721 return MatchOperand_Success;
5722 }
5723
5724 OperandMatchResultTy
parseNumericFormat(int64_t & Format)5725 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5726 using namespace llvm::AMDGPU::MTBUFFormat;
5727 SMLoc Loc = getLoc();
5728
5729 if (!parseExpr(Format))
5730 return MatchOperand_ParseFail;
5731 if (!isValidFormatEncoding(Format, getSTI())) {
5732 Error(Loc, "out of range format");
5733 return MatchOperand_ParseFail;
5734 }
5735
5736 return MatchOperand_Success;
5737 }
5738
5739 OperandMatchResultTy
parseSymbolicOrNumericFormat(int64_t & Format)5740 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5741 using namespace llvm::AMDGPU::MTBUFFormat;
5742
5743 if (!trySkipId("format", AsmToken::Colon))
5744 return MatchOperand_NoMatch;
5745
5746 if (trySkipToken(AsmToken::LBrac)) {
5747 StringRef FormatStr;
5748 SMLoc Loc = getLoc();
5749 if (!parseId(FormatStr, "expected a format string"))
5750 return MatchOperand_ParseFail;
5751
5752 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5753 if (Res == MatchOperand_NoMatch)
5754 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5755 if (Res != MatchOperand_Success)
5756 return Res;
5757
5758 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5759 return MatchOperand_ParseFail;
5760
5761 return MatchOperand_Success;
5762 }
5763
5764 return parseNumericFormat(Format);
5765 }
5766
5767 OperandMatchResultTy
parseFORMAT(OperandVector & Operands)5768 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5769 using namespace llvm::AMDGPU::MTBUFFormat;
5770
5771 int64_t Format = getDefaultFormatEncoding(getSTI());
5772 OperandMatchResultTy Res;
5773 SMLoc Loc = getLoc();
5774
5775 // Parse legacy format syntax.
5776 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5777 if (Res == MatchOperand_ParseFail)
5778 return Res;
5779
5780 bool FormatFound = (Res == MatchOperand_Success);
5781
5782 Operands.push_back(
5783 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5784
5785 if (FormatFound)
5786 trySkipToken(AsmToken::Comma);
5787
5788 if (isToken(AsmToken::EndOfStatement)) {
5789 // We are expecting an soffset operand,
5790 // but let matcher handle the error.
5791 return MatchOperand_Success;
5792 }
5793
5794 // Parse soffset.
5795 Res = parseRegOrImm(Operands);
5796 if (Res != MatchOperand_Success)
5797 return Res;
5798
5799 trySkipToken(AsmToken::Comma);
5800
5801 if (!FormatFound) {
5802 Res = parseSymbolicOrNumericFormat(Format);
5803 if (Res == MatchOperand_ParseFail)
5804 return Res;
5805 if (Res == MatchOperand_Success) {
5806 auto Size = Operands.size();
5807 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5808 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5809 Op.setImm(Format);
5810 }
5811 return MatchOperand_Success;
5812 }
5813
5814 if (isId("format") && peekToken().is(AsmToken::Colon)) {
5815 Error(getLoc(), "duplicate format");
5816 return MatchOperand_ParseFail;
5817 }
5818 return MatchOperand_Success;
5819 }
5820
5821 //===----------------------------------------------------------------------===//
5822 // ds
5823 //===----------------------------------------------------------------------===//
5824
cvtDSOffset01(MCInst & Inst,const OperandVector & Operands)5825 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5826 const OperandVector &Operands) {
5827 OptionalImmIndexMap OptionalIdx;
5828
5829 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5830 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5831
5832 // Add the register arguments
5833 if (Op.isReg()) {
5834 Op.addRegOperands(Inst, 1);
5835 continue;
5836 }
5837
5838 // Handle optional arguments
5839 OptionalIdx[Op.getImmTy()] = i;
5840 }
5841
5842 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5843 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5844 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5845
5846 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5847 }
5848
cvtDSImpl(MCInst & Inst,const OperandVector & Operands,bool IsGdsHardcoded)5849 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5850 bool IsGdsHardcoded) {
5851 OptionalImmIndexMap OptionalIdx;
5852
5853 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5854 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5855
5856 // Add the register arguments
5857 if (Op.isReg()) {
5858 Op.addRegOperands(Inst, 1);
5859 continue;
5860 }
5861
5862 if (Op.isToken() && Op.getToken() == "gds") {
5863 IsGdsHardcoded = true;
5864 continue;
5865 }
5866
5867 // Handle optional arguments
5868 OptionalIdx[Op.getImmTy()] = i;
5869 }
5870
5871 AMDGPUOperand::ImmTy OffsetType =
5872 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5873 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5874 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5875 AMDGPUOperand::ImmTyOffset;
5876
5877 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5878
5879 if (!IsGdsHardcoded) {
5880 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5881 }
5882 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5883 }
5884
cvtExp(MCInst & Inst,const OperandVector & Operands)5885 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5886 OptionalImmIndexMap OptionalIdx;
5887
5888 unsigned OperandIdx[4];
5889 unsigned EnMask = 0;
5890 int SrcIdx = 0;
5891
5892 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5893 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5894
5895 // Add the register arguments
5896 if (Op.isReg()) {
5897 assert(SrcIdx < 4);
5898 OperandIdx[SrcIdx] = Inst.size();
5899 Op.addRegOperands(Inst, 1);
5900 ++SrcIdx;
5901 continue;
5902 }
5903
5904 if (Op.isOff()) {
5905 assert(SrcIdx < 4);
5906 OperandIdx[SrcIdx] = Inst.size();
5907 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5908 ++SrcIdx;
5909 continue;
5910 }
5911
5912 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5913 Op.addImmOperands(Inst, 1);
5914 continue;
5915 }
5916
5917 if (Op.isToken() && Op.getToken() == "done")
5918 continue;
5919
5920 // Handle optional arguments
5921 OptionalIdx[Op.getImmTy()] = i;
5922 }
5923
5924 assert(SrcIdx == 4);
5925
5926 bool Compr = false;
5927 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5928 Compr = true;
5929 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5930 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5931 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5932 }
5933
5934 for (auto i = 0; i < SrcIdx; ++i) {
5935 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5936 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5937 }
5938 }
5939
5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5942
5943 Inst.addOperand(MCOperand::createImm(EnMask));
5944 }
5945
5946 //===----------------------------------------------------------------------===//
5947 // s_waitcnt
5948 //===----------------------------------------------------------------------===//
5949
5950 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))5951 encodeCnt(
5952 const AMDGPU::IsaVersion ISA,
5953 int64_t &IntVal,
5954 int64_t CntVal,
5955 bool Saturate,
5956 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5957 unsigned (*decode)(const IsaVersion &Version, unsigned))
5958 {
5959 bool Failed = false;
5960
5961 IntVal = encode(ISA, IntVal, CntVal);
5962 if (CntVal != decode(ISA, IntVal)) {
5963 if (Saturate) {
5964 IntVal = encode(ISA, IntVal, -1);
5965 } else {
5966 Failed = true;
5967 }
5968 }
5969 return Failed;
5970 }
5971
parseCnt(int64_t & IntVal)5972 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5973
5974 SMLoc CntLoc = getLoc();
5975 StringRef CntName = getTokenStr();
5976
5977 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5978 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5979 return false;
5980
5981 int64_t CntVal;
5982 SMLoc ValLoc = getLoc();
5983 if (!parseExpr(CntVal))
5984 return false;
5985
5986 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5987
5988 bool Failed = true;
5989 bool Sat = CntName.endswith("_sat");
5990
5991 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5992 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5993 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5994 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5995 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5996 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5997 } else {
5998 Error(CntLoc, "invalid counter name " + CntName);
5999 return false;
6000 }
6001
6002 if (Failed) {
6003 Error(ValLoc, "too large value for " + CntName);
6004 return false;
6005 }
6006
6007 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6008 return false;
6009
6010 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6011 if (isToken(AsmToken::EndOfStatement)) {
6012 Error(getLoc(), "expected a counter name");
6013 return false;
6014 }
6015 }
6016
6017 return true;
6018 }
6019
6020 OperandMatchResultTy
parseSWaitCntOps(OperandVector & Operands)6021 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6022 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6023 int64_t Waitcnt = getWaitcntBitMask(ISA);
6024 SMLoc S = getLoc();
6025
6026 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6027 while (!isToken(AsmToken::EndOfStatement)) {
6028 if (!parseCnt(Waitcnt))
6029 return MatchOperand_ParseFail;
6030 }
6031 } else {
6032 if (!parseExpr(Waitcnt))
6033 return MatchOperand_ParseFail;
6034 }
6035
6036 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6037 return MatchOperand_Success;
6038 }
6039
6040 bool
isSWaitCnt() const6041 AMDGPUOperand::isSWaitCnt() const {
6042 return isImm();
6043 }
6044
6045 //===----------------------------------------------------------------------===//
6046 // hwreg
6047 //===----------------------------------------------------------------------===//
6048
6049 bool
parseHwregBody(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)6050 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6051 OperandInfoTy &Offset,
6052 OperandInfoTy &Width) {
6053 using namespace llvm::AMDGPU::Hwreg;
6054
6055 // The register may be specified by name or using a numeric code
6056 HwReg.Loc = getLoc();
6057 if (isToken(AsmToken::Identifier) &&
6058 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6059 HwReg.IsSymbolic = true;
6060 lex(); // skip register name
6061 } else if (!parseExpr(HwReg.Id, "a register name")) {
6062 return false;
6063 }
6064
6065 if (trySkipToken(AsmToken::RParen))
6066 return true;
6067
6068 // parse optional params
6069 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6070 return false;
6071
6072 Offset.Loc = getLoc();
6073 if (!parseExpr(Offset.Id))
6074 return false;
6075
6076 if (!skipToken(AsmToken::Comma, "expected a comma"))
6077 return false;
6078
6079 Width.Loc = getLoc();
6080 return parseExpr(Width.Id) &&
6081 skipToken(AsmToken::RParen, "expected a closing parenthesis");
6082 }
6083
6084 bool
validateHwreg(const OperandInfoTy & HwReg,const OperandInfoTy & Offset,const OperandInfoTy & Width)6085 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6086 const OperandInfoTy &Offset,
6087 const OperandInfoTy &Width) {
6088
6089 using namespace llvm::AMDGPU::Hwreg;
6090
6091 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6092 Error(HwReg.Loc,
6093 "specified hardware register is not supported on this GPU");
6094 return false;
6095 }
6096 if (!isValidHwreg(HwReg.Id)) {
6097 Error(HwReg.Loc,
6098 "invalid code of hardware register: only 6-bit values are legal");
6099 return false;
6100 }
6101 if (!isValidHwregOffset(Offset.Id)) {
6102 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6103 return false;
6104 }
6105 if (!isValidHwregWidth(Width.Id)) {
6106 Error(Width.Loc,
6107 "invalid bitfield width: only values from 1 to 32 are legal");
6108 return false;
6109 }
6110 return true;
6111 }
6112
6113 OperandMatchResultTy
parseHwreg(OperandVector & Operands)6114 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6115 using namespace llvm::AMDGPU::Hwreg;
6116
6117 int64_t ImmVal = 0;
6118 SMLoc Loc = getLoc();
6119
6120 if (trySkipId("hwreg", AsmToken::LParen)) {
6121 OperandInfoTy HwReg(ID_UNKNOWN_);
6122 OperandInfoTy Offset(OFFSET_DEFAULT_);
6123 OperandInfoTy Width(WIDTH_DEFAULT_);
6124 if (parseHwregBody(HwReg, Offset, Width) &&
6125 validateHwreg(HwReg, Offset, Width)) {
6126 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6127 } else {
6128 return MatchOperand_ParseFail;
6129 }
6130 } else if (parseExpr(ImmVal, "a hwreg macro")) {
6131 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6132 Error(Loc, "invalid immediate: only 16-bit values are legal");
6133 return MatchOperand_ParseFail;
6134 }
6135 } else {
6136 return MatchOperand_ParseFail;
6137 }
6138
6139 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6140 return MatchOperand_Success;
6141 }
6142
isHwreg() const6143 bool AMDGPUOperand::isHwreg() const {
6144 return isImmTy(ImmTyHwreg);
6145 }
6146
6147 //===----------------------------------------------------------------------===//
6148 // sendmsg
6149 //===----------------------------------------------------------------------===//
6150
6151 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)6152 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6153 OperandInfoTy &Op,
6154 OperandInfoTy &Stream) {
6155 using namespace llvm::AMDGPU::SendMsg;
6156
6157 Msg.Loc = getLoc();
6158 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6159 Msg.IsSymbolic = true;
6160 lex(); // skip message name
6161 } else if (!parseExpr(Msg.Id, "a message name")) {
6162 return false;
6163 }
6164
6165 if (trySkipToken(AsmToken::Comma)) {
6166 Op.IsDefined = true;
6167 Op.Loc = getLoc();
6168 if (isToken(AsmToken::Identifier) &&
6169 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6170 lex(); // skip operation name
6171 } else if (!parseExpr(Op.Id, "an operation name")) {
6172 return false;
6173 }
6174
6175 if (trySkipToken(AsmToken::Comma)) {
6176 Stream.IsDefined = true;
6177 Stream.Loc = getLoc();
6178 if (!parseExpr(Stream.Id))
6179 return false;
6180 }
6181 }
6182
6183 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6184 }
6185
6186 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)6187 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6188 const OperandInfoTy &Op,
6189 const OperandInfoTy &Stream) {
6190 using namespace llvm::AMDGPU::SendMsg;
6191
6192 // Validation strictness depends on whether message is specified
6193 // in a symbolc or in a numeric form. In the latter case
6194 // only encoding possibility is checked.
6195 bool Strict = Msg.IsSymbolic;
6196
6197 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6198 Error(Msg.Loc, "invalid message id");
6199 return false;
6200 }
6201 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6202 if (Op.IsDefined) {
6203 Error(Op.Loc, "message does not support operations");
6204 } else {
6205 Error(Msg.Loc, "missing message operation");
6206 }
6207 return false;
6208 }
6209 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6210 Error(Op.Loc, "invalid operation id");
6211 return false;
6212 }
6213 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6214 Error(Stream.Loc, "message operation does not support streams");
6215 return false;
6216 }
6217 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6218 Error(Stream.Loc, "invalid message stream id");
6219 return false;
6220 }
6221 return true;
6222 }
6223
6224 OperandMatchResultTy
parseSendMsgOp(OperandVector & Operands)6225 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6226 using namespace llvm::AMDGPU::SendMsg;
6227
6228 int64_t ImmVal = 0;
6229 SMLoc Loc = getLoc();
6230
6231 if (trySkipId("sendmsg", AsmToken::LParen)) {
6232 OperandInfoTy Msg(ID_UNKNOWN_);
6233 OperandInfoTy Op(OP_NONE_);
6234 OperandInfoTy Stream(STREAM_ID_NONE_);
6235 if (parseSendMsgBody(Msg, Op, Stream) &&
6236 validateSendMsg(Msg, Op, Stream)) {
6237 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6238 } else {
6239 return MatchOperand_ParseFail;
6240 }
6241 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6242 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6243 Error(Loc, "invalid immediate: only 16-bit values are legal");
6244 return MatchOperand_ParseFail;
6245 }
6246 } else {
6247 return MatchOperand_ParseFail;
6248 }
6249
6250 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6251 return MatchOperand_Success;
6252 }
6253
isSendMsg() const6254 bool AMDGPUOperand::isSendMsg() const {
6255 return isImmTy(ImmTySendMsg);
6256 }
6257
6258 //===----------------------------------------------------------------------===//
6259 // v_interp
6260 //===----------------------------------------------------------------------===//
6261
parseInterpSlot(OperandVector & Operands)6262 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6263 StringRef Str;
6264 SMLoc S = getLoc();
6265
6266 if (!parseId(Str))
6267 return MatchOperand_NoMatch;
6268
6269 int Slot = StringSwitch<int>(Str)
6270 .Case("p10", 0)
6271 .Case("p20", 1)
6272 .Case("p0", 2)
6273 .Default(-1);
6274
6275 if (Slot == -1) {
6276 Error(S, "invalid interpolation slot");
6277 return MatchOperand_ParseFail;
6278 }
6279
6280 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6281 AMDGPUOperand::ImmTyInterpSlot));
6282 return MatchOperand_Success;
6283 }
6284
parseInterpAttr(OperandVector & Operands)6285 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6286 StringRef Str;
6287 SMLoc S = getLoc();
6288
6289 if (!parseId(Str))
6290 return MatchOperand_NoMatch;
6291
6292 if (!Str.startswith("attr")) {
6293 Error(S, "invalid interpolation attribute");
6294 return MatchOperand_ParseFail;
6295 }
6296
6297 StringRef Chan = Str.take_back(2);
6298 int AttrChan = StringSwitch<int>(Chan)
6299 .Case(".x", 0)
6300 .Case(".y", 1)
6301 .Case(".z", 2)
6302 .Case(".w", 3)
6303 .Default(-1);
6304 if (AttrChan == -1) {
6305 Error(S, "invalid or missing interpolation attribute channel");
6306 return MatchOperand_ParseFail;
6307 }
6308
6309 Str = Str.drop_back(2).drop_front(4);
6310
6311 uint8_t Attr;
6312 if (Str.getAsInteger(10, Attr)) {
6313 Error(S, "invalid or missing interpolation attribute number");
6314 return MatchOperand_ParseFail;
6315 }
6316
6317 if (Attr > 63) {
6318 Error(S, "out of bounds interpolation attribute number");
6319 return MatchOperand_ParseFail;
6320 }
6321
6322 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6323
6324 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6325 AMDGPUOperand::ImmTyInterpAttr));
6326 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6327 AMDGPUOperand::ImmTyAttrChan));
6328 return MatchOperand_Success;
6329 }
6330
6331 //===----------------------------------------------------------------------===//
6332 // exp
6333 //===----------------------------------------------------------------------===//
6334
parseExpTgt(OperandVector & Operands)6335 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6336 using namespace llvm::AMDGPU::Exp;
6337
6338 StringRef Str;
6339 SMLoc S = getLoc();
6340
6341 if (!parseId(Str))
6342 return MatchOperand_NoMatch;
6343
6344 unsigned Id = getTgtId(Str);
6345 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6346 Error(S, (Id == ET_INVALID) ?
6347 "invalid exp target" :
6348 "exp target is not supported on this GPU");
6349 return MatchOperand_ParseFail;
6350 }
6351
6352 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6353 AMDGPUOperand::ImmTyExpTgt));
6354 return MatchOperand_Success;
6355 }
6356
6357 //===----------------------------------------------------------------------===//
6358 // parser helpers
6359 //===----------------------------------------------------------------------===//
6360
6361 bool
isId(const AsmToken & Token,const StringRef Id) const6362 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6363 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6364 }
6365
6366 bool
isId(const StringRef Id) const6367 AMDGPUAsmParser::isId(const StringRef Id) const {
6368 return isId(getToken(), Id);
6369 }
6370
6371 bool
isToken(const AsmToken::TokenKind Kind) const6372 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6373 return getTokenKind() == Kind;
6374 }
6375
6376 bool
trySkipId(const StringRef Id)6377 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6378 if (isId(Id)) {
6379 lex();
6380 return true;
6381 }
6382 return false;
6383 }
6384
6385 bool
trySkipId(const StringRef Pref,const StringRef Id)6386 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6387 if (isToken(AsmToken::Identifier)) {
6388 StringRef Tok = getTokenStr();
6389 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6390 lex();
6391 return true;
6392 }
6393 }
6394 return false;
6395 }
6396
6397 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)6398 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6399 if (isId(Id) && peekToken().is(Kind)) {
6400 lex();
6401 lex();
6402 return true;
6403 }
6404 return false;
6405 }
6406
6407 bool
trySkipToken(const AsmToken::TokenKind Kind)6408 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6409 if (isToken(Kind)) {
6410 lex();
6411 return true;
6412 }
6413 return false;
6414 }
6415
6416 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)6417 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6418 const StringRef ErrMsg) {
6419 if (!trySkipToken(Kind)) {
6420 Error(getLoc(), ErrMsg);
6421 return false;
6422 }
6423 return true;
6424 }
6425
6426 bool
parseExpr(int64_t & Imm,StringRef Expected)6427 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6428 SMLoc S = getLoc();
6429
6430 const MCExpr *Expr;
6431 if (Parser.parseExpression(Expr))
6432 return false;
6433
6434 if (Expr->evaluateAsAbsolute(Imm))
6435 return true;
6436
6437 if (Expected.empty()) {
6438 Error(S, "expected absolute expression");
6439 } else {
6440 Error(S, Twine("expected ", Expected) +
6441 Twine(" or an absolute expression"));
6442 }
6443 return false;
6444 }
6445
6446 bool
parseExpr(OperandVector & Operands)6447 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6448 SMLoc S = getLoc();
6449
6450 const MCExpr *Expr;
6451 if (Parser.parseExpression(Expr))
6452 return false;
6453
6454 int64_t IntVal;
6455 if (Expr->evaluateAsAbsolute(IntVal)) {
6456 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6457 } else {
6458 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6459 }
6460 return true;
6461 }
6462
6463 bool
parseString(StringRef & Val,const StringRef ErrMsg)6464 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6465 if (isToken(AsmToken::String)) {
6466 Val = getToken().getStringContents();
6467 lex();
6468 return true;
6469 } else {
6470 Error(getLoc(), ErrMsg);
6471 return false;
6472 }
6473 }
6474
6475 bool
parseId(StringRef & Val,const StringRef ErrMsg)6476 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6477 if (isToken(AsmToken::Identifier)) {
6478 Val = getTokenStr();
6479 lex();
6480 return true;
6481 } else {
6482 if (!ErrMsg.empty())
6483 Error(getLoc(), ErrMsg);
6484 return false;
6485 }
6486 }
6487
6488 AsmToken
getToken() const6489 AMDGPUAsmParser::getToken() const {
6490 return Parser.getTok();
6491 }
6492
6493 AsmToken
peekToken()6494 AMDGPUAsmParser::peekToken() {
6495 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6496 }
6497
6498 void
peekTokens(MutableArrayRef<AsmToken> Tokens)6499 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6500 auto TokCount = getLexer().peekTokens(Tokens);
6501
6502 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6503 Tokens[Idx] = AsmToken(AsmToken::Error, "");
6504 }
6505
6506 AsmToken::TokenKind
getTokenKind() const6507 AMDGPUAsmParser::getTokenKind() const {
6508 return getLexer().getKind();
6509 }
6510
6511 SMLoc
getLoc() const6512 AMDGPUAsmParser::getLoc() const {
6513 return getToken().getLoc();
6514 }
6515
6516 StringRef
getTokenStr() const6517 AMDGPUAsmParser::getTokenStr() const {
6518 return getToken().getString();
6519 }
6520
6521 void
lex()6522 AMDGPUAsmParser::lex() {
6523 Parser.Lex();
6524 }
6525
6526 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const6527 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6528 const OperandVector &Operands) const {
6529 for (unsigned i = Operands.size() - 1; i > 0; --i) {
6530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6531 if (Test(Op))
6532 return Op.getStartLoc();
6533 }
6534 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6535 }
6536
6537 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const6538 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6539 const OperandVector &Operands) const {
6540 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6541 return getOperandLoc(Test, Operands);
6542 }
6543
6544 SMLoc
getRegLoc(unsigned Reg,const OperandVector & Operands) const6545 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6546 const OperandVector &Operands) const {
6547 auto Test = [=](const AMDGPUOperand& Op) {
6548 return Op.isRegKind() && Op.getReg() == Reg;
6549 };
6550 return getOperandLoc(Test, Operands);
6551 }
6552
6553 SMLoc
getLitLoc(const OperandVector & Operands) const6554 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6555 auto Test = [](const AMDGPUOperand& Op) {
6556 return Op.IsImmKindLiteral() || Op.isExpr();
6557 };
6558 return getOperandLoc(Test, Operands);
6559 }
6560
6561 SMLoc
getConstLoc(const OperandVector & Operands) const6562 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6563 auto Test = [](const AMDGPUOperand& Op) {
6564 return Op.isImmKindConst();
6565 };
6566 return getOperandLoc(Test, Operands);
6567 }
6568
6569 //===----------------------------------------------------------------------===//
6570 // swizzle
6571 //===----------------------------------------------------------------------===//
6572
6573 LLVM_READNONE
6574 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)6575 encodeBitmaskPerm(const unsigned AndMask,
6576 const unsigned OrMask,
6577 const unsigned XorMask) {
6578 using namespace llvm::AMDGPU::Swizzle;
6579
6580 return BITMASK_PERM_ENC |
6581 (AndMask << BITMASK_AND_SHIFT) |
6582 (OrMask << BITMASK_OR_SHIFT) |
6583 (XorMask << BITMASK_XOR_SHIFT);
6584 }
6585
6586 bool
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg,SMLoc & Loc)6587 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6588 const unsigned MinVal,
6589 const unsigned MaxVal,
6590 const StringRef ErrMsg,
6591 SMLoc &Loc) {
6592 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6593 return false;
6594 }
6595 Loc = getLoc();
6596 if (!parseExpr(Op)) {
6597 return false;
6598 }
6599 if (Op < MinVal || Op > MaxVal) {
6600 Error(Loc, ErrMsg);
6601 return false;
6602 }
6603
6604 return true;
6605 }
6606
6607 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)6608 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6609 const unsigned MinVal,
6610 const unsigned MaxVal,
6611 const StringRef ErrMsg) {
6612 SMLoc Loc;
6613 for (unsigned i = 0; i < OpNum; ++i) {
6614 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6615 return false;
6616 }
6617
6618 return true;
6619 }
6620
6621 bool
parseSwizzleQuadPerm(int64_t & Imm)6622 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6623 using namespace llvm::AMDGPU::Swizzle;
6624
6625 int64_t Lane[LANE_NUM];
6626 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6627 "expected a 2-bit lane id")) {
6628 Imm = QUAD_PERM_ENC;
6629 for (unsigned I = 0; I < LANE_NUM; ++I) {
6630 Imm |= Lane[I] << (LANE_SHIFT * I);
6631 }
6632 return true;
6633 }
6634 return false;
6635 }
6636
6637 bool
parseSwizzleBroadcast(int64_t & Imm)6638 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6639 using namespace llvm::AMDGPU::Swizzle;
6640
6641 SMLoc Loc;
6642 int64_t GroupSize;
6643 int64_t LaneIdx;
6644
6645 if (!parseSwizzleOperand(GroupSize,
6646 2, 32,
6647 "group size must be in the interval [2,32]",
6648 Loc)) {
6649 return false;
6650 }
6651 if (!isPowerOf2_64(GroupSize)) {
6652 Error(Loc, "group size must be a power of two");
6653 return false;
6654 }
6655 if (parseSwizzleOperand(LaneIdx,
6656 0, GroupSize - 1,
6657 "lane id must be in the interval [0,group size - 1]",
6658 Loc)) {
6659 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6660 return true;
6661 }
6662 return false;
6663 }
6664
6665 bool
parseSwizzleReverse(int64_t & Imm)6666 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6667 using namespace llvm::AMDGPU::Swizzle;
6668
6669 SMLoc Loc;
6670 int64_t GroupSize;
6671
6672 if (!parseSwizzleOperand(GroupSize,
6673 2, 32,
6674 "group size must be in the interval [2,32]",
6675 Loc)) {
6676 return false;
6677 }
6678 if (!isPowerOf2_64(GroupSize)) {
6679 Error(Loc, "group size must be a power of two");
6680 return false;
6681 }
6682
6683 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6684 return true;
6685 }
6686
6687 bool
parseSwizzleSwap(int64_t & Imm)6688 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6689 using namespace llvm::AMDGPU::Swizzle;
6690
6691 SMLoc Loc;
6692 int64_t GroupSize;
6693
6694 if (!parseSwizzleOperand(GroupSize,
6695 1, 16,
6696 "group size must be in the interval [1,16]",
6697 Loc)) {
6698 return false;
6699 }
6700 if (!isPowerOf2_64(GroupSize)) {
6701 Error(Loc, "group size must be a power of two");
6702 return false;
6703 }
6704
6705 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6706 return true;
6707 }
6708
6709 bool
parseSwizzleBitmaskPerm(int64_t & Imm)6710 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6711 using namespace llvm::AMDGPU::Swizzle;
6712
6713 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6714 return false;
6715 }
6716
6717 StringRef Ctl;
6718 SMLoc StrLoc = getLoc();
6719 if (!parseString(Ctl)) {
6720 return false;
6721 }
6722 if (Ctl.size() != BITMASK_WIDTH) {
6723 Error(StrLoc, "expected a 5-character mask");
6724 return false;
6725 }
6726
6727 unsigned AndMask = 0;
6728 unsigned OrMask = 0;
6729 unsigned XorMask = 0;
6730
6731 for (size_t i = 0; i < Ctl.size(); ++i) {
6732 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6733 switch(Ctl[i]) {
6734 default:
6735 Error(StrLoc, "invalid mask");
6736 return false;
6737 case '0':
6738 break;
6739 case '1':
6740 OrMask |= Mask;
6741 break;
6742 case 'p':
6743 AndMask |= Mask;
6744 break;
6745 case 'i':
6746 AndMask |= Mask;
6747 XorMask |= Mask;
6748 break;
6749 }
6750 }
6751
6752 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6753 return true;
6754 }
6755
6756 bool
parseSwizzleOffset(int64_t & Imm)6757 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6758
6759 SMLoc OffsetLoc = getLoc();
6760
6761 if (!parseExpr(Imm, "a swizzle macro")) {
6762 return false;
6763 }
6764 if (!isUInt<16>(Imm)) {
6765 Error(OffsetLoc, "expected a 16-bit offset");
6766 return false;
6767 }
6768 return true;
6769 }
6770
6771 bool
parseSwizzleMacro(int64_t & Imm)6772 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6773 using namespace llvm::AMDGPU::Swizzle;
6774
6775 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6776
6777 SMLoc ModeLoc = getLoc();
6778 bool Ok = false;
6779
6780 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6781 Ok = parseSwizzleQuadPerm(Imm);
6782 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6783 Ok = parseSwizzleBitmaskPerm(Imm);
6784 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6785 Ok = parseSwizzleBroadcast(Imm);
6786 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6787 Ok = parseSwizzleSwap(Imm);
6788 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6789 Ok = parseSwizzleReverse(Imm);
6790 } else {
6791 Error(ModeLoc, "expected a swizzle mode");
6792 }
6793
6794 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6795 }
6796
6797 return false;
6798 }
6799
6800 OperandMatchResultTy
parseSwizzleOp(OperandVector & Operands)6801 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6802 SMLoc S = getLoc();
6803 int64_t Imm = 0;
6804
6805 if (trySkipId("offset")) {
6806
6807 bool Ok = false;
6808 if (skipToken(AsmToken::Colon, "expected a colon")) {
6809 if (trySkipId("swizzle")) {
6810 Ok = parseSwizzleMacro(Imm);
6811 } else {
6812 Ok = parseSwizzleOffset(Imm);
6813 }
6814 }
6815
6816 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6817
6818 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6819 } else {
6820 // Swizzle "offset" operand is optional.
6821 // If it is omitted, try parsing other optional operands.
6822 return parseOptionalOpr(Operands);
6823 }
6824 }
6825
6826 bool
isSwizzle() const6827 AMDGPUOperand::isSwizzle() const {
6828 return isImmTy(ImmTySwizzle);
6829 }
6830
6831 //===----------------------------------------------------------------------===//
6832 // VGPR Index Mode
6833 //===----------------------------------------------------------------------===//
6834
parseGPRIdxMacro()6835 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6836
6837 using namespace llvm::AMDGPU::VGPRIndexMode;
6838
6839 if (trySkipToken(AsmToken::RParen)) {
6840 return OFF;
6841 }
6842
6843 int64_t Imm = 0;
6844
6845 while (true) {
6846 unsigned Mode = 0;
6847 SMLoc S = getLoc();
6848
6849 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6850 if (trySkipId(IdSymbolic[ModeId])) {
6851 Mode = 1 << ModeId;
6852 break;
6853 }
6854 }
6855
6856 if (Mode == 0) {
6857 Error(S, (Imm == 0)?
6858 "expected a VGPR index mode or a closing parenthesis" :
6859 "expected a VGPR index mode");
6860 return UNDEF;
6861 }
6862
6863 if (Imm & Mode) {
6864 Error(S, "duplicate VGPR index mode");
6865 return UNDEF;
6866 }
6867 Imm |= Mode;
6868
6869 if (trySkipToken(AsmToken::RParen))
6870 break;
6871 if (!skipToken(AsmToken::Comma,
6872 "expected a comma or a closing parenthesis"))
6873 return UNDEF;
6874 }
6875
6876 return Imm;
6877 }
6878
6879 OperandMatchResultTy
parseGPRIdxMode(OperandVector & Operands)6880 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6881
6882 using namespace llvm::AMDGPU::VGPRIndexMode;
6883
6884 int64_t Imm = 0;
6885 SMLoc S = getLoc();
6886
6887 if (trySkipId("gpr_idx", AsmToken::LParen)) {
6888 Imm = parseGPRIdxMacro();
6889 if (Imm == UNDEF)
6890 return MatchOperand_ParseFail;
6891 } else {
6892 if (getParser().parseAbsoluteExpression(Imm))
6893 return MatchOperand_ParseFail;
6894 if (Imm < 0 || !isUInt<4>(Imm)) {
6895 Error(S, "invalid immediate: only 4-bit values are legal");
6896 return MatchOperand_ParseFail;
6897 }
6898 }
6899
6900 Operands.push_back(
6901 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6902 return MatchOperand_Success;
6903 }
6904
isGPRIdxMode() const6905 bool AMDGPUOperand::isGPRIdxMode() const {
6906 return isImmTy(ImmTyGprIdxMode);
6907 }
6908
6909 //===----------------------------------------------------------------------===//
6910 // sopp branch targets
6911 //===----------------------------------------------------------------------===//
6912
6913 OperandMatchResultTy
parseSOppBrTarget(OperandVector & Operands)6914 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6915
6916 // Make sure we are not parsing something
6917 // that looks like a label or an expression but is not.
6918 // This will improve error messages.
6919 if (isRegister() || isModifier())
6920 return MatchOperand_NoMatch;
6921
6922 if (!parseExpr(Operands))
6923 return MatchOperand_ParseFail;
6924
6925 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6926 assert(Opr.isImm() || Opr.isExpr());
6927 SMLoc Loc = Opr.getStartLoc();
6928
6929 // Currently we do not support arbitrary expressions as branch targets.
6930 // Only labels and absolute expressions are accepted.
6931 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6932 Error(Loc, "expected an absolute expression or a label");
6933 } else if (Opr.isImm() && !Opr.isS16Imm()) {
6934 Error(Loc, "expected a 16-bit signed jump offset");
6935 }
6936
6937 return MatchOperand_Success;
6938 }
6939
6940 //===----------------------------------------------------------------------===//
6941 // Boolean holding registers
6942 //===----------------------------------------------------------------------===//
6943
6944 OperandMatchResultTy
parseBoolReg(OperandVector & Operands)6945 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6946 return parseReg(Operands);
6947 }
6948
6949 //===----------------------------------------------------------------------===//
6950 // mubuf
6951 //===----------------------------------------------------------------------===//
6952
defaultCPol() const6953 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6954 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6955 }
6956
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic,bool IsLds)6957 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6958 const OperandVector &Operands,
6959 bool IsAtomic,
6960 bool IsLds) {
6961 bool IsLdsOpcode = IsLds;
6962 bool HasLdsModifier = false;
6963 OptionalImmIndexMap OptionalIdx;
6964 unsigned FirstOperandIdx = 1;
6965 bool IsAtomicReturn = false;
6966
6967 if (IsAtomic) {
6968 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6969 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6970 if (!Op.isCPol())
6971 continue;
6972 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6973 break;
6974 }
6975
6976 if (!IsAtomicReturn) {
6977 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6978 if (NewOpc != -1)
6979 Inst.setOpcode(NewOpc);
6980 }
6981
6982 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
6983 SIInstrFlags::IsAtomicRet;
6984 }
6985
6986 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6987 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6988
6989 // Add the register arguments
6990 if (Op.isReg()) {
6991 Op.addRegOperands(Inst, 1);
6992 // Insert a tied src for atomic return dst.
6993 // This cannot be postponed as subsequent calls to
6994 // addImmOperands rely on correct number of MC operands.
6995 if (IsAtomicReturn && i == FirstOperandIdx)
6996 Op.addRegOperands(Inst, 1);
6997 continue;
6998 }
6999
7000 // Handle the case where soffset is an immediate
7001 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7002 Op.addImmOperands(Inst, 1);
7003 continue;
7004 }
7005
7006 HasLdsModifier |= Op.isLDS();
7007
7008 // Handle tokens like 'offen' which are sometimes hard-coded into the
7009 // asm string. There are no MCInst operands for these.
7010 if (Op.isToken()) {
7011 continue;
7012 }
7013 assert(Op.isImm());
7014
7015 // Handle optional arguments
7016 OptionalIdx[Op.getImmTy()] = i;
7017 }
7018
7019 // This is a workaround for an llvm quirk which may result in an
7020 // incorrect instruction selection. Lds and non-lds versions of
7021 // MUBUF instructions are identical except that lds versions
7022 // have mandatory 'lds' modifier. However this modifier follows
7023 // optional modifiers and llvm asm matcher regards this 'lds'
7024 // modifier as an optional one. As a result, an lds version
7025 // of opcode may be selected even if it has no 'lds' modifier.
7026 if (IsLdsOpcode && !HasLdsModifier) {
7027 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7028 if (NoLdsOpcode != -1) { // Got lds version - correct it.
7029 Inst.setOpcode(NoLdsOpcode);
7030 IsLdsOpcode = false;
7031 }
7032 }
7033
7034 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7036
7037 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7039 }
7040 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7041 }
7042
cvtMtbuf(MCInst & Inst,const OperandVector & Operands)7043 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7044 OptionalImmIndexMap OptionalIdx;
7045
7046 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7047 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7048
7049 // Add the register arguments
7050 if (Op.isReg()) {
7051 Op.addRegOperands(Inst, 1);
7052 continue;
7053 }
7054
7055 // Handle the case where soffset is an immediate
7056 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7057 Op.addImmOperands(Inst, 1);
7058 continue;
7059 }
7060
7061 // Handle tokens like 'offen' which are sometimes hard-coded into the
7062 // asm string. There are no MCInst operands for these.
7063 if (Op.isToken()) {
7064 continue;
7065 }
7066 assert(Op.isImm());
7067
7068 // Handle optional arguments
7069 OptionalIdx[Op.getImmTy()] = i;
7070 }
7071
7072 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7073 AMDGPUOperand::ImmTyOffset);
7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7077 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7078 }
7079
7080 //===----------------------------------------------------------------------===//
7081 // mimg
7082 //===----------------------------------------------------------------------===//
7083
cvtMIMG(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)7084 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7085 bool IsAtomic) {
7086 unsigned I = 1;
7087 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7088 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7089 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7090 }
7091
7092 if (IsAtomic) {
7093 // Add src, same as dst
7094 assert(Desc.getNumDefs() == 1);
7095 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7096 }
7097
7098 OptionalImmIndexMap OptionalIdx;
7099
7100 for (unsigned E = Operands.size(); I != E; ++I) {
7101 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7102
7103 // Add the register arguments
7104 if (Op.isReg()) {
7105 Op.addRegOperands(Inst, 1);
7106 } else if (Op.isImmModifier()) {
7107 OptionalIdx[Op.getImmTy()] = I;
7108 } else if (!Op.isToken()) {
7109 llvm_unreachable("unexpected operand type");
7110 }
7111 }
7112
7113 bool IsGFX10Plus = isGFX10Plus();
7114
7115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7116 if (IsGFX10Plus)
7117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7121 if (IsGFX10Plus)
7122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7123 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7126 if (!IsGFX10Plus)
7127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7129 }
7130
cvtMIMGAtomic(MCInst & Inst,const OperandVector & Operands)7131 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7132 cvtMIMG(Inst, Operands, true);
7133 }
7134
cvtSMEMAtomic(MCInst & Inst,const OperandVector & Operands)7135 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7136 OptionalImmIndexMap OptionalIdx;
7137 bool IsAtomicReturn = false;
7138
7139 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7140 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7141 if (!Op.isCPol())
7142 continue;
7143 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7144 break;
7145 }
7146
7147 if (!IsAtomicReturn) {
7148 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7149 if (NewOpc != -1)
7150 Inst.setOpcode(NewOpc);
7151 }
7152
7153 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7154 SIInstrFlags::IsAtomicRet;
7155
7156 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7158
7159 // Add the register arguments
7160 if (Op.isReg()) {
7161 Op.addRegOperands(Inst, 1);
7162 if (IsAtomicReturn && i == 1)
7163 Op.addRegOperands(Inst, 1);
7164 continue;
7165 }
7166
7167 // Handle the case where soffset is an immediate
7168 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7169 Op.addImmOperands(Inst, 1);
7170 continue;
7171 }
7172
7173 // Handle tokens like 'offen' which are sometimes hard-coded into the
7174 // asm string. There are no MCInst operands for these.
7175 if (Op.isToken()) {
7176 continue;
7177 }
7178 assert(Op.isImm());
7179
7180 // Handle optional arguments
7181 OptionalIdx[Op.getImmTy()] = i;
7182 }
7183
7184 if ((int)Inst.getNumOperands() <=
7185 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7188 }
7189
cvtIntersectRay(MCInst & Inst,const OperandVector & Operands)7190 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7191 const OperandVector &Operands) {
7192 for (unsigned I = 1; I < Operands.size(); ++I) {
7193 auto &Operand = (AMDGPUOperand &)*Operands[I];
7194 if (Operand.isReg())
7195 Operand.addRegOperands(Inst, 1);
7196 }
7197
7198 Inst.addOperand(MCOperand::createImm(1)); // a16
7199 }
7200
7201 //===----------------------------------------------------------------------===//
7202 // smrd
7203 //===----------------------------------------------------------------------===//
7204
isSMRDOffset8() const7205 bool AMDGPUOperand::isSMRDOffset8() const {
7206 return isImm() && isUInt<8>(getImm());
7207 }
7208
isSMEMOffset() const7209 bool AMDGPUOperand::isSMEMOffset() const {
7210 return isImm(); // Offset range is checked later by validator.
7211 }
7212
isSMRDLiteralOffset() const7213 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7214 // 32-bit literals are only supported on CI and we only want to use them
7215 // when the offset is > 8-bits.
7216 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7217 }
7218
defaultSMRDOffset8() const7219 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7220 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7221 }
7222
defaultSMEMOffset() const7223 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7224 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7225 }
7226
defaultSMRDLiteralOffset() const7227 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7228 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7229 }
7230
defaultFlatOffset() const7231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7232 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7233 }
7234
7235 //===----------------------------------------------------------------------===//
7236 // vop3
7237 //===----------------------------------------------------------------------===//
7238
ConvertOmodMul(int64_t & Mul)7239 static bool ConvertOmodMul(int64_t &Mul) {
7240 if (Mul != 1 && Mul != 2 && Mul != 4)
7241 return false;
7242
7243 Mul >>= 1;
7244 return true;
7245 }
7246
ConvertOmodDiv(int64_t & Div)7247 static bool ConvertOmodDiv(int64_t &Div) {
7248 if (Div == 1) {
7249 Div = 0;
7250 return true;
7251 }
7252
7253 if (Div == 2) {
7254 Div = 3;
7255 return true;
7256 }
7257
7258 return false;
7259 }
7260
7261 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7262 // This is intentional and ensures compatibility with sp3.
7263 // See bug 35397 for details.
ConvertBoundCtrl(int64_t & BoundCtrl)7264 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7265 if (BoundCtrl == 0 || BoundCtrl == 1) {
7266 BoundCtrl = 1;
7267 return true;
7268 }
7269 return false;
7270 }
7271
7272 // Note: the order in this table matches the order of operands in AsmString.
7273 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7274 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
7275 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
7276 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
7277 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7278 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7279 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
7280 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
7281 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
7282 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7283 {"", AMDGPUOperand::ImmTyCPol, false, nullptr},
7284 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
7285 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
7286 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7287 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
7288 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
7289 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7290 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
7291 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
7292 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
7293 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
7294 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
7295 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7296 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
7297 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
7298 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7299 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7300 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7301 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
7302 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7303 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7304 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7305 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7306 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7307 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7308 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7309 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7310 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7311 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7312 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7313 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7314 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7315 };
7316
onBeginOfFile()7317 void AMDGPUAsmParser::onBeginOfFile() {
7318 if (!getParser().getStreamer().getTargetStreamer() ||
7319 getSTI().getTargetTriple().getArch() == Triple::r600)
7320 return;
7321
7322 if (!getTargetStreamer().getTargetID())
7323 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7324
7325 if (isHsaAbiVersion3Or4(&getSTI()))
7326 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7327 }
7328
parseOptionalOperand(OperandVector & Operands)7329 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7330
7331 OperandMatchResultTy res = parseOptionalOpr(Operands);
7332
7333 // This is a hack to enable hardcoded mandatory operands which follow
7334 // optional operands.
7335 //
7336 // Current design assumes that all operands after the first optional operand
7337 // are also optional. However implementation of some instructions violates
7338 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7339 //
7340 // To alleviate this problem, we have to (implicitly) parse extra operands
7341 // to make sure autogenerated parser of custom operands never hit hardcoded
7342 // mandatory operands.
7343
7344 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7345 if (res != MatchOperand_Success ||
7346 isToken(AsmToken::EndOfStatement))
7347 break;
7348
7349 trySkipToken(AsmToken::Comma);
7350 res = parseOptionalOpr(Operands);
7351 }
7352
7353 return res;
7354 }
7355
parseOptionalOpr(OperandVector & Operands)7356 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7357 OperandMatchResultTy res;
7358 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7359 // try to parse any optional operand here
7360 if (Op.IsBit) {
7361 res = parseNamedBit(Op.Name, Operands, Op.Type);
7362 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7363 res = parseOModOperand(Operands);
7364 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7365 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7366 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7367 res = parseSDWASel(Operands, Op.Name, Op.Type);
7368 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7369 res = parseSDWADstUnused(Operands);
7370 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7371 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7372 Op.Type == AMDGPUOperand::ImmTyNegLo ||
7373 Op.Type == AMDGPUOperand::ImmTyNegHi) {
7374 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7375 Op.ConvertResult);
7376 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7377 res = parseDim(Operands);
7378 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7379 res = parseCPol(Operands);
7380 } else {
7381 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7382 }
7383 if (res != MatchOperand_NoMatch) {
7384 return res;
7385 }
7386 }
7387 return MatchOperand_NoMatch;
7388 }
7389
parseOModOperand(OperandVector & Operands)7390 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7391 StringRef Name = getTokenStr();
7392 if (Name == "mul") {
7393 return parseIntWithPrefix("mul", Operands,
7394 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7395 }
7396
7397 if (Name == "div") {
7398 return parseIntWithPrefix("div", Operands,
7399 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7400 }
7401
7402 return MatchOperand_NoMatch;
7403 }
7404
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)7405 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7406 cvtVOP3P(Inst, Operands);
7407
7408 int Opc = Inst.getOpcode();
7409
7410 int SrcNum;
7411 const int Ops[] = { AMDGPU::OpName::src0,
7412 AMDGPU::OpName::src1,
7413 AMDGPU::OpName::src2 };
7414 for (SrcNum = 0;
7415 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7416 ++SrcNum);
7417 assert(SrcNum > 0);
7418
7419 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7420 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7421
7422 if ((OpSel & (1 << SrcNum)) != 0) {
7423 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7424 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7425 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7426 }
7427 }
7428
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)7429 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7430 // 1. This operand is input modifiers
7431 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7432 // 2. This is not last operand
7433 && Desc.NumOperands > (OpNum + 1)
7434 // 3. Next operand is register class
7435 && Desc.OpInfo[OpNum + 1].RegClass != -1
7436 // 4. Next register is not tied to any other operand
7437 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7438 }
7439
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)7440 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7441 {
7442 OptionalImmIndexMap OptionalIdx;
7443 unsigned Opc = Inst.getOpcode();
7444
7445 unsigned I = 1;
7446 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7447 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7448 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7449 }
7450
7451 for (unsigned E = Operands.size(); I != E; ++I) {
7452 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7453 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7454 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7455 } else if (Op.isInterpSlot() ||
7456 Op.isInterpAttr() ||
7457 Op.isAttrChan()) {
7458 Inst.addOperand(MCOperand::createImm(Op.getImm()));
7459 } else if (Op.isImmModifier()) {
7460 OptionalIdx[Op.getImmTy()] = I;
7461 } else {
7462 llvm_unreachable("unhandled operand type");
7463 }
7464 }
7465
7466 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7468 }
7469
7470 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7472 }
7473
7474 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7475 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7476 }
7477 }
7478
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)7479 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7480 OptionalImmIndexMap &OptionalIdx) {
7481 unsigned Opc = Inst.getOpcode();
7482
7483 unsigned I = 1;
7484 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7485 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7486 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7487 }
7488
7489 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7490 // This instruction has src modifiers
7491 for (unsigned E = Operands.size(); I != E; ++I) {
7492 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7493 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7494 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7495 } else if (Op.isImmModifier()) {
7496 OptionalIdx[Op.getImmTy()] = I;
7497 } else if (Op.isRegOrImm()) {
7498 Op.addRegOrImmOperands(Inst, 1);
7499 } else {
7500 llvm_unreachable("unhandled operand type");
7501 }
7502 }
7503 } else {
7504 // No src modifiers
7505 for (unsigned E = Operands.size(); I != E; ++I) {
7506 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7507 if (Op.isMod()) {
7508 OptionalIdx[Op.getImmTy()] = I;
7509 } else {
7510 Op.addRegOrImmOperands(Inst, 1);
7511 }
7512 }
7513 }
7514
7515 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7517 }
7518
7519 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7520 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7521 }
7522
7523 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7524 // it has src2 register operand that is tied to dst operand
7525 // we don't allow modifiers for this operand in assembler so src2_modifiers
7526 // should be 0.
7527 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7528 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7529 Opc == AMDGPU::V_MAC_F32_e64_vi ||
7530 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7531 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7532 Opc == AMDGPU::V_MAC_F16_e64_vi ||
7533 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7534 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7535 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7536 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7537 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7538 auto it = Inst.begin();
7539 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7540 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7541 ++it;
7542 // Copy the operand to ensure it's not invalidated when Inst grows.
7543 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7544 }
7545 }
7546
cvtVOP3(MCInst & Inst,const OperandVector & Operands)7547 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7548 OptionalImmIndexMap OptionalIdx;
7549 cvtVOP3(Inst, Operands, OptionalIdx);
7550 }
7551
cvtVOP3P(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptIdx)7552 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7553 OptionalImmIndexMap &OptIdx) {
7554 const int Opc = Inst.getOpcode();
7555 const MCInstrDesc &Desc = MII.get(Opc);
7556
7557 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7558
7559 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7560 assert(!IsPacked);
7561 Inst.addOperand(Inst.getOperand(0));
7562 }
7563
7564 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7565 // instruction, and then figure out where to actually put the modifiers
7566
7567 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7568 if (OpSelIdx != -1) {
7569 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7570 }
7571
7572 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7573 if (OpSelHiIdx != -1) {
7574 int DefaultVal = IsPacked ? -1 : 0;
7575 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7576 DefaultVal);
7577 }
7578
7579 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7580 if (NegLoIdx != -1) {
7581 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7582 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7583 }
7584
7585 const int Ops[] = { AMDGPU::OpName::src0,
7586 AMDGPU::OpName::src1,
7587 AMDGPU::OpName::src2 };
7588 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7589 AMDGPU::OpName::src1_modifiers,
7590 AMDGPU::OpName::src2_modifiers };
7591
7592 unsigned OpSel = 0;
7593 unsigned OpSelHi = 0;
7594 unsigned NegLo = 0;
7595 unsigned NegHi = 0;
7596
7597 if (OpSelIdx != -1)
7598 OpSel = Inst.getOperand(OpSelIdx).getImm();
7599
7600 if (OpSelHiIdx != -1)
7601 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7602
7603 if (NegLoIdx != -1) {
7604 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7605 NegLo = Inst.getOperand(NegLoIdx).getImm();
7606 NegHi = Inst.getOperand(NegHiIdx).getImm();
7607 }
7608
7609 for (int J = 0; J < 3; ++J) {
7610 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7611 if (OpIdx == -1)
7612 break;
7613
7614 uint32_t ModVal = 0;
7615
7616 if ((OpSel & (1 << J)) != 0)
7617 ModVal |= SISrcMods::OP_SEL_0;
7618
7619 if ((OpSelHi & (1 << J)) != 0)
7620 ModVal |= SISrcMods::OP_SEL_1;
7621
7622 if ((NegLo & (1 << J)) != 0)
7623 ModVal |= SISrcMods::NEG;
7624
7625 if ((NegHi & (1 << J)) != 0)
7626 ModVal |= SISrcMods::NEG_HI;
7627
7628 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7629
7630 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7631 }
7632 }
7633
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)7634 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7635 OptionalImmIndexMap OptIdx;
7636 cvtVOP3(Inst, Operands, OptIdx);
7637 cvtVOP3P(Inst, Operands, OptIdx);
7638 }
7639
7640 //===----------------------------------------------------------------------===//
7641 // dpp
7642 //===----------------------------------------------------------------------===//
7643
isDPP8() const7644 bool AMDGPUOperand::isDPP8() const {
7645 return isImmTy(ImmTyDPP8);
7646 }
7647
isDPPCtrl() const7648 bool AMDGPUOperand::isDPPCtrl() const {
7649 using namespace AMDGPU::DPP;
7650
7651 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7652 if (result) {
7653 int64_t Imm = getImm();
7654 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7655 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7656 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7657 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7658 (Imm == DppCtrl::WAVE_SHL1) ||
7659 (Imm == DppCtrl::WAVE_ROL1) ||
7660 (Imm == DppCtrl::WAVE_SHR1) ||
7661 (Imm == DppCtrl::WAVE_ROR1) ||
7662 (Imm == DppCtrl::ROW_MIRROR) ||
7663 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7664 (Imm == DppCtrl::BCAST15) ||
7665 (Imm == DppCtrl::BCAST31) ||
7666 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7667 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7668 }
7669 return false;
7670 }
7671
7672 //===----------------------------------------------------------------------===//
7673 // mAI
7674 //===----------------------------------------------------------------------===//
7675
isBLGP() const7676 bool AMDGPUOperand::isBLGP() const {
7677 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7678 }
7679
isCBSZ() const7680 bool AMDGPUOperand::isCBSZ() const {
7681 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7682 }
7683
isABID() const7684 bool AMDGPUOperand::isABID() const {
7685 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7686 }
7687
isS16Imm() const7688 bool AMDGPUOperand::isS16Imm() const {
7689 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7690 }
7691
isU16Imm() const7692 bool AMDGPUOperand::isU16Imm() const {
7693 return isImm() && isUInt<16>(getImm());
7694 }
7695
7696 //===----------------------------------------------------------------------===//
7697 // dim
7698 //===----------------------------------------------------------------------===//
7699
parseDimId(unsigned & Encoding)7700 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7701 // We want to allow "dim:1D" etc.,
7702 // but the initial 1 is tokenized as an integer.
7703 std::string Token;
7704 if (isToken(AsmToken::Integer)) {
7705 SMLoc Loc = getToken().getEndLoc();
7706 Token = std::string(getTokenStr());
7707 lex();
7708 if (getLoc() != Loc)
7709 return false;
7710 }
7711
7712 StringRef Suffix;
7713 if (!parseId(Suffix))
7714 return false;
7715 Token += Suffix;
7716
7717 StringRef DimId = Token;
7718 if (DimId.startswith("SQ_RSRC_IMG_"))
7719 DimId = DimId.drop_front(12);
7720
7721 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7722 if (!DimInfo)
7723 return false;
7724
7725 Encoding = DimInfo->Encoding;
7726 return true;
7727 }
7728
parseDim(OperandVector & Operands)7729 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7730 if (!isGFX10Plus())
7731 return MatchOperand_NoMatch;
7732
7733 SMLoc S = getLoc();
7734
7735 if (!trySkipId("dim", AsmToken::Colon))
7736 return MatchOperand_NoMatch;
7737
7738 unsigned Encoding;
7739 SMLoc Loc = getLoc();
7740 if (!parseDimId(Encoding)) {
7741 Error(Loc, "invalid dim value");
7742 return MatchOperand_ParseFail;
7743 }
7744
7745 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7746 AMDGPUOperand::ImmTyDim));
7747 return MatchOperand_Success;
7748 }
7749
7750 //===----------------------------------------------------------------------===//
7751 // dpp
7752 //===----------------------------------------------------------------------===//
7753
parseDPP8(OperandVector & Operands)7754 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7755 SMLoc S = getLoc();
7756
7757 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7758 return MatchOperand_NoMatch;
7759
7760 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7761
7762 int64_t Sels[8];
7763
7764 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7765 return MatchOperand_ParseFail;
7766
7767 for (size_t i = 0; i < 8; ++i) {
7768 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7769 return MatchOperand_ParseFail;
7770
7771 SMLoc Loc = getLoc();
7772 if (getParser().parseAbsoluteExpression(Sels[i]))
7773 return MatchOperand_ParseFail;
7774 if (0 > Sels[i] || 7 < Sels[i]) {
7775 Error(Loc, "expected a 3-bit value");
7776 return MatchOperand_ParseFail;
7777 }
7778 }
7779
7780 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7781 return MatchOperand_ParseFail;
7782
7783 unsigned DPP8 = 0;
7784 for (size_t i = 0; i < 8; ++i)
7785 DPP8 |= (Sels[i] << (i * 3));
7786
7787 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7788 return MatchOperand_Success;
7789 }
7790
7791 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)7792 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7793 const OperandVector &Operands) {
7794 if (Ctrl == "row_newbcast")
7795 return isGFX90A();
7796
7797 if (Ctrl == "row_share" ||
7798 Ctrl == "row_xmask")
7799 return isGFX10Plus();
7800
7801 if (Ctrl == "wave_shl" ||
7802 Ctrl == "wave_shr" ||
7803 Ctrl == "wave_rol" ||
7804 Ctrl == "wave_ror" ||
7805 Ctrl == "row_bcast")
7806 return isVI() || isGFX9();
7807
7808 return Ctrl == "row_mirror" ||
7809 Ctrl == "row_half_mirror" ||
7810 Ctrl == "quad_perm" ||
7811 Ctrl == "row_shl" ||
7812 Ctrl == "row_shr" ||
7813 Ctrl == "row_ror";
7814 }
7815
7816 int64_t
parseDPPCtrlPerm()7817 AMDGPUAsmParser::parseDPPCtrlPerm() {
7818 // quad_perm:[%d,%d,%d,%d]
7819
7820 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7821 return -1;
7822
7823 int64_t Val = 0;
7824 for (int i = 0; i < 4; ++i) {
7825 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7826 return -1;
7827
7828 int64_t Temp;
7829 SMLoc Loc = getLoc();
7830 if (getParser().parseAbsoluteExpression(Temp))
7831 return -1;
7832 if (Temp < 0 || Temp > 3) {
7833 Error(Loc, "expected a 2-bit value");
7834 return -1;
7835 }
7836
7837 Val += (Temp << i * 2);
7838 }
7839
7840 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7841 return -1;
7842
7843 return Val;
7844 }
7845
7846 int64_t
parseDPPCtrlSel(StringRef Ctrl)7847 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7848 using namespace AMDGPU::DPP;
7849
7850 // sel:%d
7851
7852 int64_t Val;
7853 SMLoc Loc = getLoc();
7854
7855 if (getParser().parseAbsoluteExpression(Val))
7856 return -1;
7857
7858 struct DppCtrlCheck {
7859 int64_t Ctrl;
7860 int Lo;
7861 int Hi;
7862 };
7863
7864 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7865 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
7866 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
7867 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
7868 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
7869 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
7870 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
7871 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
7872 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7873 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7874 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7875 .Default({-1, 0, 0});
7876
7877 bool Valid;
7878 if (Check.Ctrl == -1) {
7879 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7880 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7881 } else {
7882 Valid = Check.Lo <= Val && Val <= Check.Hi;
7883 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7884 }
7885
7886 if (!Valid) {
7887 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7888 return -1;
7889 }
7890
7891 return Val;
7892 }
7893
7894 OperandMatchResultTy
parseDPPCtrl(OperandVector & Operands)7895 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7896 using namespace AMDGPU::DPP;
7897
7898 if (!isToken(AsmToken::Identifier) ||
7899 !isSupportedDPPCtrl(getTokenStr(), Operands))
7900 return MatchOperand_NoMatch;
7901
7902 SMLoc S = getLoc();
7903 int64_t Val = -1;
7904 StringRef Ctrl;
7905
7906 parseId(Ctrl);
7907
7908 if (Ctrl == "row_mirror") {
7909 Val = DppCtrl::ROW_MIRROR;
7910 } else if (Ctrl == "row_half_mirror") {
7911 Val = DppCtrl::ROW_HALF_MIRROR;
7912 } else {
7913 if (skipToken(AsmToken::Colon, "expected a colon")) {
7914 if (Ctrl == "quad_perm") {
7915 Val = parseDPPCtrlPerm();
7916 } else {
7917 Val = parseDPPCtrlSel(Ctrl);
7918 }
7919 }
7920 }
7921
7922 if (Val == -1)
7923 return MatchOperand_ParseFail;
7924
7925 Operands.push_back(
7926 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7927 return MatchOperand_Success;
7928 }
7929
defaultRowMask() const7930 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7931 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7932 }
7933
defaultEndpgmImmOperands() const7934 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7935 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7936 }
7937
defaultBankMask() const7938 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7939 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7940 }
7941
defaultBoundCtrl() const7942 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7943 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7944 }
7945
defaultFI() const7946 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7947 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7948 }
7949
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)7950 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7951 OptionalImmIndexMap OptionalIdx;
7952
7953 unsigned I = 1;
7954 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7955 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7956 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7957 }
7958
7959 int Fi = 0;
7960 for (unsigned E = Operands.size(); I != E; ++I) {
7961 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7962 MCOI::TIED_TO);
7963 if (TiedTo != -1) {
7964 assert((unsigned)TiedTo < Inst.getNumOperands());
7965 // handle tied old or src2 for MAC instructions
7966 Inst.addOperand(Inst.getOperand(TiedTo));
7967 }
7968 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7969 // Add the register arguments
7970 if (Op.isReg() && validateVccOperand(Op.getReg())) {
7971 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7972 // Skip it.
7973 continue;
7974 }
7975
7976 if (IsDPP8) {
7977 if (Op.isDPP8()) {
7978 Op.addImmOperands(Inst, 1);
7979 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7980 Op.addRegWithFPInputModsOperands(Inst, 2);
7981 } else if (Op.isFI()) {
7982 Fi = Op.getImm();
7983 } else if (Op.isReg()) {
7984 Op.addRegOperands(Inst, 1);
7985 } else {
7986 llvm_unreachable("Invalid operand type");
7987 }
7988 } else {
7989 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7990 Op.addRegWithFPInputModsOperands(Inst, 2);
7991 } else if (Op.isDPPCtrl()) {
7992 Op.addImmOperands(Inst, 1);
7993 } else if (Op.isImm()) {
7994 // Handle optional arguments
7995 OptionalIdx[Op.getImmTy()] = I;
7996 } else {
7997 llvm_unreachable("Invalid operand type");
7998 }
7999 }
8000 }
8001
8002 if (IsDPP8) {
8003 using namespace llvm::AMDGPU::DPP;
8004 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8005 } else {
8006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8009 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8011 }
8012 }
8013 }
8014
8015 //===----------------------------------------------------------------------===//
8016 // sdwa
8017 //===----------------------------------------------------------------------===//
8018
8019 OperandMatchResultTy
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)8020 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8021 AMDGPUOperand::ImmTy Type) {
8022 using namespace llvm::AMDGPU::SDWA;
8023
8024 SMLoc S = getLoc();
8025 StringRef Value;
8026 OperandMatchResultTy res;
8027
8028 SMLoc StringLoc;
8029 res = parseStringWithPrefix(Prefix, Value, StringLoc);
8030 if (res != MatchOperand_Success) {
8031 return res;
8032 }
8033
8034 int64_t Int;
8035 Int = StringSwitch<int64_t>(Value)
8036 .Case("BYTE_0", SdwaSel::BYTE_0)
8037 .Case("BYTE_1", SdwaSel::BYTE_1)
8038 .Case("BYTE_2", SdwaSel::BYTE_2)
8039 .Case("BYTE_3", SdwaSel::BYTE_3)
8040 .Case("WORD_0", SdwaSel::WORD_0)
8041 .Case("WORD_1", SdwaSel::WORD_1)
8042 .Case("DWORD", SdwaSel::DWORD)
8043 .Default(0xffffffff);
8044
8045 if (Int == 0xffffffff) {
8046 Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8047 return MatchOperand_ParseFail;
8048 }
8049
8050 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8051 return MatchOperand_Success;
8052 }
8053
8054 OperandMatchResultTy
parseSDWADstUnused(OperandVector & Operands)8055 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8056 using namespace llvm::AMDGPU::SDWA;
8057
8058 SMLoc S = getLoc();
8059 StringRef Value;
8060 OperandMatchResultTy res;
8061
8062 SMLoc StringLoc;
8063 res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8064 if (res != MatchOperand_Success) {
8065 return res;
8066 }
8067
8068 int64_t Int;
8069 Int = StringSwitch<int64_t>(Value)
8070 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8071 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8072 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8073 .Default(0xffffffff);
8074
8075 if (Int == 0xffffffff) {
8076 Error(StringLoc, "invalid dst_unused value");
8077 return MatchOperand_ParseFail;
8078 }
8079
8080 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8081 return MatchOperand_Success;
8082 }
8083
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)8084 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8085 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8086 }
8087
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)8088 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8089 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8090 }
8091
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)8092 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8093 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8094 }
8095
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)8096 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8097 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8098 }
8099
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)8100 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8101 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8102 }
8103
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)8104 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8105 uint64_t BasicInstType,
8106 bool SkipDstVcc,
8107 bool SkipSrcVcc) {
8108 using namespace llvm::AMDGPU::SDWA;
8109
8110 OptionalImmIndexMap OptionalIdx;
8111 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8112 bool SkippedVcc = false;
8113
8114 unsigned I = 1;
8115 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8116 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8117 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8118 }
8119
8120 for (unsigned E = Operands.size(); I != E; ++I) {
8121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8122 if (SkipVcc && !SkippedVcc && Op.isReg() &&
8123 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8124 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8125 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8126 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8127 // Skip VCC only if we didn't skip it on previous iteration.
8128 // Note that src0 and src1 occupy 2 slots each because of modifiers.
8129 if (BasicInstType == SIInstrFlags::VOP2 &&
8130 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8131 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8132 SkippedVcc = true;
8133 continue;
8134 } else if (BasicInstType == SIInstrFlags::VOPC &&
8135 Inst.getNumOperands() == 0) {
8136 SkippedVcc = true;
8137 continue;
8138 }
8139 }
8140 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8141 Op.addRegOrImmWithInputModsOperands(Inst, 2);
8142 } else if (Op.isImm()) {
8143 // Handle optional arguments
8144 OptionalIdx[Op.getImmTy()] = I;
8145 } else {
8146 llvm_unreachable("Invalid operand type");
8147 }
8148 SkippedVcc = false;
8149 }
8150
8151 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8152 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8153 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8154 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8155 switch (BasicInstType) {
8156 case SIInstrFlags::VOP1:
8157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8158 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8160 }
8161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8162 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8164 break;
8165
8166 case SIInstrFlags::VOP2:
8167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8168 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8170 }
8171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8172 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8173 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8174 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8175 break;
8176
8177 case SIInstrFlags::VOPC:
8178 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8182 break;
8183
8184 default:
8185 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8186 }
8187 }
8188
8189 // special case v_mac_{f16, f32}:
8190 // it has src2 register operand that is tied to dst operand
8191 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8192 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
8193 auto it = Inst.begin();
8194 std::advance(
8195 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8196 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8197 }
8198 }
8199
8200 //===----------------------------------------------------------------------===//
8201 // mAI
8202 //===----------------------------------------------------------------------===//
8203
defaultBLGP() const8204 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8205 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8206 }
8207
defaultCBSZ() const8208 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8209 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8210 }
8211
defaultABID() const8212 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8213 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8214 }
8215
8216 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()8217 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8218 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8219 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8220 }
8221
8222 #define GET_REGISTER_MATCHER
8223 #define GET_MATCHER_IMPLEMENTATION
8224 #define GET_MNEMONIC_SPELL_CHECKER
8225 #define GET_MNEMONIC_CHECKER
8226 #include "AMDGPUGenAsmMatcher.inc"
8227
8228 // This fuction should be defined after auto-generated include so that we have
8229 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)8230 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8231 unsigned Kind) {
8232 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8233 // But MatchInstructionImpl() expects to meet token and fails to validate
8234 // operand. This method checks if we are given immediate operand but expect to
8235 // get corresponding token.
8236 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8237 switch (Kind) {
8238 case MCK_addr64:
8239 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8240 case MCK_gds:
8241 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8242 case MCK_lds:
8243 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8244 case MCK_idxen:
8245 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8246 case MCK_offen:
8247 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8248 case MCK_SSrcB32:
8249 // When operands have expression values, they will return true for isToken,
8250 // because it is not possible to distinguish between a token and an
8251 // expression at parse time. MatchInstructionImpl() will always try to
8252 // match an operand as a token, when isToken returns true, and when the
8253 // name of the expression is not a valid token, the match will fail,
8254 // so we need to handle it here.
8255 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8256 case MCK_SSrcF32:
8257 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8258 case MCK_SoppBrTarget:
8259 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8260 case MCK_VReg32OrOff:
8261 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8262 case MCK_InterpSlot:
8263 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8264 case MCK_Attr:
8265 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8266 case MCK_AttrChan:
8267 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8268 case MCK_ImmSMEMOffset:
8269 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8270 case MCK_SReg_64:
8271 case MCK_SReg_64_XEXEC:
8272 // Null is defined as a 32-bit register but
8273 // it should also be enabled with 64-bit operands.
8274 // The following code enables it for SReg_64 operands
8275 // used as source and destination. Remaining source
8276 // operands are handled in isInlinableImm.
8277 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8278 default:
8279 return Match_InvalidOperand;
8280 }
8281 }
8282
8283 //===----------------------------------------------------------------------===//
8284 // endpgm
8285 //===----------------------------------------------------------------------===//
8286
parseEndpgmOp(OperandVector & Operands)8287 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8288 SMLoc S = getLoc();
8289 int64_t Imm = 0;
8290
8291 if (!parseExpr(Imm)) {
8292 // The operand is optional, if not present default to 0
8293 Imm = 0;
8294 }
8295
8296 if (!isUInt<16>(Imm)) {
8297 Error(S, "expected a 16-bit value");
8298 return MatchOperand_ParseFail;
8299 }
8300
8301 Operands.push_back(
8302 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8303 return MatchOperand_Success;
8304 }
8305
isEndpgm() const8306 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8307