1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUInstPrinter.h" 11 #include "MCTargetDesc/AMDGPUMCExpr.h" 12 #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" 13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 15 #include "SIDefines.h" 16 #include "SIInstrInfo.h" 17 #include "TargetInfo/AMDGPUTargetInfo.h" 18 #include "Utils/AMDGPUAsmUtils.h" 19 #include "Utils/AMDGPUBaseInfo.h" 20 #include "Utils/AMDKernelCodeTUtils.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/ADT/Twine.h" 25 #include "llvm/BinaryFormat/ELF.h" 26 #include "llvm/CodeGenTypes/MachineValueType.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/MC/MCContext.h" 29 #include "llvm/MC/MCExpr.h" 30 #include "llvm/MC/MCInst.h" 31 #include "llvm/MC/MCInstrDesc.h" 32 #include "llvm/MC/MCParser/MCAsmLexer.h" 33 #include "llvm/MC/MCParser/MCAsmParser.h" 34 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 35 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 36 #include "llvm/MC/MCSymbol.h" 37 #include "llvm/MC/TargetRegistry.h" 38 #include "llvm/Support/AMDGPUMetadata.h" 39 #include "llvm/Support/AMDHSAKernelDescriptor.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/MathExtras.h" 42 #include "llvm/TargetParser/TargetParser.h" 43 #include <optional> 44 45 using namespace llvm; 46 using namespace llvm::AMDGPU; 47 using namespace llvm::amdhsa; 48 49 namespace { 50 51 class AMDGPUAsmParser; 52 53 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 54 55 //===----------------------------------------------------------------------===// 56 // Operand 57 //===----------------------------------------------------------------------===// 58 59 class AMDGPUOperand : public MCParsedAsmOperand { 60 enum KindTy { 61 Token, 62 Immediate, 63 Register, 64 Expression 65 } Kind; 66 67 SMLoc StartLoc, EndLoc; 68 const AMDGPUAsmParser *AsmParser; 69 70 public: 71 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 72 : Kind(Kind_), AsmParser(AsmParser_) {} 73 74 using Ptr = std::unique_ptr<AMDGPUOperand>; 75 76 struct Modifiers { 77 bool Abs = false; 78 bool Neg = false; 79 bool Sext = false; 80 bool Lit = false; 81 82 bool hasFPModifiers() const { return Abs || Neg; } 83 bool hasIntModifiers() const { return Sext; } 84 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 85 86 int64_t getFPModifiersOperand() const { 87 int64_t Operand = 0; 88 Operand |= Abs ? SISrcMods::ABS : 0u; 89 Operand |= Neg ? SISrcMods::NEG : 0u; 90 return Operand; 91 } 92 93 int64_t getIntModifiersOperand() const { 94 int64_t Operand = 0; 95 Operand |= Sext ? SISrcMods::SEXT : 0u; 96 return Operand; 97 } 98 99 int64_t getModifiersOperand() const { 100 assert(!(hasFPModifiers() && hasIntModifiers()) 101 && "fp and int modifiers should not be used simultaneously"); 102 if (hasFPModifiers()) 103 return getFPModifiersOperand(); 104 if (hasIntModifiers()) 105 return getIntModifiersOperand(); 106 return 0; 107 } 108 109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 110 }; 111 112 enum ImmTy { 113 ImmTyNone, 114 ImmTyGDS, 115 ImmTyLDS, 116 ImmTyOffen, 117 ImmTyIdxen, 118 ImmTyAddr64, 119 ImmTyOffset, 120 ImmTyInstOffset, 121 ImmTyOffset0, 122 ImmTyOffset1, 123 ImmTySMEMOffsetMod, 124 ImmTyCPol, 125 ImmTyTFE, 126 ImmTyD16, 127 ImmTyClamp, 128 ImmTyOModSI, 129 ImmTySDWADstSel, 130 ImmTySDWASrc0Sel, 131 ImmTySDWASrc1Sel, 132 ImmTySDWADstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyInterpAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTyIndexKey8bit, 155 ImmTyIndexKey16bit, 156 ImmTyDPP8, 157 ImmTyDppCtrl, 158 ImmTyDppRowMask, 159 ImmTyDppBankMask, 160 ImmTyDppBoundCtrl, 161 ImmTyDppFI, 162 ImmTySwizzle, 163 ImmTyGprIdxMode, 164 ImmTyHigh, 165 ImmTyBLGP, 166 ImmTyCBSZ, 167 ImmTyABID, 168 ImmTyEndpgm, 169 ImmTyWaitVDST, 170 ImmTyWaitEXP, 171 ImmTyWaitVAVDst, 172 ImmTyWaitVMVSrc, 173 ImmTyByteSel, 174 ImmTyBitOp3, 175 }; 176 177 // Immediate operand kind. 178 // It helps to identify the location of an offending operand after an error. 179 // Note that regular literals and mandatory literals (KImm) must be handled 180 // differently. When looking for an offending operand, we should usually 181 // ignore mandatory literals because they are part of the instruction and 182 // cannot be changed. Report location of mandatory operands only for VOPD, 183 // when both OpX and OpY have a KImm and there are no other literals. 184 enum ImmKindTy { 185 ImmKindTyNone, 186 ImmKindTyLiteral, 187 ImmKindTyMandatoryLiteral, 188 ImmKindTyConst, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 mutable ImmKindTy Kind; 202 Modifiers Mods; 203 }; 204 205 struct RegOp { 206 MCRegister RegNo; 207 Modifiers Mods; 208 }; 209 210 union { 211 TokOp Tok; 212 ImmOp Imm; 213 RegOp Reg; 214 const MCExpr *Expr; 215 }; 216 217 public: 218 bool isToken() const override { return Kind == Token; } 219 220 bool isSymbolRefExpr() const { 221 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 222 } 223 224 bool isImm() const override { 225 return Kind == Immediate; 226 } 227 228 void setImmKindNone() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyNone; 231 } 232 233 void setImmKindLiteral() const { 234 assert(isImm()); 235 Imm.Kind = ImmKindTyLiteral; 236 } 237 238 void setImmKindMandatoryLiteral() const { 239 assert(isImm()); 240 Imm.Kind = ImmKindTyMandatoryLiteral; 241 } 242 243 void setImmKindConst() const { 244 assert(isImm()); 245 Imm.Kind = ImmKindTyConst; 246 } 247 248 bool IsImmKindLiteral() const { 249 return isImm() && Imm.Kind == ImmKindTyLiteral; 250 } 251 252 bool IsImmKindMandatoryLiteral() const { 253 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 254 } 255 256 bool isImmKindConst() const { 257 return isImm() && Imm.Kind == ImmKindTyConst; 258 } 259 260 bool isInlinableImm(MVT type) const; 261 bool isLiteralImm(MVT type) const; 262 263 bool isRegKind() const { 264 return Kind == Register; 265 } 266 267 bool isReg() const override { 268 return isRegKind() && !hasModifiers(); 269 } 270 271 bool isRegOrInline(unsigned RCID, MVT type) const { 272 return isRegClass(RCID) || isInlinableImm(type); 273 } 274 275 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 276 return isRegOrInline(RCID, type) || isLiteralImm(type); 277 } 278 279 bool isRegOrImmWithInt16InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 281 } 282 283 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const { 284 return isRegOrImmWithInputMods( 285 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16); 286 } 287 288 bool isRegOrImmWithInt32InputMods() const { 289 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 290 } 291 292 bool isRegOrInlineImmWithInt16InputMods() const { 293 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 294 } 295 296 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const { 297 return isRegOrInline( 298 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16); 299 } 300 301 bool isRegOrInlineImmWithInt32InputMods() const { 302 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 303 } 304 305 bool isRegOrImmWithInt64InputMods() const { 306 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 307 } 308 309 bool isRegOrImmWithFP16InputMods() const { 310 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 311 } 312 313 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const { 314 return isRegOrImmWithInputMods( 315 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16); 316 } 317 318 bool isRegOrImmWithFP32InputMods() const { 319 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 320 } 321 322 bool isRegOrImmWithFP64InputMods() const { 323 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 324 } 325 326 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const { 327 return isRegOrInline( 328 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16); 329 } 330 331 bool isRegOrInlineImmWithFP32InputMods() const { 332 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 333 } 334 335 bool isPackedFP16InputMods() const { 336 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16); 337 } 338 339 bool isPackedFP32InputMods() const { 340 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::v2f32); 341 } 342 343 bool isVReg() const { 344 return isRegClass(AMDGPU::VGPR_32RegClassID) || 345 isRegClass(AMDGPU::VReg_64RegClassID) || 346 isRegClass(AMDGPU::VReg_96RegClassID) || 347 isRegClass(AMDGPU::VReg_128RegClassID) || 348 isRegClass(AMDGPU::VReg_160RegClassID) || 349 isRegClass(AMDGPU::VReg_192RegClassID) || 350 isRegClass(AMDGPU::VReg_256RegClassID) || 351 isRegClass(AMDGPU::VReg_512RegClassID) || 352 isRegClass(AMDGPU::VReg_1024RegClassID); 353 } 354 355 bool isVReg32() const { 356 return isRegClass(AMDGPU::VGPR_32RegClassID); 357 } 358 359 bool isVReg32OrOff() const { 360 return isOff() || isVReg32(); 361 } 362 363 bool isNull() const { 364 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 365 } 366 367 bool isVRegWithInputMods() const; 368 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const; 369 template <bool IsFake16> bool isT16VRegWithInputMods() const; 370 371 bool isSDWAOperand(MVT type) const; 372 bool isSDWAFP16Operand() const; 373 bool isSDWAFP32Operand() const; 374 bool isSDWAInt16Operand() const; 375 bool isSDWAInt32Operand() const; 376 377 bool isImmTy(ImmTy ImmT) const { 378 return isImm() && Imm.Type == ImmT; 379 } 380 381 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 382 383 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 384 385 bool isImmModifier() const { 386 return isImm() && Imm.Type != ImmTyNone; 387 } 388 389 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 390 bool isDim() const { return isImmTy(ImmTyDim); } 391 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 392 bool isOff() const { return isImmTy(ImmTyOff); } 393 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 394 bool isOffen() const { return isImmTy(ImmTyOffen); } 395 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 396 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 397 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 398 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 399 bool isGDS() const { return isImmTy(ImmTyGDS); } 400 bool isLDS() const { return isImmTy(ImmTyLDS); } 401 bool isCPol() const { return isImmTy(ImmTyCPol); } 402 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); } 403 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); } 404 bool isTFE() const { return isImmTy(ImmTyTFE); } 405 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 406 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 407 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 408 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 409 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 410 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 411 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 412 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 413 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 414 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 415 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 416 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 417 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 418 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); } 419 420 bool isRegOrImm() const { 421 return isReg() || isImm(); 422 } 423 424 bool isRegClass(unsigned RCID) const; 425 426 bool isInlineValue() const; 427 428 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 429 return isRegOrInline(RCID, type) && !hasModifiers(); 430 } 431 432 bool isSCSrcB16() const { 433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 434 } 435 436 bool isSCSrcV2B16() const { 437 return isSCSrcB16(); 438 } 439 440 bool isSCSrc_b32() const { 441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 442 } 443 444 bool isSCSrc_b64() const { 445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 446 } 447 448 bool isBoolReg() const; 449 450 bool isSCSrcF16() const { 451 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 452 } 453 454 bool isSCSrcV2F16() const { 455 return isSCSrcF16(); 456 } 457 458 bool isSCSrcF32() const { 459 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 460 } 461 462 bool isSCSrcF64() const { 463 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 464 } 465 466 bool isSSrc_b32() const { 467 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr(); 468 } 469 470 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); } 471 472 bool isSSrcV2B16() const { 473 llvm_unreachable("cannot happen"); 474 return isSSrc_b16(); 475 } 476 477 bool isSSrc_b64() const { 478 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 479 // See isVSrc64(). 480 return isSCSrc_b64() || isLiteralImm(MVT::i64); 481 } 482 483 bool isSSrc_f32() const { 484 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr(); 485 } 486 487 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); } 488 489 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); } 490 491 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); } 492 493 bool isSSrcV2F16() const { 494 llvm_unreachable("cannot happen"); 495 return isSSrc_f16(); 496 } 497 498 bool isSSrcV2FP32() const { 499 llvm_unreachable("cannot happen"); 500 return isSSrc_f32(); 501 } 502 503 bool isSCSrcV2FP32() const { 504 llvm_unreachable("cannot happen"); 505 return isSCSrcF32(); 506 } 507 508 bool isSSrcV2INT32() const { 509 llvm_unreachable("cannot happen"); 510 return isSSrc_b32(); 511 } 512 513 bool isSCSrcV2INT32() const { 514 llvm_unreachable("cannot happen"); 515 return isSCSrc_b32(); 516 } 517 518 bool isSSrcOrLds_b32() const { 519 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 520 isLiteralImm(MVT::i32) || isExpr(); 521 } 522 523 bool isVCSrc_b32() const { 524 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 525 } 526 527 bool isVCSrcB64() const { 528 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 529 } 530 531 bool isVCSrcT_b16() const { 532 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 533 } 534 535 bool isVCSrcTB16_Lo128() const { 536 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 537 } 538 539 bool isVCSrcFake16B16_Lo128() const { 540 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 541 } 542 543 bool isVCSrc_b16() const { 544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 545 } 546 547 bool isVCSrc_v2b16() const { return isVCSrc_b16(); } 548 549 bool isVCSrc_f32() const { 550 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 551 } 552 553 bool isVCSrcF64() const { 554 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 555 } 556 557 bool isVCSrcTBF16() const { 558 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16); 559 } 560 561 bool isVCSrcT_f16() const { 562 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 563 } 564 565 bool isVCSrcT_bf16() const { 566 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 567 } 568 569 bool isVCSrcTBF16_Lo128() const { 570 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16); 571 } 572 573 bool isVCSrcTF16_Lo128() const { 574 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 575 } 576 577 bool isVCSrcFake16BF16_Lo128() const { 578 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16); 579 } 580 581 bool isVCSrcFake16F16_Lo128() const { 582 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 583 } 584 585 bool isVCSrc_bf16() const { 586 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16); 587 } 588 589 bool isVCSrc_f16() const { 590 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 591 } 592 593 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); } 594 595 bool isVCSrc_v2f16() const { return isVCSrc_f16(); } 596 597 bool isVSrc_b32() const { 598 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr(); 599 } 600 601 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(MVT::i64); } 602 603 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); } 604 605 bool isVSrcT_b16_Lo128() const { 606 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 607 } 608 609 bool isVSrcFake16_b16_Lo128() const { 610 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 611 } 612 613 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); } 614 615 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); } 616 617 bool isVCSrcV2FP32() const { 618 return isVCSrcF64(); 619 } 620 621 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); } 622 623 bool isVCSrcV2INT32() const { 624 return isVCSrcB64(); 625 } 626 627 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); } 628 629 bool isVSrc_f32() const { 630 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr(); 631 } 632 633 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(MVT::f64); } 634 635 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); } 636 637 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); } 638 639 bool isVSrcT_bf16_Lo128() const { 640 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16); 641 } 642 643 bool isVSrcT_f16_Lo128() const { 644 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 645 } 646 647 bool isVSrcFake16_bf16_Lo128() const { 648 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16); 649 } 650 651 bool isVSrcFake16_f16_Lo128() const { 652 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 653 } 654 655 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); } 656 657 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); } 658 659 bool isVSrc_v2bf16() const { 660 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16); 661 } 662 663 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); } 664 665 bool isVISrcB32() const { 666 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 667 } 668 669 bool isVISrcB16() const { 670 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 671 } 672 673 bool isVISrcV2B16() const { 674 return isVISrcB16(); 675 } 676 677 bool isVISrcF32() const { 678 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 679 } 680 681 bool isVISrcF16() const { 682 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 683 } 684 685 bool isVISrcV2F16() const { 686 return isVISrcF16() || isVISrcB32(); 687 } 688 689 bool isVISrc_64_bf16() const { 690 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16); 691 } 692 693 bool isVISrc_64_f16() const { 694 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16); 695 } 696 697 bool isVISrc_64_b32() const { 698 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 699 } 700 701 bool isVISrc_64B64() const { 702 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 703 } 704 705 bool isVISrc_64_f64() const { 706 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 707 } 708 709 bool isVISrc_64V2FP32() const { 710 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 711 } 712 713 bool isVISrc_64V2INT32() const { 714 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 715 } 716 717 bool isVISrc_256_b32() const { 718 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 719 } 720 721 bool isVISrc_256_f32() const { 722 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 723 } 724 725 bool isVISrc_256B64() const { 726 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 727 } 728 729 bool isVISrc_256_f64() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 731 } 732 733 bool isVISrc_128B16() const { 734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 735 } 736 737 bool isVISrc_128V2B16() const { 738 return isVISrc_128B16(); 739 } 740 741 bool isVISrc_128_b32() const { 742 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 743 } 744 745 bool isVISrc_128_f32() const { 746 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 747 } 748 749 bool isVISrc_256V2FP32() const { 750 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 751 } 752 753 bool isVISrc_256V2INT32() const { 754 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 755 } 756 757 bool isVISrc_512_b32() const { 758 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 759 } 760 761 bool isVISrc_512B16() const { 762 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 763 } 764 765 bool isVISrc_512V2B16() const { 766 return isVISrc_512B16(); 767 } 768 769 bool isVISrc_512_f32() const { 770 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 771 } 772 773 bool isVISrc_512F16() const { 774 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 775 } 776 777 bool isVISrc_512V2F16() const { 778 return isVISrc_512F16() || isVISrc_512_b32(); 779 } 780 781 bool isVISrc_1024_b32() const { 782 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 783 } 784 785 bool isVISrc_1024B16() const { 786 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 787 } 788 789 bool isVISrc_1024V2B16() const { 790 return isVISrc_1024B16(); 791 } 792 793 bool isVISrc_1024_f32() const { 794 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 795 } 796 797 bool isVISrc_1024F16() const { 798 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 799 } 800 801 bool isVISrc_1024V2F16() const { 802 return isVISrc_1024F16() || isVISrc_1024_b32(); 803 } 804 805 bool isAISrcB32() const { 806 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 807 } 808 809 bool isAISrcB16() const { 810 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 811 } 812 813 bool isAISrcV2B16() const { 814 return isAISrcB16(); 815 } 816 817 bool isAISrcF32() const { 818 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 819 } 820 821 bool isAISrcF16() const { 822 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 823 } 824 825 bool isAISrcV2F16() const { 826 return isAISrcF16() || isAISrcB32(); 827 } 828 829 bool isAISrc_64B64() const { 830 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 831 } 832 833 bool isAISrc_64_f64() const { 834 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 835 } 836 837 bool isAISrc_128_b32() const { 838 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 839 } 840 841 bool isAISrc_128B16() const { 842 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 843 } 844 845 bool isAISrc_128V2B16() const { 846 return isAISrc_128B16(); 847 } 848 849 bool isAISrc_128_f32() const { 850 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 851 } 852 853 bool isAISrc_128F16() const { 854 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 855 } 856 857 bool isAISrc_128V2F16() const { 858 return isAISrc_128F16() || isAISrc_128_b32(); 859 } 860 861 bool isVISrc_128_bf16() const { 862 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16); 863 } 864 865 bool isVISrc_128_f16() const { 866 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 867 } 868 869 bool isVISrc_128V2F16() const { 870 return isVISrc_128_f16() || isVISrc_128_b32(); 871 } 872 873 bool isAISrc_256B64() const { 874 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 875 } 876 877 bool isAISrc_256_f64() const { 878 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 879 } 880 881 bool isAISrc_512_b32() const { 882 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 883 } 884 885 bool isAISrc_512B16() const { 886 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 887 } 888 889 bool isAISrc_512V2B16() const { 890 return isAISrc_512B16(); 891 } 892 893 bool isAISrc_512_f32() const { 894 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 895 } 896 897 bool isAISrc_512F16() const { 898 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 899 } 900 901 bool isAISrc_512V2F16() const { 902 return isAISrc_512F16() || isAISrc_512_b32(); 903 } 904 905 bool isAISrc_1024_b32() const { 906 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 907 } 908 909 bool isAISrc_1024B16() const { 910 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 911 } 912 913 bool isAISrc_1024V2B16() const { 914 return isAISrc_1024B16(); 915 } 916 917 bool isAISrc_1024_f32() const { 918 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 919 } 920 921 bool isAISrc_1024F16() const { 922 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 923 } 924 925 bool isAISrc_1024V2F16() const { 926 return isAISrc_1024F16() || isAISrc_1024_b32(); 927 } 928 929 bool isKImmFP32() const { 930 return isLiteralImm(MVT::f32); 931 } 932 933 bool isKImmFP16() const { 934 return isLiteralImm(MVT::f16); 935 } 936 937 bool isMem() const override { 938 return false; 939 } 940 941 bool isExpr() const { 942 return Kind == Expression; 943 } 944 945 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 946 947 bool isSWaitCnt() const; 948 bool isDepCtr() const; 949 bool isSDelayALU() const; 950 bool isHwreg() const; 951 bool isSendMsg() const; 952 bool isSplitBarrier() const; 953 bool isSwizzle() const; 954 bool isSMRDOffset8() const; 955 bool isSMEMOffset() const; 956 bool isSMRDLiteralOffset() const; 957 bool isDPP8() const; 958 bool isDPPCtrl() const; 959 bool isBLGP() const; 960 bool isGPRIdxMode() const; 961 bool isS16Imm() const; 962 bool isU16Imm() const; 963 bool isEndpgm() const; 964 965 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 966 return [=](){ return P(*this); }; 967 } 968 969 StringRef getToken() const { 970 assert(isToken()); 971 return StringRef(Tok.Data, Tok.Length); 972 } 973 974 int64_t getImm() const { 975 assert(isImm()); 976 return Imm.Val; 977 } 978 979 void setImm(int64_t Val) { 980 assert(isImm()); 981 Imm.Val = Val; 982 } 983 984 ImmTy getImmTy() const { 985 assert(isImm()); 986 return Imm.Type; 987 } 988 989 MCRegister getReg() const override { 990 assert(isRegKind()); 991 return Reg.RegNo; 992 } 993 994 SMLoc getStartLoc() const override { 995 return StartLoc; 996 } 997 998 SMLoc getEndLoc() const override { 999 return EndLoc; 1000 } 1001 1002 SMRange getLocRange() const { 1003 return SMRange(StartLoc, EndLoc); 1004 } 1005 1006 Modifiers getModifiers() const { 1007 assert(isRegKind() || isImmTy(ImmTyNone)); 1008 return isRegKind() ? Reg.Mods : Imm.Mods; 1009 } 1010 1011 void setModifiers(Modifiers Mods) { 1012 assert(isRegKind() || isImmTy(ImmTyNone)); 1013 if (isRegKind()) 1014 Reg.Mods = Mods; 1015 else 1016 Imm.Mods = Mods; 1017 } 1018 1019 bool hasModifiers() const { 1020 return getModifiers().hasModifiers(); 1021 } 1022 1023 bool hasFPModifiers() const { 1024 return getModifiers().hasFPModifiers(); 1025 } 1026 1027 bool hasIntModifiers() const { 1028 return getModifiers().hasIntModifiers(); 1029 } 1030 1031 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 1032 1033 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 1034 1035 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 1036 1037 void addRegOperands(MCInst &Inst, unsigned N) const; 1038 1039 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 1040 if (isRegKind()) 1041 addRegOperands(Inst, N); 1042 else 1043 addImmOperands(Inst, N); 1044 } 1045 1046 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 1047 Modifiers Mods = getModifiers(); 1048 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1049 if (isRegKind()) { 1050 addRegOperands(Inst, N); 1051 } else { 1052 addImmOperands(Inst, N, false); 1053 } 1054 } 1055 1056 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1057 assert(!hasIntModifiers()); 1058 addRegOrImmWithInputModsOperands(Inst, N); 1059 } 1060 1061 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1062 assert(!hasFPModifiers()); 1063 addRegOrImmWithInputModsOperands(Inst, N); 1064 } 1065 1066 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1067 Modifiers Mods = getModifiers(); 1068 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1069 assert(isRegKind()); 1070 addRegOperands(Inst, N); 1071 } 1072 1073 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1074 assert(!hasIntModifiers()); 1075 addRegWithInputModsOperands(Inst, N); 1076 } 1077 1078 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1079 assert(!hasFPModifiers()); 1080 addRegWithInputModsOperands(Inst, N); 1081 } 1082 1083 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1084 // clang-format off 1085 switch (Type) { 1086 case ImmTyNone: OS << "None"; break; 1087 case ImmTyGDS: OS << "GDS"; break; 1088 case ImmTyLDS: OS << "LDS"; break; 1089 case ImmTyOffen: OS << "Offen"; break; 1090 case ImmTyIdxen: OS << "Idxen"; break; 1091 case ImmTyAddr64: OS << "Addr64"; break; 1092 case ImmTyOffset: OS << "Offset"; break; 1093 case ImmTyInstOffset: OS << "InstOffset"; break; 1094 case ImmTyOffset0: OS << "Offset0"; break; 1095 case ImmTyOffset1: OS << "Offset1"; break; 1096 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1097 case ImmTyCPol: OS << "CPol"; break; 1098 case ImmTyIndexKey8bit: OS << "index_key"; break; 1099 case ImmTyIndexKey16bit: OS << "index_key"; break; 1100 case ImmTyTFE: OS << "TFE"; break; 1101 case ImmTyD16: OS << "D16"; break; 1102 case ImmTyFORMAT: OS << "FORMAT"; break; 1103 case ImmTyClamp: OS << "Clamp"; break; 1104 case ImmTyOModSI: OS << "OModSI"; break; 1105 case ImmTyDPP8: OS << "DPP8"; break; 1106 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1107 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1108 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1109 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1110 case ImmTyDppFI: OS << "DppFI"; break; 1111 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1112 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1113 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1114 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1115 case ImmTyDMask: OS << "DMask"; break; 1116 case ImmTyDim: OS << "Dim"; break; 1117 case ImmTyUNorm: OS << "UNorm"; break; 1118 case ImmTyDA: OS << "DA"; break; 1119 case ImmTyR128A16: OS << "R128A16"; break; 1120 case ImmTyA16: OS << "A16"; break; 1121 case ImmTyLWE: OS << "LWE"; break; 1122 case ImmTyOff: OS << "Off"; break; 1123 case ImmTyExpTgt: OS << "ExpTgt"; break; 1124 case ImmTyExpCompr: OS << "ExpCompr"; break; 1125 case ImmTyExpVM: OS << "ExpVM"; break; 1126 case ImmTyHwreg: OS << "Hwreg"; break; 1127 case ImmTySendMsg: OS << "SendMsg"; break; 1128 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1129 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1130 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1131 case ImmTyOpSel: OS << "OpSel"; break; 1132 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1133 case ImmTyNegLo: OS << "NegLo"; break; 1134 case ImmTyNegHi: OS << "NegHi"; break; 1135 case ImmTySwizzle: OS << "Swizzle"; break; 1136 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1137 case ImmTyHigh: OS << "High"; break; 1138 case ImmTyBLGP: OS << "BLGP"; break; 1139 case ImmTyCBSZ: OS << "CBSZ"; break; 1140 case ImmTyABID: OS << "ABID"; break; 1141 case ImmTyEndpgm: OS << "Endpgm"; break; 1142 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1143 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1144 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break; 1145 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break; 1146 case ImmTyByteSel: OS << "ByteSel" ; break; 1147 case ImmTyBitOp3: OS << "BitOp3"; break; 1148 } 1149 // clang-format on 1150 } 1151 1152 void print(raw_ostream &OS) const override { 1153 switch (Kind) { 1154 case Register: 1155 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg()) 1156 << " mods: " << Reg.Mods << '>'; 1157 break; 1158 case Immediate: 1159 OS << '<' << getImm(); 1160 if (getImmTy() != ImmTyNone) { 1161 OS << " type: "; printImmTy(OS, getImmTy()); 1162 } 1163 OS << " mods: " << Imm.Mods << '>'; 1164 break; 1165 case Token: 1166 OS << '\'' << getToken() << '\''; 1167 break; 1168 case Expression: 1169 OS << "<expr " << *Expr << '>'; 1170 break; 1171 } 1172 } 1173 1174 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1175 int64_t Val, SMLoc Loc, 1176 ImmTy Type = ImmTyNone, 1177 bool IsFPImm = false) { 1178 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1179 Op->Imm.Val = Val; 1180 Op->Imm.IsFPImm = IsFPImm; 1181 Op->Imm.Kind = ImmKindTyNone; 1182 Op->Imm.Type = Type; 1183 Op->Imm.Mods = Modifiers(); 1184 Op->StartLoc = Loc; 1185 Op->EndLoc = Loc; 1186 return Op; 1187 } 1188 1189 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1190 StringRef Str, SMLoc Loc, 1191 bool HasExplicitEncodingSize = true) { 1192 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1193 Res->Tok.Data = Str.data(); 1194 Res->Tok.Length = Str.size(); 1195 Res->StartLoc = Loc; 1196 Res->EndLoc = Loc; 1197 return Res; 1198 } 1199 1200 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1201 MCRegister Reg, SMLoc S, SMLoc E) { 1202 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1203 Op->Reg.RegNo = Reg; 1204 Op->Reg.Mods = Modifiers(); 1205 Op->StartLoc = S; 1206 Op->EndLoc = E; 1207 return Op; 1208 } 1209 1210 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1211 const class MCExpr *Expr, SMLoc S) { 1212 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1213 Op->Expr = Expr; 1214 Op->StartLoc = S; 1215 Op->EndLoc = S; 1216 return Op; 1217 } 1218 }; 1219 1220 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1221 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1222 return OS; 1223 } 1224 1225 //===----------------------------------------------------------------------===// 1226 // AsmParser 1227 //===----------------------------------------------------------------------===// 1228 1229 // Holds info related to the current kernel, e.g. count of SGPRs used. 1230 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1231 // .amdgpu_hsa_kernel or at EOF. 1232 class KernelScopeInfo { 1233 int SgprIndexUnusedMin = -1; 1234 int VgprIndexUnusedMin = -1; 1235 int AgprIndexUnusedMin = -1; 1236 MCContext *Ctx = nullptr; 1237 MCSubtargetInfo const *MSTI = nullptr; 1238 1239 void usesSgprAt(int i) { 1240 if (i >= SgprIndexUnusedMin) { 1241 SgprIndexUnusedMin = ++i; 1242 if (Ctx) { 1243 MCSymbol* const Sym = 1244 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1245 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1246 } 1247 } 1248 } 1249 1250 void usesVgprAt(int i) { 1251 if (i >= VgprIndexUnusedMin) { 1252 VgprIndexUnusedMin = ++i; 1253 if (Ctx) { 1254 MCSymbol* const Sym = 1255 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1256 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1257 VgprIndexUnusedMin); 1258 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1259 } 1260 } 1261 } 1262 1263 void usesAgprAt(int i) { 1264 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction 1265 if (!hasMAIInsts(*MSTI)) 1266 return; 1267 1268 if (i >= AgprIndexUnusedMin) { 1269 AgprIndexUnusedMin = ++i; 1270 if (Ctx) { 1271 MCSymbol* const Sym = 1272 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1273 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1274 1275 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1276 MCSymbol* const vSym = 1277 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1278 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1279 VgprIndexUnusedMin); 1280 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1281 } 1282 } 1283 } 1284 1285 public: 1286 KernelScopeInfo() = default; 1287 1288 void initialize(MCContext &Context) { 1289 Ctx = &Context; 1290 MSTI = Ctx->getSubtargetInfo(); 1291 1292 usesSgprAt(SgprIndexUnusedMin = -1); 1293 usesVgprAt(VgprIndexUnusedMin = -1); 1294 if (hasMAIInsts(*MSTI)) { 1295 usesAgprAt(AgprIndexUnusedMin = -1); 1296 } 1297 } 1298 1299 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1300 unsigned RegWidth) { 1301 switch (RegKind) { 1302 case IS_SGPR: 1303 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1304 break; 1305 case IS_AGPR: 1306 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1307 break; 1308 case IS_VGPR: 1309 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1310 break; 1311 default: 1312 break; 1313 } 1314 } 1315 }; 1316 1317 class AMDGPUAsmParser : public MCTargetAsmParser { 1318 MCAsmParser &Parser; 1319 1320 unsigned ForcedEncodingSize = 0; 1321 bool ForcedDPP = false; 1322 bool ForcedSDWA = false; 1323 KernelScopeInfo KernelScope; 1324 1325 /// @name Auto-generated Match Functions 1326 /// { 1327 1328 #define GET_ASSEMBLER_HEADER 1329 #include "AMDGPUGenAsmMatcher.inc" 1330 1331 /// } 1332 1333 private: 1334 void createConstantSymbol(StringRef Id, int64_t Val); 1335 1336 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1337 bool OutOfRangeError(SMRange Range); 1338 /// Calculate VGPR/SGPR blocks required for given target, reserved 1339 /// registers, and user-specified NextFreeXGPR values. 1340 /// 1341 /// \param Features [in] Target features, used for bug corrections. 1342 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1343 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1344 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1345 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1346 /// descriptor field, if valid. 1347 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1348 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1349 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1350 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1351 /// \param VGPRBlocks [out] Result VGPR block count. 1352 /// \param SGPRBlocks [out] Result SGPR block count. 1353 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed, 1354 const MCExpr *FlatScrUsed, bool XNACKUsed, 1355 std::optional<bool> EnableWavefrontSize32, 1356 const MCExpr *NextFreeVGPR, SMRange VGPRRange, 1357 const MCExpr *NextFreeSGPR, SMRange SGPRRange, 1358 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks); 1359 bool ParseDirectiveAMDGCNTarget(); 1360 bool ParseDirectiveAMDHSACodeObjectVersion(); 1361 bool ParseDirectiveAMDHSAKernel(); 1362 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header); 1363 bool ParseDirectiveAMDKernelCodeT(); 1364 // TODO: Possibly make subtargetHasRegister const. 1365 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg); 1366 bool ParseDirectiveAMDGPUHsaKernel(); 1367 1368 bool ParseDirectiveISAVersion(); 1369 bool ParseDirectiveHSAMetadata(); 1370 bool ParseDirectivePALMetadataBegin(); 1371 bool ParseDirectivePALMetadata(); 1372 bool ParseDirectiveAMDGPULDS(); 1373 1374 /// Common code to parse out a block of text (typically YAML) between start and 1375 /// end directives. 1376 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1377 const char *AssemblerDirectiveEnd, 1378 std::string &CollectString); 1379 1380 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth, 1381 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc); 1382 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg, 1383 unsigned &RegNum, unsigned &RegWidth, 1384 bool RestoreOnFailure = false); 1385 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg, 1386 unsigned &RegNum, unsigned &RegWidth, 1387 SmallVectorImpl<AsmToken> &Tokens); 1388 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1389 unsigned &RegWidth, 1390 SmallVectorImpl<AsmToken> &Tokens); 1391 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1392 unsigned &RegWidth, 1393 SmallVectorImpl<AsmToken> &Tokens); 1394 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1395 unsigned &RegWidth, 1396 SmallVectorImpl<AsmToken> &Tokens); 1397 bool ParseRegRange(unsigned& Num, unsigned& Width); 1398 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum, 1399 unsigned SubReg, unsigned RegWidth, SMLoc Loc); 1400 1401 bool isRegister(); 1402 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1403 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1404 void initializeGprCountSymbol(RegisterKind RegKind); 1405 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1406 unsigned RegWidth); 1407 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1408 bool IsAtomic); 1409 1410 public: 1411 enum OperandMode { 1412 OperandMode_Default, 1413 OperandMode_NSA, 1414 }; 1415 1416 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1417 1418 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1419 const MCInstrInfo &MII, 1420 const MCTargetOptions &Options) 1421 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1422 MCAsmParserExtension::Initialize(Parser); 1423 1424 if (getFeatureBits().none()) { 1425 // Set default features. 1426 copySTI().ToggleFeature("southern-islands"); 1427 } 1428 1429 FeatureBitset FB = getFeatureBits(); 1430 if (!FB[AMDGPU::FeatureWavefrontSize64] && 1431 !FB[AMDGPU::FeatureWavefrontSize32]) { 1432 // If there is no default wave size it must be a generation before gfx10, 1433 // these have FeatureWavefrontSize64 in their definition already. For 1434 // gfx10+ set wave32 as a default. 1435 copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32); 1436 } 1437 1438 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1439 1440 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1441 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1442 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major); 1443 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor); 1444 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping); 1445 } else { 1446 createConstantSymbol(".option.machine_version_major", ISA.Major); 1447 createConstantSymbol(".option.machine_version_minor", ISA.Minor); 1448 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping); 1449 } 1450 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1451 initializeGprCountSymbol(IS_VGPR); 1452 initializeGprCountSymbol(IS_SGPR); 1453 } else 1454 KernelScope.initialize(getContext()); 1455 1456 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) 1457 createConstantSymbol(Symbol, Code); 1458 1459 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000); 1460 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000); 1461 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000); 1462 } 1463 1464 bool hasMIMG_R128() const { 1465 return AMDGPU::hasMIMG_R128(getSTI()); 1466 } 1467 1468 bool hasPackedD16() const { 1469 return AMDGPU::hasPackedD16(getSTI()); 1470 } 1471 1472 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1473 1474 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1475 1476 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1477 1478 bool isSI() const { 1479 return AMDGPU::isSI(getSTI()); 1480 } 1481 1482 bool isCI() const { 1483 return AMDGPU::isCI(getSTI()); 1484 } 1485 1486 bool isVI() const { 1487 return AMDGPU::isVI(getSTI()); 1488 } 1489 1490 bool isGFX9() const { 1491 return AMDGPU::isGFX9(getSTI()); 1492 } 1493 1494 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1495 bool isGFX90A() const { 1496 return AMDGPU::isGFX90A(getSTI()); 1497 } 1498 1499 bool isGFX940() const { 1500 return AMDGPU::isGFX940(getSTI()); 1501 } 1502 1503 bool isGFX9Plus() const { 1504 return AMDGPU::isGFX9Plus(getSTI()); 1505 } 1506 1507 bool isGFX10() const { 1508 return AMDGPU::isGFX10(getSTI()); 1509 } 1510 1511 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1512 1513 bool isGFX11() const { 1514 return AMDGPU::isGFX11(getSTI()); 1515 } 1516 1517 bool isGFX11Plus() const { 1518 return AMDGPU::isGFX11Plus(getSTI()); 1519 } 1520 1521 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1522 1523 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1524 1525 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1526 1527 bool isGFX10_BEncoding() const { 1528 return AMDGPU::isGFX10_BEncoding(getSTI()); 1529 } 1530 1531 bool hasInv2PiInlineImm() const { 1532 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1533 } 1534 1535 bool hasFlatOffsets() const { 1536 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1537 } 1538 1539 bool hasArchitectedFlatScratch() const { 1540 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1541 } 1542 1543 bool hasSGPR102_SGPR103() const { 1544 return !isVI() && !isGFX9(); 1545 } 1546 1547 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1548 1549 bool hasIntClamp() const { 1550 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1551 } 1552 1553 bool hasPartialNSAEncoding() const { 1554 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1555 } 1556 1557 unsigned getNSAMaxSize(bool HasSampler = false) const { 1558 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1559 } 1560 1561 unsigned getMaxNumUserSGPRs() const { 1562 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1563 } 1564 1565 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1566 1567 AMDGPUTargetStreamer &getTargetStreamer() { 1568 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1569 return static_cast<AMDGPUTargetStreamer &>(TS); 1570 } 1571 1572 const MCRegisterInfo *getMRI() const { 1573 // We need this const_cast because for some reason getContext() is not const 1574 // in MCAsmParser. 1575 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1576 } 1577 1578 const MCInstrInfo *getMII() const { 1579 return &MII; 1580 } 1581 1582 const FeatureBitset &getFeatureBits() const { 1583 return getSTI().getFeatureBits(); 1584 } 1585 1586 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1587 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1588 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1589 1590 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1591 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1592 bool isForcedDPP() const { return ForcedDPP; } 1593 bool isForcedSDWA() const { return ForcedSDWA; } 1594 ArrayRef<unsigned> getMatchedVariants() const; 1595 StringRef getMatchedVariantName() const; 1596 1597 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1598 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1599 bool RestoreOnFailure); 1600 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1601 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1602 SMLoc &EndLoc) override; 1603 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1604 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1605 unsigned Kind) override; 1606 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1607 OperandVector &Operands, MCStreamer &Out, 1608 uint64_t &ErrorInfo, 1609 bool MatchingInlineAsm) override; 1610 bool ParseDirective(AsmToken DirectiveID) override; 1611 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1612 OperandMode Mode = OperandMode_Default); 1613 StringRef parseMnemonicSuffix(StringRef Name); 1614 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, 1615 SMLoc NameLoc, OperandVector &Operands) override; 1616 //bool ProcessInstruction(MCInst &Inst); 1617 1618 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1619 1620 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1621 1622 ParseStatus 1623 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1624 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1625 std::function<bool(int64_t &)> ConvertResult = nullptr); 1626 1627 ParseStatus parseOperandArrayWithPrefix( 1628 const char *Prefix, OperandVector &Operands, 1629 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1630 bool (*ConvertResult)(int64_t &) = nullptr); 1631 1632 ParseStatus 1633 parseNamedBit(StringRef Name, OperandVector &Operands, 1634 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1635 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1636 ParseStatus parseCPol(OperandVector &Operands); 1637 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1638 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1639 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1640 SMLoc &StringLoc); 1641 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands, 1642 StringRef Name, 1643 ArrayRef<const char *> Ids, 1644 int64_t &IntVal); 1645 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands, 1646 StringRef Name, 1647 ArrayRef<const char *> Ids, 1648 AMDGPUOperand::ImmTy Type); 1649 1650 bool isModifier(); 1651 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1652 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1653 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1654 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1655 bool parseSP3NegModifier(); 1656 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1657 bool HasLit = false); 1658 ParseStatus parseReg(OperandVector &Operands); 1659 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1660 bool HasLit = false); 1661 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1662 bool AllowImm = true); 1663 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1664 bool AllowImm = true); 1665 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1666 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1667 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1668 ParseStatus tryParseIndexKey(OperandVector &Operands, 1669 AMDGPUOperand::ImmTy ImmTy); 1670 ParseStatus parseIndexKey8bit(OperandVector &Operands); 1671 ParseStatus parseIndexKey16bit(OperandVector &Operands); 1672 1673 ParseStatus parseDfmtNfmt(int64_t &Format); 1674 ParseStatus parseUfmt(int64_t &Format); 1675 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1676 int64_t &Format); 1677 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1678 int64_t &Format); 1679 ParseStatus parseFORMAT(OperandVector &Operands); 1680 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1681 ParseStatus parseNumericFormat(int64_t &Format); 1682 ParseStatus parseFlatOffset(OperandVector &Operands); 1683 ParseStatus parseR128A16(OperandVector &Operands); 1684 ParseStatus parseBLGP(OperandVector &Operands); 1685 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1686 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1687 1688 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1689 1690 bool parseCnt(int64_t &IntVal); 1691 ParseStatus parseSWaitCnt(OperandVector &Operands); 1692 1693 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1694 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1695 ParseStatus parseDepCtr(OperandVector &Operands); 1696 1697 bool parseDelay(int64_t &Delay); 1698 ParseStatus parseSDelayALU(OperandVector &Operands); 1699 1700 ParseStatus parseHwreg(OperandVector &Operands); 1701 1702 private: 1703 struct OperandInfoTy { 1704 SMLoc Loc; 1705 int64_t Val; 1706 bool IsSymbolic = false; 1707 bool IsDefined = false; 1708 1709 OperandInfoTy(int64_t Val) : Val(Val) {} 1710 }; 1711 1712 struct StructuredOpField : OperandInfoTy { 1713 StringLiteral Id; 1714 StringLiteral Desc; 1715 unsigned Width; 1716 bool IsDefined = false; 1717 1718 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width, 1719 int64_t Default) 1720 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {} 1721 virtual ~StructuredOpField() = default; 1722 1723 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const { 1724 Parser.Error(Loc, "invalid " + Desc + ": " + Err); 1725 return false; 1726 } 1727 1728 virtual bool validate(AMDGPUAsmParser &Parser) const { 1729 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED) 1730 return Error(Parser, "not supported on this GPU"); 1731 if (!isUIntN(Width, Val)) 1732 return Error(Parser, "only " + Twine(Width) + "-bit values are legal"); 1733 return true; 1734 } 1735 }; 1736 1737 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields); 1738 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields); 1739 1740 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1741 bool validateSendMsg(const OperandInfoTy &Msg, 1742 const OperandInfoTy &Op, 1743 const OperandInfoTy &Stream); 1744 1745 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset, 1746 OperandInfoTy &Width); 1747 1748 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1749 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1750 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1751 1752 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1753 const OperandVector &Operands) const; 1754 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1755 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const; 1756 SMLoc getLitLoc(const OperandVector &Operands, 1757 bool SearchMandatoryLiterals = false) const; 1758 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1759 SMLoc getConstLoc(const OperandVector &Operands) const; 1760 SMLoc getInstLoc(const OperandVector &Operands) const; 1761 1762 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1763 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1764 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1765 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1766 bool validateSOPLiteral(const MCInst &Inst) const; 1767 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1768 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1769 const OperandVector &Operands); 1770 bool validateIntClampSupported(const MCInst &Inst); 1771 bool validateMIMGAtomicDMask(const MCInst &Inst); 1772 bool validateMIMGGatherDMask(const MCInst &Inst); 1773 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1774 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1775 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1776 bool validateMIMGD16(const MCInst &Inst); 1777 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands); 1778 bool validateMIMGMSAA(const MCInst &Inst); 1779 bool validateOpSel(const MCInst &Inst); 1780 bool validateNeg(const MCInst &Inst, int OpName); 1781 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1782 bool validateVccOperand(MCRegister Reg) const; 1783 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1784 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1785 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1786 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1787 bool validateAGPRLdSt(const MCInst &Inst) const; 1788 bool validateVGPRAlign(const MCInst &Inst) const; 1789 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1790 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1791 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1792 bool validateDivScale(const MCInst &Inst); 1793 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1794 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1795 const SMLoc &IDLoc); 1796 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1797 const unsigned CPol); 1798 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1799 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1800 unsigned getConstantBusLimit(unsigned Opcode) const; 1801 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1802 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1803 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1804 1805 bool isSupportedMnemo(StringRef Mnemo, 1806 const FeatureBitset &FBS); 1807 bool isSupportedMnemo(StringRef Mnemo, 1808 const FeatureBitset &FBS, 1809 ArrayRef<unsigned> Variants); 1810 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1811 1812 bool isId(const StringRef Id) const; 1813 bool isId(const AsmToken &Token, const StringRef Id) const; 1814 bool isToken(const AsmToken::TokenKind Kind) const; 1815 StringRef getId() const; 1816 bool trySkipId(const StringRef Id); 1817 bool trySkipId(const StringRef Pref, const StringRef Id); 1818 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1819 bool trySkipToken(const AsmToken::TokenKind Kind); 1820 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1821 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1822 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1823 1824 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1825 AsmToken::TokenKind getTokenKind() const; 1826 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1827 bool parseExpr(OperandVector &Operands); 1828 StringRef getTokenStr() const; 1829 AsmToken peekToken(bool ShouldSkipSpace = true); 1830 AsmToken getToken() const; 1831 SMLoc getLoc() const; 1832 void lex(); 1833 1834 public: 1835 void onBeginOfFile() override; 1836 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1837 1838 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1839 1840 ParseStatus parseExpTgt(OperandVector &Operands); 1841 ParseStatus parseSendMsg(OperandVector &Operands); 1842 ParseStatus parseInterpSlot(OperandVector &Operands); 1843 ParseStatus parseInterpAttr(OperandVector &Operands); 1844 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1845 ParseStatus parseBoolReg(OperandVector &Operands); 1846 1847 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal, 1848 const unsigned MaxVal, const Twine &ErrMsg, 1849 SMLoc &Loc); 1850 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1851 const unsigned MinVal, 1852 const unsigned MaxVal, 1853 const StringRef ErrMsg); 1854 ParseStatus parseSwizzle(OperandVector &Operands); 1855 bool parseSwizzleOffset(int64_t &Imm); 1856 bool parseSwizzleMacro(int64_t &Imm); 1857 bool parseSwizzleQuadPerm(int64_t &Imm); 1858 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1859 bool parseSwizzleBroadcast(int64_t &Imm); 1860 bool parseSwizzleSwap(int64_t &Imm); 1861 bool parseSwizzleReverse(int64_t &Imm); 1862 bool parseSwizzleFFT(int64_t &Imm); 1863 bool parseSwizzleRotate(int64_t &Imm); 1864 1865 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1866 int64_t parseGPRIdxMacro(); 1867 1868 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1869 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1870 1871 ParseStatus parseOModSI(OperandVector &Operands); 1872 1873 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1874 OptionalImmIndexMap &OptionalIdx); 1875 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1876 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1877 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1878 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); 1879 1880 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1881 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1882 OptionalImmIndexMap &OptionalIdx); 1883 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1884 OptionalImmIndexMap &OptionalIdx); 1885 1886 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1887 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1888 1889 bool parseDimId(unsigned &Encoding); 1890 ParseStatus parseDim(OperandVector &Operands); 1891 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1892 ParseStatus parseDPP8(OperandVector &Operands); 1893 ParseStatus parseDPPCtrl(OperandVector &Operands); 1894 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1895 int64_t parseDPPCtrlSel(StringRef Ctrl); 1896 int64_t parseDPPCtrlPerm(); 1897 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1898 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1899 cvtDPP(Inst, Operands, true); 1900 } 1901 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1902 bool IsDPP8 = false); 1903 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1904 cvtVOP3DPP(Inst, Operands, true); 1905 } 1906 1907 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1908 AMDGPUOperand::ImmTy Type); 1909 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1910 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1911 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1912 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1913 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1914 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1915 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1916 uint64_t BasicInstType, 1917 bool SkipDstVcc = false, 1918 bool SkipSrcVcc = false); 1919 1920 ParseStatus parseEndpgm(OperandVector &Operands); 1921 1922 ParseStatus parseVOPD(OperandVector &Operands); 1923 }; 1924 1925 } // end anonymous namespace 1926 1927 // May be called with integer type with equivalent bitwidth. 1928 static const fltSemantics *getFltSemantics(unsigned Size) { 1929 switch (Size) { 1930 case 4: 1931 return &APFloat::IEEEsingle(); 1932 case 8: 1933 return &APFloat::IEEEdouble(); 1934 case 2: 1935 return &APFloat::IEEEhalf(); 1936 default: 1937 llvm_unreachable("unsupported fp type"); 1938 } 1939 } 1940 1941 static const fltSemantics *getFltSemantics(MVT VT) { 1942 return getFltSemantics(VT.getSizeInBits() / 8); 1943 } 1944 1945 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1946 switch (OperandType) { 1947 // When floating-point immediate is used as operand of type i16, the 32-bit 1948 // representation of the constant truncated to the 16 LSBs should be used. 1949 case AMDGPU::OPERAND_REG_IMM_INT16: 1950 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1951 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1952 case AMDGPU::OPERAND_REG_IMM_INT32: 1953 case AMDGPU::OPERAND_REG_IMM_FP32: 1954 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1955 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1956 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1957 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1958 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1959 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1960 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1961 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1962 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1963 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1964 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1965 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1966 case AMDGPU::OPERAND_KIMM32: 1967 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1968 return &APFloat::IEEEsingle(); 1969 case AMDGPU::OPERAND_REG_IMM_INT64: 1970 case AMDGPU::OPERAND_REG_IMM_FP64: 1971 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1972 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1973 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1974 return &APFloat::IEEEdouble(); 1975 case AMDGPU::OPERAND_REG_IMM_FP16: 1976 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1977 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1978 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1979 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1980 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1981 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1982 case AMDGPU::OPERAND_KIMM16: 1983 return &APFloat::IEEEhalf(); 1984 case AMDGPU::OPERAND_REG_IMM_BF16: 1985 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 1986 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 1987 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 1988 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 1989 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 1990 case AMDGPU::OPERAND_REG_IMM_V2BF16: 1991 return &APFloat::BFloat(); 1992 default: 1993 llvm_unreachable("unsupported fp type"); 1994 } 1995 } 1996 1997 //===----------------------------------------------------------------------===// 1998 // Operand 1999 //===----------------------------------------------------------------------===// 2000 2001 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 2002 bool Lost; 2003 2004 // Convert literal to single precision 2005 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 2006 APFloat::rmNearestTiesToEven, 2007 &Lost); 2008 // We allow precision lost but not overflow or underflow 2009 if (Status != APFloat::opOK && 2010 Lost && 2011 ((Status & APFloat::opOverflow) != 0 || 2012 (Status & APFloat::opUnderflow) != 0)) { 2013 return false; 2014 } 2015 2016 return true; 2017 } 2018 2019 static bool isSafeTruncation(int64_t Val, unsigned Size) { 2020 return isUIntN(Size, Val) || isIntN(Size, Val); 2021 } 2022 2023 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 2024 if (VT.getScalarType() == MVT::i16) 2025 return isInlinableLiteral32(Val, HasInv2Pi); 2026 2027 if (VT.getScalarType() == MVT::f16) 2028 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi); 2029 2030 assert(VT.getScalarType() == MVT::bf16); 2031 2032 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi); 2033 } 2034 2035 bool AMDGPUOperand::isInlinableImm(MVT type) const { 2036 2037 // This is a hack to enable named inline values like 2038 // shared_base with both 32-bit and 64-bit operands. 2039 // Note that these values are defined as 2040 // 32-bit operands only. 2041 if (isInlineValue()) { 2042 return true; 2043 } 2044 2045 if (!isImmTy(ImmTyNone)) { 2046 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 2047 return false; 2048 } 2049 // TODO: We should avoid using host float here. It would be better to 2050 // check the float bit values which is what a few other places do. 2051 // We've had bot failures before due to weird NaN support on mips hosts. 2052 2053 APInt Literal(64, Imm.Val); 2054 2055 if (Imm.IsFPImm) { // We got fp literal token 2056 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2057 return AMDGPU::isInlinableLiteral64(Imm.Val, 2058 AsmParser->hasInv2PiInlineImm()); 2059 } 2060 2061 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2062 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 2063 return false; 2064 2065 if (type.getScalarSizeInBits() == 16) { 2066 bool Lost = false; 2067 switch (type.getScalarType().SimpleTy) { 2068 default: 2069 llvm_unreachable("unknown 16-bit type"); 2070 case MVT::bf16: 2071 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven, 2072 &Lost); 2073 break; 2074 case MVT::f16: 2075 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven, 2076 &Lost); 2077 break; 2078 case MVT::i16: 2079 FPLiteral.convert(APFloatBase::IEEEsingle(), 2080 APFloat::rmNearestTiesToEven, &Lost); 2081 break; 2082 } 2083 // We need to use 32-bit representation here because when a floating-point 2084 // inline constant is used as an i16 operand, its 32-bit representation 2085 // representation will be used. We will need the 32-bit value to check if 2086 // it is FP inline constant. 2087 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2088 return isInlineableLiteralOp16(ImmVal, type, 2089 AsmParser->hasInv2PiInlineImm()); 2090 } 2091 2092 // Check if single precision literal is inlinable 2093 return AMDGPU::isInlinableLiteral32( 2094 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 2095 AsmParser->hasInv2PiInlineImm()); 2096 } 2097 2098 // We got int literal token. 2099 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 2100 return AMDGPU::isInlinableLiteral64(Imm.Val, 2101 AsmParser->hasInv2PiInlineImm()); 2102 } 2103 2104 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 2105 return false; 2106 } 2107 2108 if (type.getScalarSizeInBits() == 16) { 2109 return isInlineableLiteralOp16( 2110 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 2111 type, AsmParser->hasInv2PiInlineImm()); 2112 } 2113 2114 return AMDGPU::isInlinableLiteral32( 2115 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 2116 AsmParser->hasInv2PiInlineImm()); 2117 } 2118 2119 bool AMDGPUOperand::isLiteralImm(MVT type) const { 2120 // Check that this immediate can be added as literal 2121 if (!isImmTy(ImmTyNone)) { 2122 return false; 2123 } 2124 2125 if (!Imm.IsFPImm) { 2126 // We got int literal token. 2127 2128 if (type == MVT::f64 && hasFPModifiers()) { 2129 // Cannot apply fp modifiers to int literals preserving the same semantics 2130 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2131 // disable these cases. 2132 return false; 2133 } 2134 2135 unsigned Size = type.getSizeInBits(); 2136 if (Size == 64) 2137 Size = 32; 2138 2139 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2140 // types. 2141 return isSafeTruncation(Imm.Val, Size); 2142 } 2143 2144 // We got fp literal token 2145 if (type == MVT::f64) { // Expected 64-bit fp operand 2146 // We would set low 64-bits of literal to zeroes but we accept this literals 2147 return true; 2148 } 2149 2150 if (type == MVT::i64) { // Expected 64-bit int operand 2151 // We don't allow fp literals in 64-bit integer instructions. It is 2152 // unclear how we should encode them. 2153 return false; 2154 } 2155 2156 // We allow fp literals with f16x2 operands assuming that the specified 2157 // literal goes into the lower half and the upper half is zero. We also 2158 // require that the literal may be losslessly converted to f16. 2159 // 2160 // For i16x2 operands, we assume that the specified literal is encoded as a 2161 // single-precision float. This is pretty odd, but it matches SP3 and what 2162 // happens in hardware. 2163 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 2164 : (type == MVT::v2i16) ? MVT::f32 2165 : (type == MVT::v2f32) ? MVT::f32 2166 : type; 2167 2168 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2169 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2170 } 2171 2172 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2173 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2174 } 2175 2176 bool AMDGPUOperand::isVRegWithInputMods() const { 2177 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2178 // GFX90A allows DPP on 64-bit operands. 2179 (isRegClass(AMDGPU::VReg_64RegClassID) && 2180 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2181 } 2182 2183 template <bool IsFake16> 2184 bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const { 2185 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID 2186 : AMDGPU::VGPR_16_Lo128RegClassID); 2187 } 2188 2189 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const { 2190 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID 2191 : AMDGPU::VGPR_16RegClassID); 2192 } 2193 2194 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2195 if (AsmParser->isVI()) 2196 return isVReg32(); 2197 if (AsmParser->isGFX9Plus()) 2198 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2199 return false; 2200 } 2201 2202 bool AMDGPUOperand::isSDWAFP16Operand() const { 2203 return isSDWAOperand(MVT::f16); 2204 } 2205 2206 bool AMDGPUOperand::isSDWAFP32Operand() const { 2207 return isSDWAOperand(MVT::f32); 2208 } 2209 2210 bool AMDGPUOperand::isSDWAInt16Operand() const { 2211 return isSDWAOperand(MVT::i16); 2212 } 2213 2214 bool AMDGPUOperand::isSDWAInt32Operand() const { 2215 return isSDWAOperand(MVT::i32); 2216 } 2217 2218 bool AMDGPUOperand::isBoolReg() const { 2219 auto FB = AsmParser->getFeatureBits(); 2220 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) || 2221 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32())); 2222 } 2223 2224 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2225 { 2226 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2227 assert(Size == 2 || Size == 4 || Size == 8); 2228 2229 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2230 2231 if (Imm.Mods.Abs) { 2232 Val &= ~FpSignMask; 2233 } 2234 if (Imm.Mods.Neg) { 2235 Val ^= FpSignMask; 2236 } 2237 2238 return Val; 2239 } 2240 2241 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2242 if (isExpr()) { 2243 Inst.addOperand(MCOperand::createExpr(Expr)); 2244 return; 2245 } 2246 2247 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2248 Inst.getNumOperands())) { 2249 addLiteralImmOperand(Inst, Imm.Val, 2250 ApplyModifiers & 2251 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2252 } else { 2253 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2254 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2255 setImmKindNone(); 2256 } 2257 } 2258 2259 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2260 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2261 auto OpNum = Inst.getNumOperands(); 2262 // Check that this operand accepts literals 2263 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2264 2265 if (ApplyModifiers) { 2266 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2267 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2268 Val = applyInputFPModifiers(Val, Size); 2269 } 2270 2271 APInt Literal(64, Val); 2272 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2273 2274 if (Imm.IsFPImm) { // We got fp literal token 2275 switch (OpTy) { 2276 case AMDGPU::OPERAND_REG_IMM_INT64: 2277 case AMDGPU::OPERAND_REG_IMM_FP64: 2278 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2279 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2280 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2281 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2282 AsmParser->hasInv2PiInlineImm())) { 2283 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2284 setImmKindConst(); 2285 return; 2286 } 2287 2288 // Non-inlineable 2289 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2290 // For fp operands we check if low 32 bits are zeros 2291 if (Literal.getLoBits(32) != 0) { 2292 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2293 "Can't encode literal as exact 64-bit floating-point operand. " 2294 "Low 32-bits will be set to zero"); 2295 Val &= 0xffffffff00000000u; 2296 } 2297 2298 Inst.addOperand(MCOperand::createImm(Val)); 2299 setImmKindLiteral(); 2300 return; 2301 } 2302 2303 // We don't allow fp literals in 64-bit integer instructions. It is 2304 // unclear how we should encode them. This case should be checked earlier 2305 // in predicate methods (isLiteralImm()) 2306 llvm_unreachable("fp literal in 64-bit integer instruction."); 2307 2308 case AMDGPU::OPERAND_REG_IMM_BF16: 2309 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 2310 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2311 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2312 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 2313 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: 2314 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2315 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { 2316 // This is the 1/(2*pi) which is going to be truncated to bf16 with the 2317 // loss of precision. The constant represents ideomatic fp32 value of 2318 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 2319 // bits. Prevent rounding below. 2320 Inst.addOperand(MCOperand::createImm(0x3e22)); 2321 setImmKindLiteral(); 2322 return; 2323 } 2324 [[fallthrough]]; 2325 2326 case AMDGPU::OPERAND_REG_IMM_INT32: 2327 case AMDGPU::OPERAND_REG_IMM_FP32: 2328 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2329 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2330 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2331 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2332 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2333 case AMDGPU::OPERAND_REG_IMM_INT16: 2334 case AMDGPU::OPERAND_REG_IMM_FP16: 2335 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2336 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2337 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2338 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2339 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2340 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2341 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2342 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2343 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2344 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2345 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2346 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2347 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2348 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2349 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2350 case AMDGPU::OPERAND_KIMM32: 2351 case AMDGPU::OPERAND_KIMM16: 2352 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2353 bool lost; 2354 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2355 // Convert literal to single precision 2356 FPLiteral.convert(*getOpFltSemantics(OpTy), 2357 APFloat::rmNearestTiesToEven, &lost); 2358 // We allow precision lost but not overflow or underflow. This should be 2359 // checked earlier in isLiteralImm() 2360 2361 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2362 Inst.addOperand(MCOperand::createImm(ImmVal)); 2363 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2364 setImmKindMandatoryLiteral(); 2365 } else { 2366 setImmKindLiteral(); 2367 } 2368 return; 2369 } 2370 default: 2371 llvm_unreachable("invalid operand size"); 2372 } 2373 2374 return; 2375 } 2376 2377 // We got int literal token. 2378 // Only sign extend inline immediates. 2379 switch (OpTy) { 2380 case AMDGPU::OPERAND_REG_IMM_INT32: 2381 case AMDGPU::OPERAND_REG_IMM_FP32: 2382 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2383 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2384 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2385 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2386 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2387 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2388 case AMDGPU::OPERAND_REG_IMM_V2BF16: 2389 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2390 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2391 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2392 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2393 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2394 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2395 if (isSafeTruncation(Val, 32) && 2396 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2397 AsmParser->hasInv2PiInlineImm())) { 2398 Inst.addOperand(MCOperand::createImm(Val)); 2399 setImmKindConst(); 2400 return; 2401 } 2402 2403 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2404 setImmKindLiteral(); 2405 return; 2406 2407 case AMDGPU::OPERAND_REG_IMM_INT64: 2408 case AMDGPU::OPERAND_REG_IMM_FP64: 2409 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2410 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2411 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2412 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2413 Inst.addOperand(MCOperand::createImm(Val)); 2414 setImmKindConst(); 2415 return; 2416 } 2417 2418 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 2419 : Lo_32(Val); 2420 2421 Inst.addOperand(MCOperand::createImm(Val)); 2422 setImmKindLiteral(); 2423 return; 2424 2425 case AMDGPU::OPERAND_REG_IMM_INT16: 2426 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2427 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2428 if (isSafeTruncation(Val, 16) && 2429 AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) { 2430 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2431 setImmKindConst(); 2432 return; 2433 } 2434 2435 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2436 setImmKindLiteral(); 2437 return; 2438 2439 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2440 case AMDGPU::OPERAND_REG_IMM_FP16: 2441 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2442 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2443 if (isSafeTruncation(Val, 16) && 2444 AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), 2445 AsmParser->hasInv2PiInlineImm())) { 2446 Inst.addOperand(MCOperand::createImm(Val)); 2447 setImmKindConst(); 2448 return; 2449 } 2450 2451 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2452 setImmKindLiteral(); 2453 return; 2454 2455 case AMDGPU::OPERAND_REG_IMM_BF16: 2456 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: 2457 case AMDGPU::OPERAND_REG_INLINE_C_BF16: 2458 case AMDGPU::OPERAND_REG_INLINE_AC_BF16: 2459 if (isSafeTruncation(Val, 16) && 2460 AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), 2461 AsmParser->hasInv2PiInlineImm())) { 2462 Inst.addOperand(MCOperand::createImm(Val)); 2463 setImmKindConst(); 2464 return; 2465 } 2466 2467 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2468 setImmKindLiteral(); 2469 return; 2470 2471 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2472 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: { 2473 assert(isSafeTruncation(Val, 16)); 2474 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))); 2475 Inst.addOperand(MCOperand::createImm(Val)); 2476 return; 2477 } 2478 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2479 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2480 assert(isSafeTruncation(Val, 16)); 2481 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), 2482 AsmParser->hasInv2PiInlineImm())); 2483 2484 Inst.addOperand(MCOperand::createImm(Val)); 2485 return; 2486 } 2487 2488 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: 2489 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: { 2490 assert(isSafeTruncation(Val, 16)); 2491 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), 2492 AsmParser->hasInv2PiInlineImm())); 2493 2494 Inst.addOperand(MCOperand::createImm(Val)); 2495 return; 2496 } 2497 2498 case AMDGPU::OPERAND_KIMM32: 2499 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2500 setImmKindMandatoryLiteral(); 2501 return; 2502 case AMDGPU::OPERAND_KIMM16: 2503 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2504 setImmKindMandatoryLiteral(); 2505 return; 2506 default: 2507 llvm_unreachable("invalid operand size"); 2508 } 2509 } 2510 2511 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2512 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2513 } 2514 2515 bool AMDGPUOperand::isInlineValue() const { 2516 return isRegKind() && ::isInlineValue(getReg()); 2517 } 2518 2519 //===----------------------------------------------------------------------===// 2520 // AsmParser 2521 //===----------------------------------------------------------------------===// 2522 2523 void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) { 2524 // TODO: make those pre-defined variables read-only. 2525 // Currently there is none suitable machinery in the core llvm-mc for this. 2526 // MCSymbol::isRedefinable is intended for another purpose, and 2527 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 2528 MCContext &Ctx = getContext(); 2529 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id); 2530 Sym->setVariableValue(MCConstantExpr::create(Val, Ctx)); 2531 } 2532 2533 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2534 if (Is == IS_VGPR) { 2535 switch (RegWidth) { 2536 default: return -1; 2537 case 32: 2538 return AMDGPU::VGPR_32RegClassID; 2539 case 64: 2540 return AMDGPU::VReg_64RegClassID; 2541 case 96: 2542 return AMDGPU::VReg_96RegClassID; 2543 case 128: 2544 return AMDGPU::VReg_128RegClassID; 2545 case 160: 2546 return AMDGPU::VReg_160RegClassID; 2547 case 192: 2548 return AMDGPU::VReg_192RegClassID; 2549 case 224: 2550 return AMDGPU::VReg_224RegClassID; 2551 case 256: 2552 return AMDGPU::VReg_256RegClassID; 2553 case 288: 2554 return AMDGPU::VReg_288RegClassID; 2555 case 320: 2556 return AMDGPU::VReg_320RegClassID; 2557 case 352: 2558 return AMDGPU::VReg_352RegClassID; 2559 case 384: 2560 return AMDGPU::VReg_384RegClassID; 2561 case 512: 2562 return AMDGPU::VReg_512RegClassID; 2563 case 1024: 2564 return AMDGPU::VReg_1024RegClassID; 2565 } 2566 } else if (Is == IS_TTMP) { 2567 switch (RegWidth) { 2568 default: return -1; 2569 case 32: 2570 return AMDGPU::TTMP_32RegClassID; 2571 case 64: 2572 return AMDGPU::TTMP_64RegClassID; 2573 case 128: 2574 return AMDGPU::TTMP_128RegClassID; 2575 case 256: 2576 return AMDGPU::TTMP_256RegClassID; 2577 case 512: 2578 return AMDGPU::TTMP_512RegClassID; 2579 } 2580 } else if (Is == IS_SGPR) { 2581 switch (RegWidth) { 2582 default: return -1; 2583 case 32: 2584 return AMDGPU::SGPR_32RegClassID; 2585 case 64: 2586 return AMDGPU::SGPR_64RegClassID; 2587 case 96: 2588 return AMDGPU::SGPR_96RegClassID; 2589 case 128: 2590 return AMDGPU::SGPR_128RegClassID; 2591 case 160: 2592 return AMDGPU::SGPR_160RegClassID; 2593 case 192: 2594 return AMDGPU::SGPR_192RegClassID; 2595 case 224: 2596 return AMDGPU::SGPR_224RegClassID; 2597 case 256: 2598 return AMDGPU::SGPR_256RegClassID; 2599 case 288: 2600 return AMDGPU::SGPR_288RegClassID; 2601 case 320: 2602 return AMDGPU::SGPR_320RegClassID; 2603 case 352: 2604 return AMDGPU::SGPR_352RegClassID; 2605 case 384: 2606 return AMDGPU::SGPR_384RegClassID; 2607 case 512: 2608 return AMDGPU::SGPR_512RegClassID; 2609 } 2610 } else if (Is == IS_AGPR) { 2611 switch (RegWidth) { 2612 default: return -1; 2613 case 32: 2614 return AMDGPU::AGPR_32RegClassID; 2615 case 64: 2616 return AMDGPU::AReg_64RegClassID; 2617 case 96: 2618 return AMDGPU::AReg_96RegClassID; 2619 case 128: 2620 return AMDGPU::AReg_128RegClassID; 2621 case 160: 2622 return AMDGPU::AReg_160RegClassID; 2623 case 192: 2624 return AMDGPU::AReg_192RegClassID; 2625 case 224: 2626 return AMDGPU::AReg_224RegClassID; 2627 case 256: 2628 return AMDGPU::AReg_256RegClassID; 2629 case 288: 2630 return AMDGPU::AReg_288RegClassID; 2631 case 320: 2632 return AMDGPU::AReg_320RegClassID; 2633 case 352: 2634 return AMDGPU::AReg_352RegClassID; 2635 case 384: 2636 return AMDGPU::AReg_384RegClassID; 2637 case 512: 2638 return AMDGPU::AReg_512RegClassID; 2639 case 1024: 2640 return AMDGPU::AReg_1024RegClassID; 2641 } 2642 } 2643 return -1; 2644 } 2645 2646 static MCRegister getSpecialRegForName(StringRef RegName) { 2647 return StringSwitch<unsigned>(RegName) 2648 .Case("exec", AMDGPU::EXEC) 2649 .Case("vcc", AMDGPU::VCC) 2650 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2651 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2652 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2653 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2654 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2655 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2656 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2657 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2658 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2659 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2660 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2661 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2662 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2663 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2664 .Case("m0", AMDGPU::M0) 2665 .Case("vccz", AMDGPU::SRC_VCCZ) 2666 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2667 .Case("execz", AMDGPU::SRC_EXECZ) 2668 .Case("src_execz", AMDGPU::SRC_EXECZ) 2669 .Case("scc", AMDGPU::SRC_SCC) 2670 .Case("src_scc", AMDGPU::SRC_SCC) 2671 .Case("tba", AMDGPU::TBA) 2672 .Case("tma", AMDGPU::TMA) 2673 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2674 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2675 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2676 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2677 .Case("vcc_lo", AMDGPU::VCC_LO) 2678 .Case("vcc_hi", AMDGPU::VCC_HI) 2679 .Case("exec_lo", AMDGPU::EXEC_LO) 2680 .Case("exec_hi", AMDGPU::EXEC_HI) 2681 .Case("tma_lo", AMDGPU::TMA_LO) 2682 .Case("tma_hi", AMDGPU::TMA_HI) 2683 .Case("tba_lo", AMDGPU::TBA_LO) 2684 .Case("tba_hi", AMDGPU::TBA_HI) 2685 .Case("pc", AMDGPU::PC_REG) 2686 .Case("null", AMDGPU::SGPR_NULL) 2687 .Default(AMDGPU::NoRegister); 2688 } 2689 2690 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2691 SMLoc &EndLoc, bool RestoreOnFailure) { 2692 auto R = parseRegister(); 2693 if (!R) return true; 2694 assert(R->isReg()); 2695 RegNo = R->getReg(); 2696 StartLoc = R->getStartLoc(); 2697 EndLoc = R->getEndLoc(); 2698 return false; 2699 } 2700 2701 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2702 SMLoc &EndLoc) { 2703 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2704 } 2705 2706 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2707 SMLoc &EndLoc) { 2708 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2709 bool PendingErrors = getParser().hasPendingError(); 2710 getParser().clearPendingErrors(); 2711 if (PendingErrors) 2712 return ParseStatus::Failure; 2713 if (Result) 2714 return ParseStatus::NoMatch; 2715 return ParseStatus::Success; 2716 } 2717 2718 bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth, 2719 RegisterKind RegKind, 2720 MCRegister Reg1, SMLoc Loc) { 2721 switch (RegKind) { 2722 case IS_SPECIAL: 2723 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2724 Reg = AMDGPU::EXEC; 2725 RegWidth = 64; 2726 return true; 2727 } 2728 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2729 Reg = AMDGPU::FLAT_SCR; 2730 RegWidth = 64; 2731 return true; 2732 } 2733 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2734 Reg = AMDGPU::XNACK_MASK; 2735 RegWidth = 64; 2736 return true; 2737 } 2738 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2739 Reg = AMDGPU::VCC; 2740 RegWidth = 64; 2741 return true; 2742 } 2743 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2744 Reg = AMDGPU::TBA; 2745 RegWidth = 64; 2746 return true; 2747 } 2748 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2749 Reg = AMDGPU::TMA; 2750 RegWidth = 64; 2751 return true; 2752 } 2753 Error(Loc, "register does not fit in the list"); 2754 return false; 2755 case IS_VGPR: 2756 case IS_SGPR: 2757 case IS_AGPR: 2758 case IS_TTMP: 2759 if (Reg1 != Reg + RegWidth / 32) { 2760 Error(Loc, "registers in a list must have consecutive indices"); 2761 return false; 2762 } 2763 RegWidth += 32; 2764 return true; 2765 default: 2766 llvm_unreachable("unexpected register kind"); 2767 } 2768 } 2769 2770 struct RegInfo { 2771 StringLiteral Name; 2772 RegisterKind Kind; 2773 }; 2774 2775 static constexpr RegInfo RegularRegisters[] = { 2776 {{"v"}, IS_VGPR}, 2777 {{"s"}, IS_SGPR}, 2778 {{"ttmp"}, IS_TTMP}, 2779 {{"acc"}, IS_AGPR}, 2780 {{"a"}, IS_AGPR}, 2781 }; 2782 2783 static bool isRegularReg(RegisterKind Kind) { 2784 return Kind == IS_VGPR || 2785 Kind == IS_SGPR || 2786 Kind == IS_TTMP || 2787 Kind == IS_AGPR; 2788 } 2789 2790 static const RegInfo* getRegularRegInfo(StringRef Str) { 2791 for (const RegInfo &Reg : RegularRegisters) 2792 if (Str.starts_with(Reg.Name)) 2793 return &Reg; 2794 return nullptr; 2795 } 2796 2797 static bool getRegNum(StringRef Str, unsigned& Num) { 2798 return !Str.getAsInteger(10, Num); 2799 } 2800 2801 bool 2802 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2803 const AsmToken &NextToken) const { 2804 2805 // A list of consecutive registers: [s0,s1,s2,s3] 2806 if (Token.is(AsmToken::LBrac)) 2807 return true; 2808 2809 if (!Token.is(AsmToken::Identifier)) 2810 return false; 2811 2812 // A single register like s0 or a range of registers like s[0:1] 2813 2814 StringRef Str = Token.getString(); 2815 const RegInfo *Reg = getRegularRegInfo(Str); 2816 if (Reg) { 2817 StringRef RegName = Reg->Name; 2818 StringRef RegSuffix = Str.substr(RegName.size()); 2819 if (!RegSuffix.empty()) { 2820 RegSuffix.consume_back(".l"); 2821 RegSuffix.consume_back(".h"); 2822 unsigned Num; 2823 // A single register with an index: rXX 2824 if (getRegNum(RegSuffix, Num)) 2825 return true; 2826 } else { 2827 // A range of registers: r[XX:YY]. 2828 if (NextToken.is(AsmToken::LBrac)) 2829 return true; 2830 } 2831 } 2832 2833 return getSpecialRegForName(Str).isValid(); 2834 } 2835 2836 bool 2837 AMDGPUAsmParser::isRegister() 2838 { 2839 return isRegister(getToken(), peekToken()); 2840 } 2841 2842 MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, 2843 unsigned SubReg, unsigned RegWidth, 2844 SMLoc Loc) { 2845 assert(isRegularReg(RegKind)); 2846 2847 unsigned AlignSize = 1; 2848 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2849 // SGPR and TTMP registers must be aligned. 2850 // Max required alignment is 4 dwords. 2851 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2852 } 2853 2854 if (RegNum % AlignSize != 0) { 2855 Error(Loc, "invalid register alignment"); 2856 return MCRegister(); 2857 } 2858 2859 unsigned RegIdx = RegNum / AlignSize; 2860 int RCID = getRegClass(RegKind, RegWidth); 2861 if (RCID == -1) { 2862 Error(Loc, "invalid or unsupported register size"); 2863 return MCRegister(); 2864 } 2865 2866 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2867 const MCRegisterClass RC = TRI->getRegClass(RCID); 2868 if (RegIdx >= RC.getNumRegs()) { 2869 Error(Loc, "register index is out of range"); 2870 return MCRegister(); 2871 } 2872 2873 MCRegister Reg = RC.getRegister(RegIdx); 2874 2875 if (SubReg) { 2876 Reg = TRI->getSubReg(Reg, SubReg); 2877 2878 // Currently all regular registers have their .l and .h subregisters, so 2879 // we should never need to generate an error here. 2880 assert(Reg && "Invalid subregister!"); 2881 } 2882 2883 return Reg; 2884 } 2885 2886 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2887 int64_t RegLo, RegHi; 2888 if (!skipToken(AsmToken::LBrac, "missing register index")) 2889 return false; 2890 2891 SMLoc FirstIdxLoc = getLoc(); 2892 SMLoc SecondIdxLoc; 2893 2894 if (!parseExpr(RegLo)) 2895 return false; 2896 2897 if (trySkipToken(AsmToken::Colon)) { 2898 SecondIdxLoc = getLoc(); 2899 if (!parseExpr(RegHi)) 2900 return false; 2901 } else { 2902 RegHi = RegLo; 2903 } 2904 2905 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2906 return false; 2907 2908 if (!isUInt<32>(RegLo)) { 2909 Error(FirstIdxLoc, "invalid register index"); 2910 return false; 2911 } 2912 2913 if (!isUInt<32>(RegHi)) { 2914 Error(SecondIdxLoc, "invalid register index"); 2915 return false; 2916 } 2917 2918 if (RegLo > RegHi) { 2919 Error(FirstIdxLoc, "first register index should not exceed second index"); 2920 return false; 2921 } 2922 2923 Num = static_cast<unsigned>(RegLo); 2924 RegWidth = 32 * ((RegHi - RegLo) + 1); 2925 return true; 2926 } 2927 2928 MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2929 unsigned &RegNum, 2930 unsigned &RegWidth, 2931 SmallVectorImpl<AsmToken> &Tokens) { 2932 assert(isToken(AsmToken::Identifier)); 2933 MCRegister Reg = getSpecialRegForName(getTokenStr()); 2934 if (Reg) { 2935 RegNum = 0; 2936 RegWidth = 32; 2937 RegKind = IS_SPECIAL; 2938 Tokens.push_back(getToken()); 2939 lex(); // skip register name 2940 } 2941 return Reg; 2942 } 2943 2944 MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2945 unsigned &RegNum, 2946 unsigned &RegWidth, 2947 SmallVectorImpl<AsmToken> &Tokens) { 2948 assert(isToken(AsmToken::Identifier)); 2949 StringRef RegName = getTokenStr(); 2950 auto Loc = getLoc(); 2951 2952 const RegInfo *RI = getRegularRegInfo(RegName); 2953 if (!RI) { 2954 Error(Loc, "invalid register name"); 2955 return MCRegister(); 2956 } 2957 2958 Tokens.push_back(getToken()); 2959 lex(); // skip register name 2960 2961 RegKind = RI->Kind; 2962 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2963 unsigned SubReg = NoSubRegister; 2964 if (!RegSuffix.empty()) { 2965 if (RegSuffix.consume_back(".l")) 2966 SubReg = AMDGPU::lo16; 2967 else if (RegSuffix.consume_back(".h")) 2968 SubReg = AMDGPU::hi16; 2969 2970 // Single 32-bit register: vXX. 2971 if (!getRegNum(RegSuffix, RegNum)) { 2972 Error(Loc, "invalid register index"); 2973 return MCRegister(); 2974 } 2975 RegWidth = 32; 2976 } else { 2977 // Range of registers: v[XX:YY]. ":YY" is optional. 2978 if (!ParseRegRange(RegNum, RegWidth)) 2979 return MCRegister(); 2980 } 2981 2982 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); 2983 } 2984 2985 MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 2986 unsigned &RegNum, unsigned &RegWidth, 2987 SmallVectorImpl<AsmToken> &Tokens) { 2988 MCRegister Reg; 2989 auto ListLoc = getLoc(); 2990 2991 if (!skipToken(AsmToken::LBrac, 2992 "expected a register or a list of registers")) { 2993 return MCRegister(); 2994 } 2995 2996 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2997 2998 auto Loc = getLoc(); 2999 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 3000 return MCRegister(); 3001 if (RegWidth != 32) { 3002 Error(Loc, "expected a single 32-bit register"); 3003 return MCRegister(); 3004 } 3005 3006 for (; trySkipToken(AsmToken::Comma); ) { 3007 RegisterKind NextRegKind; 3008 MCRegister NextReg; 3009 unsigned NextRegNum, NextRegWidth; 3010 Loc = getLoc(); 3011 3012 if (!ParseAMDGPURegister(NextRegKind, NextReg, 3013 NextRegNum, NextRegWidth, 3014 Tokens)) { 3015 return MCRegister(); 3016 } 3017 if (NextRegWidth != 32) { 3018 Error(Loc, "expected a single 32-bit register"); 3019 return MCRegister(); 3020 } 3021 if (NextRegKind != RegKind) { 3022 Error(Loc, "registers in a list must be of the same kind"); 3023 return MCRegister(); 3024 } 3025 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 3026 return MCRegister(); 3027 } 3028 3029 if (!skipToken(AsmToken::RBrac, 3030 "expected a comma or a closing square bracket")) { 3031 return MCRegister(); 3032 } 3033 3034 if (isRegularReg(RegKind)) 3035 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); 3036 3037 return Reg; 3038 } 3039 3040 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 3041 MCRegister &Reg, unsigned &RegNum, 3042 unsigned &RegWidth, 3043 SmallVectorImpl<AsmToken> &Tokens) { 3044 auto Loc = getLoc(); 3045 Reg = MCRegister(); 3046 3047 if (isToken(AsmToken::Identifier)) { 3048 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 3049 if (!Reg) 3050 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 3051 } else { 3052 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 3053 } 3054 3055 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3056 if (!Reg) { 3057 assert(Parser.hasPendingError()); 3058 return false; 3059 } 3060 3061 if (!subtargetHasRegister(*TRI, Reg)) { 3062 if (Reg == AMDGPU::SGPR_NULL) { 3063 Error(Loc, "'null' operand is not supported on this GPU"); 3064 } else { 3065 Error(Loc, Twine(AMDGPUInstPrinter::getRegisterName(Reg)) + 3066 " register not available on this GPU"); 3067 } 3068 return false; 3069 } 3070 3071 return true; 3072 } 3073 3074 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 3075 MCRegister &Reg, unsigned &RegNum, 3076 unsigned &RegWidth, 3077 bool RestoreOnFailure /*=false*/) { 3078 Reg = MCRegister(); 3079 3080 SmallVector<AsmToken, 1> Tokens; 3081 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 3082 if (RestoreOnFailure) { 3083 while (!Tokens.empty()) { 3084 getLexer().UnLex(Tokens.pop_back_val()); 3085 } 3086 } 3087 return true; 3088 } 3089 return false; 3090 } 3091 3092 std::optional<StringRef> 3093 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 3094 switch (RegKind) { 3095 case IS_VGPR: 3096 return StringRef(".amdgcn.next_free_vgpr"); 3097 case IS_SGPR: 3098 return StringRef(".amdgcn.next_free_sgpr"); 3099 default: 3100 return std::nullopt; 3101 } 3102 } 3103 3104 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 3105 auto SymbolName = getGprCountSymbolName(RegKind); 3106 assert(SymbolName && "initializing invalid register kind"); 3107 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 3108 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 3109 } 3110 3111 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 3112 unsigned DwordRegIndex, 3113 unsigned RegWidth) { 3114 // Symbols are only defined for GCN targets 3115 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 3116 return true; 3117 3118 auto SymbolName = getGprCountSymbolName(RegKind); 3119 if (!SymbolName) 3120 return true; 3121 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 3122 3123 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 3124 int64_t OldCount; 3125 3126 if (!Sym->isVariable()) 3127 return !Error(getLoc(), 3128 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 3129 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 3130 return !Error( 3131 getLoc(), 3132 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 3133 3134 if (OldCount <= NewMax) 3135 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 3136 3137 return true; 3138 } 3139 3140 std::unique_ptr<AMDGPUOperand> 3141 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 3142 const auto &Tok = getToken(); 3143 SMLoc StartLoc = Tok.getLoc(); 3144 SMLoc EndLoc = Tok.getEndLoc(); 3145 RegisterKind RegKind; 3146 MCRegister Reg; 3147 unsigned RegNum, RegWidth; 3148 3149 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 3150 return nullptr; 3151 } 3152 if (isHsaAbi(getSTI())) { 3153 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 3154 return nullptr; 3155 } else 3156 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 3157 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 3158 } 3159 3160 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 3161 bool HasSP3AbsModifier, bool HasLit) { 3162 // TODO: add syntactic sugar for 1/(2*PI) 3163 3164 if (isRegister()) 3165 return ParseStatus::NoMatch; 3166 assert(!isModifier()); 3167 3168 if (!HasLit) { 3169 HasLit = trySkipId("lit"); 3170 if (HasLit) { 3171 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 3172 return ParseStatus::Failure; 3173 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); 3174 if (S.isSuccess() && 3175 !skipToken(AsmToken::RParen, "expected closing parentheses")) 3176 return ParseStatus::Failure; 3177 return S; 3178 } 3179 } 3180 3181 const auto& Tok = getToken(); 3182 const auto& NextTok = peekToken(); 3183 bool IsReal = Tok.is(AsmToken::Real); 3184 SMLoc S = getLoc(); 3185 bool Negate = false; 3186 3187 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 3188 lex(); 3189 IsReal = true; 3190 Negate = true; 3191 } 3192 3193 AMDGPUOperand::Modifiers Mods; 3194 Mods.Lit = HasLit; 3195 3196 if (IsReal) { 3197 // Floating-point expressions are not supported. 3198 // Can only allow floating-point literals with an 3199 // optional sign. 3200 3201 StringRef Num = getTokenStr(); 3202 lex(); 3203 3204 APFloat RealVal(APFloat::IEEEdouble()); 3205 auto roundMode = APFloat::rmNearestTiesToEven; 3206 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 3207 return ParseStatus::Failure; 3208 if (Negate) 3209 RealVal.changeSign(); 3210 3211 Operands.push_back( 3212 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 3213 AMDGPUOperand::ImmTyNone, true)); 3214 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3215 Op.setModifiers(Mods); 3216 3217 return ParseStatus::Success; 3218 3219 } else { 3220 int64_t IntVal; 3221 const MCExpr *Expr; 3222 SMLoc S = getLoc(); 3223 3224 if (HasSP3AbsModifier) { 3225 // This is a workaround for handling expressions 3226 // as arguments of SP3 'abs' modifier, for example: 3227 // |1.0| 3228 // |-1| 3229 // |1+x| 3230 // This syntax is not compatible with syntax of standard 3231 // MC expressions (due to the trailing '|'). 3232 SMLoc EndLoc; 3233 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 3234 return ParseStatus::Failure; 3235 } else { 3236 if (Parser.parseExpression(Expr)) 3237 return ParseStatus::Failure; 3238 } 3239 3240 if (Expr->evaluateAsAbsolute(IntVal)) { 3241 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3242 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3243 Op.setModifiers(Mods); 3244 } else { 3245 if (HasLit) 3246 return ParseStatus::NoMatch; 3247 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3248 } 3249 3250 return ParseStatus::Success; 3251 } 3252 3253 return ParseStatus::NoMatch; 3254 } 3255 3256 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3257 if (!isRegister()) 3258 return ParseStatus::NoMatch; 3259 3260 if (auto R = parseRegister()) { 3261 assert(R->isReg()); 3262 Operands.push_back(std::move(R)); 3263 return ParseStatus::Success; 3264 } 3265 return ParseStatus::Failure; 3266 } 3267 3268 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3269 bool HasSP3AbsMod, bool HasLit) { 3270 ParseStatus Res = parseReg(Operands); 3271 if (!Res.isNoMatch()) 3272 return Res; 3273 if (isModifier()) 3274 return ParseStatus::NoMatch; 3275 return parseImm(Operands, HasSP3AbsMod, HasLit); 3276 } 3277 3278 bool 3279 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3280 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3281 const auto &str = Token.getString(); 3282 return str == "abs" || str == "neg" || str == "sext"; 3283 } 3284 return false; 3285 } 3286 3287 bool 3288 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3289 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3290 } 3291 3292 bool 3293 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3294 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3295 } 3296 3297 bool 3298 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3299 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3300 } 3301 3302 // Check if this is an operand modifier or an opcode modifier 3303 // which may look like an expression but it is not. We should 3304 // avoid parsing these modifiers as expressions. Currently 3305 // recognized sequences are: 3306 // |...| 3307 // abs(...) 3308 // neg(...) 3309 // sext(...) 3310 // -reg 3311 // -|...| 3312 // -abs(...) 3313 // name:... 3314 // 3315 bool 3316 AMDGPUAsmParser::isModifier() { 3317 3318 AsmToken Tok = getToken(); 3319 AsmToken NextToken[2]; 3320 peekTokens(NextToken); 3321 3322 return isOperandModifier(Tok, NextToken[0]) || 3323 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3324 isOpcodeModifierWithVal(Tok, NextToken[0]); 3325 } 3326 3327 // Check if the current token is an SP3 'neg' modifier. 3328 // Currently this modifier is allowed in the following context: 3329 // 3330 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3331 // 2. Before an 'abs' modifier: -abs(...) 3332 // 3. Before an SP3 'abs' modifier: -|...| 3333 // 3334 // In all other cases "-" is handled as a part 3335 // of an expression that follows the sign. 3336 // 3337 // Note: When "-" is followed by an integer literal, 3338 // this is interpreted as integer negation rather 3339 // than a floating-point NEG modifier applied to N. 3340 // Beside being contr-intuitive, such use of floating-point 3341 // NEG modifier would have resulted in different meaning 3342 // of integer literals used with VOP1/2/C and VOP3, 3343 // for example: 3344 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3345 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3346 // Negative fp literals with preceding "-" are 3347 // handled likewise for uniformity 3348 // 3349 bool 3350 AMDGPUAsmParser::parseSP3NegModifier() { 3351 3352 AsmToken NextToken[2]; 3353 peekTokens(NextToken); 3354 3355 if (isToken(AsmToken::Minus) && 3356 (isRegister(NextToken[0], NextToken[1]) || 3357 NextToken[0].is(AsmToken::Pipe) || 3358 isId(NextToken[0], "abs"))) { 3359 lex(); 3360 return true; 3361 } 3362 3363 return false; 3364 } 3365 3366 ParseStatus 3367 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3368 bool AllowImm) { 3369 bool Neg, SP3Neg; 3370 bool Abs, SP3Abs; 3371 bool Lit; 3372 SMLoc Loc; 3373 3374 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3375 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3376 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3377 3378 SP3Neg = parseSP3NegModifier(); 3379 3380 Loc = getLoc(); 3381 Neg = trySkipId("neg"); 3382 if (Neg && SP3Neg) 3383 return Error(Loc, "expected register or immediate"); 3384 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3385 return ParseStatus::Failure; 3386 3387 Abs = trySkipId("abs"); 3388 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3389 return ParseStatus::Failure; 3390 3391 Lit = trySkipId("lit"); 3392 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3393 return ParseStatus::Failure; 3394 3395 Loc = getLoc(); 3396 SP3Abs = trySkipToken(AsmToken::Pipe); 3397 if (Abs && SP3Abs) 3398 return Error(Loc, "expected register or immediate"); 3399 3400 ParseStatus Res; 3401 if (AllowImm) { 3402 Res = parseRegOrImm(Operands, SP3Abs, Lit); 3403 } else { 3404 Res = parseReg(Operands); 3405 } 3406 if (!Res.isSuccess()) 3407 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; 3408 3409 if (Lit && !Operands.back()->isImm()) 3410 Error(Loc, "expected immediate with lit modifier"); 3411 3412 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3413 return ParseStatus::Failure; 3414 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3415 return ParseStatus::Failure; 3416 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3417 return ParseStatus::Failure; 3418 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3419 return ParseStatus::Failure; 3420 3421 AMDGPUOperand::Modifiers Mods; 3422 Mods.Abs = Abs || SP3Abs; 3423 Mods.Neg = Neg || SP3Neg; 3424 Mods.Lit = Lit; 3425 3426 if (Mods.hasFPModifiers() || Lit) { 3427 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3428 if (Op.isExpr()) 3429 return Error(Op.getStartLoc(), "expected an absolute expression"); 3430 Op.setModifiers(Mods); 3431 } 3432 return ParseStatus::Success; 3433 } 3434 3435 ParseStatus 3436 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3437 bool AllowImm) { 3438 bool Sext = trySkipId("sext"); 3439 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3440 return ParseStatus::Failure; 3441 3442 ParseStatus Res; 3443 if (AllowImm) { 3444 Res = parseRegOrImm(Operands); 3445 } else { 3446 Res = parseReg(Operands); 3447 } 3448 if (!Res.isSuccess()) 3449 return Sext ? ParseStatus::Failure : Res; 3450 3451 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3452 return ParseStatus::Failure; 3453 3454 AMDGPUOperand::Modifiers Mods; 3455 Mods.Sext = Sext; 3456 3457 if (Mods.hasIntModifiers()) { 3458 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3459 if (Op.isExpr()) 3460 return Error(Op.getStartLoc(), "expected an absolute expression"); 3461 Op.setModifiers(Mods); 3462 } 3463 3464 return ParseStatus::Success; 3465 } 3466 3467 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3468 return parseRegOrImmWithFPInputMods(Operands, false); 3469 } 3470 3471 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3472 return parseRegOrImmWithIntInputMods(Operands, false); 3473 } 3474 3475 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3476 auto Loc = getLoc(); 3477 if (trySkipId("off")) { 3478 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3479 AMDGPUOperand::ImmTyOff, false)); 3480 return ParseStatus::Success; 3481 } 3482 3483 if (!isRegister()) 3484 return ParseStatus::NoMatch; 3485 3486 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3487 if (Reg) { 3488 Operands.push_back(std::move(Reg)); 3489 return ParseStatus::Success; 3490 } 3491 3492 return ParseStatus::Failure; 3493 } 3494 3495 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3496 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3497 3498 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3499 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3500 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3501 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3502 return Match_InvalidOperand; 3503 3504 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3505 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3506 // v_mac_f32/16 allow only dst_sel == DWORD; 3507 auto OpNum = 3508 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3509 const auto &Op = Inst.getOperand(OpNum); 3510 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3511 return Match_InvalidOperand; 3512 } 3513 } 3514 3515 return Match_Success; 3516 } 3517 3518 static ArrayRef<unsigned> getAllVariants() { 3519 static const unsigned Variants[] = { 3520 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3521 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3522 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3523 }; 3524 3525 return ArrayRef(Variants); 3526 } 3527 3528 // What asm variants we should check 3529 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3530 if (isForcedDPP() && isForcedVOP3()) { 3531 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3532 return ArrayRef(Variants); 3533 } 3534 if (getForcedEncodingSize() == 32) { 3535 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3536 return ArrayRef(Variants); 3537 } 3538 3539 if (isForcedVOP3()) { 3540 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3541 return ArrayRef(Variants); 3542 } 3543 3544 if (isForcedSDWA()) { 3545 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3546 AMDGPUAsmVariants::SDWA9}; 3547 return ArrayRef(Variants); 3548 } 3549 3550 if (isForcedDPP()) { 3551 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3552 return ArrayRef(Variants); 3553 } 3554 3555 return getAllVariants(); 3556 } 3557 3558 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3559 if (isForcedDPP() && isForcedVOP3()) 3560 return "e64_dpp"; 3561 3562 if (getForcedEncodingSize() == 32) 3563 return "e32"; 3564 3565 if (isForcedVOP3()) 3566 return "e64"; 3567 3568 if (isForcedSDWA()) 3569 return "sdwa"; 3570 3571 if (isForcedDPP()) 3572 return "dpp"; 3573 3574 return ""; 3575 } 3576 3577 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3578 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3579 for (MCPhysReg Reg : Desc.implicit_uses()) { 3580 switch (Reg) { 3581 case AMDGPU::FLAT_SCR: 3582 case AMDGPU::VCC: 3583 case AMDGPU::VCC_LO: 3584 case AMDGPU::VCC_HI: 3585 case AMDGPU::M0: 3586 return Reg; 3587 default: 3588 break; 3589 } 3590 } 3591 return AMDGPU::NoRegister; 3592 } 3593 3594 // NB: This code is correct only when used to check constant 3595 // bus limitations because GFX7 support no f16 inline constants. 3596 // Note that there are no cases when a GFX7 opcode violates 3597 // constant bus limitations due to the use of an f16 constant. 3598 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3599 unsigned OpIdx) const { 3600 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3601 3602 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3603 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3604 return false; 3605 } 3606 3607 const MCOperand &MO = Inst.getOperand(OpIdx); 3608 3609 int64_t Val = MO.getImm(); 3610 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3611 3612 switch (OpSize) { // expected operand size 3613 case 8: 3614 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3615 case 4: 3616 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3617 case 2: { 3618 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3619 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3620 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3621 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3622 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm()); 3623 3624 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3625 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3626 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3627 return AMDGPU::isInlinableLiteralV2I16(Val); 3628 3629 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3630 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3631 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3632 return AMDGPU::isInlinableLiteralV2F16(Val); 3633 3634 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 || 3635 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 || 3636 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16) 3637 return AMDGPU::isInlinableLiteralV2BF16(Val); 3638 3639 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 || 3640 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 || 3641 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 || 3642 OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED) 3643 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm()); 3644 3645 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 || 3646 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 || 3647 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 || 3648 OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED) 3649 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm()); 3650 3651 llvm_unreachable("invalid operand type"); 3652 } 3653 default: 3654 llvm_unreachable("invalid operand size"); 3655 } 3656 } 3657 3658 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3659 if (!isGFX10Plus()) 3660 return 1; 3661 3662 switch (Opcode) { 3663 // 64-bit shift instructions can use only one scalar value input 3664 case AMDGPU::V_LSHLREV_B64_e64: 3665 case AMDGPU::V_LSHLREV_B64_gfx10: 3666 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3667 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3668 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3669 case AMDGPU::V_LSHRREV_B64_e64: 3670 case AMDGPU::V_LSHRREV_B64_gfx10: 3671 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3672 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3673 case AMDGPU::V_ASHRREV_I64_e64: 3674 case AMDGPU::V_ASHRREV_I64_gfx10: 3675 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3676 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3677 case AMDGPU::V_LSHL_B64_e64: 3678 case AMDGPU::V_LSHR_B64_e64: 3679 case AMDGPU::V_ASHR_I64_e64: 3680 return 1; 3681 default: 3682 return 2; 3683 } 3684 } 3685 3686 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3687 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3688 3689 // Get regular operand indices in the same order as specified 3690 // in the instruction (but append mandatory literals to the end). 3691 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3692 bool AddMandatoryLiterals = false) { 3693 3694 int16_t ImmIdx = 3695 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3696 3697 if (isVOPD(Opcode)) { 3698 int16_t ImmDeferredIdx = 3699 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3700 : -1; 3701 3702 return {getNamedOperandIdx(Opcode, OpName::src0X), 3703 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3704 getNamedOperandIdx(Opcode, OpName::src0Y), 3705 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3706 ImmDeferredIdx, 3707 ImmIdx}; 3708 } 3709 3710 return {getNamedOperandIdx(Opcode, OpName::src0), 3711 getNamedOperandIdx(Opcode, OpName::src1), 3712 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3713 } 3714 3715 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3716 const MCOperand &MO = Inst.getOperand(OpIdx); 3717 if (MO.isImm()) 3718 return !isInlineConstant(Inst, OpIdx); 3719 if (MO.isReg()) { 3720 auto Reg = MO.getReg(); 3721 if (!Reg) 3722 return false; 3723 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3724 auto PReg = mc2PseudoReg(Reg); 3725 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3726 } 3727 return true; 3728 } 3729 3730 // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`: 3731 // Writelane is special in that it can use SGPR and M0 (which would normally 3732 // count as using the constant bus twice - but in this case it is allowed since 3733 // the lane selector doesn't count as a use of the constant bus). However, it is 3734 // still required to abide by the 1 SGPR rule. 3735 static bool checkWriteLane(const MCInst &Inst) { 3736 const unsigned Opcode = Inst.getOpcode(); 3737 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi) 3738 return false; 3739 const MCOperand &LaneSelOp = Inst.getOperand(2); 3740 if (!LaneSelOp.isReg()) 3741 return false; 3742 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg()); 3743 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11; 3744 } 3745 3746 bool AMDGPUAsmParser::validateConstantBusLimitations( 3747 const MCInst &Inst, const OperandVector &Operands) { 3748 const unsigned Opcode = Inst.getOpcode(); 3749 const MCInstrDesc &Desc = MII.get(Opcode); 3750 MCRegister LastSGPR; 3751 unsigned ConstantBusUseCount = 0; 3752 unsigned NumLiterals = 0; 3753 unsigned LiteralSize; 3754 3755 if (!(Desc.TSFlags & 3756 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3757 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3758 !isVOPD(Opcode)) 3759 return true; 3760 3761 if (checkWriteLane(Inst)) 3762 return true; 3763 3764 // Check special imm operands (used by madmk, etc) 3765 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3766 ++NumLiterals; 3767 LiteralSize = 4; 3768 } 3769 3770 SmallDenseSet<unsigned> SGPRsUsed; 3771 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3772 if (SGPRUsed != AMDGPU::NoRegister) { 3773 SGPRsUsed.insert(SGPRUsed); 3774 ++ConstantBusUseCount; 3775 } 3776 3777 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3778 3779 for (int OpIdx : OpIndices) { 3780 if (OpIdx == -1) 3781 continue; 3782 3783 const MCOperand &MO = Inst.getOperand(OpIdx); 3784 if (usesConstantBus(Inst, OpIdx)) { 3785 if (MO.isReg()) { 3786 LastSGPR = mc2PseudoReg(MO.getReg()); 3787 // Pairs of registers with a partial intersections like these 3788 // s0, s[0:1] 3789 // flat_scratch_lo, flat_scratch 3790 // flat_scratch_lo, flat_scratch_hi 3791 // are theoretically valid but they are disabled anyway. 3792 // Note that this code mimics SIInstrInfo::verifyInstruction 3793 if (SGPRsUsed.insert(LastSGPR).second) { 3794 ++ConstantBusUseCount; 3795 } 3796 } else { // Expression or a literal 3797 3798 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3799 continue; // special operand like VINTERP attr_chan 3800 3801 // An instruction may use only one literal. 3802 // This has been validated on the previous step. 3803 // See validateVOPLiteral. 3804 // This literal may be used as more than one operand. 3805 // If all these operands are of the same size, 3806 // this literal counts as one scalar value. 3807 // Otherwise it counts as 2 scalar values. 3808 // See "GFX10 Shader Programming", section 3.6.2.3. 3809 3810 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3811 if (Size < 4) 3812 Size = 4; 3813 3814 if (NumLiterals == 0) { 3815 NumLiterals = 1; 3816 LiteralSize = Size; 3817 } else if (LiteralSize != Size) { 3818 NumLiterals = 2; 3819 } 3820 } 3821 } 3822 } 3823 ConstantBusUseCount += NumLiterals; 3824 3825 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3826 return true; 3827 3828 SMLoc LitLoc = getLitLoc(Operands); 3829 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3830 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3831 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3832 return false; 3833 } 3834 3835 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3836 const MCInst &Inst, const OperandVector &Operands) { 3837 3838 const unsigned Opcode = Inst.getOpcode(); 3839 if (!isVOPD(Opcode)) 3840 return true; 3841 3842 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3843 3844 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3845 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3846 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3847 ? Opr.getReg() 3848 : MCRegister(); 3849 }; 3850 3851 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3852 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; 3853 3854 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3855 auto InvalidCompOprIdx = 3856 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); 3857 if (!InvalidCompOprIdx) 3858 return true; 3859 3860 auto CompOprIdx = *InvalidCompOprIdx; 3861 auto ParsedIdx = 3862 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3863 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3864 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3865 3866 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3867 if (CompOprIdx == VOPD::Component::DST) { 3868 Error(Loc, "one dst register must be even and the other odd"); 3869 } else { 3870 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3871 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3872 " operands must use different VGPR banks"); 3873 } 3874 3875 return false; 3876 } 3877 3878 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3879 3880 const unsigned Opc = Inst.getOpcode(); 3881 const MCInstrDesc &Desc = MII.get(Opc); 3882 3883 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3884 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3885 assert(ClampIdx != -1); 3886 return Inst.getOperand(ClampIdx).getImm() == 0; 3887 } 3888 3889 return true; 3890 } 3891 3892 constexpr uint64_t MIMGFlags = 3893 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 3894 3895 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3896 const SMLoc &IDLoc) { 3897 3898 const unsigned Opc = Inst.getOpcode(); 3899 const MCInstrDesc &Desc = MII.get(Opc); 3900 3901 if ((Desc.TSFlags & MIMGFlags) == 0) 3902 return true; 3903 3904 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3905 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3906 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3907 3908 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample 3909 return true; 3910 3911 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3912 return true; 3913 3914 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3915 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3916 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3917 if (DMask == 0) 3918 DMask = 1; 3919 3920 bool IsPackedD16 = false; 3921 unsigned DataSize = 3922 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3923 if (hasPackedD16()) { 3924 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3925 IsPackedD16 = D16Idx >= 0; 3926 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3927 DataSize = (DataSize + 1) / 2; 3928 } 3929 3930 if ((VDataSize / 4) == DataSize + TFESize) 3931 return true; 3932 3933 StringRef Modifiers; 3934 if (isGFX90A()) 3935 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3936 else 3937 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3938 3939 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3940 return false; 3941 } 3942 3943 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3944 const SMLoc &IDLoc) { 3945 const unsigned Opc = Inst.getOpcode(); 3946 const MCInstrDesc &Desc = MII.get(Opc); 3947 3948 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 3949 return true; 3950 3951 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3952 3953 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3954 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3955 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3956 int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc 3957 : AMDGPU::OpName::rsrc; 3958 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 3959 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3960 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3961 3962 assert(VAddr0Idx != -1); 3963 assert(SrsrcIdx != -1); 3964 assert(SrsrcIdx > VAddr0Idx); 3965 3966 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3967 if (BaseOpcode->BVH) { 3968 if (IsA16 == BaseOpcode->A16) 3969 return true; 3970 Error(IDLoc, "image address size does not match a16"); 3971 return false; 3972 } 3973 3974 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3975 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3976 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3977 unsigned ActualAddrSize = 3978 IsNSA ? SrsrcIdx - VAddr0Idx 3979 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3980 3981 unsigned ExpectedAddrSize = 3982 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3983 3984 if (IsNSA) { 3985 if (hasPartialNSAEncoding() && 3986 ExpectedAddrSize > 3987 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 3988 int VAddrLastIdx = SrsrcIdx - 1; 3989 unsigned VAddrLastSize = 3990 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3991 3992 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3993 } 3994 } else { 3995 if (ExpectedAddrSize > 12) 3996 ExpectedAddrSize = 16; 3997 3998 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3999 // This provides backward compatibility for assembly created 4000 // before 160b/192b/224b types were directly supported. 4001 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 4002 return true; 4003 } 4004 4005 if (ActualAddrSize == ExpectedAddrSize) 4006 return true; 4007 4008 Error(IDLoc, "image address size does not match dim and a16"); 4009 return false; 4010 } 4011 4012 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 4013 4014 const unsigned Opc = Inst.getOpcode(); 4015 const MCInstrDesc &Desc = MII.get(Opc); 4016 4017 if ((Desc.TSFlags & MIMGFlags) == 0) 4018 return true; 4019 if (!Desc.mayLoad() || !Desc.mayStore()) 4020 return true; // Not atomic 4021 4022 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 4023 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 4024 4025 // This is an incomplete check because image_atomic_cmpswap 4026 // may only use 0x3 and 0xf while other atomic operations 4027 // may use 0x1 and 0x3. However these limitations are 4028 // verified when we check that dmask matches dst size. 4029 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 4030 } 4031 4032 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 4033 4034 const unsigned Opc = Inst.getOpcode(); 4035 const MCInstrDesc &Desc = MII.get(Opc); 4036 4037 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 4038 return true; 4039 4040 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 4041 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 4042 4043 // GATHER4 instructions use dmask in a different fashion compared to 4044 // other MIMG instructions. The only useful DMASK values are 4045 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 4046 // (red,red,red,red) etc.) The ISA document doesn't mention 4047 // this. 4048 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 4049 } 4050 4051 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst, 4052 const OperandVector &Operands) { 4053 if (!isGFX10Plus()) 4054 return true; 4055 4056 const unsigned Opc = Inst.getOpcode(); 4057 const MCInstrDesc &Desc = MII.get(Opc); 4058 4059 if ((Desc.TSFlags & MIMGFlags) == 0) 4060 return true; 4061 4062 // image_bvh_intersect_ray instructions do not have dim 4063 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) 4064 return true; 4065 4066 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4067 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4068 if (Op.isDim()) 4069 return true; 4070 } 4071 return false; 4072 } 4073 4074 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 4075 const unsigned Opc = Inst.getOpcode(); 4076 const MCInstrDesc &Desc = MII.get(Opc); 4077 4078 if ((Desc.TSFlags & MIMGFlags) == 0) 4079 return true; 4080 4081 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 4082 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 4083 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 4084 4085 if (!BaseOpcode->MSAA) 4086 return true; 4087 4088 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 4089 assert(DimIdx != -1); 4090 4091 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 4092 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 4093 4094 return DimInfo->MSAA; 4095 } 4096 4097 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 4098 { 4099 switch (Opcode) { 4100 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 4101 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 4102 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 4103 return true; 4104 default: 4105 return false; 4106 } 4107 } 4108 4109 // movrels* opcodes should only allow VGPRS as src0. 4110 // This is specified in .td description for vop1/vop3, 4111 // but sdwa is handled differently. See isSDWAOperand. 4112 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 4113 const OperandVector &Operands) { 4114 4115 const unsigned Opc = Inst.getOpcode(); 4116 const MCInstrDesc &Desc = MII.get(Opc); 4117 4118 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 4119 return true; 4120 4121 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4122 assert(Src0Idx != -1); 4123 4124 SMLoc ErrLoc; 4125 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 4126 if (Src0.isReg()) { 4127 auto Reg = mc2PseudoReg(Src0.getReg()); 4128 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4129 if (!isSGPR(Reg, TRI)) 4130 return true; 4131 ErrLoc = getRegLoc(Reg, Operands); 4132 } else { 4133 ErrLoc = getConstLoc(Operands); 4134 } 4135 4136 Error(ErrLoc, "source operand must be a VGPR"); 4137 return false; 4138 } 4139 4140 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 4141 const OperandVector &Operands) { 4142 4143 const unsigned Opc = Inst.getOpcode(); 4144 4145 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 4146 return true; 4147 4148 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4149 assert(Src0Idx != -1); 4150 4151 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 4152 if (!Src0.isReg()) 4153 return true; 4154 4155 auto Reg = mc2PseudoReg(Src0.getReg()); 4156 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4157 if (!isGFX90A() && isSGPR(Reg, TRI)) { 4158 Error(getRegLoc(Reg, Operands), 4159 "source operand must be either a VGPR or an inline constant"); 4160 return false; 4161 } 4162 4163 return true; 4164 } 4165 4166 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 4167 const OperandVector &Operands) { 4168 unsigned Opcode = Inst.getOpcode(); 4169 const MCInstrDesc &Desc = MII.get(Opcode); 4170 4171 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 4172 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 4173 return true; 4174 4175 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 4176 if (Src2Idx == -1) 4177 return true; 4178 4179 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 4180 Error(getConstLoc(Operands), 4181 "inline constants are not allowed for this operand"); 4182 return false; 4183 } 4184 4185 return true; 4186 } 4187 4188 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 4189 const OperandVector &Operands) { 4190 const unsigned Opc = Inst.getOpcode(); 4191 const MCInstrDesc &Desc = MII.get(Opc); 4192 4193 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 4194 return true; 4195 4196 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4197 if (BlgpIdx != -1) { 4198 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) { 4199 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz); 4200 4201 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm(); 4202 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm(); 4203 4204 // Validate the correct register size was used for the floating point 4205 // format operands 4206 4207 bool Success = true; 4208 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) { 4209 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4210 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()), 4211 Operands), 4212 "wrong register tuple size for cbsz value " + Twine(CBSZ)); 4213 Success = false; 4214 } 4215 4216 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) { 4217 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4218 Error(getRegLoc(mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()), 4219 Operands), 4220 "wrong register tuple size for blgp value " + Twine(BLGP)); 4221 Success = false; 4222 } 4223 4224 return Success; 4225 } 4226 } 4227 4228 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 4229 if (Src2Idx == -1) 4230 return true; 4231 4232 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 4233 if (!Src2.isReg()) 4234 return true; 4235 4236 MCRegister Src2Reg = Src2.getReg(); 4237 MCRegister DstReg = Inst.getOperand(0).getReg(); 4238 if (Src2Reg == DstReg) 4239 return true; 4240 4241 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4242 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 4243 return true; 4244 4245 if (TRI->regsOverlap(Src2Reg, DstReg)) { 4246 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 4247 "source 2 operand must not partially overlap with dst"); 4248 return false; 4249 } 4250 4251 return true; 4252 } 4253 4254 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 4255 switch (Inst.getOpcode()) { 4256 default: 4257 return true; 4258 case V_DIV_SCALE_F32_gfx6_gfx7: 4259 case V_DIV_SCALE_F32_vi: 4260 case V_DIV_SCALE_F32_gfx10: 4261 case V_DIV_SCALE_F64_gfx6_gfx7: 4262 case V_DIV_SCALE_F64_vi: 4263 case V_DIV_SCALE_F64_gfx10: 4264 break; 4265 } 4266 4267 // TODO: Check that src0 = src1 or src2. 4268 4269 for (auto Name : {AMDGPU::OpName::src0_modifiers, 4270 AMDGPU::OpName::src2_modifiers, 4271 AMDGPU::OpName::src2_modifiers}) { 4272 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 4273 .getImm() & 4274 SISrcMods::ABS) { 4275 return false; 4276 } 4277 } 4278 4279 return true; 4280 } 4281 4282 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 4283 4284 const unsigned Opc = Inst.getOpcode(); 4285 const MCInstrDesc &Desc = MII.get(Opc); 4286 4287 if ((Desc.TSFlags & MIMGFlags) == 0) 4288 return true; 4289 4290 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 4291 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 4292 if (isCI() || isSI()) 4293 return false; 4294 } 4295 4296 return true; 4297 } 4298 4299 static bool IsRevOpcode(const unsigned Opcode) 4300 { 4301 switch (Opcode) { 4302 case AMDGPU::V_SUBREV_F32_e32: 4303 case AMDGPU::V_SUBREV_F32_e64: 4304 case AMDGPU::V_SUBREV_F32_e32_gfx10: 4305 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 4306 case AMDGPU::V_SUBREV_F32_e32_vi: 4307 case AMDGPU::V_SUBREV_F32_e64_gfx10: 4308 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 4309 case AMDGPU::V_SUBREV_F32_e64_vi: 4310 4311 case AMDGPU::V_SUBREV_CO_U32_e32: 4312 case AMDGPU::V_SUBREV_CO_U32_e64: 4313 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 4314 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 4315 4316 case AMDGPU::V_SUBBREV_U32_e32: 4317 case AMDGPU::V_SUBBREV_U32_e64: 4318 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 4319 case AMDGPU::V_SUBBREV_U32_e32_vi: 4320 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 4321 case AMDGPU::V_SUBBREV_U32_e64_vi: 4322 4323 case AMDGPU::V_SUBREV_U32_e32: 4324 case AMDGPU::V_SUBREV_U32_e64: 4325 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4326 case AMDGPU::V_SUBREV_U32_e32_vi: 4327 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4328 case AMDGPU::V_SUBREV_U32_e64_vi: 4329 4330 case AMDGPU::V_SUBREV_F16_e32: 4331 case AMDGPU::V_SUBREV_F16_e64: 4332 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4333 case AMDGPU::V_SUBREV_F16_e32_vi: 4334 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4335 case AMDGPU::V_SUBREV_F16_e64_vi: 4336 4337 case AMDGPU::V_SUBREV_U16_e32: 4338 case AMDGPU::V_SUBREV_U16_e64: 4339 case AMDGPU::V_SUBREV_U16_e32_vi: 4340 case AMDGPU::V_SUBREV_U16_e64_vi: 4341 4342 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4343 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4344 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4345 4346 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4347 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4348 4349 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4350 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4351 4352 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4353 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4354 4355 case AMDGPU::V_LSHRREV_B32_e32: 4356 case AMDGPU::V_LSHRREV_B32_e64: 4357 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4358 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4359 case AMDGPU::V_LSHRREV_B32_e32_vi: 4360 case AMDGPU::V_LSHRREV_B32_e64_vi: 4361 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4362 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4363 4364 case AMDGPU::V_ASHRREV_I32_e32: 4365 case AMDGPU::V_ASHRREV_I32_e64: 4366 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4367 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4368 case AMDGPU::V_ASHRREV_I32_e32_vi: 4369 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4370 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4371 case AMDGPU::V_ASHRREV_I32_e64_vi: 4372 4373 case AMDGPU::V_LSHLREV_B32_e32: 4374 case AMDGPU::V_LSHLREV_B32_e64: 4375 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4376 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4377 case AMDGPU::V_LSHLREV_B32_e32_vi: 4378 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4379 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4380 case AMDGPU::V_LSHLREV_B32_e64_vi: 4381 4382 case AMDGPU::V_LSHLREV_B16_e32: 4383 case AMDGPU::V_LSHLREV_B16_e64: 4384 case AMDGPU::V_LSHLREV_B16_e32_vi: 4385 case AMDGPU::V_LSHLREV_B16_e64_vi: 4386 case AMDGPU::V_LSHLREV_B16_gfx10: 4387 4388 case AMDGPU::V_LSHRREV_B16_e32: 4389 case AMDGPU::V_LSHRREV_B16_e64: 4390 case AMDGPU::V_LSHRREV_B16_e32_vi: 4391 case AMDGPU::V_LSHRREV_B16_e64_vi: 4392 case AMDGPU::V_LSHRREV_B16_gfx10: 4393 4394 case AMDGPU::V_ASHRREV_I16_e32: 4395 case AMDGPU::V_ASHRREV_I16_e64: 4396 case AMDGPU::V_ASHRREV_I16_e32_vi: 4397 case AMDGPU::V_ASHRREV_I16_e64_vi: 4398 case AMDGPU::V_ASHRREV_I16_gfx10: 4399 4400 case AMDGPU::V_LSHLREV_B64_e64: 4401 case AMDGPU::V_LSHLREV_B64_gfx10: 4402 case AMDGPU::V_LSHLREV_B64_vi: 4403 4404 case AMDGPU::V_LSHRREV_B64_e64: 4405 case AMDGPU::V_LSHRREV_B64_gfx10: 4406 case AMDGPU::V_LSHRREV_B64_vi: 4407 4408 case AMDGPU::V_ASHRREV_I64_e64: 4409 case AMDGPU::V_ASHRREV_I64_gfx10: 4410 case AMDGPU::V_ASHRREV_I64_vi: 4411 4412 case AMDGPU::V_PK_LSHLREV_B16: 4413 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4414 case AMDGPU::V_PK_LSHLREV_B16_vi: 4415 4416 case AMDGPU::V_PK_LSHRREV_B16: 4417 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4418 case AMDGPU::V_PK_LSHRREV_B16_vi: 4419 case AMDGPU::V_PK_ASHRREV_I16: 4420 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4421 case AMDGPU::V_PK_ASHRREV_I16_vi: 4422 return true; 4423 default: 4424 return false; 4425 } 4426 } 4427 4428 std::optional<StringRef> 4429 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4430 4431 using namespace SIInstrFlags; 4432 const unsigned Opcode = Inst.getOpcode(); 4433 const MCInstrDesc &Desc = MII.get(Opcode); 4434 4435 // lds_direct register is defined so that it can be used 4436 // with 9-bit operands only. Ignore encodings which do not accept these. 4437 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4438 if ((Desc.TSFlags & Enc) == 0) 4439 return std::nullopt; 4440 4441 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4442 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4443 if (SrcIdx == -1) 4444 break; 4445 const auto &Src = Inst.getOperand(SrcIdx); 4446 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4447 4448 if (isGFX90A() || isGFX11Plus()) 4449 return StringRef("lds_direct is not supported on this GPU"); 4450 4451 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4452 return StringRef("lds_direct cannot be used with this instruction"); 4453 4454 if (SrcName != OpName::src0) 4455 return StringRef("lds_direct may be used as src0 only"); 4456 } 4457 } 4458 4459 return std::nullopt; 4460 } 4461 4462 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4463 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4464 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4465 if (Op.isFlatOffset()) 4466 return Op.getStartLoc(); 4467 } 4468 return getLoc(); 4469 } 4470 4471 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4472 const OperandVector &Operands) { 4473 auto Opcode = Inst.getOpcode(); 4474 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4475 if (OpNum == -1) 4476 return true; 4477 4478 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4479 if ((TSFlags & SIInstrFlags::FLAT)) 4480 return validateFlatOffset(Inst, Operands); 4481 4482 if ((TSFlags & SIInstrFlags::SMRD)) 4483 return validateSMEMOffset(Inst, Operands); 4484 4485 const auto &Op = Inst.getOperand(OpNum); 4486 if (isGFX12Plus() && 4487 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4488 const unsigned OffsetSize = 24; 4489 if (!isIntN(OffsetSize, Op.getImm())) { 4490 Error(getFlatOffsetLoc(Operands), 4491 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4492 return false; 4493 } 4494 } else { 4495 const unsigned OffsetSize = 16; 4496 if (!isUIntN(OffsetSize, Op.getImm())) { 4497 Error(getFlatOffsetLoc(Operands), 4498 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4499 return false; 4500 } 4501 } 4502 return true; 4503 } 4504 4505 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4506 const OperandVector &Operands) { 4507 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4508 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4509 return true; 4510 4511 auto Opcode = Inst.getOpcode(); 4512 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4513 assert(OpNum != -1); 4514 4515 const auto &Op = Inst.getOperand(OpNum); 4516 if (!hasFlatOffsets() && Op.getImm() != 0) { 4517 Error(getFlatOffsetLoc(Operands), 4518 "flat offset modifier is not supported on this GPU"); 4519 return false; 4520 } 4521 4522 // For pre-GFX12 FLAT instructions the offset must be positive; 4523 // MSB is ignored and forced to zero. 4524 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4525 bool AllowNegative = 4526 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4527 isGFX12Plus(); 4528 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4529 Error(getFlatOffsetLoc(Operands), 4530 Twine("expected a ") + 4531 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4532 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4533 return false; 4534 } 4535 4536 return true; 4537 } 4538 4539 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4540 // Start with second operand because SMEM Offset cannot be dst or src0. 4541 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4542 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4543 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4544 return Op.getStartLoc(); 4545 } 4546 return getLoc(); 4547 } 4548 4549 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4550 const OperandVector &Operands) { 4551 if (isCI() || isSI()) 4552 return true; 4553 4554 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4555 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4556 return true; 4557 4558 auto Opcode = Inst.getOpcode(); 4559 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4560 if (OpNum == -1) 4561 return true; 4562 4563 const auto &Op = Inst.getOperand(OpNum); 4564 if (!Op.isImm()) 4565 return true; 4566 4567 uint64_t Offset = Op.getImm(); 4568 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4569 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4570 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4571 return true; 4572 4573 Error(getSMEMOffsetLoc(Operands), 4574 isGFX12Plus() ? "expected a 24-bit signed offset" 4575 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4576 : "expected a 21-bit signed offset"); 4577 4578 return false; 4579 } 4580 4581 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4582 unsigned Opcode = Inst.getOpcode(); 4583 const MCInstrDesc &Desc = MII.get(Opcode); 4584 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4585 return true; 4586 4587 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4588 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4589 4590 const int OpIndices[] = { Src0Idx, Src1Idx }; 4591 4592 unsigned NumExprs = 0; 4593 unsigned NumLiterals = 0; 4594 uint32_t LiteralValue; 4595 4596 for (int OpIdx : OpIndices) { 4597 if (OpIdx == -1) break; 4598 4599 const MCOperand &MO = Inst.getOperand(OpIdx); 4600 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4601 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4602 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4603 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4604 if (NumLiterals == 0 || LiteralValue != Value) { 4605 LiteralValue = Value; 4606 ++NumLiterals; 4607 } 4608 } else if (MO.isExpr()) { 4609 ++NumExprs; 4610 } 4611 } 4612 } 4613 4614 return NumLiterals + NumExprs <= 1; 4615 } 4616 4617 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4618 const unsigned Opc = Inst.getOpcode(); 4619 if (isPermlane16(Opc)) { 4620 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4621 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4622 4623 if (OpSel & ~3) 4624 return false; 4625 } 4626 4627 uint64_t TSFlags = MII.get(Opc).TSFlags; 4628 4629 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4630 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4631 if (OpSelIdx != -1) { 4632 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4633 return false; 4634 } 4635 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4636 if (OpSelHiIdx != -1) { 4637 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4638 return false; 4639 } 4640 } 4641 4642 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4643 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4644 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4645 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4646 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4647 if (OpSel & 3) 4648 return false; 4649 } 4650 4651 return true; 4652 } 4653 4654 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) { 4655 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); 4656 4657 const unsigned Opc = Inst.getOpcode(); 4658 uint64_t TSFlags = MII.get(Opc).TSFlags; 4659 4660 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) 4661 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) 4662 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) 4663 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. 4664 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && 4665 !(TSFlags & SIInstrFlags::IsSWMMAC)) 4666 return true; 4667 4668 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName); 4669 if (NegIdx == -1) 4670 return true; 4671 4672 unsigned Neg = Inst.getOperand(NegIdx).getImm(); 4673 4674 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed 4675 // on some src operands but not allowed on other. 4676 // It is convenient that such instructions don't have src_modifiers operand 4677 // for src operands that don't allow neg because they also don't allow opsel. 4678 4679 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers, 4680 AMDGPU::OpName::src1_modifiers, 4681 AMDGPU::OpName::src2_modifiers}; 4682 4683 for (unsigned i = 0; i < 3; ++i) { 4684 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) { 4685 if (Neg & (1 << i)) 4686 return false; 4687 } 4688 } 4689 4690 return true; 4691 } 4692 4693 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4694 const OperandVector &Operands) { 4695 const unsigned Opc = Inst.getOpcode(); 4696 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4697 if (DppCtrlIdx >= 0) { 4698 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4699 4700 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4701 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4702 // DP ALU DPP is supported for row_newbcast only on GFX9* 4703 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4704 Error(S, "DP ALU dpp only supports row_newbcast"); 4705 return false; 4706 } 4707 } 4708 4709 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4710 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4711 4712 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4713 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4714 if (Src1Idx >= 0) { 4715 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4716 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4717 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) { 4718 auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()); 4719 SMLoc S = getRegLoc(Reg, Operands); 4720 Error(S, "invalid operand for instruction"); 4721 return false; 4722 } 4723 if (Src1.isImm()) { 4724 Error(getInstLoc(Operands), 4725 "src1 immediate operand invalid for instruction"); 4726 return false; 4727 } 4728 } 4729 } 4730 4731 return true; 4732 } 4733 4734 // Check if VCC register matches wavefront size 4735 bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const { 4736 auto FB = getFeatureBits(); 4737 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4738 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4739 } 4740 4741 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4742 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4743 const OperandVector &Operands) { 4744 unsigned Opcode = Inst.getOpcode(); 4745 const MCInstrDesc &Desc = MII.get(Opcode); 4746 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4747 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4748 !HasMandatoryLiteral && !isVOPD(Opcode)) 4749 return true; 4750 4751 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4752 4753 unsigned NumExprs = 0; 4754 unsigned NumLiterals = 0; 4755 uint32_t LiteralValue; 4756 4757 for (int OpIdx : OpIndices) { 4758 if (OpIdx == -1) 4759 continue; 4760 4761 const MCOperand &MO = Inst.getOperand(OpIdx); 4762 if (!MO.isImm() && !MO.isExpr()) 4763 continue; 4764 if (!isSISrcOperand(Desc, OpIdx)) 4765 continue; 4766 4767 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4768 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4769 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && 4770 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 4771 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 4772 4773 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { 4774 Error(getLitLoc(Operands), "invalid operand for instruction"); 4775 return false; 4776 } 4777 4778 if (IsFP64 && IsValid32Op) 4779 Value = Hi_32(Value); 4780 4781 if (NumLiterals == 0 || LiteralValue != Value) { 4782 LiteralValue = Value; 4783 ++NumLiterals; 4784 } 4785 } else if (MO.isExpr()) { 4786 ++NumExprs; 4787 } 4788 } 4789 NumLiterals += NumExprs; 4790 4791 if (!NumLiterals) 4792 return true; 4793 4794 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4795 Error(getLitLoc(Operands), "literal operands are not supported"); 4796 return false; 4797 } 4798 4799 if (NumLiterals > 1) { 4800 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4801 return false; 4802 } 4803 4804 return true; 4805 } 4806 4807 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4808 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4809 const MCRegisterInfo *MRI) { 4810 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4811 if (OpIdx < 0) 4812 return -1; 4813 4814 const MCOperand &Op = Inst.getOperand(OpIdx); 4815 if (!Op.isReg()) 4816 return -1; 4817 4818 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4819 auto Reg = Sub ? Sub : Op.getReg(); 4820 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4821 return AGPR32.contains(Reg) ? 1 : 0; 4822 } 4823 4824 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4825 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4826 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4827 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4828 SIInstrFlags::DS)) == 0) 4829 return true; 4830 4831 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4832 : AMDGPU::OpName::vdata; 4833 4834 const MCRegisterInfo *MRI = getMRI(); 4835 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4836 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4837 4838 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4839 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4840 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4841 return false; 4842 } 4843 4844 auto FB = getFeatureBits(); 4845 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4846 if (DataAreg < 0 || DstAreg < 0) 4847 return true; 4848 return DstAreg == DataAreg; 4849 } 4850 4851 return DstAreg < 1 && DataAreg < 1; 4852 } 4853 4854 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4855 auto FB = getFeatureBits(); 4856 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4857 return true; 4858 4859 const MCRegisterInfo *MRI = getMRI(); 4860 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4861 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4862 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4863 const MCOperand &Op = Inst.getOperand(I); 4864 if (!Op.isReg()) 4865 continue; 4866 4867 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4868 if (!Sub) 4869 continue; 4870 4871 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4872 return false; 4873 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4874 return false; 4875 } 4876 4877 return true; 4878 } 4879 4880 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4881 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4882 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4883 if (Op.isBLGP()) 4884 return Op.getStartLoc(); 4885 } 4886 return SMLoc(); 4887 } 4888 4889 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4890 const OperandVector &Operands) { 4891 unsigned Opc = Inst.getOpcode(); 4892 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4893 if (BlgpIdx == -1) 4894 return true; 4895 SMLoc BLGPLoc = getBLGPLoc(Operands); 4896 if (!BLGPLoc.isValid()) 4897 return true; 4898 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 4899 auto FB = getFeatureBits(); 4900 bool UsesNeg = false; 4901 if (FB[AMDGPU::FeatureGFX940Insts]) { 4902 switch (Opc) { 4903 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4904 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4905 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4906 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4907 UsesNeg = true; 4908 } 4909 } 4910 4911 if (IsNeg == UsesNeg) 4912 return true; 4913 4914 Error(BLGPLoc, 4915 UsesNeg ? "invalid modifier: blgp is not supported" 4916 : "invalid modifier: neg is not supported"); 4917 4918 return false; 4919 } 4920 4921 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4922 const OperandVector &Operands) { 4923 if (!isGFX11Plus()) 4924 return true; 4925 4926 unsigned Opc = Inst.getOpcode(); 4927 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4928 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4929 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4930 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4931 return true; 4932 4933 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4934 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4935 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4936 if (Reg == AMDGPU::SGPR_NULL) 4937 return true; 4938 4939 SMLoc RegLoc = getRegLoc(Reg, Operands); 4940 Error(RegLoc, "src0 must be null"); 4941 return false; 4942 } 4943 4944 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 4945 const OperandVector &Operands) { 4946 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4947 if ((TSFlags & SIInstrFlags::DS) == 0) 4948 return true; 4949 if (TSFlags & SIInstrFlags::GWS) 4950 return validateGWS(Inst, Operands); 4951 // Only validate GDS for non-GWS instructions. 4952 if (hasGDS()) 4953 return true; 4954 int GDSIdx = 4955 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 4956 if (GDSIdx < 0) 4957 return true; 4958 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 4959 if (GDS) { 4960 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 4961 Error(S, "gds modifier is not supported on this GPU"); 4962 return false; 4963 } 4964 return true; 4965 } 4966 4967 // gfx90a has an undocumented limitation: 4968 // DS_GWS opcodes must use even aligned registers. 4969 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4970 const OperandVector &Operands) { 4971 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4972 return true; 4973 4974 int Opc = Inst.getOpcode(); 4975 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4976 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4977 return true; 4978 4979 const MCRegisterInfo *MRI = getMRI(); 4980 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4981 int Data0Pos = 4982 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4983 assert(Data0Pos != -1); 4984 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4985 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4986 if (RegIdx & 1) { 4987 SMLoc RegLoc = getRegLoc(Reg, Operands); 4988 Error(RegLoc, "vgpr must be even aligned"); 4989 return false; 4990 } 4991 4992 return true; 4993 } 4994 4995 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4996 const OperandVector &Operands, 4997 const SMLoc &IDLoc) { 4998 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4999 AMDGPU::OpName::cpol); 5000 if (CPolPos == -1) 5001 return true; 5002 5003 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 5004 5005 if (isGFX12Plus()) 5006 return validateTHAndScopeBits(Inst, Operands, CPol); 5007 5008 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 5009 if (TSFlags & SIInstrFlags::SMRD) { 5010 if (CPol && (isSI() || isCI())) { 5011 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5012 Error(S, "cache policy is not supported for SMRD instructions"); 5013 return false; 5014 } 5015 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 5016 Error(IDLoc, "invalid cache policy for SMEM instruction"); 5017 return false; 5018 } 5019 } 5020 5021 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 5022 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 5023 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 5024 SIInstrFlags::FLAT; 5025 if (!(TSFlags & AllowSCCModifier)) { 5026 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5027 StringRef CStr(S.getPointer()); 5028 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 5029 Error(S, 5030 "scc modifier is not supported for this instruction on this GPU"); 5031 return false; 5032 } 5033 } 5034 5035 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 5036 return true; 5037 5038 if (TSFlags & SIInstrFlags::IsAtomicRet) { 5039 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 5040 Error(IDLoc, isGFX940() ? "instruction must use sc0" 5041 : "instruction must use glc"); 5042 return false; 5043 } 5044 } else { 5045 if (CPol & CPol::GLC) { 5046 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5047 StringRef CStr(S.getPointer()); 5048 S = SMLoc::getFromPointer( 5049 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 5050 Error(S, isGFX940() ? "instruction must not use sc0" 5051 : "instruction must not use glc"); 5052 return false; 5053 } 5054 } 5055 5056 return true; 5057 } 5058 5059 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 5060 const OperandVector &Operands, 5061 const unsigned CPol) { 5062 const unsigned TH = CPol & AMDGPU::CPol::TH; 5063 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 5064 5065 const unsigned Opcode = Inst.getOpcode(); 5066 const MCInstrDesc &TID = MII.get(Opcode); 5067 5068 auto PrintError = [&](StringRef Msg) { 5069 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 5070 Error(S, Msg); 5071 return false; 5072 }; 5073 5074 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 5075 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 5076 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 5077 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 5078 5079 if (TH == 0) 5080 return true; 5081 5082 if ((TID.TSFlags & SIInstrFlags::SMRD) && 5083 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 5084 (TH == AMDGPU::CPol::TH_NT_HT))) 5085 return PrintError("invalid th value for SMEM instruction"); 5086 5087 if (TH == AMDGPU::CPol::TH_BYPASS) { 5088 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 5089 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 5090 (Scope == AMDGPU::CPol::SCOPE_SYS && 5091 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 5092 return PrintError("scope and th combination is not valid"); 5093 } 5094 5095 bool IsStore = TID.mayStore(); 5096 bool IsAtomic = 5097 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); 5098 5099 if (IsAtomic) { 5100 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 5101 return PrintError("invalid th value for atomic instructions"); 5102 } else if (IsStore) { 5103 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 5104 return PrintError("invalid th value for store instructions"); 5105 } else { 5106 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 5107 return PrintError("invalid th value for load instructions"); 5108 } 5109 5110 return true; 5111 } 5112 5113 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 5114 const OperandVector &Operands) { 5115 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5116 if (Desc.mayStore() && 5117 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 5118 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 5119 if (Loc != getInstLoc(Operands)) { 5120 Error(Loc, "TFE modifier has no meaning for store instructions"); 5121 return false; 5122 } 5123 } 5124 5125 return true; 5126 } 5127 5128 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 5129 const SMLoc &IDLoc, 5130 const OperandVector &Operands) { 5131 if (auto ErrMsg = validateLdsDirect(Inst)) { 5132 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 5133 return false; 5134 } 5135 if (!validateSOPLiteral(Inst)) { 5136 Error(getLitLoc(Operands), 5137 "only one unique literal operand is allowed"); 5138 return false; 5139 } 5140 if (!validateVOPLiteral(Inst, Operands)) { 5141 return false; 5142 } 5143 if (!validateConstantBusLimitations(Inst, Operands)) { 5144 return false; 5145 } 5146 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 5147 return false; 5148 } 5149 if (!validateIntClampSupported(Inst)) { 5150 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands), 5151 "integer clamping is not supported on this GPU"); 5152 return false; 5153 } 5154 if (!validateOpSel(Inst)) { 5155 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 5156 "invalid op_sel operand"); 5157 return false; 5158 } 5159 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) { 5160 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands), 5161 "invalid neg_lo operand"); 5162 return false; 5163 } 5164 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) { 5165 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands), 5166 "invalid neg_hi operand"); 5167 return false; 5168 } 5169 if (!validateDPP(Inst, Operands)) { 5170 return false; 5171 } 5172 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 5173 if (!validateMIMGD16(Inst)) { 5174 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 5175 "d16 modifier is not supported on this GPU"); 5176 return false; 5177 } 5178 if (!validateMIMGDim(Inst, Operands)) { 5179 Error(IDLoc, "missing dim operand"); 5180 return false; 5181 } 5182 if (!validateMIMGMSAA(Inst)) { 5183 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 5184 "invalid dim; must be MSAA type"); 5185 return false; 5186 } 5187 if (!validateMIMGDataSize(Inst, IDLoc)) { 5188 return false; 5189 } 5190 if (!validateMIMGAddrSize(Inst, IDLoc)) 5191 return false; 5192 if (!validateMIMGAtomicDMask(Inst)) { 5193 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 5194 "invalid atomic image dmask"); 5195 return false; 5196 } 5197 if (!validateMIMGGatherDMask(Inst)) { 5198 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 5199 "invalid image_gather dmask: only one bit must be set"); 5200 return false; 5201 } 5202 if (!validateMovrels(Inst, Operands)) { 5203 return false; 5204 } 5205 if (!validateOffset(Inst, Operands)) { 5206 return false; 5207 } 5208 if (!validateMAIAccWrite(Inst, Operands)) { 5209 return false; 5210 } 5211 if (!validateMAISrc2(Inst, Operands)) { 5212 return false; 5213 } 5214 if (!validateMFMA(Inst, Operands)) { 5215 return false; 5216 } 5217 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 5218 return false; 5219 } 5220 5221 if (!validateAGPRLdSt(Inst)) { 5222 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 5223 ? "invalid register class: data and dst should be all VGPR or AGPR" 5224 : "invalid register class: agpr loads and stores not supported on this GPU" 5225 ); 5226 return false; 5227 } 5228 if (!validateVGPRAlign(Inst)) { 5229 Error(IDLoc, 5230 "invalid register class: vgpr tuples must be 64 bit aligned"); 5231 return false; 5232 } 5233 if (!validateDS(Inst, Operands)) { 5234 return false; 5235 } 5236 5237 if (!validateBLGP(Inst, Operands)) { 5238 return false; 5239 } 5240 5241 if (!validateDivScale(Inst)) { 5242 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 5243 return false; 5244 } 5245 if (!validateWaitCnt(Inst, Operands)) { 5246 return false; 5247 } 5248 if (!validateTFE(Inst, Operands)) { 5249 return false; 5250 } 5251 5252 return true; 5253 } 5254 5255 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 5256 const FeatureBitset &FBS, 5257 unsigned VariantID = 0); 5258 5259 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 5260 const FeatureBitset &AvailableFeatures, 5261 unsigned VariantID); 5262 5263 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5264 const FeatureBitset &FBS) { 5265 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 5266 } 5267 5268 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 5269 const FeatureBitset &FBS, 5270 ArrayRef<unsigned> Variants) { 5271 for (auto Variant : Variants) { 5272 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 5273 return true; 5274 } 5275 5276 return false; 5277 } 5278 5279 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 5280 const SMLoc &IDLoc) { 5281 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 5282 5283 // Check if requested instruction variant is supported. 5284 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 5285 return false; 5286 5287 // This instruction is not supported. 5288 // Clear any other pending errors because they are no longer relevant. 5289 getParser().clearPendingErrors(); 5290 5291 // Requested instruction variant is not supported. 5292 // Check if any other variants are supported. 5293 StringRef VariantName = getMatchedVariantName(); 5294 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 5295 return Error(IDLoc, 5296 Twine(VariantName, 5297 " variant of this instruction is not supported")); 5298 } 5299 5300 // Check if this instruction may be used with a different wavesize. 5301 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 5302 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 5303 5304 FeatureBitset FeaturesWS32 = getFeatureBits(); 5305 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 5306 .flip(AMDGPU::FeatureWavefrontSize32); 5307 FeatureBitset AvailableFeaturesWS32 = 5308 ComputeAvailableFeatures(FeaturesWS32); 5309 5310 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 5311 return Error(IDLoc, "instruction requires wavesize=32"); 5312 } 5313 5314 // Finally check if this instruction is supported on any other GPU. 5315 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 5316 return Error(IDLoc, "instruction not supported on this GPU"); 5317 } 5318 5319 // Instruction not supported on any GPU. Probably a typo. 5320 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 5321 return Error(IDLoc, "invalid instruction" + Suggestion); 5322 } 5323 5324 static bool isInvalidVOPDY(const OperandVector &Operands, 5325 uint64_t InvalidOprIdx) { 5326 assert(InvalidOprIdx < Operands.size()); 5327 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 5328 if (Op.isToken() && InvalidOprIdx > 1) { 5329 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 5330 return PrevOp.isToken() && PrevOp.getToken() == "::"; 5331 } 5332 return false; 5333 } 5334 5335 bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 5336 OperandVector &Operands, 5337 MCStreamer &Out, 5338 uint64_t &ErrorInfo, 5339 bool MatchingInlineAsm) { 5340 MCInst Inst; 5341 unsigned Result = Match_Success; 5342 for (auto Variant : getMatchedVariants()) { 5343 uint64_t EI; 5344 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 5345 Variant); 5346 // We order match statuses from least to most specific. We use most specific 5347 // status as resulting 5348 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature 5349 if (R == Match_Success || R == Match_MissingFeature || 5350 (R == Match_InvalidOperand && Result != Match_MissingFeature) || 5351 (R == Match_MnemonicFail && Result != Match_InvalidOperand && 5352 Result != Match_MissingFeature)) { 5353 Result = R; 5354 ErrorInfo = EI; 5355 } 5356 if (R == Match_Success) 5357 break; 5358 } 5359 5360 if (Result == Match_Success) { 5361 if (!validateInstruction(Inst, IDLoc, Operands)) { 5362 return true; 5363 } 5364 Inst.setLoc(IDLoc); 5365 Out.emitInstruction(Inst, getSTI()); 5366 return false; 5367 } 5368 5369 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5370 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5371 return true; 5372 } 5373 5374 switch (Result) { 5375 default: break; 5376 case Match_MissingFeature: 5377 // It has been verified that the specified instruction 5378 // mnemonic is valid. A match was found but it requires 5379 // features which are not supported on this GPU. 5380 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5381 5382 case Match_InvalidOperand: { 5383 SMLoc ErrorLoc = IDLoc; 5384 if (ErrorInfo != ~0ULL) { 5385 if (ErrorInfo >= Operands.size()) { 5386 return Error(IDLoc, "too few operands for instruction"); 5387 } 5388 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5389 if (ErrorLoc == SMLoc()) 5390 ErrorLoc = IDLoc; 5391 5392 if (isInvalidVOPDY(Operands, ErrorInfo)) 5393 return Error(ErrorLoc, "invalid VOPDY instruction"); 5394 } 5395 return Error(ErrorLoc, "invalid operand for instruction"); 5396 } 5397 5398 case Match_MnemonicFail: 5399 llvm_unreachable("Invalid instructions should have been handled already"); 5400 } 5401 llvm_unreachable("Implement any new match types added!"); 5402 } 5403 5404 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5405 int64_t Tmp = -1; 5406 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5407 return true; 5408 } 5409 if (getParser().parseAbsoluteExpression(Tmp)) { 5410 return true; 5411 } 5412 Ret = static_cast<uint32_t>(Tmp); 5413 return false; 5414 } 5415 5416 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5417 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5418 return TokError("directive only supported for amdgcn architecture"); 5419 5420 std::string TargetIDDirective; 5421 SMLoc TargetStart = getTok().getLoc(); 5422 if (getParser().parseEscapedString(TargetIDDirective)) 5423 return true; 5424 5425 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5426 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5427 return getParser().Error(TargetRange.Start, 5428 (Twine(".amdgcn_target directive's target id ") + 5429 Twine(TargetIDDirective) + 5430 Twine(" does not match the specified target id ") + 5431 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5432 5433 return false; 5434 } 5435 5436 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5437 return Error(Range.Start, "value out of range", Range); 5438 } 5439 5440 bool AMDGPUAsmParser::calculateGPRBlocks( 5441 const FeatureBitset &Features, const MCExpr *VCCUsed, 5442 const MCExpr *FlatScrUsed, bool XNACKUsed, 5443 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR, 5444 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange, 5445 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) { 5446 // TODO(scott.linder): These calculations are duplicated from 5447 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5448 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5449 MCContext &Ctx = getContext(); 5450 5451 const MCExpr *NumSGPRs = NextFreeSGPR; 5452 int64_t EvaluatedSGPRs; 5453 5454 if (Version.Major >= 10) 5455 NumSGPRs = MCConstantExpr::create(0, Ctx); 5456 else { 5457 unsigned MaxAddressableNumSGPRs = 5458 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5459 5460 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 && 5461 !Features.test(FeatureSGPRInitBug) && 5462 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) 5463 return OutOfRangeError(SGPRRange); 5464 5465 const MCExpr *ExtraSGPRs = 5466 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx); 5467 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx); 5468 5469 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && 5470 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5471 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) 5472 return OutOfRangeError(SGPRRange); 5473 5474 if (Features.test(FeatureSGPRInitBug)) 5475 NumSGPRs = 5476 MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx); 5477 } 5478 5479 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks: 5480 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1 5481 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR, 5482 unsigned Granule) -> const MCExpr * { 5483 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx); 5484 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx); 5485 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx); 5486 const MCExpr *AlignToGPR = 5487 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx); 5488 const MCExpr *DivGPR = 5489 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx); 5490 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx); 5491 return SubGPR; 5492 }; 5493 5494 VGPRBlocks = GetNumGPRBlocks( 5495 NextFreeVGPR, 5496 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32)); 5497 SGPRBlocks = 5498 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI())); 5499 5500 return false; 5501 } 5502 5503 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5504 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5505 return TokError("directive only supported for amdgcn architecture"); 5506 5507 if (!isHsaAbi(getSTI())) 5508 return TokError("directive only supported for amdhsa OS"); 5509 5510 StringRef KernelName; 5511 if (getParser().parseIdentifier(KernelName)) 5512 return true; 5513 5514 AMDGPU::MCKernelDescriptor KD = 5515 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor( 5516 &getSTI(), getContext()); 5517 5518 StringSet<> Seen; 5519 5520 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5521 5522 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext()); 5523 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext()); 5524 5525 SMRange VGPRRange; 5526 const MCExpr *NextFreeVGPR = ZeroExpr; 5527 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext()); 5528 uint64_t SharedVGPRCount = 0; 5529 uint64_t PreloadLength = 0; 5530 uint64_t PreloadOffset = 0; 5531 SMRange SGPRRange; 5532 const MCExpr *NextFreeSGPR = ZeroExpr; 5533 5534 // Count the number of user SGPRs implied from the enabled feature bits. 5535 unsigned ImpliedUserSGPRCount = 0; 5536 5537 // Track if the asm explicitly contains the directive for the user SGPR 5538 // count. 5539 std::optional<unsigned> ExplicitUserSGPRCount; 5540 const MCExpr *ReserveVCC = OneExpr; 5541 const MCExpr *ReserveFlatScr = OneExpr; 5542 std::optional<bool> EnableWavefrontSize32; 5543 5544 while (true) { 5545 while (trySkipToken(AsmToken::EndOfStatement)); 5546 5547 StringRef ID; 5548 SMRange IDRange = getTok().getLocRange(); 5549 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5550 return true; 5551 5552 if (ID == ".end_amdhsa_kernel") 5553 break; 5554 5555 if (!Seen.insert(ID).second) 5556 return TokError(".amdhsa_ directives cannot be repeated"); 5557 5558 SMLoc ValStart = getLoc(); 5559 const MCExpr *ExprVal; 5560 if (getParser().parseExpression(ExprVal)) 5561 return true; 5562 SMLoc ValEnd = getLoc(); 5563 SMRange ValRange = SMRange(ValStart, ValEnd); 5564 5565 int64_t IVal = 0; 5566 uint64_t Val = IVal; 5567 bool EvaluatableExpr; 5568 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) { 5569 if (IVal < 0) 5570 return OutOfRangeError(ValRange); 5571 Val = IVal; 5572 } 5573 5574 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5575 if (!isUInt<ENTRY##_WIDTH>(Val)) \ 5576 return OutOfRangeError(RANGE); \ 5577 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ 5578 getContext()); 5579 5580 // Some fields use the parsed value immediately which requires the expression to 5581 // be solvable. 5582 #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ 5583 if (!(RESOLVED)) \ 5584 return Error(IDRange.Start, "directive should have resolvable expression", \ 5585 IDRange); 5586 5587 if (ID == ".amdhsa_group_segment_fixed_size") { 5588 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) * 5589 CHAR_BIT>(Val)) 5590 return OutOfRangeError(ValRange); 5591 KD.group_segment_fixed_size = ExprVal; 5592 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5593 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) * 5594 CHAR_BIT>(Val)) 5595 return OutOfRangeError(ValRange); 5596 KD.private_segment_fixed_size = ExprVal; 5597 } else if (ID == ".amdhsa_kernarg_size") { 5598 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val)) 5599 return OutOfRangeError(ValRange); 5600 KD.kernarg_size = ExprVal; 5601 } else if (ID == ".amdhsa_user_sgpr_count") { 5602 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5603 ExplicitUserSGPRCount = Val; 5604 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5605 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5606 if (hasArchitectedFlatScratch()) 5607 return Error(IDRange.Start, 5608 "directive is not supported with architected flat scratch", 5609 IDRange); 5610 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5611 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5612 ExprVal, ValRange); 5613 if (Val) 5614 ImpliedUserSGPRCount += 4; 5615 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5616 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5617 if (!hasKernargPreload()) 5618 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5619 5620 if (Val > getMaxNumUserSGPRs()) 5621 return OutOfRangeError(ValRange); 5622 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, 5623 ValRange); 5624 if (Val) { 5625 ImpliedUserSGPRCount += Val; 5626 PreloadLength = Val; 5627 } 5628 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5629 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5630 if (!hasKernargPreload()) 5631 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5632 5633 if (Val >= 1024) 5634 return OutOfRangeError(ValRange); 5635 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, 5636 ValRange); 5637 if (Val) 5638 PreloadOffset = Val; 5639 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5640 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5641 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, 5643 ValRange); 5644 if (Val) 5645 ImpliedUserSGPRCount += 2; 5646 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5647 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5648 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5649 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, 5650 ValRange); 5651 if (Val) 5652 ImpliedUserSGPRCount += 2; 5653 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5654 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5655 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5656 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5657 ExprVal, ValRange); 5658 if (Val) 5659 ImpliedUserSGPRCount += 2; 5660 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5661 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5662 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5663 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, 5664 ValRange); 5665 if (Val) 5666 ImpliedUserSGPRCount += 2; 5667 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5668 if (hasArchitectedFlatScratch()) 5669 return Error(IDRange.Start, 5670 "directive is not supported with architected flat scratch", 5671 IDRange); 5672 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5673 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5674 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5675 ExprVal, ValRange); 5676 if (Val) 5677 ImpliedUserSGPRCount += 2; 5678 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5679 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5680 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5681 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5682 ExprVal, ValRange); 5683 if (Val) 5684 ImpliedUserSGPRCount += 1; 5685 } else if (ID == ".amdhsa_wavefront_size32") { 5686 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5687 if (IVersion.Major < 10) 5688 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5689 EnableWavefrontSize32 = Val; 5690 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5691 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, 5692 ValRange); 5693 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5694 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5695 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, 5696 ValRange); 5697 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5698 if (hasArchitectedFlatScratch()) 5699 return Error(IDRange.Start, 5700 "directive is not supported with architected flat scratch", 5701 IDRange); 5702 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5703 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, 5704 ValRange); 5705 } else if (ID == ".amdhsa_enable_private_segment") { 5706 if (!hasArchitectedFlatScratch()) 5707 return Error( 5708 IDRange.Start, 5709 "directive is not supported without architected flat scratch", 5710 IDRange); 5711 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5712 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, 5713 ValRange); 5714 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5715 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5716 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, 5717 ValRange); 5718 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5719 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5720 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, 5721 ValRange); 5722 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5723 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5724 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, 5725 ValRange); 5726 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5727 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5728 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, 5729 ValRange); 5730 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5732 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, 5733 ValRange); 5734 } else if (ID == ".amdhsa_next_free_vgpr") { 5735 VGPRRange = ValRange; 5736 NextFreeVGPR = ExprVal; 5737 } else if (ID == ".amdhsa_next_free_sgpr") { 5738 SGPRRange = ValRange; 5739 NextFreeSGPR = ExprVal; 5740 } else if (ID == ".amdhsa_accum_offset") { 5741 if (!isGFX90A()) 5742 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5743 AccumOffset = ExprVal; 5744 } else if (ID == ".amdhsa_reserve_vcc") { 5745 if (EvaluatableExpr && !isUInt<1>(Val)) 5746 return OutOfRangeError(ValRange); 5747 ReserveVCC = ExprVal; 5748 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5749 if (IVersion.Major < 7) 5750 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5751 if (hasArchitectedFlatScratch()) 5752 return Error(IDRange.Start, 5753 "directive is not supported with architected flat scratch", 5754 IDRange); 5755 if (EvaluatableExpr && !isUInt<1>(Val)) 5756 return OutOfRangeError(ValRange); 5757 ReserveFlatScr = ExprVal; 5758 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5759 if (IVersion.Major < 8) 5760 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5761 if (!isUInt<1>(Val)) 5762 return OutOfRangeError(ValRange); 5763 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5764 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5765 IDRange); 5766 } else if (ID == ".amdhsa_float_round_mode_32") { 5767 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5768 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, 5769 ValRange); 5770 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5771 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5772 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, 5773 ValRange); 5774 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5775 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5776 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, 5777 ValRange); 5778 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5779 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5780 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, 5781 ValRange); 5782 } else if (ID == ".amdhsa_dx10_clamp") { 5783 if (IVersion.Major >= 12) 5784 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5785 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5786 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, 5787 ValRange); 5788 } else if (ID == ".amdhsa_ieee_mode") { 5789 if (IVersion.Major >= 12) 5790 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5791 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5792 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, 5793 ValRange); 5794 } else if (ID == ".amdhsa_fp16_overflow") { 5795 if (IVersion.Major < 9) 5796 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5797 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5798 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, 5799 ValRange); 5800 } else if (ID == ".amdhsa_tg_split") { 5801 if (!isGFX90A()) 5802 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 5804 ExprVal, ValRange); 5805 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5806 if (IVersion.Major < 10) 5807 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5808 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5809 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, 5810 ValRange); 5811 } else if (ID == ".amdhsa_memory_ordered") { 5812 if (IVersion.Major < 10) 5813 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5814 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5815 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, 5816 ValRange); 5817 } else if (ID == ".amdhsa_forward_progress") { 5818 if (IVersion.Major < 10) 5819 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5821 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, 5822 ValRange); 5823 } else if (ID == ".amdhsa_shared_vgpr_count") { 5824 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); 5825 if (IVersion.Major < 10 || IVersion.Major >= 12) 5826 return Error(IDRange.Start, "directive requires gfx10 or gfx11", 5827 IDRange); 5828 SharedVGPRCount = Val; 5829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5830 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, 5831 ValRange); 5832 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5833 PARSE_BITS_ENTRY( 5834 KD.compute_pgm_rsrc2, 5835 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 5836 ExprVal, ValRange); 5837 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5838 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5839 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5840 ExprVal, ValRange); 5841 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5842 PARSE_BITS_ENTRY( 5843 KD.compute_pgm_rsrc2, 5844 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 5845 ExprVal, ValRange); 5846 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5847 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5848 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5849 ExprVal, ValRange); 5850 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5851 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5853 ExprVal, ValRange); 5854 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5855 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5857 ExprVal, ValRange); 5858 } else if (ID == ".amdhsa_exception_int_div_zero") { 5859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5861 ExprVal, ValRange); 5862 } else if (ID == ".amdhsa_round_robin_scheduling") { 5863 if (IVersion.Major < 12) 5864 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 5865 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5866 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, 5867 ValRange); 5868 } else { 5869 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5870 } 5871 5872 #undef PARSE_BITS_ENTRY 5873 } 5874 5875 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5876 return TokError(".amdhsa_next_free_vgpr directive is required"); 5877 5878 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5879 return TokError(".amdhsa_next_free_sgpr directive is required"); 5880 5881 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount); 5882 5883 // Consider the case where the total number of UserSGPRs with trailing 5884 // allocated preload SGPRs, is greater than the number of explicitly 5885 // referenced SGPRs. 5886 if (PreloadLength) { 5887 MCContext &Ctx = getContext(); 5888 NextFreeSGPR = AMDGPUMCExpr::createMax( 5889 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx); 5890 } 5891 5892 const MCExpr *VGPRBlocks; 5893 const MCExpr *SGPRBlocks; 5894 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5895 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5896 EnableWavefrontSize32, NextFreeVGPR, 5897 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5898 SGPRBlocks)) 5899 return true; 5900 5901 int64_t EvaluatedVGPRBlocks; 5902 bool VGPRBlocksEvaluatable = 5903 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks); 5904 if (VGPRBlocksEvaluatable && 5905 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5906 static_cast<uint64_t>(EvaluatedVGPRBlocks))) { 5907 return OutOfRangeError(VGPRRange); 5908 } 5909 AMDGPU::MCKernelDescriptor::bits_set( 5910 KD.compute_pgm_rsrc1, VGPRBlocks, 5911 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, 5912 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext()); 5913 5914 int64_t EvaluatedSGPRBlocks; 5915 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) && 5916 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5917 static_cast<uint64_t>(EvaluatedSGPRBlocks))) 5918 return OutOfRangeError(SGPRRange); 5919 AMDGPU::MCKernelDescriptor::bits_set( 5920 KD.compute_pgm_rsrc1, SGPRBlocks, 5921 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, 5922 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext()); 5923 5924 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5925 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5926 "enabled user SGPRs"); 5927 5928 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5929 return TokError("too many user SGPRs enabled"); 5930 AMDGPU::MCKernelDescriptor::bits_set( 5931 KD.compute_pgm_rsrc2, MCConstantExpr::create(UserSGPRCount, getContext()), 5932 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, 5933 COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, getContext()); 5934 5935 int64_t IVal = 0; 5936 if (!KD.kernarg_size->evaluateAsAbsolute(IVal)) 5937 return TokError("Kernarg size should be resolvable"); 5938 uint64_t kernarg_size = IVal; 5939 if (PreloadLength && kernarg_size && 5940 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) 5941 return TokError("Kernarg preload length + offset is larger than the " 5942 "kernarg segment size"); 5943 5944 if (isGFX90A()) { 5945 if (!Seen.contains(".amdhsa_accum_offset")) 5946 return TokError(".amdhsa_accum_offset directive is required"); 5947 int64_t EvaluatedAccum; 5948 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum); 5949 uint64_t UEvaluatedAccum = EvaluatedAccum; 5950 if (AccumEvaluatable && 5951 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3))) 5952 return TokError("accum_offset should be in range [4..256] in " 5953 "increments of 4"); 5954 5955 int64_t EvaluatedNumVGPR; 5956 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) && 5957 AccumEvaluatable && 5958 UEvaluatedAccum > 5959 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4)) 5960 return TokError("accum_offset exceeds total VGPR allocation"); 5961 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub( 5962 MCBinaryExpr::createDiv( 5963 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()), 5964 MCConstantExpr::create(1, getContext()), getContext()); 5965 MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum, 5966 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, 5967 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5968 getContext()); 5969 } 5970 5971 if (IVersion.Major >= 10 && IVersion.Major < 12) { 5972 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5973 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5974 return TokError("shared_vgpr_count directive not valid on " 5975 "wavefront size 32"); 5976 } 5977 5978 if (VGPRBlocksEvaluatable && 5979 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) > 5980 63)) { 5981 return TokError("shared_vgpr_count*2 + " 5982 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5983 "exceed 63\n"); 5984 } 5985 } 5986 5987 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD, 5988 NextFreeVGPR, NextFreeSGPR, 5989 ReserveVCC, ReserveFlatScr); 5990 return false; 5991 } 5992 5993 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { 5994 uint32_t Version; 5995 if (ParseAsAbsoluteExpression(Version)) 5996 return true; 5997 5998 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version); 5999 return false; 6000 } 6001 6002 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 6003 AMDGPUMCKernelCodeT &C) { 6004 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 6005 // assembly for backwards compatibility. 6006 if (ID == "max_scratch_backing_memory_byte_size") { 6007 Parser.eatToEndOfStatement(); 6008 return false; 6009 } 6010 6011 SmallString<40> ErrStr; 6012 raw_svector_ostream Err(ErrStr); 6013 if (!C.ParseKernelCodeT(ID, getParser(), Err)) { 6014 return TokError(Err.str()); 6015 } 6016 Lex(); 6017 6018 if (ID == "enable_wavefront_size32") { 6019 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 6020 if (!isGFX10Plus()) 6021 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 6022 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 6023 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 6024 } else { 6025 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 6026 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 6027 } 6028 } 6029 6030 if (ID == "wavefront_size") { 6031 if (C.wavefront_size == 5) { 6032 if (!isGFX10Plus()) 6033 return TokError("wavefront_size=5 is only allowed on GFX10+"); 6034 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 6035 return TokError("wavefront_size=5 requires +WavefrontSize32"); 6036 } else if (C.wavefront_size == 6) { 6037 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 6038 return TokError("wavefront_size=6 requires +WavefrontSize64"); 6039 } 6040 } 6041 6042 return false; 6043 } 6044 6045 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 6046 AMDGPUMCKernelCodeT KernelCode; 6047 KernelCode.initDefault(&getSTI(), getContext()); 6048 6049 while (true) { 6050 // Lex EndOfStatement. This is in a while loop, because lexing a comment 6051 // will set the current token to EndOfStatement. 6052 while(trySkipToken(AsmToken::EndOfStatement)); 6053 6054 StringRef ID; 6055 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 6056 return true; 6057 6058 if (ID == ".end_amd_kernel_code_t") 6059 break; 6060 6061 if (ParseAMDKernelCodeTValue(ID, KernelCode)) 6062 return true; 6063 } 6064 6065 KernelCode.validate(&getSTI(), getContext()); 6066 getTargetStreamer().EmitAMDKernelCodeT(KernelCode); 6067 6068 return false; 6069 } 6070 6071 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 6072 StringRef KernelName; 6073 if (!parseId(KernelName, "expected symbol name")) 6074 return true; 6075 6076 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 6077 ELF::STT_AMDGPU_HSA_KERNEL); 6078 6079 KernelScope.initialize(getContext()); 6080 return false; 6081 } 6082 6083 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 6084 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 6085 return Error(getLoc(), 6086 ".amd_amdgpu_isa directive is not available on non-amdgcn " 6087 "architectures"); 6088 } 6089 6090 auto TargetIDDirective = getLexer().getTok().getStringContents(); 6091 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 6092 return Error(getParser().getTok().getLoc(), "target id must match options"); 6093 6094 getTargetStreamer().EmitISAVersion(); 6095 Lex(); 6096 6097 return false; 6098 } 6099 6100 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 6101 assert(isHsaAbi(getSTI())); 6102 6103 std::string HSAMetadataString; 6104 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 6105 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 6106 return true; 6107 6108 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 6109 return Error(getLoc(), "invalid HSA metadata"); 6110 6111 return false; 6112 } 6113 6114 /// Common code to parse out a block of text (typically YAML) between start and 6115 /// end directives. 6116 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 6117 const char *AssemblerDirectiveEnd, 6118 std::string &CollectString) { 6119 6120 raw_string_ostream CollectStream(CollectString); 6121 6122 getLexer().setSkipSpace(false); 6123 6124 bool FoundEnd = false; 6125 while (!isToken(AsmToken::Eof)) { 6126 while (isToken(AsmToken::Space)) { 6127 CollectStream << getTokenStr(); 6128 Lex(); 6129 } 6130 6131 if (trySkipId(AssemblerDirectiveEnd)) { 6132 FoundEnd = true; 6133 break; 6134 } 6135 6136 CollectStream << Parser.parseStringToEndOfStatement() 6137 << getContext().getAsmInfo()->getSeparatorString(); 6138 6139 Parser.eatToEndOfStatement(); 6140 } 6141 6142 getLexer().setSkipSpace(true); 6143 6144 if (isToken(AsmToken::Eof) && !FoundEnd) { 6145 return TokError(Twine("expected directive ") + 6146 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 6147 } 6148 6149 return false; 6150 } 6151 6152 /// Parse the assembler directive for new MsgPack-format PAL metadata. 6153 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 6154 std::string String; 6155 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 6156 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 6157 return true; 6158 6159 auto *PALMetadata = getTargetStreamer().getPALMetadata(); 6160 if (!PALMetadata->setFromString(String)) 6161 return Error(getLoc(), "invalid PAL metadata"); 6162 return false; 6163 } 6164 6165 /// Parse the assembler directive for old linear-format PAL metadata. 6166 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 6167 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 6168 return Error(getLoc(), 6169 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 6170 "not available on non-amdpal OSes")).str()); 6171 } 6172 6173 auto *PALMetadata = getTargetStreamer().getPALMetadata(); 6174 PALMetadata->setLegacy(); 6175 for (;;) { 6176 uint32_t Key, Value; 6177 if (ParseAsAbsoluteExpression(Key)) { 6178 return TokError(Twine("invalid value in ") + 6179 Twine(PALMD::AssemblerDirective)); 6180 } 6181 if (!trySkipToken(AsmToken::Comma)) { 6182 return TokError(Twine("expected an even number of values in ") + 6183 Twine(PALMD::AssemblerDirective)); 6184 } 6185 if (ParseAsAbsoluteExpression(Value)) { 6186 return TokError(Twine("invalid value in ") + 6187 Twine(PALMD::AssemblerDirective)); 6188 } 6189 PALMetadata->setRegister(Key, Value); 6190 if (!trySkipToken(AsmToken::Comma)) 6191 break; 6192 } 6193 return false; 6194 } 6195 6196 /// ParseDirectiveAMDGPULDS 6197 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 6198 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 6199 if (getParser().checkForValidSection()) 6200 return true; 6201 6202 StringRef Name; 6203 SMLoc NameLoc = getLoc(); 6204 if (getParser().parseIdentifier(Name)) 6205 return TokError("expected identifier in directive"); 6206 6207 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 6208 if (getParser().parseComma()) 6209 return true; 6210 6211 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 6212 6213 int64_t Size; 6214 SMLoc SizeLoc = getLoc(); 6215 if (getParser().parseAbsoluteExpression(Size)) 6216 return true; 6217 if (Size < 0) 6218 return Error(SizeLoc, "size must be non-negative"); 6219 if (Size > LocalMemorySize) 6220 return Error(SizeLoc, "size is too large"); 6221 6222 int64_t Alignment = 4; 6223 if (trySkipToken(AsmToken::Comma)) { 6224 SMLoc AlignLoc = getLoc(); 6225 if (getParser().parseAbsoluteExpression(Alignment)) 6226 return true; 6227 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 6228 return Error(AlignLoc, "alignment must be a power of two"); 6229 6230 // Alignment larger than the size of LDS is possible in theory, as long 6231 // as the linker manages to place to symbol at address 0, but we do want 6232 // to make sure the alignment fits nicely into a 32-bit integer. 6233 if (Alignment >= 1u << 31) 6234 return Error(AlignLoc, "alignment is too large"); 6235 } 6236 6237 if (parseEOL()) 6238 return true; 6239 6240 Symbol->redefineIfPossible(); 6241 if (!Symbol->isUndefined()) 6242 return Error(NameLoc, "invalid symbol redefinition"); 6243 6244 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 6245 return false; 6246 } 6247 6248 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 6249 StringRef IDVal = DirectiveID.getString(); 6250 6251 if (isHsaAbi(getSTI())) { 6252 if (IDVal == ".amdhsa_kernel") 6253 return ParseDirectiveAMDHSAKernel(); 6254 6255 if (IDVal == ".amdhsa_code_object_version") 6256 return ParseDirectiveAMDHSACodeObjectVersion(); 6257 6258 // TODO: Restructure/combine with PAL metadata directive. 6259 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 6260 return ParseDirectiveHSAMetadata(); 6261 } else { 6262 if (IDVal == ".amd_kernel_code_t") 6263 return ParseDirectiveAMDKernelCodeT(); 6264 6265 if (IDVal == ".amdgpu_hsa_kernel") 6266 return ParseDirectiveAMDGPUHsaKernel(); 6267 6268 if (IDVal == ".amd_amdgpu_isa") 6269 return ParseDirectiveISAVersion(); 6270 6271 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 6272 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 6273 Twine(" directive is " 6274 "not available on non-amdhsa OSes")) 6275 .str()); 6276 } 6277 } 6278 6279 if (IDVal == ".amdgcn_target") 6280 return ParseDirectiveAMDGCNTarget(); 6281 6282 if (IDVal == ".amdgpu_lds") 6283 return ParseDirectiveAMDGPULDS(); 6284 6285 if (IDVal == PALMD::AssemblerDirectiveBegin) 6286 return ParseDirectivePALMetadataBegin(); 6287 6288 if (IDVal == PALMD::AssemblerDirective) 6289 return ParseDirectivePALMetadata(); 6290 6291 return true; 6292 } 6293 6294 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 6295 MCRegister Reg) { 6296 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg)) 6297 return isGFX9Plus(); 6298 6299 // GFX10+ has 2 more SGPRs 104 and 105. 6300 if (MRI.regsOverlap(SGPR104_SGPR105, Reg)) 6301 return hasSGPR104_SGPR105(); 6302 6303 switch (Reg.id()) { 6304 case SRC_SHARED_BASE_LO: 6305 case SRC_SHARED_BASE: 6306 case SRC_SHARED_LIMIT_LO: 6307 case SRC_SHARED_LIMIT: 6308 case SRC_PRIVATE_BASE_LO: 6309 case SRC_PRIVATE_BASE: 6310 case SRC_PRIVATE_LIMIT_LO: 6311 case SRC_PRIVATE_LIMIT: 6312 return isGFX9Plus(); 6313 case SRC_POPS_EXITING_WAVE_ID: 6314 return isGFX9Plus() && !isGFX11Plus(); 6315 case TBA: 6316 case TBA_LO: 6317 case TBA_HI: 6318 case TMA: 6319 case TMA_LO: 6320 case TMA_HI: 6321 return !isGFX9Plus(); 6322 case XNACK_MASK: 6323 case XNACK_MASK_LO: 6324 case XNACK_MASK_HI: 6325 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 6326 case SGPR_NULL: 6327 return isGFX10Plus(); 6328 case SRC_EXECZ: 6329 case SRC_VCCZ: 6330 return !isGFX11Plus(); 6331 default: 6332 break; 6333 } 6334 6335 if (isCI()) 6336 return true; 6337 6338 if (isSI() || isGFX10Plus()) { 6339 // No flat_scr on SI. 6340 // On GFX10Plus flat scratch is not a valid register operand and can only be 6341 // accessed with s_setreg/s_getreg. 6342 switch (Reg.id()) { 6343 case FLAT_SCR: 6344 case FLAT_SCR_LO: 6345 case FLAT_SCR_HI: 6346 return false; 6347 default: 6348 return true; 6349 } 6350 } 6351 6352 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 6353 // SI/CI have. 6354 if (MRI.regsOverlap(SGPR102_SGPR103, Reg)) 6355 return hasSGPR102_SGPR103(); 6356 6357 return true; 6358 } 6359 6360 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 6361 StringRef Mnemonic, 6362 OperandMode Mode) { 6363 ParseStatus Res = parseVOPD(Operands); 6364 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6365 return Res; 6366 6367 // Try to parse with a custom parser 6368 Res = MatchOperandParserImpl(Operands, Mnemonic); 6369 6370 // If we successfully parsed the operand or if there as an error parsing, 6371 // we are done. 6372 // 6373 // If we are parsing after we reach EndOfStatement then this means we 6374 // are appending default values to the Operands list. This is only done 6375 // by custom parser, so we shouldn't continue on to the generic parsing. 6376 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6377 return Res; 6378 6379 SMLoc RBraceLoc; 6380 SMLoc LBraceLoc = getLoc(); 6381 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6382 unsigned Prefix = Operands.size(); 6383 6384 for (;;) { 6385 auto Loc = getLoc(); 6386 Res = parseReg(Operands); 6387 if (Res.isNoMatch()) 6388 Error(Loc, "expected a register"); 6389 if (!Res.isSuccess()) 6390 return ParseStatus::Failure; 6391 6392 RBraceLoc = getLoc(); 6393 if (trySkipToken(AsmToken::RBrac)) 6394 break; 6395 6396 if (!skipToken(AsmToken::Comma, 6397 "expected a comma or a closing square bracket")) 6398 return ParseStatus::Failure; 6399 } 6400 6401 if (Operands.size() - Prefix > 1) { 6402 Operands.insert(Operands.begin() + Prefix, 6403 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6404 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6405 } 6406 6407 return ParseStatus::Success; 6408 } 6409 6410 return parseRegOrImm(Operands); 6411 } 6412 6413 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6414 // Clear any forced encodings from the previous instruction. 6415 setForcedEncodingSize(0); 6416 setForcedDPP(false); 6417 setForcedSDWA(false); 6418 6419 if (Name.ends_with("_e64_dpp")) { 6420 setForcedDPP(true); 6421 setForcedEncodingSize(64); 6422 return Name.substr(0, Name.size() - 8); 6423 } 6424 if (Name.ends_with("_e64")) { 6425 setForcedEncodingSize(64); 6426 return Name.substr(0, Name.size() - 4); 6427 } 6428 if (Name.ends_with("_e32")) { 6429 setForcedEncodingSize(32); 6430 return Name.substr(0, Name.size() - 4); 6431 } 6432 if (Name.ends_with("_dpp")) { 6433 setForcedDPP(true); 6434 return Name.substr(0, Name.size() - 4); 6435 } 6436 if (Name.ends_with("_sdwa")) { 6437 setForcedSDWA(true); 6438 return Name.substr(0, Name.size() - 5); 6439 } 6440 return Name; 6441 } 6442 6443 static void applyMnemonicAliases(StringRef &Mnemonic, 6444 const FeatureBitset &Features, 6445 unsigned VariantID); 6446 6447 bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info, 6448 StringRef Name, SMLoc NameLoc, 6449 OperandVector &Operands) { 6450 // Add the instruction mnemonic 6451 Name = parseMnemonicSuffix(Name); 6452 6453 // If the target architecture uses MnemonicAlias, call it here to parse 6454 // operands correctly. 6455 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6456 6457 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6458 6459 bool IsMIMG = Name.starts_with("image_"); 6460 6461 while (!trySkipToken(AsmToken::EndOfStatement)) { 6462 OperandMode Mode = OperandMode_Default; 6463 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6464 Mode = OperandMode_NSA; 6465 ParseStatus Res = parseOperand(Operands, Name, Mode); 6466 6467 if (!Res.isSuccess()) { 6468 checkUnsupportedInstruction(Name, NameLoc); 6469 if (!Parser.hasPendingError()) { 6470 // FIXME: use real operand location rather than the current location. 6471 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6472 : "not a valid operand."; 6473 Error(getLoc(), Msg); 6474 } 6475 while (!trySkipToken(AsmToken::EndOfStatement)) { 6476 lex(); 6477 } 6478 return true; 6479 } 6480 6481 // Eat the comma or space if there is one. 6482 trySkipToken(AsmToken::Comma); 6483 } 6484 6485 return false; 6486 } 6487 6488 //===----------------------------------------------------------------------===// 6489 // Utility functions 6490 //===----------------------------------------------------------------------===// 6491 6492 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6493 OperandVector &Operands) { 6494 SMLoc S = getLoc(); 6495 if (!trySkipId(Name)) 6496 return ParseStatus::NoMatch; 6497 6498 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6499 return ParseStatus::Success; 6500 } 6501 6502 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6503 int64_t &IntVal) { 6504 6505 if (!trySkipId(Prefix, AsmToken::Colon)) 6506 return ParseStatus::NoMatch; 6507 6508 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6509 } 6510 6511 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6512 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6513 std::function<bool(int64_t &)> ConvertResult) { 6514 SMLoc S = getLoc(); 6515 int64_t Value = 0; 6516 6517 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6518 if (!Res.isSuccess()) 6519 return Res; 6520 6521 if (ConvertResult && !ConvertResult(Value)) { 6522 Error(S, "invalid " + StringRef(Prefix) + " value."); 6523 } 6524 6525 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6526 return ParseStatus::Success; 6527 } 6528 6529 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6530 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6531 bool (*ConvertResult)(int64_t &)) { 6532 SMLoc S = getLoc(); 6533 if (!trySkipId(Prefix, AsmToken::Colon)) 6534 return ParseStatus::NoMatch; 6535 6536 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6537 return ParseStatus::Failure; 6538 6539 unsigned Val = 0; 6540 const unsigned MaxSize = 4; 6541 6542 // FIXME: How to verify the number of elements matches the number of src 6543 // operands? 6544 for (int I = 0; ; ++I) { 6545 int64_t Op; 6546 SMLoc Loc = getLoc(); 6547 if (!parseExpr(Op)) 6548 return ParseStatus::Failure; 6549 6550 if (Op != 0 && Op != 1) 6551 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6552 6553 Val |= (Op << I); 6554 6555 if (trySkipToken(AsmToken::RBrac)) 6556 break; 6557 6558 if (I + 1 == MaxSize) 6559 return Error(getLoc(), "expected a closing square bracket"); 6560 6561 if (!skipToken(AsmToken::Comma, "expected a comma")) 6562 return ParseStatus::Failure; 6563 } 6564 6565 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6566 return ParseStatus::Success; 6567 } 6568 6569 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6570 OperandVector &Operands, 6571 AMDGPUOperand::ImmTy ImmTy) { 6572 int64_t Bit; 6573 SMLoc S = getLoc(); 6574 6575 if (trySkipId(Name)) { 6576 Bit = 1; 6577 } else if (trySkipId("no", Name)) { 6578 Bit = 0; 6579 } else { 6580 return ParseStatus::NoMatch; 6581 } 6582 6583 if (Name == "r128" && !hasMIMG_R128()) 6584 return Error(S, "r128 modifier is not supported on this GPU"); 6585 if (Name == "a16" && !hasA16()) 6586 return Error(S, "a16 modifier is not supported on this GPU"); 6587 6588 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6589 ImmTy = AMDGPUOperand::ImmTyR128A16; 6590 6591 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6592 return ParseStatus::Success; 6593 } 6594 6595 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6596 bool &Disabling) const { 6597 Disabling = Id.consume_front("no"); 6598 6599 if (isGFX940() && !Mnemo.starts_with("s_")) { 6600 return StringSwitch<unsigned>(Id) 6601 .Case("nt", AMDGPU::CPol::NT) 6602 .Case("sc0", AMDGPU::CPol::SC0) 6603 .Case("sc1", AMDGPU::CPol::SC1) 6604 .Default(0); 6605 } 6606 6607 return StringSwitch<unsigned>(Id) 6608 .Case("dlc", AMDGPU::CPol::DLC) 6609 .Case("glc", AMDGPU::CPol::GLC) 6610 .Case("scc", AMDGPU::CPol::SCC) 6611 .Case("slc", AMDGPU::CPol::SLC) 6612 .Default(0); 6613 } 6614 6615 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6616 if (isGFX12Plus()) { 6617 SMLoc StringLoc = getLoc(); 6618 6619 int64_t CPolVal = 0; 6620 ParseStatus ResTH = ParseStatus::NoMatch; 6621 ParseStatus ResScope = ParseStatus::NoMatch; 6622 6623 for (;;) { 6624 if (ResTH.isNoMatch()) { 6625 int64_t TH; 6626 ResTH = parseTH(Operands, TH); 6627 if (ResTH.isFailure()) 6628 return ResTH; 6629 if (ResTH.isSuccess()) { 6630 CPolVal |= TH; 6631 continue; 6632 } 6633 } 6634 6635 if (ResScope.isNoMatch()) { 6636 int64_t Scope; 6637 ResScope = parseScope(Operands, Scope); 6638 if (ResScope.isFailure()) 6639 return ResScope; 6640 if (ResScope.isSuccess()) { 6641 CPolVal |= Scope; 6642 continue; 6643 } 6644 } 6645 6646 break; 6647 } 6648 6649 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6650 return ParseStatus::NoMatch; 6651 6652 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6653 AMDGPUOperand::ImmTyCPol)); 6654 return ParseStatus::Success; 6655 } 6656 6657 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6658 SMLoc OpLoc = getLoc(); 6659 unsigned Enabled = 0, Seen = 0; 6660 for (;;) { 6661 SMLoc S = getLoc(); 6662 bool Disabling; 6663 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6664 if (!CPol) 6665 break; 6666 6667 lex(); 6668 6669 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6670 return Error(S, "dlc modifier is not supported on this GPU"); 6671 6672 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6673 return Error(S, "scc modifier is not supported on this GPU"); 6674 6675 if (Seen & CPol) 6676 return Error(S, "duplicate cache policy modifier"); 6677 6678 if (!Disabling) 6679 Enabled |= CPol; 6680 6681 Seen |= CPol; 6682 } 6683 6684 if (!Seen) 6685 return ParseStatus::NoMatch; 6686 6687 Operands.push_back( 6688 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6689 return ParseStatus::Success; 6690 } 6691 6692 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6693 int64_t &Scope) { 6694 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE, 6695 CPol::SCOPE_DEV, CPol::SCOPE_SYS}; 6696 6697 ParseStatus Res = parseStringOrIntWithPrefix( 6698 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"}, 6699 Scope); 6700 6701 if (Res.isSuccess()) 6702 Scope = Scopes[Scope]; 6703 6704 return Res; 6705 } 6706 6707 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 6708 TH = AMDGPU::CPol::TH_RT; // default 6709 6710 StringRef Value; 6711 SMLoc StringLoc; 6712 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 6713 if (!Res.isSuccess()) 6714 return Res; 6715 6716 if (Value == "TH_DEFAULT") 6717 TH = AMDGPU::CPol::TH_RT; 6718 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || 6719 Value == "TH_LOAD_NT_WB") { 6720 return Error(StringLoc, "invalid th value"); 6721 } else if (Value.consume_front("TH_ATOMIC_")) { 6722 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 6723 } else if (Value.consume_front("TH_LOAD_")) { 6724 TH = AMDGPU::CPol::TH_TYPE_LOAD; 6725 } else if (Value.consume_front("TH_STORE_")) { 6726 TH = AMDGPU::CPol::TH_TYPE_STORE; 6727 } else { 6728 return Error(StringLoc, "invalid th value"); 6729 } 6730 6731 if (Value == "BYPASS") 6732 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 6733 6734 if (TH != 0) { 6735 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 6736 TH |= StringSwitch<int64_t>(Value) 6737 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6738 .Case("RT", AMDGPU::CPol::TH_RT) 6739 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6740 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 6741 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 6742 AMDGPU::CPol::TH_ATOMIC_RETURN) 6743 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 6744 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 6745 AMDGPU::CPol::TH_ATOMIC_NT) 6746 .Default(0xffffffff); 6747 else 6748 TH |= StringSwitch<int64_t>(Value) 6749 .Case("RT", AMDGPU::CPol::TH_RT) 6750 .Case("NT", AMDGPU::CPol::TH_NT) 6751 .Case("HT", AMDGPU::CPol::TH_HT) 6752 .Case("LU", AMDGPU::CPol::TH_LU) 6753 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB) 6754 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 6755 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 6756 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 6757 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 6758 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 6759 .Default(0xffffffff); 6760 } 6761 6762 if (TH == 0xffffffff) 6763 return Error(StringLoc, "invalid th value"); 6764 6765 return ParseStatus::Success; 6766 } 6767 6768 static void addOptionalImmOperand( 6769 MCInst& Inst, const OperandVector& Operands, 6770 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6771 AMDGPUOperand::ImmTy ImmT, 6772 int64_t Default = 0) { 6773 auto i = OptionalIdx.find(ImmT); 6774 if (i != OptionalIdx.end()) { 6775 unsigned Idx = i->second; 6776 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6777 } else { 6778 Inst.addOperand(MCOperand::createImm(Default)); 6779 } 6780 } 6781 6782 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6783 StringRef &Value, 6784 SMLoc &StringLoc) { 6785 if (!trySkipId(Prefix, AsmToken::Colon)) 6786 return ParseStatus::NoMatch; 6787 6788 StringLoc = getLoc(); 6789 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6790 : ParseStatus::Failure; 6791 } 6792 6793 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix( 6794 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids, 6795 int64_t &IntVal) { 6796 if (!trySkipId(Name, AsmToken::Colon)) 6797 return ParseStatus::NoMatch; 6798 6799 SMLoc StringLoc = getLoc(); 6800 6801 StringRef Value; 6802 if (isToken(AsmToken::Identifier)) { 6803 Value = getTokenStr(); 6804 lex(); 6805 6806 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal) 6807 if (Value == Ids[IntVal]) 6808 break; 6809 } else if (!parseExpr(IntVal)) 6810 return ParseStatus::Failure; 6811 6812 if (IntVal < 0 || IntVal >= (int64_t)Ids.size()) 6813 return Error(StringLoc, "invalid " + Twine(Name) + " value"); 6814 6815 return ParseStatus::Success; 6816 } 6817 6818 ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix( 6819 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids, 6820 AMDGPUOperand::ImmTy Type) { 6821 SMLoc S = getLoc(); 6822 int64_t IntVal; 6823 6824 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal); 6825 if (Res.isSuccess()) 6826 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type)); 6827 6828 return Res; 6829 } 6830 6831 //===----------------------------------------------------------------------===// 6832 // MTBUF format 6833 //===----------------------------------------------------------------------===// 6834 6835 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6836 int64_t MaxVal, 6837 int64_t &Fmt) { 6838 int64_t Val; 6839 SMLoc Loc = getLoc(); 6840 6841 auto Res = parseIntWithPrefix(Pref, Val); 6842 if (Res.isFailure()) 6843 return false; 6844 if (Res.isNoMatch()) 6845 return true; 6846 6847 if (Val < 0 || Val > MaxVal) { 6848 Error(Loc, Twine("out of range ", StringRef(Pref))); 6849 return false; 6850 } 6851 6852 Fmt = Val; 6853 return true; 6854 } 6855 6856 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, 6857 AMDGPUOperand::ImmTy ImmTy) { 6858 const char *Pref = "index_key"; 6859 int64_t ImmVal = 0; 6860 SMLoc Loc = getLoc(); 6861 auto Res = parseIntWithPrefix(Pref, ImmVal); 6862 if (!Res.isSuccess()) 6863 return Res; 6864 6865 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1)) 6866 return Error(Loc, Twine("out of range ", StringRef(Pref))); 6867 6868 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) 6869 return Error(Loc, Twine("out of range ", StringRef(Pref))); 6870 6871 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy)); 6872 return ParseStatus::Success; 6873 } 6874 6875 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { 6876 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit); 6877 } 6878 6879 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { 6880 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit); 6881 } 6882 6883 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6884 // values to live in a joint format operand in the MCInst encoding. 6885 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6886 using namespace llvm::AMDGPU::MTBUFFormat; 6887 6888 int64_t Dfmt = DFMT_UNDEF; 6889 int64_t Nfmt = NFMT_UNDEF; 6890 6891 // dfmt and nfmt can appear in either order, and each is optional. 6892 for (int I = 0; I < 2; ++I) { 6893 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6894 return ParseStatus::Failure; 6895 6896 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6897 return ParseStatus::Failure; 6898 6899 // Skip optional comma between dfmt/nfmt 6900 // but guard against 2 commas following each other. 6901 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6902 !peekToken().is(AsmToken::Comma)) { 6903 trySkipToken(AsmToken::Comma); 6904 } 6905 } 6906 6907 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6908 return ParseStatus::NoMatch; 6909 6910 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6911 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6912 6913 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6914 return ParseStatus::Success; 6915 } 6916 6917 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6918 using namespace llvm::AMDGPU::MTBUFFormat; 6919 6920 int64_t Fmt = UFMT_UNDEF; 6921 6922 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6923 return ParseStatus::Failure; 6924 6925 if (Fmt == UFMT_UNDEF) 6926 return ParseStatus::NoMatch; 6927 6928 Format = Fmt; 6929 return ParseStatus::Success; 6930 } 6931 6932 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6933 int64_t &Nfmt, 6934 StringRef FormatStr, 6935 SMLoc Loc) { 6936 using namespace llvm::AMDGPU::MTBUFFormat; 6937 int64_t Format; 6938 6939 Format = getDfmt(FormatStr); 6940 if (Format != DFMT_UNDEF) { 6941 Dfmt = Format; 6942 return true; 6943 } 6944 6945 Format = getNfmt(FormatStr, getSTI()); 6946 if (Format != NFMT_UNDEF) { 6947 Nfmt = Format; 6948 return true; 6949 } 6950 6951 Error(Loc, "unsupported format"); 6952 return false; 6953 } 6954 6955 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6956 SMLoc FormatLoc, 6957 int64_t &Format) { 6958 using namespace llvm::AMDGPU::MTBUFFormat; 6959 6960 int64_t Dfmt = DFMT_UNDEF; 6961 int64_t Nfmt = NFMT_UNDEF; 6962 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6963 return ParseStatus::Failure; 6964 6965 if (trySkipToken(AsmToken::Comma)) { 6966 StringRef Str; 6967 SMLoc Loc = getLoc(); 6968 if (!parseId(Str, "expected a format string") || 6969 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6970 return ParseStatus::Failure; 6971 if (Dfmt == DFMT_UNDEF) 6972 return Error(Loc, "duplicate numeric format"); 6973 if (Nfmt == NFMT_UNDEF) 6974 return Error(Loc, "duplicate data format"); 6975 } 6976 6977 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6978 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6979 6980 if (isGFX10Plus()) { 6981 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6982 if (Ufmt == UFMT_UNDEF) 6983 return Error(FormatLoc, "unsupported format"); 6984 Format = Ufmt; 6985 } else { 6986 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6987 } 6988 6989 return ParseStatus::Success; 6990 } 6991 6992 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6993 SMLoc Loc, 6994 int64_t &Format) { 6995 using namespace llvm::AMDGPU::MTBUFFormat; 6996 6997 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6998 if (Id == UFMT_UNDEF) 6999 return ParseStatus::NoMatch; 7000 7001 if (!isGFX10Plus()) 7002 return Error(Loc, "unified format is not supported on this GPU"); 7003 7004 Format = Id; 7005 return ParseStatus::Success; 7006 } 7007 7008 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 7009 using namespace llvm::AMDGPU::MTBUFFormat; 7010 SMLoc Loc = getLoc(); 7011 7012 if (!parseExpr(Format)) 7013 return ParseStatus::Failure; 7014 if (!isValidFormatEncoding(Format, getSTI())) 7015 return Error(Loc, "out of range format"); 7016 7017 return ParseStatus::Success; 7018 } 7019 7020 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 7021 using namespace llvm::AMDGPU::MTBUFFormat; 7022 7023 if (!trySkipId("format", AsmToken::Colon)) 7024 return ParseStatus::NoMatch; 7025 7026 if (trySkipToken(AsmToken::LBrac)) { 7027 StringRef FormatStr; 7028 SMLoc Loc = getLoc(); 7029 if (!parseId(FormatStr, "expected a format string")) 7030 return ParseStatus::Failure; 7031 7032 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 7033 if (Res.isNoMatch()) 7034 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 7035 if (!Res.isSuccess()) 7036 return Res; 7037 7038 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7039 return ParseStatus::Failure; 7040 7041 return ParseStatus::Success; 7042 } 7043 7044 return parseNumericFormat(Format); 7045 } 7046 7047 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 7048 using namespace llvm::AMDGPU::MTBUFFormat; 7049 7050 int64_t Format = getDefaultFormatEncoding(getSTI()); 7051 ParseStatus Res; 7052 SMLoc Loc = getLoc(); 7053 7054 // Parse legacy format syntax. 7055 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 7056 if (Res.isFailure()) 7057 return Res; 7058 7059 bool FormatFound = Res.isSuccess(); 7060 7061 Operands.push_back( 7062 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 7063 7064 if (FormatFound) 7065 trySkipToken(AsmToken::Comma); 7066 7067 if (isToken(AsmToken::EndOfStatement)) { 7068 // We are expecting an soffset operand, 7069 // but let matcher handle the error. 7070 return ParseStatus::Success; 7071 } 7072 7073 // Parse soffset. 7074 Res = parseRegOrImm(Operands); 7075 if (!Res.isSuccess()) 7076 return Res; 7077 7078 trySkipToken(AsmToken::Comma); 7079 7080 if (!FormatFound) { 7081 Res = parseSymbolicOrNumericFormat(Format); 7082 if (Res.isFailure()) 7083 return Res; 7084 if (Res.isSuccess()) { 7085 auto Size = Operands.size(); 7086 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 7087 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 7088 Op.setImm(Format); 7089 } 7090 return ParseStatus::Success; 7091 } 7092 7093 if (isId("format") && peekToken().is(AsmToken::Colon)) 7094 return Error(getLoc(), "duplicate format"); 7095 return ParseStatus::Success; 7096 } 7097 7098 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 7099 ParseStatus Res = 7100 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 7101 if (Res.isNoMatch()) { 7102 Res = parseIntWithPrefix("inst_offset", Operands, 7103 AMDGPUOperand::ImmTyInstOffset); 7104 } 7105 return Res; 7106 } 7107 7108 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 7109 ParseStatus Res = 7110 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 7111 if (Res.isNoMatch()) 7112 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 7113 return Res; 7114 } 7115 7116 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 7117 ParseStatus Res = 7118 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 7119 if (Res.isNoMatch()) { 7120 Res = 7121 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 7122 } 7123 return Res; 7124 } 7125 7126 //===----------------------------------------------------------------------===// 7127 // Exp 7128 //===----------------------------------------------------------------------===// 7129 7130 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 7131 OptionalImmIndexMap OptionalIdx; 7132 7133 unsigned OperandIdx[4]; 7134 unsigned EnMask = 0; 7135 int SrcIdx = 0; 7136 7137 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7138 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7139 7140 // Add the register arguments 7141 if (Op.isReg()) { 7142 assert(SrcIdx < 4); 7143 OperandIdx[SrcIdx] = Inst.size(); 7144 Op.addRegOperands(Inst, 1); 7145 ++SrcIdx; 7146 continue; 7147 } 7148 7149 if (Op.isOff()) { 7150 assert(SrcIdx < 4); 7151 OperandIdx[SrcIdx] = Inst.size(); 7152 Inst.addOperand(MCOperand::createReg(MCRegister())); 7153 ++SrcIdx; 7154 continue; 7155 } 7156 7157 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 7158 Op.addImmOperands(Inst, 1); 7159 continue; 7160 } 7161 7162 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 7163 continue; 7164 7165 // Handle optional arguments 7166 OptionalIdx[Op.getImmTy()] = i; 7167 } 7168 7169 assert(SrcIdx == 4); 7170 7171 bool Compr = false; 7172 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 7173 Compr = true; 7174 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 7175 Inst.getOperand(OperandIdx[2]).setReg(MCRegister()); 7176 Inst.getOperand(OperandIdx[3]).setReg(MCRegister()); 7177 } 7178 7179 for (auto i = 0; i < SrcIdx; ++i) { 7180 if (Inst.getOperand(OperandIdx[i]).getReg()) { 7181 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 7182 } 7183 } 7184 7185 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 7187 7188 Inst.addOperand(MCOperand::createImm(EnMask)); 7189 } 7190 7191 //===----------------------------------------------------------------------===// 7192 // s_waitcnt 7193 //===----------------------------------------------------------------------===// 7194 7195 static bool 7196 encodeCnt( 7197 const AMDGPU::IsaVersion ISA, 7198 int64_t &IntVal, 7199 int64_t CntVal, 7200 bool Saturate, 7201 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 7202 unsigned (*decode)(const IsaVersion &Version, unsigned)) 7203 { 7204 bool Failed = false; 7205 7206 IntVal = encode(ISA, IntVal, CntVal); 7207 if (CntVal != decode(ISA, IntVal)) { 7208 if (Saturate) { 7209 IntVal = encode(ISA, IntVal, -1); 7210 } else { 7211 Failed = true; 7212 } 7213 } 7214 return Failed; 7215 } 7216 7217 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 7218 7219 SMLoc CntLoc = getLoc(); 7220 StringRef CntName = getTokenStr(); 7221 7222 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7223 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7224 return false; 7225 7226 int64_t CntVal; 7227 SMLoc ValLoc = getLoc(); 7228 if (!parseExpr(CntVal)) 7229 return false; 7230 7231 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 7232 7233 bool Failed = true; 7234 bool Sat = CntName.ends_with("_sat"); 7235 7236 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 7237 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 7238 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 7239 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 7240 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 7241 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 7242 } else { 7243 Error(CntLoc, "invalid counter name " + CntName); 7244 return false; 7245 } 7246 7247 if (Failed) { 7248 Error(ValLoc, "too large value for " + CntName); 7249 return false; 7250 } 7251 7252 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7253 return false; 7254 7255 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7256 if (isToken(AsmToken::EndOfStatement)) { 7257 Error(getLoc(), "expected a counter name"); 7258 return false; 7259 } 7260 } 7261 7262 return true; 7263 } 7264 7265 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 7266 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 7267 int64_t Waitcnt = getWaitcntBitMask(ISA); 7268 SMLoc S = getLoc(); 7269 7270 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7271 while (!isToken(AsmToken::EndOfStatement)) { 7272 if (!parseCnt(Waitcnt)) 7273 return ParseStatus::Failure; 7274 } 7275 } else { 7276 if (!parseExpr(Waitcnt)) 7277 return ParseStatus::Failure; 7278 } 7279 7280 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 7281 return ParseStatus::Success; 7282 } 7283 7284 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 7285 SMLoc FieldLoc = getLoc(); 7286 StringRef FieldName = getTokenStr(); 7287 if (!skipToken(AsmToken::Identifier, "expected a field name") || 7288 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7289 return false; 7290 7291 SMLoc ValueLoc = getLoc(); 7292 StringRef ValueName = getTokenStr(); 7293 if (!skipToken(AsmToken::Identifier, "expected a value name") || 7294 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 7295 return false; 7296 7297 unsigned Shift; 7298 if (FieldName == "instid0") { 7299 Shift = 0; 7300 } else if (FieldName == "instskip") { 7301 Shift = 4; 7302 } else if (FieldName == "instid1") { 7303 Shift = 7; 7304 } else { 7305 Error(FieldLoc, "invalid field name " + FieldName); 7306 return false; 7307 } 7308 7309 int Value; 7310 if (Shift == 4) { 7311 // Parse values for instskip. 7312 Value = StringSwitch<int>(ValueName) 7313 .Case("SAME", 0) 7314 .Case("NEXT", 1) 7315 .Case("SKIP_1", 2) 7316 .Case("SKIP_2", 3) 7317 .Case("SKIP_3", 4) 7318 .Case("SKIP_4", 5) 7319 .Default(-1); 7320 } else { 7321 // Parse values for instid0 and instid1. 7322 Value = StringSwitch<int>(ValueName) 7323 .Case("NO_DEP", 0) 7324 .Case("VALU_DEP_1", 1) 7325 .Case("VALU_DEP_2", 2) 7326 .Case("VALU_DEP_3", 3) 7327 .Case("VALU_DEP_4", 4) 7328 .Case("TRANS32_DEP_1", 5) 7329 .Case("TRANS32_DEP_2", 6) 7330 .Case("TRANS32_DEP_3", 7) 7331 .Case("FMA_ACCUM_CYCLE_1", 8) 7332 .Case("SALU_CYCLE_1", 9) 7333 .Case("SALU_CYCLE_2", 10) 7334 .Case("SALU_CYCLE_3", 11) 7335 .Default(-1); 7336 } 7337 if (Value < 0) { 7338 Error(ValueLoc, "invalid value name " + ValueName); 7339 return false; 7340 } 7341 7342 Delay |= Value << Shift; 7343 return true; 7344 } 7345 7346 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 7347 int64_t Delay = 0; 7348 SMLoc S = getLoc(); 7349 7350 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7351 do { 7352 if (!parseDelay(Delay)) 7353 return ParseStatus::Failure; 7354 } while (trySkipToken(AsmToken::Pipe)); 7355 } else { 7356 if (!parseExpr(Delay)) 7357 return ParseStatus::Failure; 7358 } 7359 7360 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 7361 return ParseStatus::Success; 7362 } 7363 7364 bool 7365 AMDGPUOperand::isSWaitCnt() const { 7366 return isImm(); 7367 } 7368 7369 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 7370 7371 //===----------------------------------------------------------------------===// 7372 // DepCtr 7373 //===----------------------------------------------------------------------===// 7374 7375 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 7376 StringRef DepCtrName) { 7377 switch (ErrorId) { 7378 case OPR_ID_UNKNOWN: 7379 Error(Loc, Twine("invalid counter name ", DepCtrName)); 7380 return; 7381 case OPR_ID_UNSUPPORTED: 7382 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 7383 return; 7384 case OPR_ID_DUPLICATE: 7385 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 7386 return; 7387 case OPR_VAL_INVALID: 7388 Error(Loc, Twine("invalid value for ", DepCtrName)); 7389 return; 7390 default: 7391 assert(false); 7392 } 7393 } 7394 7395 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 7396 7397 using namespace llvm::AMDGPU::DepCtr; 7398 7399 SMLoc DepCtrLoc = getLoc(); 7400 StringRef DepCtrName = getTokenStr(); 7401 7402 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 7403 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 7404 return false; 7405 7406 int64_t ExprVal; 7407 if (!parseExpr(ExprVal)) 7408 return false; 7409 7410 unsigned PrevOprMask = UsedOprMask; 7411 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 7412 7413 if (CntVal < 0) { 7414 depCtrError(DepCtrLoc, CntVal, DepCtrName); 7415 return false; 7416 } 7417 7418 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7419 return false; 7420 7421 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 7422 if (isToken(AsmToken::EndOfStatement)) { 7423 Error(getLoc(), "expected a counter name"); 7424 return false; 7425 } 7426 } 7427 7428 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7429 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7430 return true; 7431 } 7432 7433 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7434 using namespace llvm::AMDGPU::DepCtr; 7435 7436 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7437 SMLoc Loc = getLoc(); 7438 7439 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7440 unsigned UsedOprMask = 0; 7441 while (!isToken(AsmToken::EndOfStatement)) { 7442 if (!parseDepCtr(DepCtr, UsedOprMask)) 7443 return ParseStatus::Failure; 7444 } 7445 } else { 7446 if (!parseExpr(DepCtr)) 7447 return ParseStatus::Failure; 7448 } 7449 7450 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7451 return ParseStatus::Success; 7452 } 7453 7454 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7455 7456 //===----------------------------------------------------------------------===// 7457 // hwreg 7458 //===----------------------------------------------------------------------===// 7459 7460 ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg, 7461 OperandInfoTy &Offset, 7462 OperandInfoTy &Width) { 7463 using namespace llvm::AMDGPU::Hwreg; 7464 7465 if (!trySkipId("hwreg", AsmToken::LParen)) 7466 return ParseStatus::NoMatch; 7467 7468 // The register may be specified by name or using a numeric code 7469 HwReg.Loc = getLoc(); 7470 if (isToken(AsmToken::Identifier) && 7471 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7472 HwReg.IsSymbolic = true; 7473 lex(); // skip register name 7474 } else if (!parseExpr(HwReg.Val, "a register name")) { 7475 return ParseStatus::Failure; 7476 } 7477 7478 if (trySkipToken(AsmToken::RParen)) 7479 return ParseStatus::Success; 7480 7481 // parse optional params 7482 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7483 return ParseStatus::Failure; 7484 7485 Offset.Loc = getLoc(); 7486 if (!parseExpr(Offset.Val)) 7487 return ParseStatus::Failure; 7488 7489 if (!skipToken(AsmToken::Comma, "expected a comma")) 7490 return ParseStatus::Failure; 7491 7492 Width.Loc = getLoc(); 7493 if (!parseExpr(Width.Val) || 7494 !skipToken(AsmToken::RParen, "expected a closing parenthesis")) 7495 return ParseStatus::Failure; 7496 7497 return ParseStatus::Success; 7498 } 7499 7500 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7501 using namespace llvm::AMDGPU::Hwreg; 7502 7503 int64_t ImmVal = 0; 7504 SMLoc Loc = getLoc(); 7505 7506 StructuredOpField HwReg("id", "hardware register", HwregId::Width, 7507 HwregId::Default); 7508 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width, 7509 HwregOffset::Default); 7510 struct : StructuredOpField { 7511 using StructuredOpField::StructuredOpField; 7512 bool validate(AMDGPUAsmParser &Parser) const override { 7513 if (!isUIntN(Width, Val - 1)) 7514 return Error(Parser, "only values from 1 to 32 are legal"); 7515 return true; 7516 } 7517 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default); 7518 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width}); 7519 7520 if (Res.isNoMatch()) 7521 Res = parseHwregFunc(HwReg, Offset, Width); 7522 7523 if (Res.isSuccess()) { 7524 if (!validateStructuredOpFields({&HwReg, &Offset, &Width})) 7525 return ParseStatus::Failure; 7526 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val); 7527 } 7528 7529 if (Res.isNoMatch() && 7530 parseExpr(ImmVal, "a hwreg macro, structured immediate")) 7531 Res = ParseStatus::Success; 7532 7533 if (!Res.isSuccess()) 7534 return ParseStatus::Failure; 7535 7536 if (!isUInt<16>(ImmVal)) 7537 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7538 Operands.push_back( 7539 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7540 return ParseStatus::Success; 7541 } 7542 7543 bool AMDGPUOperand::isHwreg() const { 7544 return isImmTy(ImmTyHwreg); 7545 } 7546 7547 //===----------------------------------------------------------------------===// 7548 // sendmsg 7549 //===----------------------------------------------------------------------===// 7550 7551 bool 7552 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7553 OperandInfoTy &Op, 7554 OperandInfoTy &Stream) { 7555 using namespace llvm::AMDGPU::SendMsg; 7556 7557 Msg.Loc = getLoc(); 7558 if (isToken(AsmToken::Identifier) && 7559 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7560 Msg.IsSymbolic = true; 7561 lex(); // skip message name 7562 } else if (!parseExpr(Msg.Val, "a message name")) { 7563 return false; 7564 } 7565 7566 if (trySkipToken(AsmToken::Comma)) { 7567 Op.IsDefined = true; 7568 Op.Loc = getLoc(); 7569 if (isToken(AsmToken::Identifier) && 7570 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) != 7571 OPR_ID_UNKNOWN) { 7572 lex(); // skip operation name 7573 } else if (!parseExpr(Op.Val, "an operation name")) { 7574 return false; 7575 } 7576 7577 if (trySkipToken(AsmToken::Comma)) { 7578 Stream.IsDefined = true; 7579 Stream.Loc = getLoc(); 7580 if (!parseExpr(Stream.Val)) 7581 return false; 7582 } 7583 } 7584 7585 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7586 } 7587 7588 bool 7589 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7590 const OperandInfoTy &Op, 7591 const OperandInfoTy &Stream) { 7592 using namespace llvm::AMDGPU::SendMsg; 7593 7594 // Validation strictness depends on whether message is specified 7595 // in a symbolic or in a numeric form. In the latter case 7596 // only encoding possibility is checked. 7597 bool Strict = Msg.IsSymbolic; 7598 7599 if (Strict) { 7600 if (Msg.Val == OPR_ID_UNSUPPORTED) { 7601 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7602 return false; 7603 } 7604 } else { 7605 if (!isValidMsgId(Msg.Val, getSTI())) { 7606 Error(Msg.Loc, "invalid message id"); 7607 return false; 7608 } 7609 } 7610 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) { 7611 if (Op.IsDefined) { 7612 Error(Op.Loc, "message does not support operations"); 7613 } else { 7614 Error(Msg.Loc, "missing message operation"); 7615 } 7616 return false; 7617 } 7618 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) { 7619 if (Op.Val == OPR_ID_UNSUPPORTED) 7620 Error(Op.Loc, "specified operation id is not supported on this GPU"); 7621 else 7622 Error(Op.Loc, "invalid operation id"); 7623 return false; 7624 } 7625 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) && 7626 Stream.IsDefined) { 7627 Error(Stream.Loc, "message operation does not support streams"); 7628 return false; 7629 } 7630 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) { 7631 Error(Stream.Loc, "invalid message stream id"); 7632 return false; 7633 } 7634 return true; 7635 } 7636 7637 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7638 using namespace llvm::AMDGPU::SendMsg; 7639 7640 int64_t ImmVal = 0; 7641 SMLoc Loc = getLoc(); 7642 7643 if (trySkipId("sendmsg", AsmToken::LParen)) { 7644 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7645 OperandInfoTy Op(OP_NONE_); 7646 OperandInfoTy Stream(STREAM_ID_NONE_); 7647 if (parseSendMsgBody(Msg, Op, Stream) && 7648 validateSendMsg(Msg, Op, Stream)) { 7649 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val); 7650 } else { 7651 return ParseStatus::Failure; 7652 } 7653 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7654 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7655 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7656 } else { 7657 return ParseStatus::Failure; 7658 } 7659 7660 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7661 return ParseStatus::Success; 7662 } 7663 7664 bool AMDGPUOperand::isSendMsg() const { 7665 return isImmTy(ImmTySendMsg); 7666 } 7667 7668 //===----------------------------------------------------------------------===// 7669 // v_interp 7670 //===----------------------------------------------------------------------===// 7671 7672 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7673 StringRef Str; 7674 SMLoc S = getLoc(); 7675 7676 if (!parseId(Str)) 7677 return ParseStatus::NoMatch; 7678 7679 int Slot = StringSwitch<int>(Str) 7680 .Case("p10", 0) 7681 .Case("p20", 1) 7682 .Case("p0", 2) 7683 .Default(-1); 7684 7685 if (Slot == -1) 7686 return Error(S, "invalid interpolation slot"); 7687 7688 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7689 AMDGPUOperand::ImmTyInterpSlot)); 7690 return ParseStatus::Success; 7691 } 7692 7693 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7694 StringRef Str; 7695 SMLoc S = getLoc(); 7696 7697 if (!parseId(Str)) 7698 return ParseStatus::NoMatch; 7699 7700 if (!Str.starts_with("attr")) 7701 return Error(S, "invalid interpolation attribute"); 7702 7703 StringRef Chan = Str.take_back(2); 7704 int AttrChan = StringSwitch<int>(Chan) 7705 .Case(".x", 0) 7706 .Case(".y", 1) 7707 .Case(".z", 2) 7708 .Case(".w", 3) 7709 .Default(-1); 7710 if (AttrChan == -1) 7711 return Error(S, "invalid or missing interpolation attribute channel"); 7712 7713 Str = Str.drop_back(2).drop_front(4); 7714 7715 uint8_t Attr; 7716 if (Str.getAsInteger(10, Attr)) 7717 return Error(S, "invalid or missing interpolation attribute number"); 7718 7719 if (Attr > 32) 7720 return Error(S, "out of bounds interpolation attribute number"); 7721 7722 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7723 7724 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7725 AMDGPUOperand::ImmTyInterpAttr)); 7726 Operands.push_back(AMDGPUOperand::CreateImm( 7727 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 7728 return ParseStatus::Success; 7729 } 7730 7731 //===----------------------------------------------------------------------===// 7732 // exp 7733 //===----------------------------------------------------------------------===// 7734 7735 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7736 using namespace llvm::AMDGPU::Exp; 7737 7738 StringRef Str; 7739 SMLoc S = getLoc(); 7740 7741 if (!parseId(Str)) 7742 return ParseStatus::NoMatch; 7743 7744 unsigned Id = getTgtId(Str); 7745 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 7746 return Error(S, (Id == ET_INVALID) 7747 ? "invalid exp target" 7748 : "exp target is not supported on this GPU"); 7749 7750 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7751 AMDGPUOperand::ImmTyExpTgt)); 7752 return ParseStatus::Success; 7753 } 7754 7755 //===----------------------------------------------------------------------===// 7756 // parser helpers 7757 //===----------------------------------------------------------------------===// 7758 7759 bool 7760 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7761 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7762 } 7763 7764 bool 7765 AMDGPUAsmParser::isId(const StringRef Id) const { 7766 return isId(getToken(), Id); 7767 } 7768 7769 bool 7770 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7771 return getTokenKind() == Kind; 7772 } 7773 7774 StringRef AMDGPUAsmParser::getId() const { 7775 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7776 } 7777 7778 bool 7779 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7780 if (isId(Id)) { 7781 lex(); 7782 return true; 7783 } 7784 return false; 7785 } 7786 7787 bool 7788 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7789 if (isToken(AsmToken::Identifier)) { 7790 StringRef Tok = getTokenStr(); 7791 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 7792 lex(); 7793 return true; 7794 } 7795 } 7796 return false; 7797 } 7798 7799 bool 7800 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7801 if (isId(Id) && peekToken().is(Kind)) { 7802 lex(); 7803 lex(); 7804 return true; 7805 } 7806 return false; 7807 } 7808 7809 bool 7810 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7811 if (isToken(Kind)) { 7812 lex(); 7813 return true; 7814 } 7815 return false; 7816 } 7817 7818 bool 7819 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7820 const StringRef ErrMsg) { 7821 if (!trySkipToken(Kind)) { 7822 Error(getLoc(), ErrMsg); 7823 return false; 7824 } 7825 return true; 7826 } 7827 7828 bool 7829 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7830 SMLoc S = getLoc(); 7831 7832 const MCExpr *Expr; 7833 if (Parser.parseExpression(Expr)) 7834 return false; 7835 7836 if (Expr->evaluateAsAbsolute(Imm)) 7837 return true; 7838 7839 if (Expected.empty()) { 7840 Error(S, "expected absolute expression"); 7841 } else { 7842 Error(S, Twine("expected ", Expected) + 7843 Twine(" or an absolute expression")); 7844 } 7845 return false; 7846 } 7847 7848 bool 7849 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7850 SMLoc S = getLoc(); 7851 7852 const MCExpr *Expr; 7853 if (Parser.parseExpression(Expr)) 7854 return false; 7855 7856 int64_t IntVal; 7857 if (Expr->evaluateAsAbsolute(IntVal)) { 7858 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7859 } else { 7860 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7861 } 7862 return true; 7863 } 7864 7865 bool 7866 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7867 if (isToken(AsmToken::String)) { 7868 Val = getToken().getStringContents(); 7869 lex(); 7870 return true; 7871 } 7872 Error(getLoc(), ErrMsg); 7873 return false; 7874 } 7875 7876 bool 7877 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7878 if (isToken(AsmToken::Identifier)) { 7879 Val = getTokenStr(); 7880 lex(); 7881 return true; 7882 } 7883 if (!ErrMsg.empty()) 7884 Error(getLoc(), ErrMsg); 7885 return false; 7886 } 7887 7888 AsmToken 7889 AMDGPUAsmParser::getToken() const { 7890 return Parser.getTok(); 7891 } 7892 7893 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7894 return isToken(AsmToken::EndOfStatement) 7895 ? getToken() 7896 : getLexer().peekTok(ShouldSkipSpace); 7897 } 7898 7899 void 7900 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7901 auto TokCount = getLexer().peekTokens(Tokens); 7902 7903 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7904 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7905 } 7906 7907 AsmToken::TokenKind 7908 AMDGPUAsmParser::getTokenKind() const { 7909 return getLexer().getKind(); 7910 } 7911 7912 SMLoc 7913 AMDGPUAsmParser::getLoc() const { 7914 return getToken().getLoc(); 7915 } 7916 7917 StringRef 7918 AMDGPUAsmParser::getTokenStr() const { 7919 return getToken().getString(); 7920 } 7921 7922 void 7923 AMDGPUAsmParser::lex() { 7924 Parser.Lex(); 7925 } 7926 7927 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7928 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7929 } 7930 7931 SMLoc 7932 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7933 const OperandVector &Operands) const { 7934 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7935 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7936 if (Test(Op)) 7937 return Op.getStartLoc(); 7938 } 7939 return getInstLoc(Operands); 7940 } 7941 7942 SMLoc 7943 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7944 const OperandVector &Operands) const { 7945 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7946 return getOperandLoc(Test, Operands); 7947 } 7948 7949 SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg, 7950 const OperandVector &Operands) const { 7951 auto Test = [=](const AMDGPUOperand& Op) { 7952 return Op.isRegKind() && Op.getReg() == Reg; 7953 }; 7954 return getOperandLoc(Test, Operands); 7955 } 7956 7957 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7958 bool SearchMandatoryLiterals) const { 7959 auto Test = [](const AMDGPUOperand& Op) { 7960 return Op.IsImmKindLiteral() || Op.isExpr(); 7961 }; 7962 SMLoc Loc = getOperandLoc(Test, Operands); 7963 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7964 Loc = getMandatoryLitLoc(Operands); 7965 return Loc; 7966 } 7967 7968 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7969 auto Test = [](const AMDGPUOperand &Op) { 7970 return Op.IsImmKindMandatoryLiteral(); 7971 }; 7972 return getOperandLoc(Test, Operands); 7973 } 7974 7975 SMLoc 7976 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7977 auto Test = [](const AMDGPUOperand& Op) { 7978 return Op.isImmKindConst(); 7979 }; 7980 return getOperandLoc(Test, Operands); 7981 } 7982 7983 ParseStatus 7984 AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) { 7985 if (!trySkipToken(AsmToken::LCurly)) 7986 return ParseStatus::NoMatch; 7987 7988 bool First = true; 7989 while (!trySkipToken(AsmToken::RCurly)) { 7990 if (!First && 7991 !skipToken(AsmToken::Comma, "comma or closing brace expected")) 7992 return ParseStatus::Failure; 7993 7994 StringRef Id = getTokenStr(); 7995 SMLoc IdLoc = getLoc(); 7996 if (!skipToken(AsmToken::Identifier, "field name expected") || 7997 !skipToken(AsmToken::Colon, "colon expected")) 7998 return ParseStatus::Failure; 7999 8000 const auto *I = 8001 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; }); 8002 if (I == Fields.end()) 8003 return Error(IdLoc, "unknown field"); 8004 if ((*I)->IsDefined) 8005 return Error(IdLoc, "duplicate field"); 8006 8007 // TODO: Support symbolic values. 8008 (*I)->Loc = getLoc(); 8009 if (!parseExpr((*I)->Val)) 8010 return ParseStatus::Failure; 8011 (*I)->IsDefined = true; 8012 8013 First = false; 8014 } 8015 return ParseStatus::Success; 8016 } 8017 8018 bool AMDGPUAsmParser::validateStructuredOpFields( 8019 ArrayRef<const StructuredOpField *> Fields) { 8020 return all_of(Fields, [this](const StructuredOpField *F) { 8021 return F->validate(*this); 8022 }); 8023 } 8024 8025 //===----------------------------------------------------------------------===// 8026 // swizzle 8027 //===----------------------------------------------------------------------===// 8028 8029 LLVM_READNONE 8030 static unsigned 8031 encodeBitmaskPerm(const unsigned AndMask, 8032 const unsigned OrMask, 8033 const unsigned XorMask) { 8034 using namespace llvm::AMDGPU::Swizzle; 8035 8036 return BITMASK_PERM_ENC | 8037 (AndMask << BITMASK_AND_SHIFT) | 8038 (OrMask << BITMASK_OR_SHIFT) | 8039 (XorMask << BITMASK_XOR_SHIFT); 8040 } 8041 8042 bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal, 8043 const unsigned MaxVal, 8044 const Twine &ErrMsg, SMLoc &Loc) { 8045 if (!skipToken(AsmToken::Comma, "expected a comma")) { 8046 return false; 8047 } 8048 Loc = getLoc(); 8049 if (!parseExpr(Op)) { 8050 return false; 8051 } 8052 if (Op < MinVal || Op > MaxVal) { 8053 Error(Loc, ErrMsg); 8054 return false; 8055 } 8056 8057 return true; 8058 } 8059 8060 bool 8061 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 8062 const unsigned MinVal, 8063 const unsigned MaxVal, 8064 const StringRef ErrMsg) { 8065 SMLoc Loc; 8066 for (unsigned i = 0; i < OpNum; ++i) { 8067 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 8068 return false; 8069 } 8070 8071 return true; 8072 } 8073 8074 bool 8075 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 8076 using namespace llvm::AMDGPU::Swizzle; 8077 8078 int64_t Lane[LANE_NUM]; 8079 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 8080 "expected a 2-bit lane id")) { 8081 Imm = QUAD_PERM_ENC; 8082 for (unsigned I = 0; I < LANE_NUM; ++I) { 8083 Imm |= Lane[I] << (LANE_SHIFT * I); 8084 } 8085 return true; 8086 } 8087 return false; 8088 } 8089 8090 bool 8091 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 8092 using namespace llvm::AMDGPU::Swizzle; 8093 8094 SMLoc Loc; 8095 int64_t GroupSize; 8096 int64_t LaneIdx; 8097 8098 if (!parseSwizzleOperand(GroupSize, 8099 2, 32, 8100 "group size must be in the interval [2,32]", 8101 Loc)) { 8102 return false; 8103 } 8104 if (!isPowerOf2_64(GroupSize)) { 8105 Error(Loc, "group size must be a power of two"); 8106 return false; 8107 } 8108 if (parseSwizzleOperand(LaneIdx, 8109 0, GroupSize - 1, 8110 "lane id must be in the interval [0,group size - 1]", 8111 Loc)) { 8112 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 8113 return true; 8114 } 8115 return false; 8116 } 8117 8118 bool 8119 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 8120 using namespace llvm::AMDGPU::Swizzle; 8121 8122 SMLoc Loc; 8123 int64_t GroupSize; 8124 8125 if (!parseSwizzleOperand(GroupSize, 8126 2, 32, 8127 "group size must be in the interval [2,32]", 8128 Loc)) { 8129 return false; 8130 } 8131 if (!isPowerOf2_64(GroupSize)) { 8132 Error(Loc, "group size must be a power of two"); 8133 return false; 8134 } 8135 8136 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 8137 return true; 8138 } 8139 8140 bool 8141 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 8142 using namespace llvm::AMDGPU::Swizzle; 8143 8144 SMLoc Loc; 8145 int64_t GroupSize; 8146 8147 if (!parseSwizzleOperand(GroupSize, 8148 1, 16, 8149 "group size must be in the interval [1,16]", 8150 Loc)) { 8151 return false; 8152 } 8153 if (!isPowerOf2_64(GroupSize)) { 8154 Error(Loc, "group size must be a power of two"); 8155 return false; 8156 } 8157 8158 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 8159 return true; 8160 } 8161 8162 bool 8163 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 8164 using namespace llvm::AMDGPU::Swizzle; 8165 8166 if (!skipToken(AsmToken::Comma, "expected a comma")) { 8167 return false; 8168 } 8169 8170 StringRef Ctl; 8171 SMLoc StrLoc = getLoc(); 8172 if (!parseString(Ctl)) { 8173 return false; 8174 } 8175 if (Ctl.size() != BITMASK_WIDTH) { 8176 Error(StrLoc, "expected a 5-character mask"); 8177 return false; 8178 } 8179 8180 unsigned AndMask = 0; 8181 unsigned OrMask = 0; 8182 unsigned XorMask = 0; 8183 8184 for (size_t i = 0; i < Ctl.size(); ++i) { 8185 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 8186 switch(Ctl[i]) { 8187 default: 8188 Error(StrLoc, "invalid mask"); 8189 return false; 8190 case '0': 8191 break; 8192 case '1': 8193 OrMask |= Mask; 8194 break; 8195 case 'p': 8196 AndMask |= Mask; 8197 break; 8198 case 'i': 8199 AndMask |= Mask; 8200 XorMask |= Mask; 8201 break; 8202 } 8203 } 8204 8205 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 8206 return true; 8207 } 8208 8209 bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) { 8210 using namespace llvm::AMDGPU::Swizzle; 8211 8212 if (!AMDGPU::isGFX9Plus(getSTI())) { 8213 Error(getLoc(), "FFT mode swizzle not supported on this GPU"); 8214 return false; 8215 } 8216 8217 int64_t Swizzle; 8218 SMLoc Loc; 8219 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX, 8220 "FFT swizzle must be in the interval [0," + 8221 Twine(FFT_SWIZZLE_MAX) + Twine(']'), 8222 Loc)) 8223 return false; 8224 8225 Imm = FFT_MODE_ENC | Swizzle; 8226 return true; 8227 } 8228 8229 bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) { 8230 using namespace llvm::AMDGPU::Swizzle; 8231 8232 if (!AMDGPU::isGFX9Plus(getSTI())) { 8233 Error(getLoc(), "Rotate mode swizzle not supported on this GPU"); 8234 return false; 8235 } 8236 8237 SMLoc Loc; 8238 int64_t Direction; 8239 8240 if (!parseSwizzleOperand(Direction, 0, 1, 8241 "direction must be 0 (left) or 1 (right)", Loc)) 8242 return false; 8243 8244 int64_t RotateSize; 8245 if (!parseSwizzleOperand( 8246 RotateSize, 0, ROTATE_MAX_SIZE, 8247 "number of threads to rotate must be in the interval [0," + 8248 Twine(ROTATE_MAX_SIZE) + Twine(']'), 8249 Loc)) 8250 return false; 8251 8252 Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) | 8253 (RotateSize << ROTATE_SIZE_SHIFT); 8254 return true; 8255 } 8256 8257 bool 8258 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 8259 8260 SMLoc OffsetLoc = getLoc(); 8261 8262 if (!parseExpr(Imm, "a swizzle macro")) { 8263 return false; 8264 } 8265 if (!isUInt<16>(Imm)) { 8266 Error(OffsetLoc, "expected a 16-bit offset"); 8267 return false; 8268 } 8269 return true; 8270 } 8271 8272 bool 8273 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 8274 using namespace llvm::AMDGPU::Swizzle; 8275 8276 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 8277 8278 SMLoc ModeLoc = getLoc(); 8279 bool Ok = false; 8280 8281 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 8282 Ok = parseSwizzleQuadPerm(Imm); 8283 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 8284 Ok = parseSwizzleBitmaskPerm(Imm); 8285 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 8286 Ok = parseSwizzleBroadcast(Imm); 8287 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 8288 Ok = parseSwizzleSwap(Imm); 8289 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 8290 Ok = parseSwizzleReverse(Imm); 8291 } else if (trySkipId(IdSymbolic[ID_FFT])) { 8292 Ok = parseSwizzleFFT(Imm); 8293 } else if (trySkipId(IdSymbolic[ID_ROTATE])) { 8294 Ok = parseSwizzleRotate(Imm); 8295 } else { 8296 Error(ModeLoc, "expected a swizzle mode"); 8297 } 8298 8299 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 8300 } 8301 8302 return false; 8303 } 8304 8305 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 8306 SMLoc S = getLoc(); 8307 int64_t Imm = 0; 8308 8309 if (trySkipId("offset")) { 8310 8311 bool Ok = false; 8312 if (skipToken(AsmToken::Colon, "expected a colon")) { 8313 if (trySkipId("swizzle")) { 8314 Ok = parseSwizzleMacro(Imm); 8315 } else { 8316 Ok = parseSwizzleOffset(Imm); 8317 } 8318 } 8319 8320 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 8321 8322 return Ok ? ParseStatus::Success : ParseStatus::Failure; 8323 } 8324 return ParseStatus::NoMatch; 8325 } 8326 8327 bool 8328 AMDGPUOperand::isSwizzle() const { 8329 return isImmTy(ImmTySwizzle); 8330 } 8331 8332 //===----------------------------------------------------------------------===// 8333 // VGPR Index Mode 8334 //===----------------------------------------------------------------------===// 8335 8336 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 8337 8338 using namespace llvm::AMDGPU::VGPRIndexMode; 8339 8340 if (trySkipToken(AsmToken::RParen)) { 8341 return OFF; 8342 } 8343 8344 int64_t Imm = 0; 8345 8346 while (true) { 8347 unsigned Mode = 0; 8348 SMLoc S = getLoc(); 8349 8350 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 8351 if (trySkipId(IdSymbolic[ModeId])) { 8352 Mode = 1 << ModeId; 8353 break; 8354 } 8355 } 8356 8357 if (Mode == 0) { 8358 Error(S, (Imm == 0)? 8359 "expected a VGPR index mode or a closing parenthesis" : 8360 "expected a VGPR index mode"); 8361 return UNDEF; 8362 } 8363 8364 if (Imm & Mode) { 8365 Error(S, "duplicate VGPR index mode"); 8366 return UNDEF; 8367 } 8368 Imm |= Mode; 8369 8370 if (trySkipToken(AsmToken::RParen)) 8371 break; 8372 if (!skipToken(AsmToken::Comma, 8373 "expected a comma or a closing parenthesis")) 8374 return UNDEF; 8375 } 8376 8377 return Imm; 8378 } 8379 8380 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 8381 8382 using namespace llvm::AMDGPU::VGPRIndexMode; 8383 8384 int64_t Imm = 0; 8385 SMLoc S = getLoc(); 8386 8387 if (trySkipId("gpr_idx", AsmToken::LParen)) { 8388 Imm = parseGPRIdxMacro(); 8389 if (Imm == UNDEF) 8390 return ParseStatus::Failure; 8391 } else { 8392 if (getParser().parseAbsoluteExpression(Imm)) 8393 return ParseStatus::Failure; 8394 if (Imm < 0 || !isUInt<4>(Imm)) 8395 return Error(S, "invalid immediate: only 4-bit values are legal"); 8396 } 8397 8398 Operands.push_back( 8399 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 8400 return ParseStatus::Success; 8401 } 8402 8403 bool AMDGPUOperand::isGPRIdxMode() const { 8404 return isImmTy(ImmTyGprIdxMode); 8405 } 8406 8407 //===----------------------------------------------------------------------===// 8408 // sopp branch targets 8409 //===----------------------------------------------------------------------===// 8410 8411 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 8412 8413 // Make sure we are not parsing something 8414 // that looks like a label or an expression but is not. 8415 // This will improve error messages. 8416 if (isRegister() || isModifier()) 8417 return ParseStatus::NoMatch; 8418 8419 if (!parseExpr(Operands)) 8420 return ParseStatus::Failure; 8421 8422 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 8423 assert(Opr.isImm() || Opr.isExpr()); 8424 SMLoc Loc = Opr.getStartLoc(); 8425 8426 // Currently we do not support arbitrary expressions as branch targets. 8427 // Only labels and absolute expressions are accepted. 8428 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 8429 Error(Loc, "expected an absolute expression or a label"); 8430 } else if (Opr.isImm() && !Opr.isS16Imm()) { 8431 Error(Loc, "expected a 16-bit signed jump offset"); 8432 } 8433 8434 return ParseStatus::Success; 8435 } 8436 8437 //===----------------------------------------------------------------------===// 8438 // Boolean holding registers 8439 //===----------------------------------------------------------------------===// 8440 8441 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 8442 return parseReg(Operands); 8443 } 8444 8445 //===----------------------------------------------------------------------===// 8446 // mubuf 8447 //===----------------------------------------------------------------------===// 8448 8449 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 8450 const OperandVector &Operands, 8451 bool IsAtomic) { 8452 OptionalImmIndexMap OptionalIdx; 8453 unsigned FirstOperandIdx = 1; 8454 bool IsAtomicReturn = false; 8455 8456 if (IsAtomic) { 8457 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 8458 SIInstrFlags::IsAtomicRet; 8459 } 8460 8461 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 8462 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8463 8464 // Add the register arguments 8465 if (Op.isReg()) { 8466 Op.addRegOperands(Inst, 1); 8467 // Insert a tied src for atomic return dst. 8468 // This cannot be postponed as subsequent calls to 8469 // addImmOperands rely on correct number of MC operands. 8470 if (IsAtomicReturn && i == FirstOperandIdx) 8471 Op.addRegOperands(Inst, 1); 8472 continue; 8473 } 8474 8475 // Handle the case where soffset is an immediate 8476 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 8477 Op.addImmOperands(Inst, 1); 8478 continue; 8479 } 8480 8481 // Handle tokens like 'offen' which are sometimes hard-coded into the 8482 // asm string. There are no MCInst operands for these. 8483 if (Op.isToken()) { 8484 continue; 8485 } 8486 assert(Op.isImm()); 8487 8488 // Handle optional arguments 8489 OptionalIdx[Op.getImmTy()] = i; 8490 } 8491 8492 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 8493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 8494 } 8495 8496 //===----------------------------------------------------------------------===// 8497 // smrd 8498 //===----------------------------------------------------------------------===// 8499 8500 bool AMDGPUOperand::isSMRDOffset8() const { 8501 return isImmLiteral() && isUInt<8>(getImm()); 8502 } 8503 8504 bool AMDGPUOperand::isSMEMOffset() const { 8505 // Offset range is checked later by validator. 8506 return isImmLiteral(); 8507 } 8508 8509 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8510 // 32-bit literals are only supported on CI and we only want to use them 8511 // when the offset is > 8-bits. 8512 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8513 } 8514 8515 //===----------------------------------------------------------------------===// 8516 // vop3 8517 //===----------------------------------------------------------------------===// 8518 8519 static bool ConvertOmodMul(int64_t &Mul) { 8520 if (Mul != 1 && Mul != 2 && Mul != 4) 8521 return false; 8522 8523 Mul >>= 1; 8524 return true; 8525 } 8526 8527 static bool ConvertOmodDiv(int64_t &Div) { 8528 if (Div == 1) { 8529 Div = 0; 8530 return true; 8531 } 8532 8533 if (Div == 2) { 8534 Div = 3; 8535 return true; 8536 } 8537 8538 return false; 8539 } 8540 8541 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8542 // This is intentional and ensures compatibility with sp3. 8543 // See bug 35397 for details. 8544 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8545 if (BoundCtrl == 0 || BoundCtrl == 1) { 8546 if (!isGFX11Plus()) 8547 BoundCtrl = 1; 8548 return true; 8549 } 8550 return false; 8551 } 8552 8553 void AMDGPUAsmParser::onBeginOfFile() { 8554 if (!getParser().getStreamer().getTargetStreamer() || 8555 getSTI().getTargetTriple().getArch() == Triple::r600) 8556 return; 8557 8558 if (!getTargetStreamer().getTargetID()) 8559 getTargetStreamer().initializeTargetID(getSTI(), 8560 getSTI().getFeatureString()); 8561 8562 if (isHsaAbi(getSTI())) 8563 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8564 } 8565 8566 /// Parse AMDGPU specific expressions. 8567 /// 8568 /// expr ::= or(expr, ...) | 8569 /// max(expr, ...) 8570 /// 8571 bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 8572 using AGVK = AMDGPUMCExpr::VariantKind; 8573 8574 if (isToken(AsmToken::Identifier)) { 8575 StringRef TokenId = getTokenStr(); 8576 AGVK VK = StringSwitch<AGVK>(TokenId) 8577 .Case("max", AGVK::AGVK_Max) 8578 .Case("or", AGVK::AGVK_Or) 8579 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs) 8580 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs) 8581 .Case("alignto", AGVK::AGVK_AlignTo) 8582 .Case("occupancy", AGVK::AGVK_Occupancy) 8583 .Default(AGVK::AGVK_None); 8584 8585 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) { 8586 SmallVector<const MCExpr *, 4> Exprs; 8587 uint64_t CommaCount = 0; 8588 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.) 8589 lex(); // Eat '(' 8590 while (true) { 8591 if (trySkipToken(AsmToken::RParen)) { 8592 if (Exprs.empty()) { 8593 Error(getToken().getLoc(), 8594 "empty " + Twine(TokenId) + " expression"); 8595 return true; 8596 } 8597 if (CommaCount + 1 != Exprs.size()) { 8598 Error(getToken().getLoc(), 8599 "mismatch of commas in " + Twine(TokenId) + " expression"); 8600 return true; 8601 } 8602 Res = AMDGPUMCExpr::create(VK, Exprs, getContext()); 8603 return false; 8604 } 8605 const MCExpr *Expr; 8606 if (getParser().parseExpression(Expr, EndLoc)) 8607 return true; 8608 Exprs.push_back(Expr); 8609 bool LastTokenWasComma = trySkipToken(AsmToken::Comma); 8610 if (LastTokenWasComma) 8611 CommaCount++; 8612 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) { 8613 Error(getToken().getLoc(), 8614 "unexpected token in " + Twine(TokenId) + " expression"); 8615 return true; 8616 } 8617 } 8618 } 8619 } 8620 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 8621 } 8622 8623 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8624 StringRef Name = getTokenStr(); 8625 if (Name == "mul") { 8626 return parseIntWithPrefix("mul", Operands, 8627 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8628 } 8629 8630 if (Name == "div") { 8631 return parseIntWithPrefix("div", Operands, 8632 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8633 } 8634 8635 return ParseStatus::NoMatch; 8636 } 8637 8638 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8639 // the number of src operands present, then copies that bit into src0_modifiers. 8640 static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) { 8641 int Opc = Inst.getOpcode(); 8642 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8643 if (OpSelIdx == -1) 8644 return; 8645 8646 int SrcNum; 8647 const int Ops[] = { AMDGPU::OpName::src0, 8648 AMDGPU::OpName::src1, 8649 AMDGPU::OpName::src2 }; 8650 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8651 ++SrcNum) 8652 ; 8653 assert(SrcNum > 0); 8654 8655 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8656 8657 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst); 8658 if (DstIdx == -1) 8659 return; 8660 8661 const MCOperand &DstOp = Inst.getOperand(DstIdx); 8662 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8663 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8664 if (DstOp.isReg() && 8665 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) { 8666 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI)) 8667 ModVal |= SISrcMods::DST_OP_SEL; 8668 } else { 8669 if ((OpSel & (1 << SrcNum)) != 0) 8670 ModVal |= SISrcMods::DST_OP_SEL; 8671 } 8672 Inst.getOperand(ModIdx).setImm(ModVal); 8673 } 8674 8675 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8676 const OperandVector &Operands) { 8677 cvtVOP3P(Inst, Operands); 8678 cvtVOP3DstOpSelOnly(Inst, *getMRI()); 8679 } 8680 8681 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8682 OptionalImmIndexMap &OptionalIdx) { 8683 cvtVOP3P(Inst, Operands, OptionalIdx); 8684 cvtVOP3DstOpSelOnly(Inst, *getMRI()); 8685 } 8686 8687 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8688 return 8689 // 1. This operand is input modifiers 8690 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8691 // 2. This is not last operand 8692 && Desc.NumOperands > (OpNum + 1) 8693 // 3. Next operand is register class 8694 && Desc.operands()[OpNum + 1].RegClass != -1 8695 // 4. Next register is not tied to any other operand 8696 && Desc.getOperandConstraint(OpNum + 1, 8697 MCOI::OperandConstraint::TIED_TO) == -1; 8698 } 8699 8700 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8701 { 8702 OptionalImmIndexMap OptionalIdx; 8703 unsigned Opc = Inst.getOpcode(); 8704 8705 unsigned I = 1; 8706 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8707 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8708 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8709 } 8710 8711 for (unsigned E = Operands.size(); I != E; ++I) { 8712 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8713 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8714 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8715 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 8716 Op.isInterpAttrChan()) { 8717 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8718 } else if (Op.isImmModifier()) { 8719 OptionalIdx[Op.getImmTy()] = I; 8720 } else { 8721 llvm_unreachable("unhandled operand type"); 8722 } 8723 } 8724 8725 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8726 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8727 AMDGPUOperand::ImmTyHigh); 8728 8729 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8730 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8731 AMDGPUOperand::ImmTyClamp); 8732 8733 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8734 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8735 AMDGPUOperand::ImmTyOModSI); 8736 } 8737 8738 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8739 { 8740 OptionalImmIndexMap OptionalIdx; 8741 unsigned Opc = Inst.getOpcode(); 8742 8743 unsigned I = 1; 8744 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8745 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8746 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8747 } 8748 8749 for (unsigned E = Operands.size(); I != E; ++I) { 8750 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8751 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8752 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8753 } else if (Op.isImmModifier()) { 8754 OptionalIdx[Op.getImmTy()] = I; 8755 } else { 8756 llvm_unreachable("unhandled operand type"); 8757 } 8758 } 8759 8760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp); 8761 8762 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8763 if (OpSelIdx != -1) 8764 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8765 8766 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8767 8768 if (OpSelIdx == -1) 8769 return; 8770 8771 const int Ops[] = { AMDGPU::OpName::src0, 8772 AMDGPU::OpName::src1, 8773 AMDGPU::OpName::src2 }; 8774 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8775 AMDGPU::OpName::src1_modifiers, 8776 AMDGPU::OpName::src2_modifiers }; 8777 8778 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8779 8780 for (int J = 0; J < 3; ++J) { 8781 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8782 if (OpIdx == -1) 8783 break; 8784 8785 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8786 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8787 8788 if ((OpSel & (1 << J)) != 0) 8789 ModVal |= SISrcMods::OP_SEL_0; 8790 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8791 (OpSel & (1 << 3)) != 0) 8792 ModVal |= SISrcMods::DST_OP_SEL; 8793 8794 Inst.getOperand(ModIdx).setImm(ModVal); 8795 } 8796 } 8797 8798 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8799 OptionalImmIndexMap &OptionalIdx) { 8800 unsigned Opc = Inst.getOpcode(); 8801 8802 unsigned I = 1; 8803 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8804 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8805 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8806 } 8807 8808 for (unsigned E = Operands.size(); I != E; ++I) { 8809 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8810 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8811 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8812 } else if (Op.isImmModifier()) { 8813 OptionalIdx[Op.getImmTy()] = I; 8814 } else { 8815 Op.addRegOrImmOperands(Inst, 1); 8816 } 8817 } 8818 8819 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) { 8820 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) 8821 Inst.addOperand(Inst.getOperand(0)); 8822 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8823 AMDGPUOperand::ImmTyByteSel); 8824 } 8825 8826 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8827 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8828 AMDGPUOperand::ImmTyClamp); 8829 8830 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8831 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8832 AMDGPUOperand::ImmTyOModSI); 8833 8834 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8835 // it has src2 register operand that is tied to dst operand 8836 // we don't allow modifiers for this operand in assembler so src2_modifiers 8837 // should be 0. 8838 if (isMAC(Opc)) { 8839 auto *it = Inst.begin(); 8840 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8841 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8842 ++it; 8843 // Copy the operand to ensure it's not invalidated when Inst grows. 8844 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8845 } 8846 } 8847 8848 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8849 OptionalImmIndexMap OptionalIdx; 8850 cvtVOP3(Inst, Operands, OptionalIdx); 8851 } 8852 8853 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8854 OptionalImmIndexMap &OptIdx) { 8855 const int Opc = Inst.getOpcode(); 8856 const MCInstrDesc &Desc = MII.get(Opc); 8857 8858 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8859 8860 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi || 8861 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi || 8862 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8863 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || 8864 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || 8865 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { 8866 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8867 Inst.addOperand(Inst.getOperand(0)); 8868 } 8869 8870 // Adding vdst_in operand is already covered for these DPP instructions in 8871 // cvtVOP3DPP. 8872 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && 8873 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || 8874 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || 8875 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || 8876 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 || 8877 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || 8878 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || 8879 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || 8880 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { 8881 Inst.addOperand(Inst.getOperand(0)); 8882 } 8883 8884 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3); 8885 if (BitOp3Idx != -1) { 8886 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3); 8887 } 8888 8889 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8890 // instruction, and then figure out where to actually put the modifiers 8891 8892 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8893 if (OpSelIdx != -1) { 8894 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8895 } 8896 8897 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8898 if (OpSelHiIdx != -1) { 8899 int DefaultVal = IsPacked ? -1 : 0; 8900 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8901 DefaultVal); 8902 } 8903 8904 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8905 if (NegLoIdx != -1) 8906 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8907 8908 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8909 if (NegHiIdx != -1) 8910 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8911 8912 const int Ops[] = { AMDGPU::OpName::src0, 8913 AMDGPU::OpName::src1, 8914 AMDGPU::OpName::src2 }; 8915 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8916 AMDGPU::OpName::src1_modifiers, 8917 AMDGPU::OpName::src2_modifiers }; 8918 8919 unsigned OpSel = 0; 8920 unsigned OpSelHi = 0; 8921 unsigned NegLo = 0; 8922 unsigned NegHi = 0; 8923 8924 if (OpSelIdx != -1) 8925 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8926 8927 if (OpSelHiIdx != -1) 8928 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8929 8930 if (NegLoIdx != -1) 8931 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8932 8933 if (NegHiIdx != -1) 8934 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8935 8936 for (int J = 0; J < 3; ++J) { 8937 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8938 if (OpIdx == -1) 8939 break; 8940 8941 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8942 8943 if (ModIdx == -1) 8944 continue; 8945 8946 uint32_t ModVal = 0; 8947 8948 const MCOperand &SrcOp = Inst.getOperand(OpIdx); 8949 if (SrcOp.isReg() && getMRI() 8950 ->getRegClass(AMDGPU::VGPR_16RegClassID) 8951 .contains(SrcOp.getReg())) { 8952 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI()); 8953 if (VGPRSuffixIsHi) 8954 ModVal |= SISrcMods::OP_SEL_0; 8955 } else { 8956 if ((OpSel & (1 << J)) != 0) 8957 ModVal |= SISrcMods::OP_SEL_0; 8958 } 8959 8960 if ((OpSelHi & (1 << J)) != 0) 8961 ModVal |= SISrcMods::OP_SEL_1; 8962 8963 if ((NegLo & (1 << J)) != 0) 8964 ModVal |= SISrcMods::NEG; 8965 8966 if ((NegHi & (1 << J)) != 0) 8967 ModVal |= SISrcMods::NEG_HI; 8968 8969 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8970 } 8971 } 8972 8973 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8974 OptionalImmIndexMap OptIdx; 8975 cvtVOP3(Inst, Operands, OptIdx); 8976 cvtVOP3P(Inst, Operands, OptIdx); 8977 } 8978 8979 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, 8980 unsigned i, unsigned Opc, unsigned OpName) { 8981 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1) 8982 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2); 8983 else 8984 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1); 8985 } 8986 8987 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { 8988 unsigned Opc = Inst.getOpcode(); 8989 8990 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); 8991 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers); 8992 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers); 8993 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef 8994 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2 8995 8996 OptionalImmIndexMap OptIdx; 8997 for (unsigned i = 5; i < Operands.size(); ++i) { 8998 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8999 OptIdx[Op.getImmTy()] = i; 9000 } 9001 9002 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit)) 9003 addOptionalImmOperand(Inst, Operands, OptIdx, 9004 AMDGPUOperand::ImmTyIndexKey8bit); 9005 9006 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit)) 9007 addOptionalImmOperand(Inst, Operands, OptIdx, 9008 AMDGPUOperand::ImmTyIndexKey16bit); 9009 9010 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9011 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp); 9012 9013 cvtVOP3P(Inst, Operands, OptIdx); 9014 } 9015 9016 //===----------------------------------------------------------------------===// 9017 // VOPD 9018 //===----------------------------------------------------------------------===// 9019 9020 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 9021 if (!hasVOPD(getSTI())) 9022 return ParseStatus::NoMatch; 9023 9024 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 9025 SMLoc S = getLoc(); 9026 lex(); 9027 lex(); 9028 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 9029 SMLoc OpYLoc = getLoc(); 9030 StringRef OpYName; 9031 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 9032 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 9033 return ParseStatus::Success; 9034 } 9035 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 9036 } 9037 return ParseStatus::NoMatch; 9038 } 9039 9040 // Create VOPD MCInst operands using parsed assembler operands. 9041 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 9042 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 9043 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 9044 if (Op.isReg()) { 9045 Op.addRegOperands(Inst, 1); 9046 return; 9047 } 9048 if (Op.isImm()) { 9049 Op.addImmOperands(Inst, 1); 9050 return; 9051 } 9052 llvm_unreachable("Unhandled operand type in cvtVOPD"); 9053 }; 9054 9055 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 9056 9057 // MCInst operands are ordered as follows: 9058 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 9059 9060 for (auto CompIdx : VOPD::COMPONENTS) { 9061 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 9062 } 9063 9064 for (auto CompIdx : VOPD::COMPONENTS) { 9065 const auto &CInfo = InstInfo[CompIdx]; 9066 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 9067 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 9068 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 9069 if (CInfo.hasSrc2Acc()) 9070 addOp(CInfo.getIndexOfDstInParsedOperands()); 9071 } 9072 } 9073 9074 //===----------------------------------------------------------------------===// 9075 // dpp 9076 //===----------------------------------------------------------------------===// 9077 9078 bool AMDGPUOperand::isDPP8() const { 9079 return isImmTy(ImmTyDPP8); 9080 } 9081 9082 bool AMDGPUOperand::isDPPCtrl() const { 9083 using namespace AMDGPU::DPP; 9084 9085 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 9086 if (result) { 9087 int64_t Imm = getImm(); 9088 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 9089 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 9090 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 9091 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 9092 (Imm == DppCtrl::WAVE_SHL1) || 9093 (Imm == DppCtrl::WAVE_ROL1) || 9094 (Imm == DppCtrl::WAVE_SHR1) || 9095 (Imm == DppCtrl::WAVE_ROR1) || 9096 (Imm == DppCtrl::ROW_MIRROR) || 9097 (Imm == DppCtrl::ROW_HALF_MIRROR) || 9098 (Imm == DppCtrl::BCAST15) || 9099 (Imm == DppCtrl::BCAST31) || 9100 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 9101 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 9102 } 9103 return false; 9104 } 9105 9106 //===----------------------------------------------------------------------===// 9107 // mAI 9108 //===----------------------------------------------------------------------===// 9109 9110 bool AMDGPUOperand::isBLGP() const { 9111 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 9112 } 9113 9114 bool AMDGPUOperand::isS16Imm() const { 9115 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 9116 } 9117 9118 bool AMDGPUOperand::isU16Imm() const { 9119 return isImmLiteral() && isUInt<16>(getImm()); 9120 } 9121 9122 //===----------------------------------------------------------------------===// 9123 // dim 9124 //===----------------------------------------------------------------------===// 9125 9126 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 9127 // We want to allow "dim:1D" etc., 9128 // but the initial 1 is tokenized as an integer. 9129 std::string Token; 9130 if (isToken(AsmToken::Integer)) { 9131 SMLoc Loc = getToken().getEndLoc(); 9132 Token = std::string(getTokenStr()); 9133 lex(); 9134 if (getLoc() != Loc) 9135 return false; 9136 } 9137 9138 StringRef Suffix; 9139 if (!parseId(Suffix)) 9140 return false; 9141 Token += Suffix; 9142 9143 StringRef DimId = Token; 9144 if (DimId.starts_with("SQ_RSRC_IMG_")) 9145 DimId = DimId.drop_front(12); 9146 9147 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 9148 if (!DimInfo) 9149 return false; 9150 9151 Encoding = DimInfo->Encoding; 9152 return true; 9153 } 9154 9155 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 9156 if (!isGFX10Plus()) 9157 return ParseStatus::NoMatch; 9158 9159 SMLoc S = getLoc(); 9160 9161 if (!trySkipId("dim", AsmToken::Colon)) 9162 return ParseStatus::NoMatch; 9163 9164 unsigned Encoding; 9165 SMLoc Loc = getLoc(); 9166 if (!parseDimId(Encoding)) 9167 return Error(Loc, "invalid dim value"); 9168 9169 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 9170 AMDGPUOperand::ImmTyDim)); 9171 return ParseStatus::Success; 9172 } 9173 9174 //===----------------------------------------------------------------------===// 9175 // dpp 9176 //===----------------------------------------------------------------------===// 9177 9178 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 9179 SMLoc S = getLoc(); 9180 9181 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 9182 return ParseStatus::NoMatch; 9183 9184 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 9185 9186 int64_t Sels[8]; 9187 9188 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 9189 return ParseStatus::Failure; 9190 9191 for (size_t i = 0; i < 8; ++i) { 9192 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 9193 return ParseStatus::Failure; 9194 9195 SMLoc Loc = getLoc(); 9196 if (getParser().parseAbsoluteExpression(Sels[i])) 9197 return ParseStatus::Failure; 9198 if (0 > Sels[i] || 7 < Sels[i]) 9199 return Error(Loc, "expected a 3-bit value"); 9200 } 9201 9202 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 9203 return ParseStatus::Failure; 9204 9205 unsigned DPP8 = 0; 9206 for (size_t i = 0; i < 8; ++i) 9207 DPP8 |= (Sels[i] << (i * 3)); 9208 9209 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 9210 return ParseStatus::Success; 9211 } 9212 9213 bool 9214 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 9215 const OperandVector &Operands) { 9216 if (Ctrl == "row_newbcast") 9217 return isGFX90A(); 9218 9219 if (Ctrl == "row_share" || 9220 Ctrl == "row_xmask") 9221 return isGFX10Plus(); 9222 9223 if (Ctrl == "wave_shl" || 9224 Ctrl == "wave_shr" || 9225 Ctrl == "wave_rol" || 9226 Ctrl == "wave_ror" || 9227 Ctrl == "row_bcast") 9228 return isVI() || isGFX9(); 9229 9230 return Ctrl == "row_mirror" || 9231 Ctrl == "row_half_mirror" || 9232 Ctrl == "quad_perm" || 9233 Ctrl == "row_shl" || 9234 Ctrl == "row_shr" || 9235 Ctrl == "row_ror"; 9236 } 9237 9238 int64_t 9239 AMDGPUAsmParser::parseDPPCtrlPerm() { 9240 // quad_perm:[%d,%d,%d,%d] 9241 9242 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 9243 return -1; 9244 9245 int64_t Val = 0; 9246 for (int i = 0; i < 4; ++i) { 9247 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 9248 return -1; 9249 9250 int64_t Temp; 9251 SMLoc Loc = getLoc(); 9252 if (getParser().parseAbsoluteExpression(Temp)) 9253 return -1; 9254 if (Temp < 0 || Temp > 3) { 9255 Error(Loc, "expected a 2-bit value"); 9256 return -1; 9257 } 9258 9259 Val += (Temp << i * 2); 9260 } 9261 9262 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 9263 return -1; 9264 9265 return Val; 9266 } 9267 9268 int64_t 9269 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 9270 using namespace AMDGPU::DPP; 9271 9272 // sel:%d 9273 9274 int64_t Val; 9275 SMLoc Loc = getLoc(); 9276 9277 if (getParser().parseAbsoluteExpression(Val)) 9278 return -1; 9279 9280 struct DppCtrlCheck { 9281 int64_t Ctrl; 9282 int Lo; 9283 int Hi; 9284 }; 9285 9286 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 9287 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 9288 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 9289 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 9290 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 9291 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 9292 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 9293 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 9294 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 9295 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 9296 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 9297 .Default({-1, 0, 0}); 9298 9299 bool Valid; 9300 if (Check.Ctrl == -1) { 9301 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 9302 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 9303 } else { 9304 Valid = Check.Lo <= Val && Val <= Check.Hi; 9305 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 9306 } 9307 9308 if (!Valid) { 9309 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 9310 return -1; 9311 } 9312 9313 return Val; 9314 } 9315 9316 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 9317 using namespace AMDGPU::DPP; 9318 9319 if (!isToken(AsmToken::Identifier) || 9320 !isSupportedDPPCtrl(getTokenStr(), Operands)) 9321 return ParseStatus::NoMatch; 9322 9323 SMLoc S = getLoc(); 9324 int64_t Val = -1; 9325 StringRef Ctrl; 9326 9327 parseId(Ctrl); 9328 9329 if (Ctrl == "row_mirror") { 9330 Val = DppCtrl::ROW_MIRROR; 9331 } else if (Ctrl == "row_half_mirror") { 9332 Val = DppCtrl::ROW_HALF_MIRROR; 9333 } else { 9334 if (skipToken(AsmToken::Colon, "expected a colon")) { 9335 if (Ctrl == "quad_perm") { 9336 Val = parseDPPCtrlPerm(); 9337 } else { 9338 Val = parseDPPCtrlSel(Ctrl); 9339 } 9340 } 9341 } 9342 9343 if (Val == -1) 9344 return ParseStatus::Failure; 9345 9346 Operands.push_back( 9347 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 9348 return ParseStatus::Success; 9349 } 9350 9351 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 9352 bool IsDPP8) { 9353 OptionalImmIndexMap OptionalIdx; 9354 unsigned Opc = Inst.getOpcode(); 9355 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9356 9357 // MAC instructions are special because they have 'old' 9358 // operand which is not tied to dst (but assumed to be). 9359 // They also have dummy unused src2_modifiers. 9360 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 9361 int Src2ModIdx = 9362 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 9363 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 9364 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 9365 9366 unsigned I = 1; 9367 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9368 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9369 } 9370 9371 int Fi = 0; 9372 for (unsigned E = Operands.size(); I != E; ++I) { 9373 9374 if (IsMAC) { 9375 int NumOperands = Inst.getNumOperands(); 9376 if (OldIdx == NumOperands) { 9377 // Handle old operand 9378 constexpr int DST_IDX = 0; 9379 Inst.addOperand(Inst.getOperand(DST_IDX)); 9380 } else if (Src2ModIdx == NumOperands) { 9381 // Add unused dummy src2_modifiers 9382 Inst.addOperand(MCOperand::createImm(0)); 9383 } 9384 } 9385 9386 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in); 9387 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { 9388 Inst.addOperand(Inst.getOperand(0)); 9389 } 9390 9391 bool IsVOP3CvtSrDpp = 9392 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || 9393 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || 9394 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || 9395 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12; 9396 if (IsVOP3CvtSrDpp) { 9397 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { 9398 Inst.addOperand(MCOperand::createImm(0)); 9399 Inst.addOperand(MCOperand::createReg(MCRegister())); 9400 } 9401 } 9402 9403 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 9404 MCOI::TIED_TO); 9405 if (TiedTo != -1) { 9406 assert((unsigned)TiedTo < Inst.getNumOperands()); 9407 // handle tied old or src2 for MAC instructions 9408 Inst.addOperand(Inst.getOperand(TiedTo)); 9409 } 9410 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9411 // Add the register arguments 9412 if (IsDPP8 && Op.isDppFI()) { 9413 Fi = Op.getImm(); 9414 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9415 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 9416 } else if (Op.isReg()) { 9417 Op.addRegOperands(Inst, 1); 9418 } else if (Op.isImm() && 9419 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 9420 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 9421 Op.addImmOperands(Inst, 1); 9422 } else if (Op.isImm()) { 9423 OptionalIdx[Op.getImmTy()] = I; 9424 } else { 9425 llvm_unreachable("unhandled operand type"); 9426 } 9427 } 9428 9429 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) 9430 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9431 AMDGPUOperand::ImmTyByteSel); 9432 9433 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9434 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9435 AMDGPUOperand::ImmTyClamp); 9436 9437 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9438 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 9439 9440 if (Desc.TSFlags & SIInstrFlags::VOP3P) 9441 cvtVOP3P(Inst, Operands, OptionalIdx); 9442 else if (Desc.TSFlags & SIInstrFlags::VOP3) 9443 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 9444 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 9445 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 9446 } 9447 9448 if (IsDPP8) { 9449 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 9450 using namespace llvm::AMDGPU::DPP; 9451 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9452 } else { 9453 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 9454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9457 9458 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 9459 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9460 AMDGPUOperand::ImmTyDppFI); 9461 } 9462 } 9463 9464 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 9465 OptionalImmIndexMap OptionalIdx; 9466 9467 unsigned I = 1; 9468 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9469 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9470 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9471 } 9472 9473 int Fi = 0; 9474 for (unsigned E = Operands.size(); I != E; ++I) { 9475 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 9476 MCOI::TIED_TO); 9477 if (TiedTo != -1) { 9478 assert((unsigned)TiedTo < Inst.getNumOperands()); 9479 // handle tied old or src2 for MAC instructions 9480 Inst.addOperand(Inst.getOperand(TiedTo)); 9481 } 9482 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9483 // Add the register arguments 9484 if (Op.isReg() && validateVccOperand(Op.getReg())) { 9485 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 9486 // Skip it. 9487 continue; 9488 } 9489 9490 if (IsDPP8) { 9491 if (Op.isDPP8()) { 9492 Op.addImmOperands(Inst, 1); 9493 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9494 Op.addRegWithFPInputModsOperands(Inst, 2); 9495 } else if (Op.isDppFI()) { 9496 Fi = Op.getImm(); 9497 } else if (Op.isReg()) { 9498 Op.addRegOperands(Inst, 1); 9499 } else { 9500 llvm_unreachable("Invalid operand type"); 9501 } 9502 } else { 9503 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9504 Op.addRegWithFPInputModsOperands(Inst, 2); 9505 } else if (Op.isReg()) { 9506 Op.addRegOperands(Inst, 1); 9507 } else if (Op.isDPPCtrl()) { 9508 Op.addImmOperands(Inst, 1); 9509 } else if (Op.isImm()) { 9510 // Handle optional arguments 9511 OptionalIdx[Op.getImmTy()] = I; 9512 } else { 9513 llvm_unreachable("Invalid operand type"); 9514 } 9515 } 9516 } 9517 9518 if (IsDPP8) { 9519 using namespace llvm::AMDGPU::DPP; 9520 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 9521 } else { 9522 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 9523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 9524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 9525 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 9526 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9527 AMDGPUOperand::ImmTyDppFI); 9528 } 9529 } 9530 } 9531 9532 //===----------------------------------------------------------------------===// 9533 // sdwa 9534 //===----------------------------------------------------------------------===// 9535 9536 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 9537 StringRef Prefix, 9538 AMDGPUOperand::ImmTy Type) { 9539 return parseStringOrIntWithPrefix( 9540 Operands, Prefix, 9541 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"}, 9542 Type); 9543 } 9544 9545 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 9546 return parseStringOrIntWithPrefix( 9547 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"}, 9548 AMDGPUOperand::ImmTySDWADstUnused); 9549 } 9550 9551 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 9552 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 9553 } 9554 9555 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 9556 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 9557 } 9558 9559 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 9560 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 9561 } 9562 9563 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 9564 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 9565 } 9566 9567 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 9568 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 9569 } 9570 9571 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 9572 uint64_t BasicInstType, 9573 bool SkipDstVcc, 9574 bool SkipSrcVcc) { 9575 using namespace llvm::AMDGPU::SDWA; 9576 9577 OptionalImmIndexMap OptionalIdx; 9578 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9579 bool SkippedVcc = false; 9580 9581 unsigned I = 1; 9582 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9583 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9584 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9585 } 9586 9587 for (unsigned E = Operands.size(); I != E; ++I) { 9588 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9589 if (SkipVcc && !SkippedVcc && Op.isReg() && 9590 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9591 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9592 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9593 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9594 // Skip VCC only if we didn't skip it on previous iteration. 9595 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9596 if (BasicInstType == SIInstrFlags::VOP2 && 9597 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9598 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9599 SkippedVcc = true; 9600 continue; 9601 } 9602 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { 9603 SkippedVcc = true; 9604 continue; 9605 } 9606 } 9607 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9608 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9609 } else if (Op.isImm()) { 9610 // Handle optional arguments 9611 OptionalIdx[Op.getImmTy()] = I; 9612 } else { 9613 llvm_unreachable("Invalid operand type"); 9614 } 9615 SkippedVcc = false; 9616 } 9617 9618 const unsigned Opc = Inst.getOpcode(); 9619 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 9620 Opc != AMDGPU::V_NOP_sdwa_vi) { 9621 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9622 switch (BasicInstType) { 9623 case SIInstrFlags::VOP1: 9624 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9625 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9626 AMDGPUOperand::ImmTyClamp, 0); 9627 9628 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9629 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9630 AMDGPUOperand::ImmTyOModSI, 0); 9631 9632 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9633 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9634 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9635 9636 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9637 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9638 AMDGPUOperand::ImmTySDWADstUnused, 9639 DstUnused::UNUSED_PRESERVE); 9640 9641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9642 break; 9643 9644 case SIInstrFlags::VOP2: 9645 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9646 AMDGPUOperand::ImmTyClamp, 0); 9647 9648 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9650 9651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9652 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 9653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9654 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9655 break; 9656 9657 case SIInstrFlags::VOPC: 9658 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9659 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9660 AMDGPUOperand::ImmTyClamp, 0); 9661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9663 break; 9664 9665 default: 9666 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9667 } 9668 } 9669 9670 // special case v_mac_{f16, f32}: 9671 // it has src2 register operand that is tied to dst operand 9672 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9673 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9674 auto *it = Inst.begin(); 9675 std::advance( 9676 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9677 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9678 } 9679 } 9680 9681 /// Force static initialization. 9682 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9683 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 9684 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9685 } 9686 9687 #define GET_REGISTER_MATCHER 9688 #define GET_MATCHER_IMPLEMENTATION 9689 #define GET_MNEMONIC_SPELL_CHECKER 9690 #define GET_MNEMONIC_CHECKER 9691 #include "AMDGPUGenAsmMatcher.inc" 9692 9693 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 9694 unsigned MCK) { 9695 switch (MCK) { 9696 case MCK_addr64: 9697 return parseTokenOp("addr64", Operands); 9698 case MCK_done: 9699 return parseTokenOp("done", Operands); 9700 case MCK_idxen: 9701 return parseTokenOp("idxen", Operands); 9702 case MCK_lds: 9703 return parseTokenOp("lds", Operands); 9704 case MCK_offen: 9705 return parseTokenOp("offen", Operands); 9706 case MCK_off: 9707 return parseTokenOp("off", Operands); 9708 case MCK_row_95_en: 9709 return parseTokenOp("row_en", Operands); 9710 case MCK_gds: 9711 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9712 case MCK_tfe: 9713 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9714 } 9715 return tryCustomParseOperand(Operands, MCK); 9716 } 9717 9718 // This function should be defined after auto-generated include so that we have 9719 // MatchClassKind enum defined 9720 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9721 unsigned Kind) { 9722 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9723 // But MatchInstructionImpl() expects to meet token and fails to validate 9724 // operand. This method checks if we are given immediate operand but expect to 9725 // get corresponding token. 9726 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9727 switch (Kind) { 9728 case MCK_addr64: 9729 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9730 case MCK_gds: 9731 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9732 case MCK_lds: 9733 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9734 case MCK_idxen: 9735 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9736 case MCK_offen: 9737 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9738 case MCK_tfe: 9739 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9740 case MCK_SSrc_b32: 9741 // When operands have expression values, they will return true for isToken, 9742 // because it is not possible to distinguish between a token and an 9743 // expression at parse time. MatchInstructionImpl() will always try to 9744 // match an operand as a token, when isToken returns true, and when the 9745 // name of the expression is not a valid token, the match will fail, 9746 // so we need to handle it here. 9747 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand; 9748 case MCK_SSrc_f32: 9749 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand; 9750 case MCK_SOPPBrTarget: 9751 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 9752 case MCK_VReg32OrOff: 9753 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9754 case MCK_InterpSlot: 9755 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9756 case MCK_InterpAttr: 9757 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9758 case MCK_InterpAttrChan: 9759 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 9760 case MCK_SReg_64: 9761 case MCK_SReg_64_XEXEC: 9762 // Null is defined as a 32-bit register but 9763 // it should also be enabled with 64-bit operands or larger. 9764 // The following code enables it for SReg_64 and larger operands 9765 // used as source and destination. Remaining source 9766 // operands are handled in isInlinableImm. 9767 case MCK_SReg_96: 9768 case MCK_SReg_128: 9769 case MCK_SReg_256: 9770 case MCK_SReg_512: 9771 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9772 default: 9773 return Match_InvalidOperand; 9774 } 9775 } 9776 9777 //===----------------------------------------------------------------------===// 9778 // endpgm 9779 //===----------------------------------------------------------------------===// 9780 9781 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 9782 SMLoc S = getLoc(); 9783 int64_t Imm = 0; 9784 9785 if (!parseExpr(Imm)) { 9786 // The operand is optional, if not present default to 0 9787 Imm = 0; 9788 } 9789 9790 if (!isUInt<16>(Imm)) 9791 return Error(S, "expected a 16-bit value"); 9792 9793 Operands.push_back( 9794 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9795 return ParseStatus::Success; 9796 } 9797 9798 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9799 9800 //===----------------------------------------------------------------------===// 9801 // Split Barrier 9802 //===----------------------------------------------------------------------===// 9803 9804 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 9805