1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 constexpr unsigned DefaultMemoryClusterDWordsLimit = 8; 40 41 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 42 /// on any path from the start of an entry function to this load. 43 static const MachineMemOperand::Flags MONoClobber = 44 MachineMemOperand::MOTargetFlag1; 45 46 /// Mark the MMO of a load as the last use. 47 static const MachineMemOperand::Flags MOLastUse = 48 MachineMemOperand::MOTargetFlag2; 49 50 /// Utility to store machine instructions worklist. 51 struct SIInstrWorklist { 52 SIInstrWorklist() = default; 53 54 void insert(MachineInstr *MI); 55 56 MachineInstr *top() const { 57 const auto *iter = InstrList.begin(); 58 return *iter; 59 } 60 61 void erase_top() { 62 const auto *iter = InstrList.begin(); 63 InstrList.erase(iter); 64 } 65 66 bool empty() const { return InstrList.empty(); } 67 68 void clear() { 69 InstrList.clear(); 70 DeferredList.clear(); 71 } 72 73 bool isDeferred(MachineInstr *MI); 74 75 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } 76 77 private: 78 /// InstrList contains the MachineInstrs. 79 SetVector<MachineInstr *> InstrList; 80 /// Deferred instructions are specific MachineInstr 81 /// that will be added by insert method. 82 SetVector<MachineInstr *> DeferredList; 83 }; 84 85 class SIInstrInfo final : public AMDGPUGenInstrInfo { 86 private: 87 const SIRegisterInfo RI; 88 const GCNSubtarget &ST; 89 TargetSchedModel SchedModel; 90 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 91 92 // The inverse predicate should have the negative value. 93 enum BranchPredicate { 94 INVALID_BR = 0, 95 SCC_TRUE = 1, 96 SCC_FALSE = -1, 97 VCCNZ = 2, 98 VCCZ = -2, 99 EXECNZ = -3, 100 EXECZ = 3 101 }; 102 103 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 104 105 static unsigned getBranchOpcode(BranchPredicate Cond); 106 static BranchPredicate getBranchPredicate(unsigned Opcode); 107 108 public: 109 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 110 MachineRegisterInfo &MRI, 111 const MachineOperand &SuperReg, 112 const TargetRegisterClass *SuperRC, 113 unsigned SubIdx, 114 const TargetRegisterClass *SubRC) const; 115 MachineOperand buildExtractSubRegOrImm( 116 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, 117 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, 118 unsigned SubIdx, const TargetRegisterClass *SubRC) const; 119 120 private: 121 void swapOperands(MachineInstr &Inst) const; 122 123 std::pair<bool, MachineBasicBlock *> 124 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 125 MachineDominatorTree *MDT = nullptr) const; 126 127 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 128 MachineDominatorTree *MDT = nullptr) const; 129 130 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 131 132 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 133 134 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, 135 unsigned Opcode) const; 136 137 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, 138 unsigned Opcode) const; 139 140 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 141 unsigned Opcode, bool Swap = false) const; 142 143 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 144 unsigned Opcode, 145 MachineDominatorTree *MDT = nullptr) const; 146 147 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, 148 MachineDominatorTree *MDT) const; 149 150 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, 151 MachineDominatorTree *MDT) const; 152 153 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, 154 MachineDominatorTree *MDT = nullptr) const; 155 156 void splitScalar64BitBCNT(SIInstrWorklist &Worklist, 157 MachineInstr &Inst) const; 158 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 159 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 160 unsigned Opcode, 161 MachineDominatorTree *MDT = nullptr) const; 162 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, 163 MachineInstr &Inst) const; 164 165 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 166 SIInstrWorklist &Worklist) const; 167 168 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 169 MachineInstr &SCCDefInst, 170 SIInstrWorklist &Worklist, 171 Register NewCond = Register()) const; 172 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 173 SIInstrWorklist &Worklist) const; 174 175 const TargetRegisterClass * 176 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 177 178 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 179 const MachineInstr &MIb) const; 180 181 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 182 183 bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI, 184 StringRef &ErrInfo) const; 185 186 protected: 187 /// If the specific machine instruction is a instruction that moves/copies 188 /// value from one register to another register return destination and source 189 /// registers as machine operands. 190 std::optional<DestSourcePair> 191 isCopyInstrImpl(const MachineInstr &MI) const override; 192 193 bool swapSourceModifiers(MachineInstr &MI, 194 MachineOperand &Src0, unsigned Src0OpName, 195 MachineOperand &Src1, unsigned Src1OpName) const; 196 bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, 197 const MachineOperand *fromMO, unsigned toIdx, 198 const MachineOperand *toMO) const; 199 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 200 unsigned OpIdx0, 201 unsigned OpIdx1) const override; 202 203 public: 204 enum TargetOperandFlags { 205 MO_MASK = 0xf, 206 207 MO_NONE = 0, 208 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 209 MO_GOTPCREL = 1, 210 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 211 MO_GOTPCREL32 = 2, 212 MO_GOTPCREL32_LO = 2, 213 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 214 MO_GOTPCREL32_HI = 3, 215 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 216 MO_REL32 = 4, 217 MO_REL32_LO = 4, 218 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 219 MO_REL32_HI = 5, 220 221 MO_FAR_BRANCH_OFFSET = 6, 222 223 MO_ABS32_LO = 8, 224 MO_ABS32_HI = 9, 225 }; 226 227 explicit SIInstrInfo(const GCNSubtarget &ST); 228 229 const SIRegisterInfo &getRegisterInfo() const { 230 return RI; 231 } 232 233 const GCNSubtarget &getSubtarget() const { 234 return ST; 235 } 236 237 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 238 239 bool isIgnorableUse(const MachineOperand &MO) const override; 240 241 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, 242 MachineCycleInfo *CI) const override; 243 244 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 245 int64_t &Offset1) const override; 246 247 bool isGlobalMemoryObject(const MachineInstr *MI) const override; 248 249 bool getMemOperandsWithOffsetWidth( 250 const MachineInstr &LdSt, 251 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 252 bool &OffsetIsScalable, LocationSize &Width, 253 const TargetRegisterInfo *TRI) const final; 254 255 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 256 int64_t Offset1, bool OffsetIsScalable1, 257 ArrayRef<const MachineOperand *> BaseOps2, 258 int64_t Offset2, bool OffsetIsScalable2, 259 unsigned ClusterSize, 260 unsigned NumBytes) const override; 261 262 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 263 int64_t Offset1, unsigned NumLoads) const override; 264 265 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 266 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 267 bool KillSrc, bool RenamableDest = false, 268 bool RenamableSrc = false) const override; 269 270 void materializeImmediate(MachineBasicBlock &MBB, 271 MachineBasicBlock::iterator MI, const DebugLoc &DL, 272 Register DestReg, int64_t Value) const; 273 274 const TargetRegisterClass *getPreferredSelectRegClass( 275 unsigned Size) const; 276 277 Register insertNE(MachineBasicBlock *MBB, 278 MachineBasicBlock::iterator I, const DebugLoc &DL, 279 Register SrcReg, int Value) const; 280 281 Register insertEQ(MachineBasicBlock *MBB, 282 MachineBasicBlock::iterator I, const DebugLoc &DL, 283 Register SrcReg, int Value) const; 284 285 void storeRegToStackSlot( 286 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, 287 bool isKill, int FrameIndex, const TargetRegisterClass *RC, 288 const TargetRegisterInfo *TRI, Register VReg, 289 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; 290 291 void loadRegFromStackSlot( 292 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, 293 int FrameIndex, const TargetRegisterClass *RC, 294 const TargetRegisterInfo *TRI, Register VReg, 295 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; 296 297 bool expandPostRAPseudo(MachineInstr &MI) const override; 298 299 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 300 Register DestReg, unsigned SubIdx, 301 const MachineInstr &Orig, 302 const TargetRegisterInfo &TRI) const override; 303 304 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 305 // instructions. Returns a pair of generated instructions. 306 // Can split either post-RA with physical registers or pre-RA with 307 // virtual registers. In latter case IR needs to be in SSA form and 308 // and a REG_SEQUENCE is produced to define original register. 309 std::pair<MachineInstr*, MachineInstr*> 310 expandMovDPP64(MachineInstr &MI) const; 311 312 // Returns an opcode that can be used to move a value to a \p DstRC 313 // register. If there is no hardware instruction that can store to \p 314 // DstRC, then AMDGPU::COPY is returned. 315 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 316 317 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 318 unsigned EltSize, 319 bool IsSGPR) const; 320 321 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 322 bool IsIndirectSrc) const; 323 LLVM_READONLY 324 int commuteOpcode(unsigned Opc) const; 325 326 LLVM_READONLY 327 inline int commuteOpcode(const MachineInstr &MI) const { 328 return commuteOpcode(MI.getOpcode()); 329 } 330 331 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 332 unsigned &SrcOpIdx1) const override; 333 334 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 335 unsigned &SrcOpIdx1) const; 336 337 bool isBranchOffsetInRange(unsigned BranchOpc, 338 int64_t BrOffset) const override; 339 340 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 341 342 /// Return whether the block terminate with divergent branch. 343 /// Note this only work before lowering the pseudo control flow instructions. 344 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 345 346 void insertIndirectBranch(MachineBasicBlock &MBB, 347 MachineBasicBlock &NewDestBB, 348 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 349 int64_t BrOffset, RegScavenger *RS) const override; 350 351 bool analyzeBranchImpl(MachineBasicBlock &MBB, 352 MachineBasicBlock::iterator I, 353 MachineBasicBlock *&TBB, 354 MachineBasicBlock *&FBB, 355 SmallVectorImpl<MachineOperand> &Cond, 356 bool AllowModify) const; 357 358 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 359 MachineBasicBlock *&FBB, 360 SmallVectorImpl<MachineOperand> &Cond, 361 bool AllowModify = false) const override; 362 363 unsigned removeBranch(MachineBasicBlock &MBB, 364 int *BytesRemoved = nullptr) const override; 365 366 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 367 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 368 const DebugLoc &DL, 369 int *BytesAdded = nullptr) const override; 370 371 bool reverseBranchCondition( 372 SmallVectorImpl<MachineOperand> &Cond) const override; 373 374 bool canInsertSelect(const MachineBasicBlock &MBB, 375 ArrayRef<MachineOperand> Cond, Register DstReg, 376 Register TrueReg, Register FalseReg, int &CondCycles, 377 int &TrueCycles, int &FalseCycles) const override; 378 379 void insertSelect(MachineBasicBlock &MBB, 380 MachineBasicBlock::iterator I, const DebugLoc &DL, 381 Register DstReg, ArrayRef<MachineOperand> Cond, 382 Register TrueReg, Register FalseReg) const override; 383 384 void insertVectorSelect(MachineBasicBlock &MBB, 385 MachineBasicBlock::iterator I, const DebugLoc &DL, 386 Register DstReg, ArrayRef<MachineOperand> Cond, 387 Register TrueReg, Register FalseReg) const; 388 389 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 390 Register &SrcReg2, int64_t &CmpMask, 391 int64_t &CmpValue) const override; 392 393 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 394 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 395 const MachineRegisterInfo *MRI) const override; 396 397 bool 398 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 399 const MachineInstr &MIb) const override; 400 401 static bool isFoldableCopy(const MachineInstr &MI); 402 403 void removeModOperands(MachineInstr &MI) const; 404 405 bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 406 MachineRegisterInfo *MRI) const final; 407 408 unsigned getMachineCSELookAheadLimit() const override { return 500; } 409 410 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 411 LiveIntervals *LIS) const override; 412 413 bool isSchedulingBoundary(const MachineInstr &MI, 414 const MachineBasicBlock *MBB, 415 const MachineFunction &MF) const override; 416 417 static bool isSALU(const MachineInstr &MI) { 418 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 419 } 420 421 bool isSALU(uint16_t Opcode) const { 422 return get(Opcode).TSFlags & SIInstrFlags::SALU; 423 } 424 425 static bool isVALU(const MachineInstr &MI) { 426 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 427 } 428 429 bool isVALU(uint16_t Opcode) const { 430 return get(Opcode).TSFlags & SIInstrFlags::VALU; 431 } 432 433 static bool isImage(const MachineInstr &MI) { 434 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); 435 } 436 437 bool isImage(uint16_t Opcode) const { 438 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); 439 } 440 441 static bool isVMEM(const MachineInstr &MI) { 442 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI); 443 } 444 445 bool isVMEM(uint16_t Opcode) const { 446 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); 447 } 448 449 static bool isSOP1(const MachineInstr &MI) { 450 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 451 } 452 453 bool isSOP1(uint16_t Opcode) const { 454 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 455 } 456 457 static bool isSOP2(const MachineInstr &MI) { 458 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 459 } 460 461 bool isSOP2(uint16_t Opcode) const { 462 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 463 } 464 465 static bool isSOPC(const MachineInstr &MI) { 466 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 467 } 468 469 bool isSOPC(uint16_t Opcode) const { 470 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 471 } 472 473 static bool isSOPK(const MachineInstr &MI) { 474 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 475 } 476 477 bool isSOPK(uint16_t Opcode) const { 478 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 479 } 480 481 static bool isSOPP(const MachineInstr &MI) { 482 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 483 } 484 485 bool isSOPP(uint16_t Opcode) const { 486 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 487 } 488 489 static bool isPacked(const MachineInstr &MI) { 490 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 491 } 492 493 bool isPacked(uint16_t Opcode) const { 494 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 495 } 496 497 static bool isVOP1(const MachineInstr &MI) { 498 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 499 } 500 501 bool isVOP1(uint16_t Opcode) const { 502 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 503 } 504 505 static bool isVOP2(const MachineInstr &MI) { 506 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 507 } 508 509 bool isVOP2(uint16_t Opcode) const { 510 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 511 } 512 513 static bool isVOP3(const MachineInstr &MI) { 514 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 515 } 516 517 bool isVOP3(uint16_t Opcode) const { 518 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 519 } 520 521 static bool isSDWA(const MachineInstr &MI) { 522 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 523 } 524 525 bool isSDWA(uint16_t Opcode) const { 526 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 527 } 528 529 static bool isVOPC(const MachineInstr &MI) { 530 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 531 } 532 533 bool isVOPC(uint16_t Opcode) const { 534 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 535 } 536 537 static bool isMUBUF(const MachineInstr &MI) { 538 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 539 } 540 541 bool isMUBUF(uint16_t Opcode) const { 542 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 543 } 544 545 static bool isMTBUF(const MachineInstr &MI) { 546 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 547 } 548 549 bool isMTBUF(uint16_t Opcode) const { 550 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 551 } 552 553 static bool isSMRD(const MachineInstr &MI) { 554 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 555 } 556 557 bool isSMRD(uint16_t Opcode) const { 558 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 559 } 560 561 bool isBufferSMRD(const MachineInstr &MI) const; 562 563 static bool isDS(const MachineInstr &MI) { 564 return MI.getDesc().TSFlags & SIInstrFlags::DS; 565 } 566 567 bool isDS(uint16_t Opcode) const { 568 return get(Opcode).TSFlags & SIInstrFlags::DS; 569 } 570 571 static bool isLDSDMA(const MachineInstr &MI) { 572 return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); 573 } 574 575 bool isLDSDMA(uint16_t Opcode) { 576 return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); 577 } 578 579 static bool isGWS(const MachineInstr &MI) { 580 return MI.getDesc().TSFlags & SIInstrFlags::GWS; 581 } 582 583 bool isGWS(uint16_t Opcode) const { 584 return get(Opcode).TSFlags & SIInstrFlags::GWS; 585 } 586 587 bool isAlwaysGDS(uint16_t Opcode) const; 588 589 static bool isMIMG(const MachineInstr &MI) { 590 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 591 } 592 593 bool isMIMG(uint16_t Opcode) const { 594 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 595 } 596 597 static bool isVIMAGE(const MachineInstr &MI) { 598 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; 599 } 600 601 bool isVIMAGE(uint16_t Opcode) const { 602 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; 603 } 604 605 static bool isVSAMPLE(const MachineInstr &MI) { 606 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; 607 } 608 609 bool isVSAMPLE(uint16_t Opcode) const { 610 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; 611 } 612 613 static bool isGather4(const MachineInstr &MI) { 614 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 615 } 616 617 bool isGather4(uint16_t Opcode) const { 618 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 619 } 620 621 static bool isFLAT(const MachineInstr &MI) { 622 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 623 } 624 625 // Is a FLAT encoded instruction which accesses a specific segment, 626 // i.e. global_* or scratch_*. 627 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 628 auto Flags = MI.getDesc().TSFlags; 629 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 630 } 631 632 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 633 auto Flags = get(Opcode).TSFlags; 634 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 635 } 636 637 static bool isFLATGlobal(const MachineInstr &MI) { 638 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 639 } 640 641 bool isFLATGlobal(uint16_t Opcode) const { 642 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 643 } 644 645 static bool isFLATScratch(const MachineInstr &MI) { 646 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 647 } 648 649 bool isFLATScratch(uint16_t Opcode) const { 650 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 651 } 652 653 // Any FLAT encoded instruction, including global_* and scratch_*. 654 bool isFLAT(uint16_t Opcode) const { 655 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 656 } 657 658 static bool isEXP(const MachineInstr &MI) { 659 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 660 } 661 662 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 663 if (!isEXP(MI)) 664 return false; 665 unsigned Target = MI.getOperand(0).getImm(); 666 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 667 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 668 } 669 670 bool isEXP(uint16_t Opcode) const { 671 return get(Opcode).TSFlags & SIInstrFlags::EXP; 672 } 673 674 static bool isAtomicNoRet(const MachineInstr &MI) { 675 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 676 } 677 678 bool isAtomicNoRet(uint16_t Opcode) const { 679 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 680 } 681 682 static bool isAtomicRet(const MachineInstr &MI) { 683 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 684 } 685 686 bool isAtomicRet(uint16_t Opcode) const { 687 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 688 } 689 690 static bool isAtomic(const MachineInstr &MI) { 691 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 692 SIInstrFlags::IsAtomicNoRet); 693 } 694 695 bool isAtomic(uint16_t Opcode) const { 696 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 697 SIInstrFlags::IsAtomicNoRet); 698 } 699 700 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { 701 return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; 702 } 703 704 static bool isWQM(const MachineInstr &MI) { 705 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 706 } 707 708 bool isWQM(uint16_t Opcode) const { 709 return get(Opcode).TSFlags & SIInstrFlags::WQM; 710 } 711 712 static bool isDisableWQM(const MachineInstr &MI) { 713 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 714 } 715 716 bool isDisableWQM(uint16_t Opcode) const { 717 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 718 } 719 720 // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of 721 // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions 722 // therefore we need an explicit check for them since just checking if the 723 // Spill bit is set and what instruction type it came from misclassifies 724 // them. 725 static bool isVGPRSpill(const MachineInstr &MI) { 726 return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR && 727 MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR && 728 (isSpill(MI) && isVALU(MI)); 729 } 730 731 bool isVGPRSpill(uint16_t Opcode) const { 732 return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR && 733 Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR && 734 (isSpill(Opcode) && isVALU(Opcode)); 735 } 736 737 static bool isSGPRSpill(const MachineInstr &MI) { 738 return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR || 739 MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR || 740 (isSpill(MI) && isSALU(MI)); 741 } 742 743 bool isSGPRSpill(uint16_t Opcode) const { 744 return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR || 745 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || 746 (isSpill(Opcode) && isSALU(Opcode)); 747 } 748 749 bool isSpill(uint16_t Opcode) const { 750 return get(Opcode).TSFlags & SIInstrFlags::Spill; 751 } 752 753 static bool isSpill(const MachineInstr &MI) { 754 return MI.getDesc().TSFlags & SIInstrFlags::Spill; 755 } 756 757 static bool isWWMRegSpillOpcode(uint16_t Opcode) { 758 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || 759 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || 760 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || 761 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; 762 } 763 764 static bool isChainCallOpcode(uint64_t Opcode) { 765 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 || 766 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64; 767 } 768 769 static bool isDPP(const MachineInstr &MI) { 770 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 771 } 772 773 bool isDPP(uint16_t Opcode) const { 774 return get(Opcode).TSFlags & SIInstrFlags::DPP; 775 } 776 777 static bool isTRANS(const MachineInstr &MI) { 778 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 779 } 780 781 bool isTRANS(uint16_t Opcode) const { 782 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 783 } 784 785 static bool isVOP3P(const MachineInstr &MI) { 786 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 787 } 788 789 bool isVOP3P(uint16_t Opcode) const { 790 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 791 } 792 793 static bool isVINTRP(const MachineInstr &MI) { 794 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 795 } 796 797 bool isVINTRP(uint16_t Opcode) const { 798 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 799 } 800 801 static bool isMAI(const MachineInstr &MI) { 802 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 803 } 804 805 bool isMAI(uint16_t Opcode) const { 806 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 807 } 808 809 static bool isMFMA(const MachineInstr &MI) { 810 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 811 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 812 } 813 814 static bool isDOT(const MachineInstr &MI) { 815 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 816 } 817 818 static bool isWMMA(const MachineInstr &MI) { 819 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 820 } 821 822 bool isWMMA(uint16_t Opcode) const { 823 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 824 } 825 826 static bool isMFMAorWMMA(const MachineInstr &MI) { 827 return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI); 828 } 829 830 static bool isSWMMAC(const MachineInstr &MI) { 831 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; 832 } 833 834 bool isSWMMAC(uint16_t Opcode) const { 835 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; 836 } 837 838 bool isDOT(uint16_t Opcode) const { 839 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 840 } 841 842 static bool isLDSDIR(const MachineInstr &MI) { 843 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 844 } 845 846 bool isLDSDIR(uint16_t Opcode) const { 847 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 848 } 849 850 static bool isVINTERP(const MachineInstr &MI) { 851 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 852 } 853 854 bool isVINTERP(uint16_t Opcode) const { 855 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 856 } 857 858 static bool isScalarUnit(const MachineInstr &MI) { 859 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 860 } 861 862 static bool usesVM_CNT(const MachineInstr &MI) { 863 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 864 } 865 866 static bool usesLGKM_CNT(const MachineInstr &MI) { 867 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 868 } 869 870 // Most sopk treat the immediate as a signed 16-bit, however some 871 // use it as unsigned. 872 static bool sopkIsZext(unsigned Opcode) { 873 return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 || 874 Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 || 875 Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 || 876 Opcode == AMDGPU::S_GETREG_B32; 877 } 878 879 /// \returns true if this is an s_store_dword* instruction. This is more 880 /// specific than isSMEM && mayStore. 881 static bool isScalarStore(const MachineInstr &MI) { 882 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 883 } 884 885 bool isScalarStore(uint16_t Opcode) const { 886 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 887 } 888 889 static bool isFixedSize(const MachineInstr &MI) { 890 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 891 } 892 893 bool isFixedSize(uint16_t Opcode) const { 894 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 895 } 896 897 static bool hasFPClamp(const MachineInstr &MI) { 898 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 899 } 900 901 bool hasFPClamp(uint16_t Opcode) const { 902 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 903 } 904 905 static bool hasIntClamp(const MachineInstr &MI) { 906 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 907 } 908 909 uint64_t getClampMask(const MachineInstr &MI) const { 910 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 911 SIInstrFlags::IntClamp | 912 SIInstrFlags::ClampLo | 913 SIInstrFlags::ClampHi; 914 return MI.getDesc().TSFlags & ClampFlags; 915 } 916 917 static bool usesFPDPRounding(const MachineInstr &MI) { 918 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 919 } 920 921 bool usesFPDPRounding(uint16_t Opcode) const { 922 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 923 } 924 925 static bool isFPAtomic(const MachineInstr &MI) { 926 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 927 } 928 929 bool isFPAtomic(uint16_t Opcode) const { 930 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 931 } 932 933 static bool isNeverUniform(const MachineInstr &MI) { 934 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; 935 } 936 937 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the 938 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want 939 // to check for the barrier start (S_BARRIER_SIGNAL*) 940 bool isBarrierStart(unsigned Opcode) const { 941 return Opcode == AMDGPU::S_BARRIER || 942 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || 943 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || 944 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || 945 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; 946 } 947 948 bool isBarrier(unsigned Opcode) const { 949 return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT || 950 Opcode == AMDGPU::S_BARRIER_INIT_M0 || 951 Opcode == AMDGPU::S_BARRIER_INIT_IMM || 952 Opcode == AMDGPU::S_BARRIER_JOIN_IMM || 953 Opcode == AMDGPU::S_BARRIER_LEAVE || 954 Opcode == AMDGPU::S_BARRIER_LEAVE_IMM || 955 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER; 956 } 957 958 static bool isF16PseudoScalarTrans(unsigned Opcode) { 959 return Opcode == AMDGPU::V_S_EXP_F16_e64 || 960 Opcode == AMDGPU::V_S_LOG_F16_e64 || 961 Opcode == AMDGPU::V_S_RCP_F16_e64 || 962 Opcode == AMDGPU::V_S_RSQ_F16_e64 || 963 Opcode == AMDGPU::V_S_SQRT_F16_e64; 964 } 965 966 static bool doesNotReadTiedSource(const MachineInstr &MI) { 967 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 968 } 969 970 bool doesNotReadTiedSource(uint16_t Opcode) const { 971 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 972 } 973 974 bool isIGLP(unsigned Opcode) const { 975 return Opcode == AMDGPU::SCHED_BARRIER || 976 Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT; 977 } 978 979 bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); } 980 981 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { 982 switch (Opcode) { 983 case AMDGPU::S_WAITCNT_soft: 984 return AMDGPU::S_WAITCNT; 985 case AMDGPU::S_WAITCNT_VSCNT_soft: 986 return AMDGPU::S_WAITCNT_VSCNT; 987 case AMDGPU::S_WAIT_LOADCNT_soft: 988 return AMDGPU::S_WAIT_LOADCNT; 989 case AMDGPU::S_WAIT_STORECNT_soft: 990 return AMDGPU::S_WAIT_STORECNT; 991 case AMDGPU::S_WAIT_SAMPLECNT_soft: 992 return AMDGPU::S_WAIT_SAMPLECNT; 993 case AMDGPU::S_WAIT_BVHCNT_soft: 994 return AMDGPU::S_WAIT_BVHCNT; 995 case AMDGPU::S_WAIT_DSCNT_soft: 996 return AMDGPU::S_WAIT_DSCNT; 997 case AMDGPU::S_WAIT_KMCNT_soft: 998 return AMDGPU::S_WAIT_KMCNT; 999 default: 1000 return Opcode; 1001 } 1002 } 1003 1004 bool isWaitcnt(unsigned Opcode) const { 1005 switch (getNonSoftWaitcntOpcode(Opcode)) { 1006 case AMDGPU::S_WAITCNT: 1007 case AMDGPU::S_WAITCNT_VSCNT: 1008 case AMDGPU::S_WAITCNT_VMCNT: 1009 case AMDGPU::S_WAITCNT_EXPCNT: 1010 case AMDGPU::S_WAITCNT_LGKMCNT: 1011 case AMDGPU::S_WAIT_LOADCNT: 1012 case AMDGPU::S_WAIT_LOADCNT_DSCNT: 1013 case AMDGPU::S_WAIT_STORECNT: 1014 case AMDGPU::S_WAIT_STORECNT_DSCNT: 1015 case AMDGPU::S_WAIT_SAMPLECNT: 1016 case AMDGPU::S_WAIT_BVHCNT: 1017 case AMDGPU::S_WAIT_EXPCNT: 1018 case AMDGPU::S_WAIT_DSCNT: 1019 case AMDGPU::S_WAIT_KMCNT: 1020 case AMDGPU::S_WAIT_IDLE: 1021 return true; 1022 default: 1023 return false; 1024 } 1025 } 1026 1027 bool isVGPRCopy(const MachineInstr &MI) const { 1028 assert(isCopyInstr(MI)); 1029 Register Dest = MI.getOperand(0).getReg(); 1030 const MachineFunction &MF = *MI.getParent()->getParent(); 1031 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1032 return !RI.isSGPRReg(MRI, Dest); 1033 } 1034 1035 bool hasVGPRUses(const MachineInstr &MI) const { 1036 const MachineFunction &MF = *MI.getParent()->getParent(); 1037 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1038 return llvm::any_of(MI.explicit_uses(), 1039 [&MRI, this](const MachineOperand &MO) { 1040 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 1041 } 1042 1043 /// Return true if the instruction modifies the mode register.q 1044 static bool modifiesModeRegister(const MachineInstr &MI); 1045 1046 /// This function is used to determine if an instruction can be safely 1047 /// executed under EXEC = 0 without hardware error, indeterminate results, 1048 /// and/or visible effects on future vector execution or outside the shader. 1049 /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is 1050 /// used in removing branches over short EXEC = 0 sequences. 1051 /// As such it embeds certain assumptions which may not apply to every case 1052 /// of EXEC = 0 execution. 1053 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 1054 1055 /// Returns true if the instruction could potentially depend on the value of 1056 /// exec. If false, exec dependencies may safely be ignored. 1057 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 1058 1059 bool isInlineConstant(const APInt &Imm) const; 1060 1061 bool isInlineConstant(const APFloat &Imm) const; 1062 1063 // Returns true if this non-register operand definitely does not need to be 1064 // encoded as a 32-bit literal. Note that this function handles all kinds of 1065 // operands, not just immediates. 1066 // 1067 // Some operands like FrameIndexes could resolve to an inline immediate value 1068 // that will not require an additional 4-bytes; this function assumes that it 1069 // will. 1070 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 1071 1072 bool isInlineConstant(const MachineOperand &MO, 1073 const MCOperandInfo &OpInfo) const { 1074 return isInlineConstant(MO, OpInfo.OperandType); 1075 } 1076 1077 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 1078 /// be an inline immediate. 1079 bool isInlineConstant(const MachineInstr &MI, 1080 const MachineOperand &UseMO, 1081 const MachineOperand &DefMO) const { 1082 assert(UseMO.getParent() == &MI); 1083 int OpIdx = UseMO.getOperandNo(); 1084 if (OpIdx >= MI.getDesc().NumOperands) 1085 return false; 1086 1087 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 1088 } 1089 1090 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 1091 /// immediate. 1092 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 1093 const MachineOperand &MO = MI.getOperand(OpIdx); 1094 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1095 } 1096 1097 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 1098 const MachineOperand &MO) const { 1099 if (OpIdx >= MI.getDesc().NumOperands) 1100 return false; 1101 1102 if (isCopyInstr(MI)) { 1103 unsigned Size = getOpSize(MI, OpIdx); 1104 assert(Size == 8 || Size == 4); 1105 1106 uint8_t OpType = (Size == 8) ? 1107 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 1108 return isInlineConstant(MO, OpType); 1109 } 1110 1111 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1112 } 1113 1114 bool isInlineConstant(const MachineOperand &MO) const { 1115 return isInlineConstant(*MO.getParent(), MO.getOperandNo()); 1116 } 1117 1118 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 1119 const MachineOperand &MO) const; 1120 1121 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 1122 /// This function will return false if you pass it a 32-bit instruction. 1123 bool hasVALU32BitEncoding(unsigned Opcode) const; 1124 1125 /// Returns true if this operand uses the constant bus. 1126 bool usesConstantBus(const MachineRegisterInfo &MRI, 1127 const MachineOperand &MO, 1128 const MCOperandInfo &OpInfo) const; 1129 1130 bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI, 1131 int OpIdx) const { 1132 return usesConstantBus(MRI, MI.getOperand(OpIdx), 1133 MI.getDesc().operands()[OpIdx]); 1134 } 1135 1136 /// Return true if this instruction has any modifiers. 1137 /// e.g. src[012]_mod, omod, clamp. 1138 bool hasModifiers(unsigned Opcode) const; 1139 1140 bool hasModifiersSet(const MachineInstr &MI, 1141 unsigned OpName) const; 1142 bool hasAnyModifiersSet(const MachineInstr &MI) const; 1143 1144 bool canShrink(const MachineInstr &MI, 1145 const MachineRegisterInfo &MRI) const; 1146 1147 MachineInstr *buildShrunkInst(MachineInstr &MI, 1148 unsigned NewOpcode) const; 1149 1150 bool verifyInstruction(const MachineInstr &MI, 1151 StringRef &ErrInfo) const override; 1152 1153 unsigned getVALUOp(const MachineInstr &MI) const; 1154 1155 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, 1156 MachineBasicBlock::iterator MBBI, 1157 const DebugLoc &DL, Register Reg, bool IsSCCLive, 1158 SlotIndexes *Indexes = nullptr) const; 1159 1160 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 1161 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 1162 Register Reg, SlotIndexes *Indexes = nullptr) const; 1163 1164 /// Return the correct register class for \p OpNo. For target-specific 1165 /// instructions, this will return the register class that has been defined 1166 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 1167 /// the register class of its machine operand. 1168 /// to infer the correct register class base on the other operands. 1169 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 1170 unsigned OpNo) const; 1171 1172 /// Return the size in bytes of the operand OpNo on the given 1173 // instruction opcode. 1174 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 1175 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 1176 1177 if (OpInfo.RegClass == -1) { 1178 // If this is an immediate operand, this must be a 32-bit literal. 1179 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 1180 return 4; 1181 } 1182 1183 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 1184 } 1185 1186 /// This form should usually be preferred since it handles operands 1187 /// with unknown register classes. 1188 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 1189 const MachineOperand &MO = MI.getOperand(OpNo); 1190 if (MO.isReg()) { 1191 if (unsigned SubReg = MO.getSubReg()) { 1192 return RI.getSubRegIdxSize(SubReg) / 8; 1193 } 1194 } 1195 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 1196 } 1197 1198 /// Legalize the \p OpIndex operand of this instruction by inserting 1199 /// a MOV. For example: 1200 /// ADD_I32_e32 VGPR0, 15 1201 /// to 1202 /// MOV VGPR1, 15 1203 /// ADD_I32_e32 VGPR0, VGPR1 1204 /// 1205 /// If the operand being legalized is a register, then a COPY will be used 1206 /// instead of MOV. 1207 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 1208 1209 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 1210 /// for \p MI. 1211 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 1212 const MachineOperand *MO = nullptr) const; 1213 1214 /// Check if \p MO would be a valid operand for the given operand 1215 /// definition \p OpInfo. Note this does not attempt to validate constant bus 1216 /// restrictions (e.g. literal constant usage). 1217 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1218 const MCOperandInfo &OpInfo, 1219 const MachineOperand &MO) const; 1220 1221 /// Check if \p MO (a register operand) is a legal register for the 1222 /// given operand description or operand index. 1223 /// The operand index version provide more legality checks 1224 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 1225 const MCOperandInfo &OpInfo, 1226 const MachineOperand &MO) const; 1227 bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, 1228 const MachineOperand &MO) const; 1229 /// Legalize operands in \p MI by either commuting it or inserting a 1230 /// copy of src1. 1231 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1232 1233 /// Fix operands in \p MI to satisfy constant bus requirements. 1234 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1235 1236 /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class 1237 /// for the dst register (\p DstRC) can be optionally supplied. This function 1238 /// can only be used when it is know that the value in SrcReg is same across 1239 /// all threads in the wave. 1240 /// \returns The SGPR register that \p SrcReg was copied to. 1241 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 1242 MachineRegisterInfo &MRI, 1243 const TargetRegisterClass *DstRC = nullptr) const; 1244 1245 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1246 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1247 1248 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 1249 MachineBasicBlock::iterator I, 1250 const TargetRegisterClass *DstRC, 1251 MachineOperand &Op, MachineRegisterInfo &MRI, 1252 const DebugLoc &DL) const; 1253 1254 /// Legalize all operands in this instruction. This function may create new 1255 /// instructions and control-flow around \p MI. If present, \p MDT is 1256 /// updated. 1257 /// \returns A new basic block that contains \p MI if new blocks were created. 1258 MachineBasicBlock * 1259 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1260 1261 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1262 /// was moved to VGPR. \returns true if succeeded. 1263 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1264 1265 /// Replace the instructions opcode with the equivalent VALU 1266 /// opcode. This function will also move the users of MachineInstruntions 1267 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is 1268 /// updated. 1269 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; 1270 1271 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, 1272 MachineInstr &Inst) const; 1273 1274 void insertNoop(MachineBasicBlock &MBB, 1275 MachineBasicBlock::iterator MI) const override; 1276 1277 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1278 unsigned Quantity) const override; 1279 1280 void insertReturn(MachineBasicBlock &MBB) const; 1281 1282 /// Build instructions that simulate the behavior of a `s_trap 2` instructions 1283 /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is 1284 /// interpreted as a nop. 1285 MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI, 1286 MachineBasicBlock &MBB, 1287 MachineInstr &MI, 1288 const DebugLoc &DL) const; 1289 1290 /// Return the number of wait states that result from executing this 1291 /// instruction. 1292 static unsigned getNumWaitStates(const MachineInstr &MI); 1293 1294 /// Returns the operand named \p Op. If \p MI does not have an 1295 /// operand named \c Op, this function returns nullptr. 1296 LLVM_READONLY 1297 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1298 1299 LLVM_READONLY 1300 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1301 unsigned OpName) const { 1302 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1303 } 1304 1305 /// Get required immediate operand 1306 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1307 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1308 return MI.getOperand(Idx).getImm(); 1309 } 1310 1311 uint64_t getDefaultRsrcDataFormat() const; 1312 uint64_t getScratchRsrcWords23() const; 1313 1314 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1315 bool isHighLatencyDef(int Opc) const override; 1316 1317 /// Return the descriptor of the target-specific machine instruction 1318 /// that corresponds to the specified pseudo or native opcode. 1319 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1320 return get(pseudoToMCOpcode(Opcode)); 1321 } 1322 1323 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1324 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1325 1326 Register isLoadFromStackSlot(const MachineInstr &MI, 1327 int &FrameIndex) const override; 1328 Register isStoreToStackSlot(const MachineInstr &MI, 1329 int &FrameIndex) const override; 1330 1331 unsigned getInstBundleSize(const MachineInstr &MI) const; 1332 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1333 1334 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1335 1336 std::pair<unsigned, unsigned> 1337 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1338 1339 ArrayRef<std::pair<int, const char *>> 1340 getSerializableTargetIndices() const override; 1341 1342 ArrayRef<std::pair<unsigned, const char *>> 1343 getSerializableDirectMachineOperandTargetFlags() const override; 1344 1345 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1346 getSerializableMachineMemOperandTargetFlags() const override; 1347 1348 ScheduleHazardRecognizer * 1349 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1350 const ScheduleDAG *DAG) const override; 1351 1352 ScheduleHazardRecognizer * 1353 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1354 1355 ScheduleHazardRecognizer * 1356 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1357 const ScheduleDAGMI *DAG) const override; 1358 1359 unsigned getLiveRangeSplitOpcode(Register Reg, 1360 const MachineFunction &MF) const override; 1361 1362 bool isBasicBlockPrologue(const MachineInstr &MI, 1363 Register Reg = Register()) const override; 1364 1365 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1366 MachineBasicBlock::iterator InsPt, 1367 const DebugLoc &DL, Register Src, 1368 Register Dst) const override; 1369 1370 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1371 MachineBasicBlock::iterator InsPt, 1372 const DebugLoc &DL, Register Src, 1373 unsigned SrcSubReg, 1374 Register Dst) const override; 1375 1376 bool isWave32() const; 1377 1378 /// Return a partially built integer add instruction without carry. 1379 /// Caller must add source operands. 1380 /// For pre-GFX9 it will generate unused carry destination operand. 1381 /// TODO: After GFX9 it should return a no-carry operation. 1382 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1383 MachineBasicBlock::iterator I, 1384 const DebugLoc &DL, 1385 Register DestReg) const; 1386 1387 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1388 MachineBasicBlock::iterator I, 1389 const DebugLoc &DL, 1390 Register DestReg, 1391 RegScavenger &RS) const; 1392 1393 static bool isKillTerminator(unsigned Opcode); 1394 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1395 1396 bool isLegalMUBUFImmOffset(unsigned Imm) const; 1397 1398 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST); 1399 1400 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1401 Align Alignment = Align(4)) const; 1402 1403 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1404 /// encoded instruction. If \p Signed, this is for an instruction that 1405 /// interprets the offset as signed. 1406 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1407 uint64_t FlatVariant) const; 1408 1409 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1410 /// values. 1411 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1412 unsigned AddrSpace, 1413 uint64_t FlatVariant) const; 1414 1415 /// Returns true if negative offsets are allowed for the given \p FlatVariant. 1416 bool allowNegativeFlatOffset(uint64_t FlatVariant) const; 1417 1418 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1419 /// Return -1 if the target-specific opcode for the pseudo instruction does 1420 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1421 int pseudoToMCOpcode(int Opcode) const; 1422 1423 /// \brief Check if this instruction should only be used by assembler. 1424 /// Return true if this opcode should not be used by codegen. 1425 bool isAsmOnlyOpcode(int MCOp) const; 1426 1427 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1428 const TargetRegisterInfo *TRI, 1429 const MachineFunction &MF) 1430 const override; 1431 1432 void fixImplicitOperands(MachineInstr &MI) const; 1433 1434 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1435 ArrayRef<unsigned> Ops, 1436 MachineBasicBlock::iterator InsertPt, 1437 int FrameIndex, 1438 LiveIntervals *LIS = nullptr, 1439 VirtRegMap *VRM = nullptr) const override; 1440 1441 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1442 const MachineInstr &MI, 1443 unsigned *PredCost = nullptr) const override; 1444 1445 InstructionUniformity 1446 getInstructionUniformity(const MachineInstr &MI) const override final; 1447 1448 InstructionUniformity 1449 getGenericInstructionUniformity(const MachineInstr &MI) const; 1450 1451 const MIRFormatter *getMIRFormatter() const override { 1452 if (!Formatter) 1453 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1454 return Formatter.get(); 1455 } 1456 1457 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1458 1459 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1460 1461 // Enforce operand's \p OpName even alignment if required by target. 1462 // This is used if an operand is a 32 bit register but needs to be aligned 1463 // regardless. 1464 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1465 }; 1466 1467 /// \brief Returns true if a reg:subreg pair P has a TRC class 1468 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1469 const TargetRegisterClass &TRC, 1470 MachineRegisterInfo &MRI) { 1471 auto *RC = MRI.getRegClass(P.Reg); 1472 if (!P.SubReg) 1473 return RC == &TRC; 1474 auto *TRI = MRI.getTargetRegisterInfo(); 1475 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1476 } 1477 1478 /// \brief Create RegSubRegPair from a register MachineOperand 1479 inline 1480 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1481 assert(O.isReg()); 1482 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1483 } 1484 1485 /// \brief Return the SubReg component from REG_SEQUENCE 1486 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1487 unsigned SubReg); 1488 1489 /// \brief Return the defining instruction for a given reg:subreg pair 1490 /// skipping copy like instructions and subreg-manipulation pseudos. 1491 /// Following another subreg of a reg:subreg isn't supported. 1492 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1493 MachineRegisterInfo &MRI); 1494 1495 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1496 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1497 /// attempt to track between blocks. 1498 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1499 Register VReg, 1500 const MachineInstr &DefMI, 1501 const MachineInstr &UseMI); 1502 1503 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1504 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1505 /// track between blocks. 1506 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1507 Register VReg, 1508 const MachineInstr &DefMI); 1509 1510 namespace AMDGPU { 1511 1512 LLVM_READONLY 1513 int getVOPe64(uint16_t Opcode); 1514 1515 LLVM_READONLY 1516 int getVOPe32(uint16_t Opcode); 1517 1518 LLVM_READONLY 1519 int getSDWAOp(uint16_t Opcode); 1520 1521 LLVM_READONLY 1522 int getDPPOp32(uint16_t Opcode); 1523 1524 LLVM_READONLY 1525 int getDPPOp64(uint16_t Opcode); 1526 1527 LLVM_READONLY 1528 int getBasicFromSDWAOp(uint16_t Opcode); 1529 1530 LLVM_READONLY 1531 int getCommuteRev(uint16_t Opcode); 1532 1533 LLVM_READONLY 1534 int getCommuteOrig(uint16_t Opcode); 1535 1536 LLVM_READONLY 1537 int getAddr64Inst(uint16_t Opcode); 1538 1539 /// Check if \p Opcode is an Addr64 opcode. 1540 /// 1541 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1542 LLVM_READONLY 1543 int getIfAddr64Inst(uint16_t Opcode); 1544 1545 LLVM_READONLY 1546 int getSOPKOp(uint16_t Opcode); 1547 1548 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1549 /// of a VADDR form. 1550 LLVM_READONLY 1551 int getGlobalSaddrOp(uint16_t Opcode); 1552 1553 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1554 /// of a SADDR form. 1555 LLVM_READONLY 1556 int getGlobalVaddrOp(uint16_t Opcode); 1557 1558 LLVM_READONLY 1559 int getVCMPXNoSDstOp(uint16_t Opcode); 1560 1561 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1562 /// given an \p Opcode of an SS (SADDR) form. 1563 LLVM_READONLY 1564 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1565 1566 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1567 /// of an SVS (SADDR + VADDR) form. 1568 LLVM_READONLY 1569 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1570 1571 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1572 /// of an SV (VADDR) form. 1573 LLVM_READONLY 1574 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1575 1576 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1577 /// of an SS (SADDR) form. 1578 LLVM_READONLY 1579 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1580 1581 /// \returns earlyclobber version of a MAC MFMA is exists. 1582 LLVM_READONLY 1583 int getMFMAEarlyClobberOp(uint16_t Opcode); 1584 1585 /// \returns Version of an MFMA instruction which uses AGPRs for srcC and 1586 /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst. 1587 LLVM_READONLY 1588 int getMFMASrcCVDstAGPROp(uint16_t Opcode); 1589 1590 /// \returns v_cmpx version of a v_cmp instruction. 1591 LLVM_READONLY 1592 int getVCMPXOpFromVCMP(uint16_t Opcode); 1593 1594 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1595 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1596 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1597 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1598 1599 } // end namespace AMDGPU 1600 1601 namespace AMDGPU { 1602 enum AsmComments { 1603 // For sgpr to vgpr spill instructions 1604 SGPR_SPILL = MachineInstr::TAsmComments 1605 }; 1606 } // namespace AMDGPU 1607 1608 namespace SI { 1609 namespace KernelInputOffsets { 1610 1611 /// Offsets in bytes from the start of the input buffer 1612 enum Offsets { 1613 NGROUPS_X = 0, 1614 NGROUPS_Y = 4, 1615 NGROUPS_Z = 8, 1616 GLOBAL_SIZE_X = 12, 1617 GLOBAL_SIZE_Y = 16, 1618 GLOBAL_SIZE_Z = 20, 1619 LOCAL_SIZE_X = 24, 1620 LOCAL_SIZE_Y = 28, 1621 LOCAL_SIZE_Z = 32 1622 }; 1623 1624 } // end namespace KernelInputOffsets 1625 } // end namespace SI 1626 1627 } // end namespace llvm 1628 1629 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1630