1 //===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass tries to apply several peephole SDWA patterns. 11 /// 12 /// E.g. original: 13 /// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1 14 /// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3 15 /// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2 16 /// 17 /// Replace: 18 /// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3 19 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 20 /// 21 //===----------------------------------------------------------------------===// 22 23 24 #include "AMDGPU.h" 25 #include "AMDGPUSubtarget.h" 26 #include "SIDefines.h" 27 #include "SIInstrInfo.h" 28 #include "llvm/ADT/Statistic.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include <unordered_map> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "si-peephole-sdwa" 37 38 STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found."); 39 STATISTIC(NumSDWAInstructionsPeepholed, 40 "Number of instruction converted to SDWA."); 41 42 namespace { 43 44 class SDWAOperand; 45 46 class SIPeepholeSDWA : public MachineFunctionPass { 47 private: 48 MachineRegisterInfo *MRI; 49 const SIRegisterInfo *TRI; 50 const SIInstrInfo *TII; 51 52 std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands; 53 54 public: 55 static char ID; 56 57 typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector; 58 59 SIPeepholeSDWA() : MachineFunctionPass(ID) { 60 initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); 61 } 62 63 bool runOnMachineFunction(MachineFunction &MF) override; 64 void matchSDWAOperands(MachineBasicBlock &MBB); 65 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); 66 67 StringRef getPassName() const override { return "SI Peephole SDWA"; } 68 69 void getAnalysisUsage(AnalysisUsage &AU) const override { 70 AU.setPreservesCFG(); 71 MachineFunctionPass::getAnalysisUsage(AU); 72 } 73 }; 74 75 class SDWAOperand { 76 private: 77 MachineOperand *Target; // Operand that would be used in converted instruction 78 MachineOperand *Replaced; // Operand that would be replace by Target 79 80 public: 81 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp) 82 : Target(TargetOp), Replaced(ReplacedOp) { 83 assert(Target->isReg()); 84 assert(Replaced->isReg()); 85 } 86 87 virtual ~SDWAOperand() {} 88 89 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; 90 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; 91 92 MachineOperand *getTargetOperand() const { return Target; } 93 MachineOperand *getReplacedOperand() const { return Replaced; } 94 MachineInstr *getParentInst() const { return Target->getParent(); } 95 MachineRegisterInfo *getMRI() const { 96 return &getParentInst()->getParent()->getParent()->getRegInfo(); 97 } 98 }; 99 100 using namespace AMDGPU::SDWA; 101 102 class SDWASrcOperand : public SDWAOperand { 103 private: 104 SdwaSel SrcSel; 105 bool Abs; 106 bool Neg; 107 bool Sext; 108 109 public: 110 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, 111 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false, 112 bool Sext_ = false) 113 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_), 114 Neg(Neg_), Sext(Sext_) {} 115 116 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; 117 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; 118 119 SdwaSel getSrcSel() const { return SrcSel; } 120 bool getAbs() const { return Abs; } 121 bool getNeg() const { return Neg; } 122 bool getSext() const { return Sext; } 123 124 uint64_t getSrcMods() const; 125 }; 126 127 class SDWADstOperand : public SDWAOperand { 128 private: 129 SdwaSel DstSel; 130 DstUnused DstUn; 131 132 public: 133 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, 134 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) 135 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} 136 137 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; 138 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; 139 140 SdwaSel getDstSel() const { return DstSel; } 141 DstUnused getDstUnused() const { return DstUn; } 142 }; 143 144 } // End anonymous namespace. 145 146 INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false) 147 148 char SIPeepholeSDWA::ID = 0; 149 150 char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID; 151 152 FunctionPass *llvm::createSIPeepholeSDWAPass() { 153 return new SIPeepholeSDWA(); 154 } 155 156 #ifndef NDEBUG 157 158 static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) { 159 switch(Sel) { 160 case BYTE_0: OS << "BYTE_0"; break; 161 case BYTE_1: OS << "BYTE_1"; break; 162 case BYTE_2: OS << "BYTE_2"; break; 163 case BYTE_3: OS << "BYTE_3"; break; 164 case WORD_0: OS << "WORD_0"; break; 165 case WORD_1: OS << "WORD_1"; break; 166 case DWORD: OS << "DWORD"; break; 167 } 168 return OS; 169 } 170 171 static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { 172 switch(Un) { 173 case UNUSED_PAD: OS << "UNUSED_PAD"; break; 174 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break; 175 case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break; 176 } 177 return OS; 178 } 179 180 static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) { 181 OS << "SDWA src: " << *Src.getTargetOperand() 182 << " src_sel:" << Src.getSrcSel() 183 << " abs:" << Src.getAbs() << " neg:" << Src.getNeg() 184 << " sext:" << Src.getSext() << '\n'; 185 return OS; 186 } 187 188 static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) { 189 OS << "SDWA dst: " << *Dst.getTargetOperand() 190 << " dst_sel:" << Dst.getDstSel() 191 << " dst_unused:" << Dst.getDstUnused() << '\n'; 192 return OS; 193 } 194 195 #endif 196 197 static bool isSameBB(const MachineInstr *FirstMI, const MachineInstr *SecondMI) { 198 assert(FirstMI && SecondMI); 199 return FirstMI->getParent() == SecondMI->getParent(); 200 } 201 202 static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { 203 assert(To.isReg() && From.isReg()); 204 To.setReg(From.getReg()); 205 To.setSubReg(From.getSubReg()); 206 To.setIsUndef(From.isUndef()); 207 if (To.isUse()) { 208 To.setIsKill(From.isKill()); 209 } else { 210 To.setIsDead(From.isDead()); 211 } 212 } 213 214 static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { 215 return LHS.isReg() && 216 RHS.isReg() && 217 LHS.getReg() == RHS.getReg() && 218 LHS.getSubReg() == RHS.getSubReg(); 219 } 220 221 static bool isSubregOf(const MachineOperand &SubReg, 222 const MachineOperand &SuperReg, 223 const TargetRegisterInfo *TRI) { 224 225 if (!SuperReg.isReg() || !SubReg.isReg()) 226 return false; 227 228 if (isSameReg(SuperReg, SubReg)) 229 return true; 230 231 if (SuperReg.getReg() != SubReg.getReg()) 232 return false; 233 234 LaneBitmask::Type SuperMask = 235 TRI->getSubRegIndexLaneMask(SuperReg.getSubReg()).getAsInteger(); 236 LaneBitmask::Type SubMask = 237 TRI->getSubRegIndexLaneMask(SubReg.getSubReg()).getAsInteger(); 238 return TRI->regmaskSubsetEqual(&SubMask, &SuperMask); 239 } 240 241 uint64_t SDWASrcOperand::getSrcMods() const { 242 uint64_t Mods = 0; 243 if (Abs || Neg) { 244 assert(!Sext && 245 "Float and integer src modifiers can't be set simulteniously"); 246 Mods |= Abs ? SISrcMods::ABS : 0; 247 Mods |= Neg ? SISrcMods::NEG : 0; 248 } else if (Sext) { 249 Mods |= SISrcMods::SEXT; 250 } 251 252 return Mods; 253 } 254 255 MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { 256 // For SDWA src operand potential instruction is one that use register 257 // defined by parent instruction 258 MachineRegisterInfo *MRI = getMRI(); 259 MachineOperand *Replaced = getReplacedOperand(); 260 assert(Replaced->isReg()); 261 262 MachineInstr *PotentialMI = nullptr; 263 for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) { 264 // If this is use of another subreg of dst reg then do nothing 265 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo())) 266 continue; 267 268 // If there exist use of dst in another basic block or use of superreg of 269 // dst then we should not combine this opernad 270 if (!isSameBB(PotentialMO.getParent(), getParentInst()) || 271 !isSameReg(PotentialMO, *Replaced)) 272 return nullptr; 273 274 // Check that PotentialMI is only instruction that uses dst reg 275 if (PotentialMI == nullptr) { 276 PotentialMI = PotentialMO.getParent(); 277 } else if (PotentialMI != PotentialMO.getParent()) { 278 return nullptr; 279 } 280 } 281 282 return PotentialMI; 283 } 284 285 bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { 286 // Find operand in instruction that matches source operand and replace it with 287 // target operand. Set corresponding src_sel 288 289 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 290 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); 291 MachineOperand *SrcMods = 292 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); 293 assert(Src && Src->isReg()); 294 if (!isSameReg(*Src, *getReplacedOperand())) { 295 // If this is not src0 then it should be src1 296 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 297 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); 298 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); 299 300 assert(Src && Src->isReg()); 301 302 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || 303 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && 304 !isSameReg(*Src, *getReplacedOperand())) { 305 // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to 306 // src2. This is not allowed. 307 return false; 308 } 309 310 assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods); 311 } 312 copyRegOperand(*Src, *getTargetOperand()); 313 SrcSel->setImm(getSrcSel()); 314 SrcMods->setImm(getSrcMods()); 315 getTargetOperand()->setIsKill(false); 316 return true; 317 } 318 319 MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { 320 // For SDWA dst operand potential instruction is one that defines register 321 // that this operand uses 322 MachineRegisterInfo *MRI = getMRI(); 323 MachineInstr *ParentMI = getParentInst(); 324 MachineOperand *Replaced = getReplacedOperand(); 325 assert(Replaced->isReg()); 326 327 for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) { 328 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo())) 329 continue; 330 331 if (!isSameBB(getParentInst(), PotentialMO.getParent()) || 332 !isSameReg(*Replaced, PotentialMO)) 333 return nullptr; 334 335 // Check that ParentMI is the only instruction that uses replaced register 336 for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) { 337 if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) && 338 UseMO.getParent() != ParentMI) { 339 return nullptr; 340 } 341 } 342 343 // Due to SSA this should be onle def of replaced register, so return it 344 return PotentialMO.getParent(); 345 } 346 347 return nullptr; 348 } 349 350 bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { 351 // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused 352 353 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || 354 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && 355 getDstSel() != AMDGPU::SDWA::DWORD) { 356 // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD 357 return false; 358 } 359 360 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); 361 assert(Operand && 362 Operand->isReg() && 363 isSameReg(*Operand, *getReplacedOperand())); 364 copyRegOperand(*Operand, *getTargetOperand()); 365 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); 366 assert(DstSel); 367 DstSel->setImm(getDstSel()); 368 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); 369 assert(DstUnused); 370 DstUnused->setImm(getDstUnused()); 371 372 // Remove original instruction because it would conflict with our new 373 // instruction by register definition 374 getParentInst()->eraseFromParent(); 375 return true; 376 } 377 378 void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { 379 for (MachineInstr &MI : MBB) { 380 unsigned Opcode = MI.getOpcode(); 381 switch (Opcode) { 382 case AMDGPU::V_LSHRREV_B32_e32: 383 case AMDGPU::V_ASHRREV_I32_e32: 384 case AMDGPU::V_LSHLREV_B32_e32: { 385 // from: v_lshrrev_b32_e32 v1, 16/24, v0 386 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 387 388 // from: v_ashrrev_i32_e32 v1, 16/24, v0 389 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 390 391 // from: v_lshlrev_b32_e32 v1, 16/24, v0 392 // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD 393 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 394 if (!Src0->isImm()) 395 break; 396 397 int64_t Imm = Src0->getImm(); 398 if (Imm != 16 && Imm != 24) 399 break; 400 401 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 402 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); 403 if (TRI->isPhysicalRegister(Src1->getReg()) || 404 TRI->isPhysicalRegister(Dst->getReg())) 405 break; 406 407 if (Opcode == AMDGPU::V_LSHLREV_B32_e32) { 408 auto SDWADst = make_unique<SDWADstOperand>( 409 Dst, Src1, Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); 410 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); 411 SDWAOperands[&MI] = std::move(SDWADst); 412 ++NumSDWAPatternsFound; 413 } else { 414 auto SDWASrc = make_unique<SDWASrcOperand>( 415 Src1, Dst, Imm == 16 ? WORD_1 : BYTE_3, false, false, 416 Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true); 417 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); 418 SDWAOperands[&MI] = std::move(SDWASrc); 419 ++NumSDWAPatternsFound; 420 } 421 break; 422 } 423 424 case AMDGPU::V_LSHRREV_B16_e32: 425 case AMDGPU::V_ASHRREV_I16_e32: 426 case AMDGPU::V_LSHLREV_B16_e32: { 427 // from: v_lshrrev_b16_e32 v1, 8, v0 428 // to SDWA src:v0 src_sel:BYTE_1 429 430 // from: v_ashrrev_i16_e32 v1, 8, v0 431 // to SDWA src:v0 src_sel:BYTE_1 sext:1 432 433 // from: v_lshlrev_b16_e32 v1, 8, v0 434 // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD 435 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 436 if (!Src0->isImm() || Src0->getImm() != 8) 437 break; 438 439 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 440 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); 441 442 if (TRI->isPhysicalRegister(Src1->getReg()) || 443 TRI->isPhysicalRegister(Dst->getReg())) 444 break; 445 446 if (Opcode == AMDGPU::V_LSHLREV_B16_e32) { 447 auto SDWADst = 448 make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); 449 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); 450 SDWAOperands[&MI] = std::move(SDWADst); 451 ++NumSDWAPatternsFound; 452 } else { 453 auto SDWASrc = make_unique<SDWASrcOperand>( 454 Src1, Dst, BYTE_1, false, false, 455 Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true); 456 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); 457 SDWAOperands[&MI] = std::move(SDWASrc); 458 ++NumSDWAPatternsFound; 459 } 460 break; 461 } 462 463 case AMDGPU::V_BFE_I32: 464 case AMDGPU::V_BFE_U32: { 465 // e.g.: 466 // from: v_bfe_u32 v1, v0, 8, 8 467 // to SDWA src:v0 src_sel:BYTE_1 468 469 // offset | width | src_sel 470 // ------------------------ 471 // 0 | 8 | BYTE_0 472 // 0 | 16 | WORD_0 473 // 0 | 32 | DWORD ? 474 // 8 | 8 | BYTE_1 475 // 16 | 8 | BYTE_2 476 // 16 | 16 | WORD_1 477 // 24 | 8 | BYTE_3 478 479 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 480 if (!Src1->isImm()) 481 break; 482 int64_t Offset = Src1->getImm(); 483 484 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); 485 if (!Src2->isImm()) 486 break; 487 int64_t Width = Src2->getImm(); 488 489 SdwaSel SrcSel = DWORD; 490 491 if (Offset == 0 && Width == 8) 492 SrcSel = BYTE_0; 493 else if (Offset == 0 && Width == 16) 494 SrcSel = WORD_0; 495 else if (Offset == 0 && Width == 32) 496 SrcSel = DWORD; 497 else if (Offset == 8 && Width == 8) 498 SrcSel = BYTE_1; 499 else if (Offset == 16 && Width == 8) 500 SrcSel = BYTE_2; 501 else if (Offset == 16 && Width == 16) 502 SrcSel = WORD_1; 503 else if (Offset == 24 && Width == 8) 504 SrcSel = BYTE_3; 505 else 506 break; 507 508 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 509 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); 510 511 if (TRI->isPhysicalRegister(Src0->getReg()) || 512 TRI->isPhysicalRegister(Dst->getReg())) 513 break; 514 515 auto SDWASrc = make_unique<SDWASrcOperand>( 516 Src0, Dst, SrcSel, false, false, 517 Opcode == AMDGPU::V_BFE_U32 ? false : true); 518 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); 519 SDWAOperands[&MI] = std::move(SDWASrc); 520 ++NumSDWAPatternsFound; 521 break; 522 } 523 case AMDGPU::V_AND_B32_e32: { 524 // e.g.: 525 // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 526 // to SDWA src:v0 src_sel:WORD_0/BYTE_0 527 528 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 529 if (!Src0->isImm()) 530 break; 531 532 int64_t Imm = Src0->getImm(); 533 if (Imm != 0x0000ffff && Imm != 0x000000ff) 534 break; 535 536 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 537 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); 538 539 if (TRI->isPhysicalRegister(Src1->getReg()) || 540 TRI->isPhysicalRegister(Dst->getReg())) 541 break; 542 543 auto SDWASrc = make_unique<SDWASrcOperand>( 544 Src1, Dst, Imm == 0x0000ffff ? WORD_0 : BYTE_0); 545 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); 546 SDWAOperands[&MI] = std::move(SDWASrc); 547 ++NumSDWAPatternsFound; 548 break; 549 } 550 } 551 } 552 } 553 554 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, 555 const SDWAOperandsVector &SDWAOperands) { 556 // Check if this instruction can be converted to SDWA: 557 // 1. Does this opcode support SDWA 558 if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1) 559 return false; 560 561 // 2. Are all operands - VGPRs 562 for (const MachineOperand &Operand : MI.explicit_operands()) { 563 if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg())) 564 return false; 565 } 566 567 // Convert to sdwa 568 int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); 569 assert(SDWAOpcode != -1); 570 571 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); 572 573 // Create SDWA version of instruction MI and initialize its operands 574 MachineInstrBuilder SDWAInst = 575 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); 576 577 // Copy dst, if it is present in original then should also be present in SDWA 578 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); 579 if (Dst) { 580 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); 581 SDWAInst.add(*Dst); 582 } else { 583 assert(TII->isVOPC(MI)); 584 } 585 586 // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and 587 // src0_modifiers (except for v_nop_sdwa, but it can't get here) 588 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 589 assert( 590 Src0 && 591 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && 592 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); 593 SDWAInst.addImm(0); 594 SDWAInst.add(*Src0); 595 596 // Copy src1 if present, initialize src1_modifiers. 597 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); 598 if (Src1) { 599 assert( 600 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && 601 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); 602 SDWAInst.addImm(0); 603 SDWAInst.add(*Src1); 604 } else { 605 assert(TII->isVOP1(MI)); 606 } 607 608 if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || 609 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) { 610 // v_mac_f16/32 has additional src2 operand tied to vdst 611 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); 612 assert(Src2); 613 SDWAInst.add(*Src2); 614 } 615 616 // Initialize clamp. 617 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); 618 SDWAInst.addImm(0); 619 620 // Initialize dst_sel and dst_unused if present 621 if (Dst) { 622 assert( 623 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 && 624 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1); 625 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); 626 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); 627 } 628 629 // Initialize src0_sel 630 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); 631 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); 632 633 634 // Initialize src1_sel if present 635 if (Src1) { 636 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); 637 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); 638 } 639 640 // Apply all sdwa operand pattenrs 641 bool Converted = false; 642 for (auto &Operand : SDWAOperands) { 643 Converted |= Operand->convertToSDWA(*SDWAInst, TII); 644 } 645 if (!Converted) { 646 SDWAInst->eraseFromParent(); 647 return false; 648 } 649 650 DEBUG(dbgs() << "Convert instruction:" << MI 651 << "Into:" << *SDWAInst << '\n'); 652 ++NumSDWAInstructionsPeepholed; 653 654 MI.eraseFromParent(); 655 return true; 656 } 657 658 bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { 659 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 660 661 if (!ST.hasSDWA() || 662 !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9 663 return false; 664 } 665 666 MRI = &MF.getRegInfo(); 667 TRI = ST.getRegisterInfo(); 668 TII = ST.getInstrInfo(); 669 670 std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches; 671 672 // FIXME: For now we only combine instructions in one basic block 673 for (MachineBasicBlock &MBB : MF) { 674 SDWAOperands.clear(); 675 matchSDWAOperands(MBB); 676 677 PotentialMatches.clear(); 678 for (auto &OperandPair : SDWAOperands) { 679 auto &Operand = OperandPair.second; 680 MachineInstr *PotentialMI = Operand->potentialToConvert(TII); 681 if (PotentialMI) { 682 PotentialMatches[PotentialMI].push_back(std::move(Operand)); 683 } 684 } 685 686 for (auto &PotentialPair : PotentialMatches) { 687 MachineInstr &PotentialMI = *PotentialPair.first; 688 convertToSDWA(PotentialMI, PotentialPair.second); 689 } 690 } 691 return false; 692 } 693