1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hazard recognizers for scheduling on GCN processors. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "GCNHazardRecognizer.h" 14 #include "AMDGPUSubtarget.h" 15 #include "SIDefines.h" 16 #include "SIInstrInfo.h" 17 #include "SIRegisterInfo.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "Utils/AMDGPUBaseInfo.h" 20 #include "llvm/ADT/iterator_range.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstr.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <limits> 31 #include <set> 32 #include <vector> 33 34 using namespace llvm; 35 36 //===----------------------------------------------------------------------===// 37 // Hazard Recoginizer Implementation 38 //===----------------------------------------------------------------------===// 39 40 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 41 IsHazardRecognizerMode(false), 42 CurrCycleInstr(nullptr), 43 MF(MF), 44 ST(MF.getSubtarget<GCNSubtarget>()), 45 TII(*ST.getInstrInfo()), 46 TRI(TII.getRegisterInfo()), 47 ClauseUses(TRI.getNumRegUnits()), 48 ClauseDefs(TRI.getNumRegUnits()) { 49 MaxLookAhead = 5; 50 } 51 52 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 53 EmitInstruction(SU->getInstr()); 54 } 55 56 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 57 CurrCycleInstr = MI; 58 } 59 60 static bool isDivFMas(unsigned Opcode) { 61 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 62 } 63 64 static bool isSGetReg(unsigned Opcode) { 65 return Opcode == AMDGPU::S_GETREG_B32; 66 } 67 68 static bool isSSetReg(unsigned Opcode) { 69 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; 70 } 71 72 static bool isRWLane(unsigned Opcode) { 73 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 74 } 75 76 static bool isRFE(unsigned Opcode) { 77 return Opcode == AMDGPU::S_RFE_B64; 78 } 79 80 static bool isSMovRel(unsigned Opcode) { 81 switch (Opcode) { 82 case AMDGPU::S_MOVRELS_B32: 83 case AMDGPU::S_MOVRELS_B64: 84 case AMDGPU::S_MOVRELD_B32: 85 case AMDGPU::S_MOVRELD_B64: 86 return true; 87 default: 88 return false; 89 } 90 } 91 92 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, 93 const MachineInstr &MI) { 94 if (TII.isAlwaysGDS(MI.getOpcode())) 95 return true; 96 97 switch (MI.getOpcode()) { 98 case AMDGPU::S_SENDMSG: 99 case AMDGPU::S_SENDMSGHALT: 100 case AMDGPU::S_TTRACEDATA: 101 return true; 102 // These DS opcodes don't support GDS. 103 case AMDGPU::DS_NOP: 104 case AMDGPU::DS_PERMUTE_B32: 105 case AMDGPU::DS_BPERMUTE_B32: 106 return false; 107 default: 108 if (TII.isDS(MI.getOpcode())) { 109 int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 110 AMDGPU::OpName::gds); 111 if (MI.getOperand(GDS).getImm()) 112 return true; 113 } 114 return false; 115 } 116 } 117 118 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 119 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 120 AMDGPU::OpName::simm16); 121 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 122 } 123 124 ScheduleHazardRecognizer::HazardType 125 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 126 MachineInstr *MI = SU->getInstr(); 127 128 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 129 return NoopHazard; 130 131 // FIXME: Should flat be considered vmem? 132 if ((SIInstrInfo::isVMEM(*MI) || 133 SIInstrInfo::isFLAT(*MI)) 134 && checkVMEMHazards(MI) > 0) 135 return NoopHazard; 136 137 if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0) 138 return NoopHazard; 139 140 if (ST.hasNoDataDepHazard()) 141 return NoHazard; 142 143 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 144 return NoopHazard; 145 146 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 147 return NoopHazard; 148 149 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 150 return NoopHazard; 151 152 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 153 return NoopHazard; 154 155 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 156 return NoopHazard; 157 158 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 159 return NoopHazard; 160 161 if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0) 162 return NoopHazard; 163 164 if (ST.hasReadM0MovRelInterpHazard() && 165 (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) && 166 checkReadM0Hazards(MI) > 0) 167 return NoopHazard; 168 169 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) && 170 checkReadM0Hazards(MI) > 0) 171 return NoopHazard; 172 173 if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) 174 return NoopHazard; 175 176 if (checkAnyInstHazards(MI) > 0) 177 return NoopHazard; 178 179 return NoHazard; 180 } 181 182 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 183 IsHazardRecognizerMode = false; 184 return PreEmitNoopsCommon(SU->getInstr()); 185 } 186 187 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 188 IsHazardRecognizerMode = true; 189 CurrCycleInstr = MI; 190 unsigned W = PreEmitNoopsCommon(MI); 191 192 fixVMEMtoScalarWriteHazards(MI); 193 fixSMEMtoVectorWriteHazards(MI); 194 fixVcmpxExecWARHazard(MI); 195 fixLdsBranchVmemWARHazard(MI); 196 197 CurrCycleInstr = nullptr; 198 return W; 199 } 200 201 unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) { 202 int WaitStates = std::max(0, checkAnyInstHazards(MI)); 203 204 if (SIInstrInfo::isSMRD(*MI)) 205 return std::max(WaitStates, checkSMRDHazards(MI)); 206 207 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI)) 208 WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 209 210 if (ST.hasNSAtoVMEMBug()) 211 WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI)); 212 213 if (ST.hasNoDataDepHazard()) 214 return WaitStates; 215 216 if (SIInstrInfo::isVALU(*MI)) 217 WaitStates = std::max(WaitStates, checkVALUHazards(MI)); 218 219 if (SIInstrInfo::isDPP(*MI)) 220 WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 221 222 if (isDivFMas(MI->getOpcode())) 223 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 224 225 if (isRWLane(MI->getOpcode())) 226 WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 227 228 if (MI->isInlineAsm()) 229 return std::max(WaitStates, checkInlineAsmHazards(MI)); 230 231 if (isSGetReg(MI->getOpcode())) 232 return std::max(WaitStates, checkGetRegHazards(MI)); 233 234 if (isSSetReg(MI->getOpcode())) 235 return std::max(WaitStates, checkSetRegHazards(MI)); 236 237 if (isRFE(MI->getOpcode())) 238 return std::max(WaitStates, checkRFEHazards(MI)); 239 240 if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) || 241 isSMovRel(MI->getOpcode()))) 242 return std::max(WaitStates, checkReadM0Hazards(MI)); 243 244 if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI)) 245 return std::max(WaitStates, checkReadM0Hazards(MI)); 246 247 return WaitStates; 248 } 249 250 void GCNHazardRecognizer::EmitNoop() { 251 EmittedInstrs.push_front(nullptr); 252 } 253 254 void GCNHazardRecognizer::AdvanceCycle() { 255 // When the scheduler detects a stall, it will call AdvanceCycle() without 256 // emitting any instructions. 257 if (!CurrCycleInstr) 258 return; 259 260 // Do not track non-instructions which do not affect the wait states. 261 // If included, these instructions can lead to buffer overflow such that 262 // detectable hazards are missed. 263 if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() || 264 CurrCycleInstr->isKill()) 265 return; 266 267 unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); 268 269 // Keep track of emitted instructions 270 EmittedInstrs.push_front(CurrCycleInstr); 271 272 // Add a nullptr for each additional wait state after the first. Make sure 273 // not to add more than getMaxLookAhead() items to the list, since we 274 // truncate the list to that size right after this loop. 275 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 276 i < e; ++i) { 277 EmittedInstrs.push_front(nullptr); 278 } 279 280 // getMaxLookahead() is the largest number of wait states we will ever need 281 // to insert, so there is no point in keeping track of more than that many 282 // wait states. 283 EmittedInstrs.resize(getMaxLookAhead()); 284 285 CurrCycleInstr = nullptr; 286 } 287 288 void GCNHazardRecognizer::RecedeCycle() { 289 llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 290 } 291 292 //===----------------------------------------------------------------------===// 293 // Helper Functions 294 //===----------------------------------------------------------------------===// 295 296 typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn; 297 298 // Returns a minimum wait states since \p I walking all predecessors. 299 // Only scans until \p IsExpired does not return true. 300 // Can only be run in a hazard recognizer mode. 301 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 302 MachineBasicBlock *MBB, 303 MachineBasicBlock::reverse_instr_iterator I, 304 int WaitStates, 305 IsExpiredFn IsExpired, 306 DenseSet<const MachineBasicBlock *> &Visited) { 307 308 for (auto E = MBB->rend() ; I != E; ++I) { 309 if (IsHazard(&*I)) 310 return WaitStates; 311 312 if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr()) 313 continue; 314 315 WaitStates += SIInstrInfo::getNumWaitStates(*I); 316 317 if (IsExpired(&*I, WaitStates)) 318 return std::numeric_limits<int>::max(); 319 } 320 321 int MinWaitStates = WaitStates; 322 bool Found = false; 323 for (MachineBasicBlock *Pred : MBB->predecessors()) { 324 if (!Visited.insert(Pred).second) 325 continue; 326 327 int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), 328 WaitStates, IsExpired, Visited); 329 330 if (W == std::numeric_limits<int>::max()) 331 continue; 332 333 MinWaitStates = Found ? std::min(MinWaitStates, W) : W; 334 if (IsExpired(nullptr, MinWaitStates)) 335 return MinWaitStates; 336 337 Found = true; 338 } 339 340 if (Found) 341 return MinWaitStates; 342 343 return std::numeric_limits<int>::max(); 344 } 345 346 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, 347 MachineInstr *MI, 348 IsExpiredFn IsExpired) { 349 DenseSet<const MachineBasicBlock *> Visited; 350 return getWaitStatesSince(IsHazard, MI->getParent(), 351 std::next(MI->getReverseIterator()), 352 0, IsExpired, Visited); 353 } 354 355 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { 356 if (IsHazardRecognizerMode) { 357 auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) { 358 return WaitStates >= Limit; 359 }; 360 return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); 361 } 362 363 int WaitStates = 0; 364 for (MachineInstr *MI : EmittedInstrs) { 365 if (MI) { 366 if (IsHazard(MI)) 367 return WaitStates; 368 369 if (MI->isInlineAsm()) 370 continue; 371 } 372 ++WaitStates; 373 374 if (WaitStates >= Limit) 375 break; 376 } 377 return std::numeric_limits<int>::max(); 378 } 379 380 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg, 381 IsHazardFn IsHazardDef, 382 int Limit) { 383 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 384 385 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { 386 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); 387 }; 388 389 return getWaitStatesSince(IsHazardFn, Limit); 390 } 391 392 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard, 393 int Limit) { 394 auto IsHazardFn = [IsHazard] (MachineInstr *MI) { 395 return isSSetReg(MI->getOpcode()) && IsHazard(MI); 396 }; 397 398 return getWaitStatesSince(IsHazardFn, Limit); 399 } 400 401 //===----------------------------------------------------------------------===// 402 // No-op Hazard Detection 403 //===----------------------------------------------------------------------===// 404 405 static void addRegUnits(const SIRegisterInfo &TRI, 406 BitVector &BV, unsigned Reg) { 407 for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) 408 BV.set(*RUI); 409 } 410 411 static void addRegsToSet(const SIRegisterInfo &TRI, 412 iterator_range<MachineInstr::const_mop_iterator> Ops, 413 BitVector &Set) { 414 for (const MachineOperand &Op : Ops) { 415 if (Op.isReg()) 416 addRegUnits(TRI, Set, Op.getReg()); 417 } 418 } 419 420 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) { 421 // XXX: Do we need to worry about implicit operands 422 addRegsToSet(TRI, MI.defs(), ClauseDefs); 423 addRegsToSet(TRI, MI.uses(), ClauseUses); 424 } 425 426 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) { 427 // SMEM soft clause are only present on VI+, and only matter if xnack is 428 // enabled. 429 if (!ST.isXNACKEnabled()) 430 return 0; 431 432 bool IsSMRD = TII.isSMRD(*MEM); 433 434 resetClause(); 435 436 // A soft-clause is any group of consecutive SMEM instructions. The 437 // instructions in this group may return out of order and/or may be 438 // replayed (i.e. the same instruction issued more than once). 439 // 440 // In order to handle these situations correctly we need to make sure 441 // that when a clause has more than one instruction, no instruction in the 442 // clause writes to a register that is read another instruction in the clause 443 // (including itself). If we encounter this situaion, we need to break the 444 // clause by inserting a non SMEM instruction. 445 446 for (MachineInstr *MI : EmittedInstrs) { 447 // When we hit a non-SMEM instruction then we have passed the start of the 448 // clause and we can stop. 449 if (!MI) 450 break; 451 452 if (IsSMRD != SIInstrInfo::isSMRD(*MI)) 453 break; 454 455 addClauseInst(*MI); 456 } 457 458 if (ClauseDefs.none()) 459 return 0; 460 461 // We need to make sure not to put loads and stores in the same clause if they 462 // use the same address. For now, just start a new clause whenever we see a 463 // store. 464 if (MEM->mayStore()) 465 return 1; 466 467 addClauseInst(*MEM); 468 469 // If the set of defs and uses intersect then we cannot add this instruction 470 // to the clause, so we have a hazard. 471 return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0; 472 } 473 474 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 475 int WaitStatesNeeded = 0; 476 477 WaitStatesNeeded = checkSoftClauseHazards(SMRD); 478 479 // This SMRD hazard only affects SI. 480 if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) 481 return WaitStatesNeeded; 482 483 // A read of an SGPR by SMRD instruction requires 4 wait states when the 484 // SGPR was written by a VALU instruction. 485 int SmrdSgprWaitStates = 4; 486 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 487 auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); }; 488 489 bool IsBufferSMRD = TII.isBufferSMRD(*SMRD); 490 491 for (const MachineOperand &Use : SMRD->uses()) { 492 if (!Use.isReg()) 493 continue; 494 int WaitStatesNeededForUse = 495 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 496 SmrdSgprWaitStates); 497 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 498 499 // This fixes what appears to be undocumented hardware behavior in SI where 500 // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor 501 // needs some number of nops in between. We don't know how many we need, but 502 // let's use 4. This wasn't discovered before probably because the only 503 // case when this happens is when we expand a 64-bit pointer into a full 504 // descriptor and use s_buffer_load_dword instead of s_load_dword, which was 505 // probably never encountered in the closed-source land. 506 if (IsBufferSMRD) { 507 int WaitStatesNeededForUse = 508 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 509 IsBufferHazardDefFn, 510 SmrdSgprWaitStates); 511 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 512 } 513 } 514 515 return WaitStatesNeeded; 516 } 517 518 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 519 if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) 520 return 0; 521 522 int WaitStatesNeeded = checkSoftClauseHazards(VMEM); 523 524 // A read of an SGPR by a VMEM instruction requires 5 wait states when the 525 // SGPR was written by a VALU Instruction. 526 const int VmemSgprWaitStates = 5; 527 auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); }; 528 529 for (const MachineOperand &Use : VMEM->uses()) { 530 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 531 continue; 532 533 int WaitStatesNeededForUse = 534 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn, 535 VmemSgprWaitStates); 536 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 537 } 538 return WaitStatesNeeded; 539 } 540 541 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 542 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 543 const SIInstrInfo *TII = ST.getInstrInfo(); 544 545 // Check for DPP VGPR read after VALU VGPR write and EXEC write. 546 int DppVgprWaitStates = 2; 547 int DppExecWaitStates = 5; 548 int WaitStatesNeeded = 0; 549 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 550 551 for (const MachineOperand &Use : DPP->uses()) { 552 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 553 continue; 554 int WaitStatesNeededForUse = 555 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(), 556 [](MachineInstr *) { return true; }, 557 DppVgprWaitStates); 558 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 559 } 560 561 WaitStatesNeeded = std::max( 562 WaitStatesNeeded, 563 DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn, 564 DppExecWaitStates)); 565 566 return WaitStatesNeeded; 567 } 568 569 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 570 const SIInstrInfo *TII = ST.getInstrInfo(); 571 572 // v_div_fmas requires 4 wait states after a write to vcc from a VALU 573 // instruction. 574 const int DivFMasWaitStates = 4; 575 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 576 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn, 577 DivFMasWaitStates); 578 579 return DivFMasWaitStates - WaitStatesNeeded; 580 } 581 582 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 583 const SIInstrInfo *TII = ST.getInstrInfo(); 584 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 585 586 const int GetRegWaitStates = 2; 587 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 588 return GetRegHWReg == getHWReg(TII, *MI); 589 }; 590 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates); 591 592 return GetRegWaitStates - WaitStatesNeeded; 593 } 594 595 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 596 const SIInstrInfo *TII = ST.getInstrInfo(); 597 unsigned HWReg = getHWReg(TII, *SetRegInstr); 598 599 const int SetRegWaitStates = 600 ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2; 601 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { 602 return HWReg == getHWReg(TII, *MI); 603 }; 604 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates); 605 return SetRegWaitStates - WaitStatesNeeded; 606 } 607 608 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 609 if (!MI.mayStore()) 610 return -1; 611 612 const SIInstrInfo *TII = ST.getInstrInfo(); 613 unsigned Opcode = MI.getOpcode(); 614 const MCInstrDesc &Desc = MI.getDesc(); 615 616 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 617 int VDataRCID = -1; 618 if (VDataIdx != -1) 619 VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 620 621 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 622 // There is no hazard if the instruction does not use vector regs 623 // (like wbinvl1) 624 if (VDataIdx == -1) 625 return -1; 626 // For MUBUF/MTBUF instructions this hazard only exists if the 627 // instruction is not using a register in the soffset field. 628 const MachineOperand *SOffset = 629 TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 630 // If we have no soffset operand, then assume this field has been 631 // hardcoded to zero. 632 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 633 (!SOffset || !SOffset->isReg())) 634 return VDataIdx; 635 } 636 637 // MIMG instructions create a hazard if they don't use a 256-bit T# and 638 // the store size is greater than 8 bytes and they have more than two bits 639 // of their dmask set. 640 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 641 if (TII->isMIMG(MI)) { 642 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 643 assert(SRsrcIdx != -1 && 644 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 645 (void)SRsrcIdx; 646 } 647 648 if (TII->isFLAT(MI)) { 649 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 650 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 651 return DataIdx; 652 } 653 654 return -1; 655 } 656 657 int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, 658 const MachineRegisterInfo &MRI) { 659 // Helper to check for the hazard where VMEM instructions that store more than 660 // 8 bytes can have there store data over written by the next instruction. 661 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 662 663 const int VALUWaitStates = 1; 664 int WaitStatesNeeded = 0; 665 666 if (!TRI->isVGPR(MRI, Def.getReg())) 667 return WaitStatesNeeded; 668 unsigned Reg = Def.getReg(); 669 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { 670 int DataIdx = createsVALUHazard(*MI); 671 return DataIdx >= 0 && 672 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); 673 }; 674 int WaitStatesNeededForDef = 675 VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); 676 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 677 678 return WaitStatesNeeded; 679 } 680 681 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 682 // This checks for the hazard where VMEM instructions that store more than 683 // 8 bytes can have there store data over written by the next instruction. 684 if (!ST.has12DWordStoreHazard()) 685 return 0; 686 687 const MachineRegisterInfo &MRI = MF.getRegInfo(); 688 int WaitStatesNeeded = 0; 689 690 for (const MachineOperand &Def : VALU->defs()) { 691 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); 692 } 693 694 return WaitStatesNeeded; 695 } 696 697 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { 698 // This checks for hazards associated with inline asm statements. 699 // Since inline asms can contain just about anything, we use this 700 // to call/leverage other check*Hazard routines. Note that 701 // this function doesn't attempt to address all possible inline asm 702 // hazards (good luck), but is a collection of what has been 703 // problematic thus far. 704 705 // see checkVALUHazards() 706 if (!ST.has12DWordStoreHazard()) 707 return 0; 708 709 const MachineRegisterInfo &MRI = MF.getRegInfo(); 710 int WaitStatesNeeded = 0; 711 712 for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); 713 I != E; ++I) { 714 const MachineOperand &Op = IA->getOperand(I); 715 if (Op.isReg() && Op.isDef()) { 716 WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); 717 } 718 } 719 720 return WaitStatesNeeded; 721 } 722 723 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 724 const SIInstrInfo *TII = ST.getInstrInfo(); 725 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 726 const MachineRegisterInfo &MRI = MF.getRegInfo(); 727 728 const MachineOperand *LaneSelectOp = 729 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 730 731 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 732 return 0; 733 734 unsigned LaneSelectReg = LaneSelectOp->getReg(); 735 auto IsHazardFn = [TII] (MachineInstr *MI) { 736 return TII->isVALU(*MI); 737 }; 738 739 const int RWLaneWaitStates = 4; 740 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn, 741 RWLaneWaitStates); 742 return RWLaneWaitStates - WaitStatesSince; 743 } 744 745 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) { 746 if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) 747 return 0; 748 749 const SIInstrInfo *TII = ST.getInstrInfo(); 750 751 const int RFEWaitStates = 1; 752 753 auto IsHazardFn = [TII] (MachineInstr *MI) { 754 return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS; 755 }; 756 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates); 757 return RFEWaitStates - WaitStatesNeeded; 758 } 759 760 int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) { 761 if (MI->isDebugInstr()) 762 return 0; 763 764 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 765 if (!ST.hasSMovFedHazard()) 766 return 0; 767 768 // Check for any instruction reading an SGPR after a write from 769 // s_mov_fed_b32. 770 int MovFedWaitStates = 1; 771 int WaitStatesNeeded = 0; 772 773 for (const MachineOperand &Use : MI->uses()) { 774 if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 775 continue; 776 auto IsHazardFn = [] (MachineInstr *MI) { 777 return MI->getOpcode() == AMDGPU::S_MOV_FED_B32; 778 }; 779 int WaitStatesNeededForUse = 780 MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn, 781 MovFedWaitStates); 782 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 783 } 784 785 return WaitStatesNeeded; 786 } 787 788 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { 789 const SIInstrInfo *TII = ST.getInstrInfo(); 790 const int SMovRelWaitStates = 1; 791 auto IsHazardFn = [TII] (MachineInstr *MI) { 792 return TII->isSALU(*MI); 793 }; 794 return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, 795 SMovRelWaitStates); 796 } 797 798 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { 799 if (!ST.hasVMEMtoScalarWriteHazard()) 800 return false; 801 802 if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI)) 803 return false; 804 805 if (MI->getNumDefs() == 0) 806 return false; 807 808 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 809 810 auto IsHazardFn = [TRI, MI] (MachineInstr *I) { 811 if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) && 812 !SIInstrInfo::isFLAT(*I)) 813 return false; 814 815 for (const MachineOperand &Def : MI->defs()) { 816 MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); 817 if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC)) 818 continue; 819 return true; 820 } 821 return false; 822 }; 823 824 auto IsExpiredFn = [] (MachineInstr *MI, int) { 825 return MI && (SIInstrInfo::isVALU(*MI) || 826 (MI->getOpcode() == AMDGPU::S_WAITCNT && 827 !MI->getOperand(0).getImm())); 828 }; 829 830 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 831 std::numeric_limits<int>::max()) 832 return false; 833 834 const SIInstrInfo *TII = ST.getInstrInfo(); 835 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32)); 836 return true; 837 } 838 839 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { 840 if (!ST.hasSMEMtoVectorWriteHazard()) 841 return false; 842 843 if (!SIInstrInfo::isVALU(*MI)) 844 return false; 845 846 unsigned SDSTName; 847 switch (MI->getOpcode()) { 848 case AMDGPU::V_READLANE_B32: 849 case AMDGPU::V_READFIRSTLANE_B32: 850 SDSTName = AMDGPU::OpName::vdst; 851 break; 852 default: 853 SDSTName = AMDGPU::OpName::sdst; 854 break; 855 } 856 857 const SIInstrInfo *TII = ST.getInstrInfo(); 858 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 859 const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName); 860 if (!SDST) { 861 for (auto MO : MI->implicit_operands()) { 862 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) { 863 SDST = &MO; 864 break; 865 } 866 } 867 } 868 869 if (!SDST) 870 return false; 871 872 const unsigned SDSTReg = SDST->getReg(); 873 auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) { 874 return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI); 875 }; 876 877 // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent 878 // between any at risk SMEM and any SALU dependent on the SMEM results. 879 auto IsExpiredFn = [TII] (MachineInstr *MI, int) { 880 if (MI) { 881 if (TII->isSALU(*MI)) { 882 if (TII->isSOPP(*MI)) 883 return false; 884 switch (MI->getOpcode()) { 885 case AMDGPU::S_SETVSKIP: 886 case AMDGPU::S_VERSION: 887 case AMDGPU::S_WAITCNT_VSCNT: 888 case AMDGPU::S_WAITCNT_VMCNT: 889 case AMDGPU::S_WAITCNT_EXPCNT: 890 case AMDGPU::S_WAITCNT_LGKMCNT: 891 return false; 892 default: 893 return true; 894 } 895 } 896 } 897 return false; 898 }; 899 900 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 901 std::numeric_limits<int>::max()) 902 return false; 903 904 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 905 TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL) 906 .addImm(0); 907 return true; 908 } 909 910 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) { 911 if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI)) 912 return false; 913 914 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 915 if (!MI->modifiesRegister(AMDGPU::EXEC, TRI)) 916 return false; 917 918 auto IsHazardFn = [TRI] (MachineInstr *I) { 919 if (SIInstrInfo::isVALU(*I)) 920 return false; 921 return I->readsRegister(AMDGPU::EXEC, TRI); 922 }; 923 924 const SIInstrInfo *TII = ST.getInstrInfo(); 925 auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) { 926 if (!MI) 927 return false; 928 if (SIInstrInfo::isVALU(*MI)) { 929 if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst)) 930 return true; 931 for (auto MO : MI->implicit_operands()) 932 if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) 933 return true; 934 } 935 if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && 936 (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe) 937 return true; 938 return false; 939 }; 940 941 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 942 std::numeric_limits<int>::max()) 943 return false; 944 945 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 946 TII->get(AMDGPU::S_WAITCNT_DEPCTR)) 947 .addImm(0xfffe); 948 return true; 949 } 950 951 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) { 952 if (!ST.hasLdsBranchVmemWARHazard()) 953 return false; 954 955 auto IsHazardInst = [] (const MachineInstr *MI) { 956 if (SIInstrInfo::isDS(*MI)) 957 return 1; 958 if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI)) 959 return 2; 960 return 0; 961 }; 962 963 auto InstType = IsHazardInst(MI); 964 if (!InstType) 965 return false; 966 967 auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) { 968 return I && (IsHazardInst(I) || 969 (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 970 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && 971 !I->getOperand(1).getImm())); 972 }; 973 974 auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) { 975 if (!I->isBranch()) 976 return false; 977 978 auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) { 979 auto InstType2 = IsHazardInst(I); 980 return InstType2 && InstType != InstType2; 981 }; 982 983 auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) { 984 if (!I) 985 return false; 986 987 auto InstType2 = IsHazardInst(I); 988 if (InstType == InstType2) 989 return true; 990 991 return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT && 992 I->getOperand(0).getReg() == AMDGPU::SGPR_NULL && 993 !I->getOperand(1).getImm(); 994 }; 995 996 return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) != 997 std::numeric_limits<int>::max(); 998 }; 999 1000 if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == 1001 std::numeric_limits<int>::max()) 1002 return false; 1003 1004 const SIInstrInfo *TII = ST.getInstrInfo(); 1005 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 1006 TII->get(AMDGPU::S_WAITCNT_VSCNT)) 1007 .addReg(AMDGPU::SGPR_NULL, RegState::Undef) 1008 .addImm(0); 1009 1010 return true; 1011 } 1012 1013 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) { 1014 int NSAtoVMEMWaitStates = 1; 1015 1016 if (!ST.hasNSAtoVMEMBug()) 1017 return 0; 1018 1019 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI)) 1020 return 0; 1021 1022 const SIInstrInfo *TII = ST.getInstrInfo(); 1023 const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 1024 if (!Offset || (Offset->getImm() & 6) == 0) 1025 return 0; 1026 1027 auto IsHazardFn = [TII] (MachineInstr *I) { 1028 if (!SIInstrInfo::isMIMG(*I)) 1029 return false; 1030 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode()); 1031 return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA && 1032 TII->getInstSizeInBytes(*I) >= 16; 1033 }; 1034 1035 return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1); 1036 } 1037