1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements hazard recognizers for scheduling on GCN processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNHazardRecognizer.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "llvm/CodeGen/ScheduleDAG.h" 18 #include "llvm/Support/Debug.h" 19 20 using namespace llvm; 21 22 //===----------------------------------------------------------------------===// 23 // Hazard Recoginizer Implementation 24 //===----------------------------------------------------------------------===// 25 26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 27 CurrCycleInstr(nullptr), 28 MF(MF), 29 ST(MF.getSubtarget<SISubtarget>()) { 30 MaxLookAhead = 5; 31 } 32 33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 34 EmitInstruction(SU->getInstr()); 35 } 36 37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 38 CurrCycleInstr = MI; 39 } 40 41 static bool isDivFMas(unsigned Opcode) { 42 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 43 } 44 45 static bool isSGetReg(unsigned Opcode) { 46 return Opcode == AMDGPU::S_GETREG_B32; 47 } 48 49 static bool isSSetReg(unsigned Opcode) { 50 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; 51 } 52 53 static bool isRWLane(unsigned Opcode) { 54 return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32; 55 } 56 57 static bool getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 58 59 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 60 AMDGPU::OpName::simm16); 61 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 62 } 63 64 ScheduleHazardRecognizer::HazardType 65 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 66 MachineInstr *MI = SU->getInstr(); 67 68 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 69 return NoopHazard; 70 71 if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) 72 return NoopHazard; 73 74 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 75 return NoopHazard; 76 77 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 78 return NoopHazard; 79 80 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 81 return NoopHazard; 82 83 if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0) 84 return NoopHazard; 85 86 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 87 return NoopHazard; 88 89 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 90 return NoopHazard; 91 92 return NoHazard; 93 } 94 95 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 96 return PreEmitNoops(SU->getInstr()); 97 } 98 99 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 100 if (SIInstrInfo::isSMRD(*MI)) 101 return std::max(0, checkSMRDHazards(MI)); 102 103 if (SIInstrInfo::isVALU(*MI)) { 104 int WaitStates = std::max(0, checkVALUHazards(MI)); 105 106 if (SIInstrInfo::isVMEM(*MI)) 107 WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 108 109 if (SIInstrInfo::isDPP(*MI)) 110 WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 111 112 if (isDivFMas(MI->getOpcode())) 113 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 114 115 if (isRWLane(MI->getOpcode())) 116 WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); 117 118 return WaitStates; 119 } 120 121 if (isSGetReg(MI->getOpcode())) 122 return std::max(0, checkGetRegHazards(MI)); 123 124 if (isSSetReg(MI->getOpcode())) 125 return std::max(0, checkSetRegHazards(MI)); 126 127 return 0; 128 } 129 130 void GCNHazardRecognizer::EmitNoop() { 131 EmittedInstrs.push_front(nullptr); 132 } 133 134 void GCNHazardRecognizer::AdvanceCycle() { 135 136 // When the scheduler detects a stall, it will call AdvanceCycle() without 137 // emitting any instructions. 138 if (!CurrCycleInstr) 139 return; 140 141 const SIInstrInfo *TII = ST.getInstrInfo(); 142 unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr); 143 144 // Keep track of emitted instructions 145 EmittedInstrs.push_front(CurrCycleInstr); 146 147 // Add a nullptr for each additional wait state after the first. Make sure 148 // not to add more than getMaxLookAhead() items to the list, since we 149 // truncate the list to that size right after this loop. 150 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 151 i < e; ++i) { 152 EmittedInstrs.push_front(nullptr); 153 } 154 155 // getMaxLookahead() is the largest number of wait states we will ever need 156 // to insert, so there is no point in keeping track of more than that many 157 // wait states. 158 EmittedInstrs.resize(getMaxLookAhead()); 159 160 CurrCycleInstr = nullptr; 161 } 162 163 void GCNHazardRecognizer::RecedeCycle() { 164 llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 165 } 166 167 //===----------------------------------------------------------------------===// 168 // Helper Functions 169 //===----------------------------------------------------------------------===// 170 171 int GCNHazardRecognizer::getWaitStatesSince( 172 function_ref<bool(MachineInstr *)> IsHazard) { 173 174 int WaitStates = -1; 175 for (MachineInstr *MI : EmittedInstrs) { 176 ++WaitStates; 177 if (!MI || !IsHazard(MI)) 178 continue; 179 return WaitStates; 180 } 181 return std::numeric_limits<int>::max(); 182 } 183 184 int GCNHazardRecognizer::getWaitStatesSinceDef( 185 unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { 186 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 187 188 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { 189 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); 190 }; 191 192 return getWaitStatesSince(IsHazardFn); 193 } 194 195 int GCNHazardRecognizer::getWaitStatesSinceSetReg( 196 function_ref<bool(MachineInstr *)> IsHazard) { 197 198 auto IsHazardFn = [IsHazard] (MachineInstr *MI) { 199 return isSSetReg(MI->getOpcode()) && IsHazard(MI); 200 }; 201 202 return getWaitStatesSince(IsHazardFn); 203 } 204 205 //===----------------------------------------------------------------------===// 206 // No-op Hazard Detection 207 //===----------------------------------------------------------------------===// 208 209 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops, 210 std::set<unsigned> &Set) { 211 for (const MachineOperand &Op : Ops) { 212 if (Op.isReg()) 213 Set.insert(Op.getReg()); 214 } 215 } 216 217 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { 218 // SMEM soft clause are only present on VI+ 219 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 220 return 0; 221 222 // A soft-clause is any group of consecutive SMEM instructions. The 223 // instructions in this group may return out of order and/or may be 224 // replayed (i.e. the same instruction issued more than once). 225 // 226 // In order to handle these situations correctly we need to make sure 227 // that when a clause has more than one instruction, no instruction in the 228 // clause writes to a register that is read another instruction in the clause 229 // (including itself). If we encounter this situaion, we need to break the 230 // clause by inserting a non SMEM instruction. 231 232 std::set<unsigned> ClauseDefs; 233 std::set<unsigned> ClauseUses; 234 235 for (MachineInstr *MI : EmittedInstrs) { 236 237 // When we hit a non-SMEM instruction then we have passed the start of the 238 // clause and we can stop. 239 if (!MI || !SIInstrInfo::isSMRD(*MI)) 240 break; 241 242 addRegsToSet(MI->defs(), ClauseDefs); 243 addRegsToSet(MI->uses(), ClauseUses); 244 } 245 246 if (ClauseDefs.empty()) 247 return 0; 248 249 // FIXME: When we support stores, we need to make sure not to put loads and 250 // stores in the same clause if they use the same address. For now, just 251 // start a new clause whenever we see a store. 252 if (SMEM->mayStore()) 253 return 1; 254 255 addRegsToSet(SMEM->defs(), ClauseDefs); 256 addRegsToSet(SMEM->uses(), ClauseUses); 257 258 std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size())); 259 std::vector<unsigned>::iterator End; 260 261 End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(), 262 ClauseUses.begin(), ClauseUses.end(), Result.begin()); 263 264 // If the set of defs and uses intersect then we cannot add this instruction 265 // to the clause, so we have a hazard. 266 if (End != Result.begin()) 267 return 1; 268 269 return 0; 270 } 271 272 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 273 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 274 const SIInstrInfo *TII = ST.getInstrInfo(); 275 int WaitStatesNeeded = 0; 276 277 WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); 278 279 // This SMRD hazard only affects SI. 280 if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) 281 return WaitStatesNeeded; 282 283 // A read of an SGPR by SMRD instruction requires 4 wait states when the 284 // SGPR was written by a VALU instruction. 285 int SmrdSgprWaitStates = 4; 286 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 287 288 for (const MachineOperand &Use : SMRD->uses()) { 289 if (!Use.isReg()) 290 continue; 291 int WaitStatesNeededForUse = 292 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 293 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 294 } 295 return WaitStatesNeeded; 296 } 297 298 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 299 const SIInstrInfo *TII = ST.getInstrInfo(); 300 301 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 302 return 0; 303 304 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 305 306 // A read of an SGPR by a VMEM instruction requires 5 wait states when the 307 // SGPR was written by a VALU Instruction. 308 int VmemSgprWaitStates = 5; 309 int WaitStatesNeeded = 0; 310 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 311 312 for (const MachineOperand &Use : VMEM->uses()) { 313 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 314 continue; 315 316 int WaitStatesNeededForUse = 317 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 318 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 319 } 320 return WaitStatesNeeded; 321 } 322 323 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 324 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 325 326 // Check for DPP VGPR read after VALU VGPR write. 327 int DppVgprWaitStates = 2; 328 int WaitStatesNeeded = 0; 329 330 for (const MachineOperand &Use : DPP->uses()) { 331 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 332 continue; 333 int WaitStatesNeededForUse = 334 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); 335 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 336 } 337 338 return WaitStatesNeeded; 339 } 340 341 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 342 const SIInstrInfo *TII = ST.getInstrInfo(); 343 344 // v_div_fmas requires 4 wait states after a write to vcc from a VALU 345 // instruction. 346 const int DivFMasWaitStates = 4; 347 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 348 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); 349 350 return DivFMasWaitStates - WaitStatesNeeded; 351 } 352 353 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 354 const SIInstrInfo *TII = ST.getInstrInfo(); 355 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 356 357 const int GetRegWaitStates = 2; 358 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 359 return GetRegHWReg == getHWReg(TII, *MI); 360 }; 361 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 362 363 return GetRegWaitStates - WaitStatesNeeded; 364 } 365 366 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 367 const SIInstrInfo *TII = ST.getInstrInfo(); 368 unsigned HWReg = getHWReg(TII, *SetRegInstr); 369 370 const int SetRegWaitStates = 371 ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2; 372 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { 373 return HWReg == getHWReg(TII, *MI); 374 }; 375 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 376 return SetRegWaitStates - WaitStatesNeeded; 377 } 378 379 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 380 if (!MI.mayStore()) 381 return -1; 382 383 const SIInstrInfo *TII = ST.getInstrInfo(); 384 unsigned Opcode = MI.getOpcode(); 385 const MCInstrDesc &Desc = MI.getDesc(); 386 387 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 388 int VDataRCID = -1; 389 if (VDataIdx != -1) 390 VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 391 392 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 393 // For MUBUF/MTBUF instructions this hazard only exists if the 394 // instruction is not using a register in the soffset field. 395 const MachineOperand *SOffset = 396 TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 397 // If we have no soffset operand, then assume this field has been 398 // hardcoded to zero. 399 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 400 (!SOffset || !SOffset->isReg())) 401 return VDataIdx; 402 } 403 404 // MIMG instructions create a hazard if they don't use a 256-bit T# and 405 // the store size is greater than 8 bytes and they have more than two bits 406 // of their dmask set. 407 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 408 if (TII->isMIMG(MI)) { 409 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 410 assert(SRsrcIdx != -1 && 411 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 412 (void)SRsrcIdx; 413 } 414 415 if (TII->isFLAT(MI)) { 416 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::data); 417 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 418 return DataIdx; 419 } 420 421 return -1; 422 } 423 424 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 425 // This checks for the hazard where VMEM instructions that store more than 426 // 8 bytes can have there store data over written by the next instruction. 427 if (!ST.has12DWordStoreHazard()) 428 return 0; 429 430 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 431 const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo(); 432 433 const int VALUWaitStates = 1; 434 int WaitStatesNeeded = 0; 435 436 for (const MachineOperand &Def : VALU->defs()) { 437 if (!TRI->isVGPR(MRI, Def.getReg())) 438 continue; 439 unsigned Reg = Def.getReg(); 440 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { 441 int DataIdx = createsVALUHazard(*MI); 442 return DataIdx >= 0 && 443 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); 444 }; 445 int WaitStatesNeededForDef = 446 VALUWaitStates - getWaitStatesSince(IsHazardFn); 447 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 448 } 449 return WaitStatesNeeded; 450 } 451 452 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { 453 const SIInstrInfo *TII = ST.getInstrInfo(); 454 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 455 const MachineRegisterInfo &MRI = 456 RWLane->getParent()->getParent()->getRegInfo(); 457 458 const MachineOperand *LaneSelectOp = 459 TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); 460 461 if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg())) 462 return 0; 463 464 unsigned LaneSelectReg = LaneSelectOp->getReg(); 465 auto IsHazardFn = [TII] (MachineInstr *MI) { 466 return TII->isVALU(*MI); 467 }; 468 469 const int RWLaneWaitStates = 4; 470 int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn); 471 return RWLaneWaitStates - WaitStatesSince; 472 } 473