1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements hazard recognizers for scheduling on GCN processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNHazardRecognizer.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "llvm/CodeGen/ScheduleDAG.h" 18 #include "llvm/Support/Debug.h" 19 20 using namespace llvm; 21 22 //===----------------------------------------------------------------------===// 23 // Hazard Recoginizer Implementation 24 //===----------------------------------------------------------------------===// 25 26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 27 CurrCycleInstr(nullptr), 28 MF(MF), 29 ST(MF.getSubtarget<SISubtarget>()) { 30 MaxLookAhead = 5; 31 } 32 33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 34 EmitInstruction(SU->getInstr()); 35 } 36 37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 38 CurrCycleInstr = MI; 39 } 40 41 static bool isDivFMas(unsigned Opcode) { 42 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 43 } 44 45 static bool isSGetReg(unsigned Opcode) { 46 return Opcode == AMDGPU::S_GETREG_B32; 47 } 48 49 static bool isSSetReg(unsigned Opcode) { 50 return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; 51 } 52 53 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { 54 55 const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, 56 AMDGPU::OpName::simm16); 57 return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; 58 } 59 60 ScheduleHazardRecognizer::HazardType 61 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 62 MachineInstr *MI = SU->getInstr(); 63 64 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 65 return NoopHazard; 66 67 if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) 68 return NoopHazard; 69 70 if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0) 71 return NoopHazard; 72 73 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 74 return NoopHazard; 75 76 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 77 return NoopHazard; 78 79 if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) 80 return NoopHazard; 81 82 if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0) 83 return NoopHazard; 84 85 return NoHazard; 86 } 87 88 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 89 return PreEmitNoops(SU->getInstr()); 90 } 91 92 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 93 if (SIInstrInfo::isSMRD(*MI)) 94 return std::max(0, checkSMRDHazards(MI)); 95 96 if (SIInstrInfo::isVALU(*MI)) { 97 int WaitStates = std::max(0, checkVALUHazards(MI)); 98 99 if (SIInstrInfo::isVMEM(*MI)) 100 WaitStates = std::max(WaitStates, checkVMEMHazards(MI)); 101 102 if (SIInstrInfo::isDPP(*MI)) 103 WaitStates = std::max(WaitStates, checkDPPHazards(MI)); 104 105 if (isDivFMas(MI->getOpcode())) 106 WaitStates = std::max(WaitStates, checkDivFMasHazards(MI)); 107 108 return WaitStates; 109 } 110 111 if (isSGetReg(MI->getOpcode())) 112 return std::max(0, checkGetRegHazards(MI)); 113 114 if (isSSetReg(MI->getOpcode())) 115 return std::max(0, checkSetRegHazards(MI)); 116 117 return 0; 118 } 119 120 void GCNHazardRecognizer::EmitNoop() { 121 EmittedInstrs.push_front(nullptr); 122 } 123 124 void GCNHazardRecognizer::AdvanceCycle() { 125 126 // When the scheduler detects a stall, it will call AdvanceCycle() without 127 // emitting any instructions. 128 if (!CurrCycleInstr) 129 return; 130 131 const SIInstrInfo *TII = ST.getInstrInfo(); 132 unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr); 133 134 // Keep track of emitted instructions 135 EmittedInstrs.push_front(CurrCycleInstr); 136 137 // Add a nullptr for each additional wait state after the first. Make sure 138 // not to add more than getMaxLookAhead() items to the list, since we 139 // truncate the list to that size right after this loop. 140 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 141 i < e; ++i) { 142 EmittedInstrs.push_front(nullptr); 143 } 144 145 // getMaxLookahead() is the largest number of wait states we will ever need 146 // to insert, so there is no point in keeping track of more than that many 147 // wait states. 148 EmittedInstrs.resize(getMaxLookAhead()); 149 150 CurrCycleInstr = nullptr; 151 } 152 153 void GCNHazardRecognizer::RecedeCycle() { 154 llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 155 } 156 157 //===----------------------------------------------------------------------===// 158 // Helper Functions 159 //===----------------------------------------------------------------------===// 160 161 int GCNHazardRecognizer::getWaitStatesSince( 162 function_ref<bool(MachineInstr *)> IsHazard) { 163 164 int WaitStates = -1; 165 for (MachineInstr *MI : EmittedInstrs) { 166 ++WaitStates; 167 if (!MI || !IsHazard(MI)) 168 continue; 169 return WaitStates; 170 } 171 return std::numeric_limits<int>::max(); 172 } 173 174 int GCNHazardRecognizer::getWaitStatesSinceDef( 175 unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { 176 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 177 178 auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) { 179 return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI); 180 }; 181 182 return getWaitStatesSince(IsHazardFn); 183 } 184 185 int GCNHazardRecognizer::getWaitStatesSinceSetReg( 186 function_ref<bool(MachineInstr *)> IsHazard) { 187 188 auto IsHazardFn = [IsHazard] (MachineInstr *MI) { 189 return isSSetReg(MI->getOpcode()) && IsHazard(MI); 190 }; 191 192 return getWaitStatesSince(IsHazardFn); 193 } 194 195 //===----------------------------------------------------------------------===// 196 // No-op Hazard Detection 197 //===----------------------------------------------------------------------===// 198 199 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops, 200 std::set<unsigned> &Set) { 201 for (const MachineOperand &Op : Ops) { 202 if (Op.isReg()) 203 Set.insert(Op.getReg()); 204 } 205 } 206 207 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { 208 // SMEM soft clause are only present on VI+ 209 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 210 return 0; 211 212 // A soft-clause is any group of consecutive SMEM instructions. The 213 // instructions in this group may return out of order and/or may be 214 // replayed (i.e. the same instruction issued more than once). 215 // 216 // In order to handle these situations correctly we need to make sure 217 // that when a clause has more than one instruction, no instruction in the 218 // clause writes to a register that is read another instruction in the clause 219 // (including itself). If we encounter this situaion, we need to break the 220 // clause by inserting a non SMEM instruction. 221 222 std::set<unsigned> ClauseDefs; 223 std::set<unsigned> ClauseUses; 224 225 for (MachineInstr *MI : EmittedInstrs) { 226 227 // When we hit a non-SMEM instruction then we have passed the start of the 228 // clause and we can stop. 229 if (!MI || !SIInstrInfo::isSMRD(*MI)) 230 break; 231 232 addRegsToSet(MI->defs(), ClauseDefs); 233 addRegsToSet(MI->uses(), ClauseUses); 234 } 235 236 if (ClauseDefs.empty()) 237 return 0; 238 239 // FIXME: When we support stores, we need to make sure not to put loads and 240 // stores in the same clause if they use the same address. For now, just 241 // start a new clause whenever we see a store. 242 if (SMEM->mayStore()) 243 return 1; 244 245 addRegsToSet(SMEM->defs(), ClauseDefs); 246 addRegsToSet(SMEM->uses(), ClauseUses); 247 248 std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size())); 249 std::vector<unsigned>::iterator End; 250 251 End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(), 252 ClauseUses.begin(), ClauseUses.end(), Result.begin()); 253 254 // If the set of defs and uses intersect then we cannot add this instruction 255 // to the clause, so we have a hazard. 256 if (End != Result.begin()) 257 return 1; 258 259 return 0; 260 } 261 262 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 263 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 264 const SIInstrInfo *TII = ST.getInstrInfo(); 265 int WaitStatesNeeded = 0; 266 267 WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); 268 269 // This SMRD hazard only affects SI. 270 if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) 271 return WaitStatesNeeded; 272 273 // A read of an SGPR by SMRD instruction requires 4 wait states when the 274 // SGPR was written by a VALU instruction. 275 int SmrdSgprWaitStates = 4; 276 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 277 278 for (const MachineOperand &Use : SMRD->uses()) { 279 if (!Use.isReg()) 280 continue; 281 int WaitStatesNeededForUse = 282 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 283 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 284 } 285 return WaitStatesNeeded; 286 } 287 288 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 289 const SIInstrInfo *TII = ST.getInstrInfo(); 290 291 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 292 return 0; 293 294 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 295 296 // A read of an SGPR by a VMEM instruction requires 5 wait states when the 297 // SGPR was written by a VALU Instruction. 298 int VmemSgprWaitStates = 5; 299 int WaitStatesNeeded = 0; 300 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 301 302 for (const MachineOperand &Use : VMEM->uses()) { 303 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 304 continue; 305 306 int WaitStatesNeededForUse = 307 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 308 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 309 } 310 return WaitStatesNeeded; 311 } 312 313 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 314 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 315 316 // Check for DPP VGPR read after VALU VGPR write. 317 int DppVgprWaitStates = 2; 318 int WaitStatesNeeded = 0; 319 320 for (const MachineOperand &Use : DPP->uses()) { 321 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 322 continue; 323 int WaitStatesNeededForUse = 324 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); 325 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 326 } 327 328 return WaitStatesNeeded; 329 } 330 331 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 332 const SIInstrInfo *TII = ST.getInstrInfo(); 333 334 // v_div_fmas requires 4 wait states after a write to vcc from a VALU 335 // instruction. 336 const int DivFMasWaitStates = 4; 337 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 338 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); 339 340 return DivFMasWaitStates - WaitStatesNeeded; 341 } 342 343 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { 344 const SIInstrInfo *TII = ST.getInstrInfo(); 345 unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); 346 347 const int GetRegWaitStates = 2; 348 auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { 349 return GetRegHWReg == getHWReg(TII, *MI); 350 }; 351 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 352 353 return GetRegWaitStates - WaitStatesNeeded; 354 } 355 356 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) { 357 const SIInstrInfo *TII = ST.getInstrInfo(); 358 unsigned HWReg = getHWReg(TII, *SetRegInstr); 359 360 const int SetRegWaitStates = 361 ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2; 362 auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) { 363 return HWReg == getHWReg(TII, *MI); 364 }; 365 int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); 366 return SetRegWaitStates - WaitStatesNeeded; 367 } 368 369 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { 370 if (!MI.mayStore()) 371 return -1; 372 373 const SIInstrInfo *TII = ST.getInstrInfo(); 374 unsigned Opcode = MI.getOpcode(); 375 const MCInstrDesc &Desc = MI.getDesc(); 376 377 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata); 378 int VDataRCID = -1; 379 if (VDataIdx != -1) 380 VDataRCID = Desc.OpInfo[VDataIdx].RegClass; 381 382 if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) { 383 // For MUBUF/MTBUF instructions this hazard only exists if the 384 // instruction is not using a register in the soffset field. 385 const MachineOperand *SOffset = 386 TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 387 // If we have no soffset operand, then assume this field has been 388 // hardcoded to zero. 389 if (AMDGPU::getRegBitWidth(VDataRCID) > 64 && 390 (!SOffset || !SOffset->isReg())) 391 return VDataIdx; 392 } 393 394 // MIMG instructions create a hazard if they don't use a 256-bit T# and 395 // the store size is greater than 8 bytes and they have more than two bits 396 // of their dmask set. 397 // All our MIMG definitions use a 256-bit T#, so we can skip checking for them. 398 if (TII->isMIMG(MI)) { 399 int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc); 400 assert(SRsrcIdx != -1 && 401 AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256); 402 (void)SRsrcIdx; 403 } 404 405 if (TII->isFLAT(MI)) { 406 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::data); 407 if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64) 408 return DataIdx; 409 } 410 411 return -1; 412 } 413 414 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { 415 // This checks for the hazard where VMEM instructions that store more than 416 // 8 bytes can have there store data over written by the next instruction. 417 if (!ST.has12DWordStoreHazard()) 418 return 0; 419 420 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 421 const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo(); 422 423 const int VALUWaitStates = 1; 424 int WaitStatesNeeded = 0; 425 426 for (const MachineOperand &Def : VALU->defs()) { 427 if (!TRI->isVGPR(MRI, Def.getReg())) 428 continue; 429 unsigned Reg = Def.getReg(); 430 auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { 431 int DataIdx = createsVALUHazard(*MI); 432 return DataIdx >= 0 && 433 TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); 434 }; 435 int WaitStatesNeededForDef = 436 VALUWaitStates - getWaitStatesSince(IsHazardFn); 437 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); 438 } 439 return WaitStatesNeeded; 440 } 441