1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements hazard recognizers for scheduling on GCN processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "GCNHazardRecognizer.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "llvm/CodeGen/ScheduleDAG.h" 18 #include "llvm/Support/Debug.h" 19 20 using namespace llvm; 21 22 //===----------------------------------------------------------------------===// 23 // Hazard Recoginizer Implementation 24 //===----------------------------------------------------------------------===// 25 26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : 27 CurrCycleInstr(nullptr), 28 MF(MF), 29 ST(MF.getSubtarget<SISubtarget>()) { 30 MaxLookAhead = 5; 31 } 32 33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) { 34 EmitInstruction(SU->getInstr()); 35 } 36 37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) { 38 CurrCycleInstr = MI; 39 } 40 41 static bool isDivFMas(unsigned Opcode) { 42 return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; 43 } 44 45 ScheduleHazardRecognizer::HazardType 46 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 47 MachineInstr *MI = SU->getInstr(); 48 49 if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0) 50 return NoopHazard; 51 52 if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) 53 return NoopHazard; 54 55 if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) 56 return NoopHazard; 57 58 if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) 59 return NoopHazard; 60 61 return NoHazard; 62 } 63 64 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) { 65 return PreEmitNoops(SU->getInstr()); 66 } 67 68 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { 69 if (SIInstrInfo::isSMRD(*MI)) 70 return std::max(0, checkSMRDHazards(MI)); 71 72 if (SIInstrInfo::isVMEM(*MI)) 73 return std::max(0, checkVMEMHazards(MI)); 74 75 if (SIInstrInfo::isDPP(*MI)) 76 return std::max(0, checkDPPHazards(MI)); 77 78 if (isDivFMas(MI->getOpcode())) 79 return std::max(0, checkDivFMasHazards(MI)); 80 81 return 0; 82 } 83 84 void GCNHazardRecognizer::EmitNoop() { 85 EmittedInstrs.push_front(nullptr); 86 } 87 88 void GCNHazardRecognizer::AdvanceCycle() { 89 90 // When the scheduler detects a stall, it will call AdvanceCycle() without 91 // emitting any instructions. 92 if (!CurrCycleInstr) 93 return; 94 95 const SIInstrInfo *TII = ST.getInstrInfo(); 96 unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr); 97 98 // Keep track of emitted instructions 99 EmittedInstrs.push_front(CurrCycleInstr); 100 101 // Add a nullptr for each additional wait state after the first. Make sure 102 // not to add more than getMaxLookAhead() items to the list, since we 103 // truncate the list to that size right after this loop. 104 for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead()); 105 i < e; ++i) { 106 EmittedInstrs.push_front(nullptr); 107 } 108 109 // getMaxLookahead() is the largest number of wait states we will ever need 110 // to insert, so there is no point in keeping track of more than that many 111 // wait states. 112 EmittedInstrs.resize(getMaxLookAhead()); 113 114 CurrCycleInstr = nullptr; 115 } 116 117 void GCNHazardRecognizer::RecedeCycle() { 118 llvm_unreachable("hazard recognizer does not support bottom-up scheduling."); 119 } 120 121 //===----------------------------------------------------------------------===// 122 // Helper Functions 123 //===----------------------------------------------------------------------===// 124 125 int GCNHazardRecognizer::getWaitStatesSinceDef( 126 unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) { 127 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 128 129 int WaitStates = -1; 130 for (MachineInstr *MI : EmittedInstrs) { 131 ++WaitStates; 132 if (!MI || !IsHazardDef(MI)) 133 continue; 134 if (MI->modifiesRegister(Reg, TRI)) 135 return WaitStates; 136 } 137 return std::numeric_limits<int>::max(); 138 } 139 140 //===----------------------------------------------------------------------===// 141 // No-op Hazard Detection 142 //===----------------------------------------------------------------------===// 143 144 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops, 145 std::set<unsigned> &Set) { 146 for (const MachineOperand &Op : Ops) { 147 if (Op.isReg()) 148 Set.insert(Op.getReg()); 149 } 150 } 151 152 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) { 153 // SMEM soft clause are only present on VI+ 154 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 155 return 0; 156 157 // A soft-clause is any group of consecutive SMEM instructions. The 158 // instructions in this group may return out of order and/or may be 159 // replayed (i.e. the same instruction issued more than once). 160 // 161 // In order to handle these situations correctly we need to make sure 162 // that when a clause has more than one instruction, no instruction in the 163 // clause writes to a register that is read another instruction in the clause 164 // (including itself). If we encounter this situaion, we need to break the 165 // clause by inserting a non SMEM instruction. 166 167 std::set<unsigned> ClauseDefs; 168 std::set<unsigned> ClauseUses; 169 170 for (MachineInstr *MI : EmittedInstrs) { 171 172 // When we hit a non-SMEM instruction then we have passed the start of the 173 // clause and we can stop. 174 if (!MI || !SIInstrInfo::isSMRD(*MI)) 175 break; 176 177 addRegsToSet(MI->defs(), ClauseDefs); 178 addRegsToSet(MI->uses(), ClauseUses); 179 } 180 181 if (ClauseDefs.empty()) 182 return 0; 183 184 // FIXME: When we support stores, we need to make sure not to put loads and 185 // stores in the same clause if they use the same address. For now, just 186 // start a new clause whenever we see a store. 187 if (SMEM->mayStore()) 188 return 1; 189 190 addRegsToSet(SMEM->defs(), ClauseDefs); 191 addRegsToSet(SMEM->uses(), ClauseUses); 192 193 std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size())); 194 std::vector<unsigned>::iterator End; 195 196 End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(), 197 ClauseUses.begin(), ClauseUses.end(), Result.begin()); 198 199 // If the set of defs and uses intersect then we cannot add this instruction 200 // to the clause, so we have a hazard. 201 if (End != Result.begin()) 202 return 1; 203 204 return 0; 205 } 206 207 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { 208 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 209 const SIInstrInfo *TII = ST.getInstrInfo(); 210 int WaitStatesNeeded = 0; 211 212 WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD); 213 214 // This SMRD hazard only affects SI. 215 if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) 216 return WaitStatesNeeded; 217 218 // A read of an SGPR by SMRD instruction requires 4 wait states when the 219 // SGPR was written by a VALU instruction. 220 int SmrdSgprWaitStates = 4; 221 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 222 223 for (const MachineOperand &Use : SMRD->uses()) { 224 if (!Use.isReg()) 225 continue; 226 int WaitStatesNeededForUse = 227 SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 228 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 229 } 230 return WaitStatesNeeded; 231 } 232 233 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { 234 const SIInstrInfo *TII = ST.getInstrInfo(); 235 236 if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) 237 return 0; 238 239 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 240 241 // A read of an SGPR by a VMEM instruction requires 5 wait states when the 242 // SGPR was written by a VALU Instruction. 243 int VmemSgprWaitStates = 5; 244 int WaitStatesNeeded = 0; 245 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 246 247 for (const MachineOperand &Use : VMEM->uses()) { 248 if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg())) 249 continue; 250 251 int WaitStatesNeededForUse = 252 VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn); 253 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 254 } 255 return WaitStatesNeeded; 256 } 257 258 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { 259 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 260 261 // Check for DPP VGPR read after VALU VGPR write. 262 int DppVgprWaitStates = 2; 263 int WaitStatesNeeded = 0; 264 265 for (const MachineOperand &Use : DPP->uses()) { 266 if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) 267 continue; 268 int WaitStatesNeededForUse = 269 DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); 270 WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); 271 } 272 273 return WaitStatesNeeded; 274 } 275 276 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { 277 const SIInstrInfo *TII = ST.getInstrInfo(); 278 279 // v_div_fmas requires 4 wait states after a write to vcc from a VALU 280 // instruction. 281 const int DivFMasWaitStates = 4; 282 auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); }; 283 int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn); 284 285 return DivFMasWaitStates - WaitStatesNeeded; 286 } 287