xref: /llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (revision 04051b5fad96e340d6de5a028356530f881b2bcc)
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements hazard recognizers for scheduling on GCN processors.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "GCNHazardRecognizer.h"
15 #include "AMDGPUSubtarget.h"
16 #include "SIInstrInfo.h"
17 #include "llvm/CodeGen/ScheduleDAG.h"
18 #include "llvm/Support/Debug.h"
19 
20 using namespace llvm;
21 
22 //===----------------------------------------------------------------------===//
23 // Hazard Recoginizer Implementation
24 //===----------------------------------------------------------------------===//
25 
26 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
27   CurrCycleInstr(nullptr),
28   MF(MF),
29   ST(MF.getSubtarget<SISubtarget>()) {
30   MaxLookAhead = 5;
31 }
32 
33 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
34   EmitInstruction(SU->getInstr());
35 }
36 
37 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
38   CurrCycleInstr = MI;
39 }
40 
41 static bool isDivFMas(unsigned Opcode) {
42   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
43 }
44 
45 static bool isSGetReg(unsigned Opcode) {
46   return Opcode == AMDGPU::S_GETREG_B32;
47 }
48 
49 static bool isSSetReg(unsigned Opcode) {
50   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
51 }
52 
53 static bool isRWLane(unsigned Opcode) {
54   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
55 }
56 
57 static bool getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
58 
59   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
60                                                      AMDGPU::OpName::simm16);
61   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
62 }
63 
64 ScheduleHazardRecognizer::HazardType
65 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
66   MachineInstr *MI = SU->getInstr();
67 
68   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
69     return NoopHazard;
70 
71   if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
72     return NoopHazard;
73 
74   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
75     return NoopHazard;
76 
77   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
78     return NoopHazard;
79 
80   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
81     return NoopHazard;
82 
83   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
84     return NoopHazard;
85 
86   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
87     return NoopHazard;
88 
89   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
90     return NoopHazard;
91 
92   return NoHazard;
93 }
94 
95 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
96   return PreEmitNoops(SU->getInstr());
97 }
98 
99 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
100   if (SIInstrInfo::isSMRD(*MI))
101     return std::max(0, checkSMRDHazards(MI));
102 
103   if (SIInstrInfo::isVALU(*MI)) {
104     int WaitStates = std::max(0, checkVALUHazards(MI));
105 
106     if (SIInstrInfo::isVMEM(*MI))
107       WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
108 
109     if (SIInstrInfo::isDPP(*MI))
110       WaitStates = std::max(WaitStates, checkDPPHazards(MI));
111 
112     if (isDivFMas(MI->getOpcode()))
113       WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
114 
115     if (isRWLane(MI->getOpcode()))
116       WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
117 
118     return WaitStates;
119   }
120 
121   if (isSGetReg(MI->getOpcode()))
122     return std::max(0, checkGetRegHazards(MI));
123 
124   if (isSSetReg(MI->getOpcode()))
125     return std::max(0, checkSetRegHazards(MI));
126 
127   return 0;
128 }
129 
130 void GCNHazardRecognizer::EmitNoop() {
131   EmittedInstrs.push_front(nullptr);
132 }
133 
134 void GCNHazardRecognizer::AdvanceCycle() {
135 
136   // When the scheduler detects a stall, it will call AdvanceCycle() without
137   // emitting any instructions.
138   if (!CurrCycleInstr)
139     return;
140 
141   const SIInstrInfo *TII = ST.getInstrInfo();
142   unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
143 
144   // Keep track of emitted instructions
145   EmittedInstrs.push_front(CurrCycleInstr);
146 
147   // Add a nullptr for each additional wait state after the first.  Make sure
148   // not to add more than getMaxLookAhead() items to the list, since we
149   // truncate the list to that size right after this loop.
150   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
151        i < e; ++i) {
152     EmittedInstrs.push_front(nullptr);
153   }
154 
155   // getMaxLookahead() is the largest number of wait states we will ever need
156   // to insert, so there is no point in keeping track of more than that many
157   // wait states.
158   EmittedInstrs.resize(getMaxLookAhead());
159 
160   CurrCycleInstr = nullptr;
161 }
162 
163 void GCNHazardRecognizer::RecedeCycle() {
164   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
165 }
166 
167 //===----------------------------------------------------------------------===//
168 // Helper Functions
169 //===----------------------------------------------------------------------===//
170 
171 int GCNHazardRecognizer::getWaitStatesSince(
172     function_ref<bool(MachineInstr *)> IsHazard) {
173 
174   int WaitStates = -1;
175   for (MachineInstr *MI : EmittedInstrs) {
176     ++WaitStates;
177     if (!MI || !IsHazard(MI))
178       continue;
179     return WaitStates;
180   }
181   return std::numeric_limits<int>::max();
182 }
183 
184 int GCNHazardRecognizer::getWaitStatesSinceDef(
185     unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
186   const SIRegisterInfo *TRI = ST.getRegisterInfo();
187 
188   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
189     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
190   };
191 
192   return getWaitStatesSince(IsHazardFn);
193 }
194 
195 int GCNHazardRecognizer::getWaitStatesSinceSetReg(
196     function_ref<bool(MachineInstr *)> IsHazard) {
197 
198   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
199     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
200   };
201 
202   return getWaitStatesSince(IsHazardFn);
203 }
204 
205 //===----------------------------------------------------------------------===//
206 // No-op Hazard Detection
207 //===----------------------------------------------------------------------===//
208 
209 static void addRegsToSet(iterator_range<MachineInstr::const_mop_iterator> Ops,
210                          std::set<unsigned> &Set) {
211   for (const MachineOperand &Op : Ops) {
212     if (Op.isReg())
213       Set.insert(Op.getReg());
214   }
215 }
216 
217 int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
218   // SMEM soft clause are only present on VI+
219   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
220     return 0;
221 
222   // A soft-clause is any group of consecutive SMEM instructions.  The
223   // instructions in this group may return out of order and/or may be
224   // replayed (i.e. the same instruction issued more than once).
225   //
226   // In order to handle these situations correctly we need to make sure
227   // that when a clause has more than one instruction, no instruction in the
228   // clause writes to a register that is read another instruction in the clause
229   // (including itself). If we encounter this situaion, we need to break the
230   // clause by inserting a non SMEM instruction.
231 
232   std::set<unsigned> ClauseDefs;
233   std::set<unsigned> ClauseUses;
234 
235   for (MachineInstr *MI : EmittedInstrs) {
236 
237     // When we hit a non-SMEM instruction then we have passed the start of the
238     // clause and we can stop.
239     if (!MI || !SIInstrInfo::isSMRD(*MI))
240       break;
241 
242     addRegsToSet(MI->defs(), ClauseDefs);
243     addRegsToSet(MI->uses(), ClauseUses);
244   }
245 
246   if (ClauseDefs.empty())
247     return 0;
248 
249   // FIXME: When we support stores, we need to make sure not to put loads and
250   // stores in the same clause if they use the same address.  For now, just
251   // start a new clause whenever we see a store.
252   if (SMEM->mayStore())
253     return 1;
254 
255   addRegsToSet(SMEM->defs(), ClauseDefs);
256   addRegsToSet(SMEM->uses(), ClauseUses);
257 
258   std::vector<unsigned> Result(std::max(ClauseDefs.size(), ClauseUses.size()));
259   std::vector<unsigned>::iterator End;
260 
261   End = std::set_intersection(ClauseDefs.begin(), ClauseDefs.end(),
262                               ClauseUses.begin(), ClauseUses.end(), Result.begin());
263 
264   // If the set of defs and uses intersect then we cannot add this instruction
265   // to the clause, so we have a hazard.
266   if (End != Result.begin())
267     return 1;
268 
269   return 0;
270 }
271 
272 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
273   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
274   const SIInstrInfo *TII = ST.getInstrInfo();
275   int WaitStatesNeeded = 0;
276 
277   WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
278 
279   // This SMRD hazard only affects SI.
280   if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS)
281     return WaitStatesNeeded;
282 
283   // A read of an SGPR by SMRD instruction requires 4 wait states when the
284   // SGPR was written by a VALU instruction.
285   int SmrdSgprWaitStates = 4;
286   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
287 
288   for (const MachineOperand &Use : SMRD->uses()) {
289     if (!Use.isReg())
290       continue;
291     int WaitStatesNeededForUse =
292         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
293     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
294   }
295   return WaitStatesNeeded;
296 }
297 
298 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
299   const SIInstrInfo *TII = ST.getInstrInfo();
300 
301   if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
302     return 0;
303 
304   const SIRegisterInfo &TRI = TII->getRegisterInfo();
305 
306   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
307   // SGPR was written by a VALU Instruction.
308   int VmemSgprWaitStates = 5;
309   int WaitStatesNeeded = 0;
310   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
311 
312   for (const MachineOperand &Use : VMEM->uses()) {
313     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
314       continue;
315 
316     int WaitStatesNeededForUse =
317         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
318     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
319   }
320   return WaitStatesNeeded;
321 }
322 
323 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
324   const SIRegisterInfo *TRI = ST.getRegisterInfo();
325 
326   // Check for DPP VGPR read after VALU VGPR write.
327   int DppVgprWaitStates = 2;
328   int WaitStatesNeeded = 0;
329 
330   for (const MachineOperand &Use : DPP->uses()) {
331     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
332       continue;
333     int WaitStatesNeededForUse =
334         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
335     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
336   }
337 
338   return WaitStatesNeeded;
339 }
340 
341 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
342   const SIInstrInfo *TII = ST.getInstrInfo();
343 
344   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
345   // instruction.
346   const int DivFMasWaitStates = 4;
347   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
348   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
349 
350   return DivFMasWaitStates - WaitStatesNeeded;
351 }
352 
353 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
354   const SIInstrInfo *TII = ST.getInstrInfo();
355   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
356 
357   const int GetRegWaitStates = 2;
358   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
359     return GetRegHWReg == getHWReg(TII, *MI);
360   };
361   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
362 
363   return GetRegWaitStates - WaitStatesNeeded;
364 }
365 
366 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
367   const SIInstrInfo *TII = ST.getInstrInfo();
368   unsigned HWReg = getHWReg(TII, *SetRegInstr);
369 
370   const int SetRegWaitStates =
371       ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
372   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
373     return HWReg == getHWReg(TII, *MI);
374   };
375   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
376   return SetRegWaitStates - WaitStatesNeeded;
377 }
378 
379 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
380   if (!MI.mayStore())
381     return -1;
382 
383   const SIInstrInfo *TII = ST.getInstrInfo();
384   unsigned Opcode = MI.getOpcode();
385   const MCInstrDesc &Desc = MI.getDesc();
386 
387   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
388   int VDataRCID = -1;
389   if (VDataIdx != -1)
390     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
391 
392   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
393     // For MUBUF/MTBUF instructions this hazard only exists if the
394     // instruction is not using a register in the soffset field.
395     const MachineOperand *SOffset =
396         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
397     // If we have no soffset operand, then assume this field has been
398     // hardcoded to zero.
399     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
400         (!SOffset || !SOffset->isReg()))
401       return VDataIdx;
402   }
403 
404   // MIMG instructions create a hazard if they don't use a 256-bit T# and
405   // the store size is greater than 8 bytes and they have more than two bits
406   // of their dmask set.
407   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
408   if (TII->isMIMG(MI)) {
409     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
410     assert(SRsrcIdx != -1 &&
411            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
412     (void)SRsrcIdx;
413   }
414 
415   if (TII->isFLAT(MI)) {
416     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::data);
417     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
418       return DataIdx;
419   }
420 
421   return -1;
422 }
423 
424 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
425   // This checks for the hazard where VMEM instructions that store more than
426   // 8 bytes can have there store data over written by the next instruction.
427   if (!ST.has12DWordStoreHazard())
428     return 0;
429 
430   const SIRegisterInfo *TRI = ST.getRegisterInfo();
431   const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo();
432 
433   const int VALUWaitStates = 1;
434   int WaitStatesNeeded = 0;
435 
436   for (const MachineOperand &Def : VALU->defs()) {
437     if (!TRI->isVGPR(MRI, Def.getReg()))
438       continue;
439     unsigned Reg = Def.getReg();
440     auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
441       int DataIdx = createsVALUHazard(*MI);
442       return DataIdx >= 0 &&
443              TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
444     };
445     int WaitStatesNeededForDef =
446         VALUWaitStates - getWaitStatesSince(IsHazardFn);
447     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
448   }
449   return WaitStatesNeeded;
450 }
451 
452 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
453   const SIInstrInfo *TII = ST.getInstrInfo();
454   const SIRegisterInfo *TRI = ST.getRegisterInfo();
455   const MachineRegisterInfo &MRI =
456       RWLane->getParent()->getParent()->getRegInfo();
457 
458   const MachineOperand *LaneSelectOp =
459       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
460 
461   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
462     return 0;
463 
464   unsigned LaneSelectReg = LaneSelectOp->getReg();
465   auto IsHazardFn = [TII] (MachineInstr *MI) {
466     return TII->isVALU(*MI);
467   };
468 
469   const int RWLaneWaitStates = 4;
470   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
471   return RWLaneWaitStates - WaitStatesSince;
472 }
473