xref: /llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "AMDGPUSubtarget.h"
15 #include "SIDefines.h"
16 #include "SIInstrInfo.h"
17 #include "SIRegisterInfo.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "llvm/ADT/iterator_range.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineOperand.h"
24 #include "llvm/CodeGen/ScheduleDAG.h"
25 #include "llvm/MC/MCInstrDesc.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <algorithm>
28 #include <cassert>
29 #include <limits>
30 #include <set>
31 #include <vector>
32 
33 using namespace llvm;
34 
35 //===----------------------------------------------------------------------===//
36 // Hazard Recoginizer Implementation
37 //===----------------------------------------------------------------------===//
38 
39 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
40   CurrCycleInstr(nullptr),
41   MF(MF),
42   ST(MF.getSubtarget<GCNSubtarget>()),
43   TII(*ST.getInstrInfo()),
44   TRI(TII.getRegisterInfo()),
45   ClauseUses(TRI.getNumRegUnits()),
46   ClauseDefs(TRI.getNumRegUnits()) {
47   MaxLookAhead = 5;
48 }
49 
50 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
51   EmitInstruction(SU->getInstr());
52 }
53 
54 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
55   CurrCycleInstr = MI;
56 }
57 
58 static bool isDivFMas(unsigned Opcode) {
59   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
60 }
61 
62 static bool isSGetReg(unsigned Opcode) {
63   return Opcode == AMDGPU::S_GETREG_B32;
64 }
65 
66 static bool isSSetReg(unsigned Opcode) {
67   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
68 }
69 
70 static bool isRWLane(unsigned Opcode) {
71   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
72 }
73 
74 static bool isRFE(unsigned Opcode) {
75   return Opcode == AMDGPU::S_RFE_B64;
76 }
77 
78 static bool isSMovRel(unsigned Opcode) {
79   switch (Opcode) {
80   case AMDGPU::S_MOVRELS_B32:
81   case AMDGPU::S_MOVRELS_B64:
82   case AMDGPU::S_MOVRELD_B32:
83   case AMDGPU::S_MOVRELD_B64:
84     return true;
85   default:
86     return false;
87   }
88 }
89 
90 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
91                                     const MachineInstr &MI) {
92   if (TII.isAlwaysGDS(MI.getOpcode()))
93     return true;
94 
95   switch (MI.getOpcode()) {
96   case AMDGPU::S_SENDMSG:
97   case AMDGPU::S_SENDMSGHALT:
98   case AMDGPU::S_TTRACEDATA:
99     return true;
100   // These DS opcodes don't support GDS.
101   case AMDGPU::DS_NOP:
102   case AMDGPU::DS_PERMUTE_B32:
103   case AMDGPU::DS_BPERMUTE_B32:
104     return false;
105   default:
106     if (TII.isDS(MI.getOpcode())) {
107       int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
108                                            AMDGPU::OpName::gds);
109       if (MI.getOperand(GDS).getImm())
110         return true;
111     }
112     return false;
113   }
114 }
115 
116 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
117   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
118                                                      AMDGPU::OpName::simm16);
119   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
120 }
121 
122 ScheduleHazardRecognizer::HazardType
123 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
124   MachineInstr *MI = SU->getInstr();
125 
126   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
127     return NoopHazard;
128 
129   // FIXME: Should flat be considered vmem?
130   if ((SIInstrInfo::isVMEM(*MI) ||
131        SIInstrInfo::isFLAT(*MI))
132       && checkVMEMHazards(MI) > 0)
133     return NoopHazard;
134 
135   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
136     return NoopHazard;
137 
138   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
139     return NoopHazard;
140 
141   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
142     return NoopHazard;
143 
144   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
145     return NoopHazard;
146 
147   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
148     return NoopHazard;
149 
150   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
151     return NoopHazard;
152 
153   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
154     return NoopHazard;
155 
156   if (ST.hasReadM0MovRelInterpHazard() &&
157       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
158       checkReadM0Hazards(MI) > 0)
159     return NoopHazard;
160 
161   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
162       checkReadM0Hazards(MI) > 0)
163     return NoopHazard;
164 
165   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
166     return NoopHazard;
167 
168   if (checkAnyInstHazards(MI) > 0)
169     return NoopHazard;
170 
171   return NoHazard;
172 }
173 
174 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
175   return PreEmitNoops(SU->getInstr());
176 }
177 
178 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
179   int WaitStates = std::max(0, checkAnyInstHazards(MI));
180 
181   if (SIInstrInfo::isSMRD(*MI))
182     return std::max(WaitStates, checkSMRDHazards(MI));
183 
184   if (SIInstrInfo::isVALU(*MI))
185     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
186 
187   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
188     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
189 
190   if (SIInstrInfo::isDPP(*MI))
191     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
192 
193   if (isDivFMas(MI->getOpcode()))
194     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
195 
196   if (isRWLane(MI->getOpcode()))
197     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
198 
199   if (MI->isInlineAsm())
200     return std::max(WaitStates, checkInlineAsmHazards(MI));
201 
202   if (isSGetReg(MI->getOpcode()))
203     return std::max(WaitStates, checkGetRegHazards(MI));
204 
205   if (isSSetReg(MI->getOpcode()))
206     return std::max(WaitStates, checkSetRegHazards(MI));
207 
208   if (isRFE(MI->getOpcode()))
209     return std::max(WaitStates, checkRFEHazards(MI));
210 
211   if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
212                                            isSMovRel(MI->getOpcode())))
213     return std::max(WaitStates, checkReadM0Hazards(MI));
214 
215   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
216     return std::max(WaitStates, checkReadM0Hazards(MI));
217 
218   return WaitStates;
219 }
220 
221 void GCNHazardRecognizer::EmitNoop() {
222   EmittedInstrs.push_front(nullptr);
223 }
224 
225 void GCNHazardRecognizer::AdvanceCycle() {
226   // When the scheduler detects a stall, it will call AdvanceCycle() without
227   // emitting any instructions.
228   if (!CurrCycleInstr)
229     return;
230 
231   // Do not track non-instructions which do not affect the wait states.
232   // If included, these instructions can lead to buffer overflow such that
233   // detectable hazards are missed.
234   if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
235     return;
236   else if (CurrCycleInstr->isDebugInstr())
237     return;
238 
239   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
240 
241   // Keep track of emitted instructions
242   EmittedInstrs.push_front(CurrCycleInstr);
243 
244   // Add a nullptr for each additional wait state after the first.  Make sure
245   // not to add more than getMaxLookAhead() items to the list, since we
246   // truncate the list to that size right after this loop.
247   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
248        i < e; ++i) {
249     EmittedInstrs.push_front(nullptr);
250   }
251 
252   // getMaxLookahead() is the largest number of wait states we will ever need
253   // to insert, so there is no point in keeping track of more than that many
254   // wait states.
255   EmittedInstrs.resize(getMaxLookAhead());
256 
257   CurrCycleInstr = nullptr;
258 }
259 
260 void GCNHazardRecognizer::RecedeCycle() {
261   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
262 }
263 
264 //===----------------------------------------------------------------------===//
265 // Helper Functions
266 //===----------------------------------------------------------------------===//
267 
268 int GCNHazardRecognizer::getWaitStatesSince(
269     function_ref<bool(MachineInstr *)> IsHazard) {
270   int WaitStates = 0;
271   for (MachineInstr *MI : EmittedInstrs) {
272     if (MI) {
273       if (IsHazard(MI))
274         return WaitStates;
275 
276       unsigned Opcode = MI->getOpcode();
277       if (Opcode == AMDGPU::INLINEASM)
278         continue;
279     }
280     ++WaitStates;
281   }
282   return std::numeric_limits<int>::max();
283 }
284 
285 int GCNHazardRecognizer::getWaitStatesSinceDef(
286     unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
287   const SIRegisterInfo *TRI = ST.getRegisterInfo();
288 
289   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
290     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
291   };
292 
293   return getWaitStatesSince(IsHazardFn);
294 }
295 
296 int GCNHazardRecognizer::getWaitStatesSinceSetReg(
297     function_ref<bool(MachineInstr *)> IsHazard) {
298   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
299     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
300   };
301 
302   return getWaitStatesSince(IsHazardFn);
303 }
304 
305 //===----------------------------------------------------------------------===//
306 // No-op Hazard Detection
307 //===----------------------------------------------------------------------===//
308 
309 static void addRegUnits(const SIRegisterInfo &TRI,
310                         BitVector &BV, unsigned Reg) {
311   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
312     BV.set(*RUI);
313 }
314 
315 static void addRegsToSet(const SIRegisterInfo &TRI,
316                          iterator_range<MachineInstr::const_mop_iterator> Ops,
317                          BitVector &Set) {
318   for (const MachineOperand &Op : Ops) {
319     if (Op.isReg())
320       addRegUnits(TRI, Set, Op.getReg());
321   }
322 }
323 
324 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
325   // XXX: Do we need to worry about implicit operands
326   addRegsToSet(TRI, MI.defs(), ClauseDefs);
327   addRegsToSet(TRI, MI.uses(), ClauseUses);
328 }
329 
330 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
331   // SMEM soft clause are only present on VI+, and only matter if xnack is
332   // enabled.
333   if (!ST.isXNACKEnabled())
334     return 0;
335 
336   bool IsSMRD = TII.isSMRD(*MEM);
337 
338   resetClause();
339 
340   // A soft-clause is any group of consecutive SMEM instructions.  The
341   // instructions in this group may return out of order and/or may be
342   // replayed (i.e. the same instruction issued more than once).
343   //
344   // In order to handle these situations correctly we need to make sure
345   // that when a clause has more than one instruction, no instruction in the
346   // clause writes to a register that is read another instruction in the clause
347   // (including itself). If we encounter this situaion, we need to break the
348   // clause by inserting a non SMEM instruction.
349 
350   for (MachineInstr *MI : EmittedInstrs) {
351     // When we hit a non-SMEM instruction then we have passed the start of the
352     // clause and we can stop.
353     if (!MI)
354       break;
355 
356     if (IsSMRD != SIInstrInfo::isSMRD(*MI))
357       break;
358 
359     addClauseInst(*MI);
360   }
361 
362   if (ClauseDefs.none())
363     return 0;
364 
365   // We need to make sure not to put loads and stores in the same clause if they
366   // use the same address. For now, just start a new clause whenever we see a
367   // store.
368   if (MEM->mayStore())
369     return 1;
370 
371   addClauseInst(*MEM);
372 
373   // If the set of defs and uses intersect then we cannot add this instruction
374   // to the clause, so we have a hazard.
375   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
376 }
377 
378 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
379   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
380   int WaitStatesNeeded = 0;
381 
382   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
383 
384   // This SMRD hazard only affects SI.
385   if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
386     return WaitStatesNeeded;
387 
388   // A read of an SGPR by SMRD instruction requires 4 wait states when the
389   // SGPR was written by a VALU instruction.
390   int SmrdSgprWaitStates = 4;
391   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
392   auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
393 
394   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
395 
396   for (const MachineOperand &Use : SMRD->uses()) {
397     if (!Use.isReg())
398       continue;
399     int WaitStatesNeededForUse =
400         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
401     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
402 
403     // This fixes what appears to be undocumented hardware behavior in SI where
404     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
405     // needs some number of nops in between. We don't know how many we need, but
406     // let's use 4. This wasn't discovered before probably because the only
407     // case when this happens is when we expand a 64-bit pointer into a full
408     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
409     // probably never encountered in the closed-source land.
410     if (IsBufferSMRD) {
411       int WaitStatesNeededForUse =
412         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
413                                                    IsBufferHazardDefFn);
414       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
415     }
416   }
417 
418   return WaitStatesNeeded;
419 }
420 
421 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
422   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
423     return 0;
424 
425   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
426 
427   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
428   // SGPR was written by a VALU Instruction.
429   const int VmemSgprWaitStates = 5;
430   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
431 
432   for (const MachineOperand &Use : VMEM->uses()) {
433     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
434       continue;
435 
436     int WaitStatesNeededForUse =
437         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
438     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
439   }
440   return WaitStatesNeeded;
441 }
442 
443 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
444   const SIRegisterInfo *TRI = ST.getRegisterInfo();
445   const SIInstrInfo *TII = ST.getInstrInfo();
446 
447   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
448   int DppVgprWaitStates = 2;
449   int DppExecWaitStates = 5;
450   int WaitStatesNeeded = 0;
451   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
452 
453   for (const MachineOperand &Use : DPP->uses()) {
454     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
455       continue;
456     int WaitStatesNeededForUse =
457         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
458     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
459   }
460 
461   WaitStatesNeeded = std::max(
462       WaitStatesNeeded,
463       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
464 
465   return WaitStatesNeeded;
466 }
467 
468 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
469   const SIInstrInfo *TII = ST.getInstrInfo();
470 
471   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
472   // instruction.
473   const int DivFMasWaitStates = 4;
474   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
475   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
476 
477   return DivFMasWaitStates - WaitStatesNeeded;
478 }
479 
480 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
481   const SIInstrInfo *TII = ST.getInstrInfo();
482   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
483 
484   const int GetRegWaitStates = 2;
485   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
486     return GetRegHWReg == getHWReg(TII, *MI);
487   };
488   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
489 
490   return GetRegWaitStates - WaitStatesNeeded;
491 }
492 
493 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
494   const SIInstrInfo *TII = ST.getInstrInfo();
495   unsigned HWReg = getHWReg(TII, *SetRegInstr);
496 
497   const int SetRegWaitStates =
498       ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
499   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
500     return HWReg == getHWReg(TII, *MI);
501   };
502   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
503   return SetRegWaitStates - WaitStatesNeeded;
504 }
505 
506 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
507   if (!MI.mayStore())
508     return -1;
509 
510   const SIInstrInfo *TII = ST.getInstrInfo();
511   unsigned Opcode = MI.getOpcode();
512   const MCInstrDesc &Desc = MI.getDesc();
513 
514   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
515   int VDataRCID = -1;
516   if (VDataIdx != -1)
517     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
518 
519   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
520     // There is no hazard if the instruction does not use vector regs
521     // (like wbinvl1)
522     if (VDataIdx == -1)
523       return -1;
524     // For MUBUF/MTBUF instructions this hazard only exists if the
525     // instruction is not using a register in the soffset field.
526     const MachineOperand *SOffset =
527         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
528     // If we have no soffset operand, then assume this field has been
529     // hardcoded to zero.
530     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
531         (!SOffset || !SOffset->isReg()))
532       return VDataIdx;
533   }
534 
535   // MIMG instructions create a hazard if they don't use a 256-bit T# and
536   // the store size is greater than 8 bytes and they have more than two bits
537   // of their dmask set.
538   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
539   if (TII->isMIMG(MI)) {
540     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
541     assert(SRsrcIdx != -1 &&
542            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
543     (void)SRsrcIdx;
544   }
545 
546   if (TII->isFLAT(MI)) {
547     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
548     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
549       return DataIdx;
550   }
551 
552   return -1;
553 }
554 
555 int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
556 						const MachineRegisterInfo &MRI) {
557   // Helper to check for the hazard where VMEM instructions that store more than
558   // 8 bytes can have there store data over written by the next instruction.
559   const SIRegisterInfo *TRI = ST.getRegisterInfo();
560 
561   const int VALUWaitStates = 1;
562   int WaitStatesNeeded = 0;
563 
564   if (!TRI->isVGPR(MRI, Def.getReg()))
565     return WaitStatesNeeded;
566   unsigned Reg = Def.getReg();
567   auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
568     int DataIdx = createsVALUHazard(*MI);
569     return DataIdx >= 0 &&
570     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
571   };
572   int WaitStatesNeededForDef =
573     VALUWaitStates - getWaitStatesSince(IsHazardFn);
574   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
575 
576   return WaitStatesNeeded;
577 }
578 
579 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
580   // This checks for the hazard where VMEM instructions that store more than
581   // 8 bytes can have there store data over written by the next instruction.
582   if (!ST.has12DWordStoreHazard())
583     return 0;
584 
585   const MachineRegisterInfo &MRI = MF.getRegInfo();
586   int WaitStatesNeeded = 0;
587 
588   for (const MachineOperand &Def : VALU->defs()) {
589     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
590   }
591 
592   return WaitStatesNeeded;
593 }
594 
595 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
596   // This checks for hazards associated with inline asm statements.
597   // Since inline asms can contain just about anything, we use this
598   // to call/leverage other check*Hazard routines. Note that
599   // this function doesn't attempt to address all possible inline asm
600   // hazards (good luck), but is a collection of what has been
601   // problematic thus far.
602 
603   // see checkVALUHazards()
604   if (!ST.has12DWordStoreHazard())
605     return 0;
606 
607   const MachineRegisterInfo &MRI = MF.getRegInfo();
608   int WaitStatesNeeded = 0;
609 
610   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
611        I != E; ++I) {
612     const MachineOperand &Op = IA->getOperand(I);
613     if (Op.isReg() && Op.isDef()) {
614       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
615     }
616   }
617 
618   return WaitStatesNeeded;
619 }
620 
621 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
622   const SIInstrInfo *TII = ST.getInstrInfo();
623   const SIRegisterInfo *TRI = ST.getRegisterInfo();
624   const MachineRegisterInfo &MRI = MF.getRegInfo();
625 
626   const MachineOperand *LaneSelectOp =
627       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
628 
629   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
630     return 0;
631 
632   unsigned LaneSelectReg = LaneSelectOp->getReg();
633   auto IsHazardFn = [TII] (MachineInstr *MI) {
634     return TII->isVALU(*MI);
635   };
636 
637   const int RWLaneWaitStates = 4;
638   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
639   return RWLaneWaitStates - WaitStatesSince;
640 }
641 
642 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
643   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
644     return 0;
645 
646   const SIInstrInfo *TII = ST.getInstrInfo();
647 
648   const int RFEWaitStates = 1;
649 
650   auto IsHazardFn = [TII] (MachineInstr *MI) {
651     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
652   };
653   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
654   return RFEWaitStates - WaitStatesNeeded;
655 }
656 
657 int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
658   if (MI->isDebugInstr())
659     return 0;
660 
661   const SIRegisterInfo *TRI = ST.getRegisterInfo();
662   if (!ST.hasSMovFedHazard())
663     return 0;
664 
665   // Check for any instruction reading an SGPR after a write from
666   // s_mov_fed_b32.
667   int MovFedWaitStates = 1;
668   int WaitStatesNeeded = 0;
669 
670   for (const MachineOperand &Use : MI->uses()) {
671     if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
672       continue;
673     auto IsHazardFn = [] (MachineInstr *MI) {
674       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
675     };
676     int WaitStatesNeededForUse =
677         MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
678     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
679   }
680 
681   return WaitStatesNeeded;
682 }
683 
684 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
685   const SIInstrInfo *TII = ST.getInstrInfo();
686   const int SMovRelWaitStates = 1;
687   auto IsHazardFn = [TII] (MachineInstr *MI) {
688     return TII->isSALU(*MI);
689   };
690   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
691 }
692