xref: /llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (revision bdf7f81b89d194dc6e629c769df1240b927ee392)
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "AMDGPUSubtarget.h"
15 #include "SIDefines.h"
16 #include "SIInstrInfo.h"
17 #include "SIRegisterInfo.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "llvm/ADT/iterator_range.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <limits>
31 #include <set>
32 #include <vector>
33 
34 using namespace llvm;
35 
36 //===----------------------------------------------------------------------===//
37 // Hazard Recoginizer Implementation
38 //===----------------------------------------------------------------------===//
39 
40 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
41   IsHazardRecognizerMode(false),
42   CurrCycleInstr(nullptr),
43   MF(MF),
44   ST(MF.getSubtarget<GCNSubtarget>()),
45   TII(*ST.getInstrInfo()),
46   TRI(TII.getRegisterInfo()),
47   ClauseUses(TRI.getNumRegUnits()),
48   ClauseDefs(TRI.getNumRegUnits()) {
49   MaxLookAhead = 5;
50 }
51 
52 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
53   EmitInstruction(SU->getInstr());
54 }
55 
56 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
57   CurrCycleInstr = MI;
58 }
59 
60 static bool isDivFMas(unsigned Opcode) {
61   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
62 }
63 
64 static bool isSGetReg(unsigned Opcode) {
65   return Opcode == AMDGPU::S_GETREG_B32;
66 }
67 
68 static bool isSSetReg(unsigned Opcode) {
69   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
70 }
71 
72 static bool isRWLane(unsigned Opcode) {
73   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
74 }
75 
76 static bool isRFE(unsigned Opcode) {
77   return Opcode == AMDGPU::S_RFE_B64;
78 }
79 
80 static bool isSMovRel(unsigned Opcode) {
81   switch (Opcode) {
82   case AMDGPU::S_MOVRELS_B32:
83   case AMDGPU::S_MOVRELS_B64:
84   case AMDGPU::S_MOVRELD_B32:
85   case AMDGPU::S_MOVRELD_B64:
86     return true;
87   default:
88     return false;
89   }
90 }
91 
92 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
93                                     const MachineInstr &MI) {
94   if (TII.isAlwaysGDS(MI.getOpcode()))
95     return true;
96 
97   switch (MI.getOpcode()) {
98   case AMDGPU::S_SENDMSG:
99   case AMDGPU::S_SENDMSGHALT:
100   case AMDGPU::S_TTRACEDATA:
101     return true;
102   // These DS opcodes don't support GDS.
103   case AMDGPU::DS_NOP:
104   case AMDGPU::DS_PERMUTE_B32:
105   case AMDGPU::DS_BPERMUTE_B32:
106     return false;
107   default:
108     if (TII.isDS(MI.getOpcode())) {
109       int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
110                                            AMDGPU::OpName::gds);
111       if (MI.getOperand(GDS).getImm())
112         return true;
113     }
114     return false;
115   }
116 }
117 
118 static bool isPermlane(const MachineInstr &MI) {
119   unsigned Opcode = MI.getOpcode();
120   return Opcode == AMDGPU::V_PERMLANE16_B32 ||
121          Opcode == AMDGPU::V_PERMLANEX16_B32;
122 }
123 
124 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
125   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
126                                                      AMDGPU::OpName::simm16);
127   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
128 }
129 
130 ScheduleHazardRecognizer::HazardType
131 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
132   MachineInstr *MI = SU->getInstr();
133   if (MI->isBundle())
134    return NoHazard;
135 
136   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
137     return NoopHazard;
138 
139   // FIXME: Should flat be considered vmem?
140   if ((SIInstrInfo::isVMEM(*MI) ||
141        SIInstrInfo::isFLAT(*MI))
142       && checkVMEMHazards(MI) > 0)
143     return NoopHazard;
144 
145   if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
146     return NoopHazard;
147 
148   if (checkFPAtomicToDenormModeHazard(MI) > 0)
149     return NoopHazard;
150 
151   if (ST.hasNoDataDepHazard())
152     return NoHazard;
153 
154   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
155     return NoopHazard;
156 
157   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
158     return NoopHazard;
159 
160   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
161     return NoopHazard;
162 
163   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
164     return NoopHazard;
165 
166   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
167     return NoopHazard;
168 
169   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
170     return NoopHazard;
171 
172   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
173     return NoopHazard;
174 
175   if (ST.hasReadM0MovRelInterpHazard() &&
176       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
177       checkReadM0Hazards(MI) > 0)
178     return NoopHazard;
179 
180   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
181       checkReadM0Hazards(MI) > 0)
182     return NoopHazard;
183 
184   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
185     return NoopHazard;
186 
187   if (checkAnyInstHazards(MI) > 0)
188     return NoopHazard;
189 
190   return NoHazard;
191 }
192 
193 static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
194   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
195       .addImm(0);
196 }
197 
198 void GCNHazardRecognizer::processBundle() {
199   MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
200   MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
201   // Check bundled MachineInstr's for hazards.
202   for (; MI != E && MI->isInsideBundle(); ++MI) {
203     CurrCycleInstr = &*MI;
204     unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
205 
206     if (IsHazardRecognizerMode)
207       fixHazards(CurrCycleInstr);
208 
209     for (unsigned i = 0; i < WaitStates; ++i)
210       insertNoopInBundle(CurrCycleInstr, TII);
211 
212     // It’s unnecessary to track more than MaxLookAhead instructions. Since we
213     // include the bundled MI directly after, only add a maximum of
214     // (MaxLookAhead - 1) noops to EmittedInstrs.
215     for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
216       EmittedInstrs.push_front(nullptr);
217 
218     EmittedInstrs.push_front(CurrCycleInstr);
219     EmittedInstrs.resize(MaxLookAhead);
220   }
221   CurrCycleInstr = nullptr;
222 }
223 
224 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
225   IsHazardRecognizerMode = false;
226   return PreEmitNoopsCommon(SU->getInstr());
227 }
228 
229 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
230   IsHazardRecognizerMode = true;
231   CurrCycleInstr = MI;
232   unsigned W = PreEmitNoopsCommon(MI);
233   fixHazards(MI);
234   CurrCycleInstr = nullptr;
235   return W;
236 }
237 
238 unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
239   if (MI->isBundle())
240     return 0;
241 
242   int WaitStates = std::max(0, checkAnyInstHazards(MI));
243 
244   if (SIInstrInfo::isSMRD(*MI))
245     return std::max(WaitStates, checkSMRDHazards(MI));
246 
247   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
248     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
249 
250   if (ST.hasNSAtoVMEMBug())
251     WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
252 
253   WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
254 
255   if (ST.hasNoDataDepHazard())
256     return WaitStates;
257 
258   if (SIInstrInfo::isVALU(*MI))
259     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
260 
261   if (SIInstrInfo::isDPP(*MI))
262     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
263 
264   if (isDivFMas(MI->getOpcode()))
265     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
266 
267   if (isRWLane(MI->getOpcode()))
268     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
269 
270   if (MI->isInlineAsm())
271     return std::max(WaitStates, checkInlineAsmHazards(MI));
272 
273   if (isSGetReg(MI->getOpcode()))
274     return std::max(WaitStates, checkGetRegHazards(MI));
275 
276   if (isSSetReg(MI->getOpcode()))
277     return std::max(WaitStates, checkSetRegHazards(MI));
278 
279   if (isRFE(MI->getOpcode()))
280     return std::max(WaitStates, checkRFEHazards(MI));
281 
282   if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
283                                            isSMovRel(MI->getOpcode())))
284     return std::max(WaitStates, checkReadM0Hazards(MI));
285 
286   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
287     return std::max(WaitStates, checkReadM0Hazards(MI));
288 
289   return WaitStates;
290 }
291 
292 void GCNHazardRecognizer::EmitNoop() {
293   EmittedInstrs.push_front(nullptr);
294 }
295 
296 void GCNHazardRecognizer::AdvanceCycle() {
297   // When the scheduler detects a stall, it will call AdvanceCycle() without
298   // emitting any instructions.
299   if (!CurrCycleInstr)
300     return;
301 
302   // Do not track non-instructions which do not affect the wait states.
303   // If included, these instructions can lead to buffer overflow such that
304   // detectable hazards are missed.
305   if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
306       CurrCycleInstr->isKill())
307     return;
308 
309   if (CurrCycleInstr->isBundle()) {
310     processBundle();
311     return;
312   }
313 
314   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
315 
316   // Keep track of emitted instructions
317   EmittedInstrs.push_front(CurrCycleInstr);
318 
319   // Add a nullptr for each additional wait state after the first.  Make sure
320   // not to add more than getMaxLookAhead() items to the list, since we
321   // truncate the list to that size right after this loop.
322   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
323        i < e; ++i) {
324     EmittedInstrs.push_front(nullptr);
325   }
326 
327   // getMaxLookahead() is the largest number of wait states we will ever need
328   // to insert, so there is no point in keeping track of more than that many
329   // wait states.
330   EmittedInstrs.resize(getMaxLookAhead());
331 
332   CurrCycleInstr = nullptr;
333 }
334 
335 void GCNHazardRecognizer::RecedeCycle() {
336   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
337 }
338 
339 //===----------------------------------------------------------------------===//
340 // Helper Functions
341 //===----------------------------------------------------------------------===//
342 
343 typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
344 
345 // Returns a minimum wait states since \p I walking all predecessors.
346 // Only scans until \p IsExpired does not return true.
347 // Can only be run in a hazard recognizer mode.
348 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
349                               MachineBasicBlock *MBB,
350                               MachineBasicBlock::reverse_instr_iterator I,
351                               int WaitStates,
352                               IsExpiredFn IsExpired,
353                               DenseSet<const MachineBasicBlock *> &Visited) {
354   for (auto E = MBB->instr_rend(); I != E; ++I) {
355     // Don't add WaitStates for parent BUNDLE instructions.
356     if (I->isBundle())
357       continue;
358 
359     if (IsHazard(&*I))
360       return WaitStates;
361 
362     if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
363       continue;
364 
365     WaitStates += SIInstrInfo::getNumWaitStates(*I);
366 
367     if (IsExpired(&*I, WaitStates))
368       return std::numeric_limits<int>::max();
369   }
370 
371   int MinWaitStates = WaitStates;
372   bool Found = false;
373   for (MachineBasicBlock *Pred : MBB->predecessors()) {
374     if (!Visited.insert(Pred).second)
375       continue;
376 
377     int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
378                                WaitStates, IsExpired, Visited);
379 
380     if (W == std::numeric_limits<int>::max())
381       continue;
382 
383     MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
384     if (IsExpired(nullptr, MinWaitStates))
385       return MinWaitStates;
386 
387     Found = true;
388   }
389 
390   if (Found)
391     return MinWaitStates;
392 
393   return std::numeric_limits<int>::max();
394 }
395 
396 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
397                               MachineInstr *MI,
398                               IsExpiredFn IsExpired) {
399   DenseSet<const MachineBasicBlock *> Visited;
400   return getWaitStatesSince(IsHazard, MI->getParent(),
401                             std::next(MI->getReverseIterator()),
402                             0, IsExpired, Visited);
403 }
404 
405 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
406   if (IsHazardRecognizerMode) {
407     auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
408       return WaitStates >= Limit;
409     };
410     return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
411   }
412 
413   int WaitStates = 0;
414   for (MachineInstr *MI : EmittedInstrs) {
415     if (MI) {
416       if (IsHazard(MI))
417         return WaitStates;
418 
419       if (MI->isInlineAsm())
420         continue;
421     }
422     ++WaitStates;
423 
424     if (WaitStates >= Limit)
425       break;
426   }
427   return std::numeric_limits<int>::max();
428 }
429 
430 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
431                                                IsHazardFn IsHazardDef,
432                                                int Limit) {
433   const SIRegisterInfo *TRI = ST.getRegisterInfo();
434 
435   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
436     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
437   };
438 
439   return getWaitStatesSince(IsHazardFn, Limit);
440 }
441 
442 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
443                                                   int Limit) {
444   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
445     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
446   };
447 
448   return getWaitStatesSince(IsHazardFn, Limit);
449 }
450 
451 //===----------------------------------------------------------------------===//
452 // No-op Hazard Detection
453 //===----------------------------------------------------------------------===//
454 
455 static void addRegUnits(const SIRegisterInfo &TRI,
456                         BitVector &BV, unsigned Reg) {
457   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
458     BV.set(*RUI);
459 }
460 
461 static void addRegsToSet(const SIRegisterInfo &TRI,
462                          iterator_range<MachineInstr::const_mop_iterator> Ops,
463                          BitVector &Set) {
464   for (const MachineOperand &Op : Ops) {
465     if (Op.isReg())
466       addRegUnits(TRI, Set, Op.getReg());
467   }
468 }
469 
470 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
471   // XXX: Do we need to worry about implicit operands
472   addRegsToSet(TRI, MI.defs(), ClauseDefs);
473   addRegsToSet(TRI, MI.uses(), ClauseUses);
474 }
475 
476 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
477   // SMEM soft clause are only present on VI+, and only matter if xnack is
478   // enabled.
479   if (!ST.isXNACKEnabled())
480     return 0;
481 
482   bool IsSMRD = TII.isSMRD(*MEM);
483 
484   resetClause();
485 
486   // A soft-clause is any group of consecutive SMEM instructions.  The
487   // instructions in this group may return out of order and/or may be
488   // replayed (i.e. the same instruction issued more than once).
489   //
490   // In order to handle these situations correctly we need to make sure that
491   // when a clause has more than one instruction, no instruction in the clause
492   // writes to a register that is read by another instruction in the clause
493   // (including itself). If we encounter this situaion, we need to break the
494   // clause by inserting a non SMEM instruction.
495 
496   for (MachineInstr *MI : EmittedInstrs) {
497     // When we hit a non-SMEM instruction then we have passed the start of the
498     // clause and we can stop.
499     if (!MI)
500       break;
501 
502     if (IsSMRD != SIInstrInfo::isSMRD(*MI))
503       break;
504 
505     addClauseInst(*MI);
506   }
507 
508   if (ClauseDefs.none())
509     return 0;
510 
511   // We need to make sure not to put loads and stores in the same clause if they
512   // use the same address. For now, just start a new clause whenever we see a
513   // store.
514   if (MEM->mayStore())
515     return 1;
516 
517   addClauseInst(*MEM);
518 
519   // If the set of defs and uses intersect then we cannot add this instruction
520   // to the clause, so we have a hazard.
521   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
522 }
523 
524 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
525   int WaitStatesNeeded = 0;
526 
527   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
528 
529   // This SMRD hazard only affects SI.
530   if (!ST.hasSMRDReadVALUDefHazard())
531     return WaitStatesNeeded;
532 
533   // A read of an SGPR by SMRD instruction requires 4 wait states when the
534   // SGPR was written by a VALU instruction.
535   int SmrdSgprWaitStates = 4;
536   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
537   auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
538 
539   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
540 
541   for (const MachineOperand &Use : SMRD->uses()) {
542     if (!Use.isReg())
543       continue;
544     int WaitStatesNeededForUse =
545         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
546                                                    SmrdSgprWaitStates);
547     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
548 
549     // This fixes what appears to be undocumented hardware behavior in SI where
550     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
551     // needs some number of nops in between. We don't know how many we need, but
552     // let's use 4. This wasn't discovered before probably because the only
553     // case when this happens is when we expand a 64-bit pointer into a full
554     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
555     // probably never encountered in the closed-source land.
556     if (IsBufferSMRD) {
557       int WaitStatesNeededForUse =
558         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
559                                                    IsBufferHazardDefFn,
560                                                    SmrdSgprWaitStates);
561       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
562     }
563   }
564 
565   return WaitStatesNeeded;
566 }
567 
568 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
569   if (!ST.hasVMEMReadSGPRVALUDefHazard())
570     return 0;
571 
572   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
573 
574   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
575   // SGPR was written by a VALU Instruction.
576   const int VmemSgprWaitStates = 5;
577   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
578   for (const MachineOperand &Use : VMEM->uses()) {
579     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
580       continue;
581 
582     int WaitStatesNeededForUse =
583         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
584                                                    VmemSgprWaitStates);
585     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
586   }
587   return WaitStatesNeeded;
588 }
589 
590 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
591   const SIRegisterInfo *TRI = ST.getRegisterInfo();
592   const SIInstrInfo *TII = ST.getInstrInfo();
593 
594   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
595   int DppVgprWaitStates = 2;
596   int DppExecWaitStates = 5;
597   int WaitStatesNeeded = 0;
598   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
599 
600   for (const MachineOperand &Use : DPP->uses()) {
601     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
602       continue;
603     int WaitStatesNeededForUse =
604         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
605                               [](MachineInstr *) { return true; },
606                               DppVgprWaitStates);
607     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
608   }
609 
610   WaitStatesNeeded = std::max(
611       WaitStatesNeeded,
612       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
613                                                 DppExecWaitStates));
614 
615   return WaitStatesNeeded;
616 }
617 
618 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
619   const SIInstrInfo *TII = ST.getInstrInfo();
620 
621   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
622   // instruction.
623   const int DivFMasWaitStates = 4;
624   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
625   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
626                                                DivFMasWaitStates);
627 
628   return DivFMasWaitStates - WaitStatesNeeded;
629 }
630 
631 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
632   const SIInstrInfo *TII = ST.getInstrInfo();
633   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
634 
635   const int GetRegWaitStates = 2;
636   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
637     return GetRegHWReg == getHWReg(TII, *MI);
638   };
639   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
640 
641   return GetRegWaitStates - WaitStatesNeeded;
642 }
643 
644 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
645   const SIInstrInfo *TII = ST.getInstrInfo();
646   unsigned HWReg = getHWReg(TII, *SetRegInstr);
647 
648   const int SetRegWaitStates = ST.getSetRegWaitStates();
649   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
650     return HWReg == getHWReg(TII, *MI);
651   };
652   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
653   return SetRegWaitStates - WaitStatesNeeded;
654 }
655 
656 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
657   if (!MI.mayStore())
658     return -1;
659 
660   const SIInstrInfo *TII = ST.getInstrInfo();
661   unsigned Opcode = MI.getOpcode();
662   const MCInstrDesc &Desc = MI.getDesc();
663 
664   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
665   int VDataRCID = -1;
666   if (VDataIdx != -1)
667     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
668 
669   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
670     // There is no hazard if the instruction does not use vector regs
671     // (like wbinvl1)
672     if (VDataIdx == -1)
673       return -1;
674     // For MUBUF/MTBUF instructions this hazard only exists if the
675     // instruction is not using a register in the soffset field.
676     const MachineOperand *SOffset =
677         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
678     // If we have no soffset operand, then assume this field has been
679     // hardcoded to zero.
680     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
681         (!SOffset || !SOffset->isReg()))
682       return VDataIdx;
683   }
684 
685   // MIMG instructions create a hazard if they don't use a 256-bit T# and
686   // the store size is greater than 8 bytes and they have more than two bits
687   // of their dmask set.
688   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
689   if (TII->isMIMG(MI)) {
690     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
691     assert(SRsrcIdx != -1 &&
692            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
693     (void)SRsrcIdx;
694   }
695 
696   if (TII->isFLAT(MI)) {
697     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
698     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
699       return DataIdx;
700   }
701 
702   return -1;
703 }
704 
705 int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
706 						const MachineRegisterInfo &MRI) {
707   // Helper to check for the hazard where VMEM instructions that store more than
708   // 8 bytes can have there store data over written by the next instruction.
709   const SIRegisterInfo *TRI = ST.getRegisterInfo();
710 
711   const int VALUWaitStates = 1;
712   int WaitStatesNeeded = 0;
713 
714   if (!TRI->isVGPR(MRI, Def.getReg()))
715     return WaitStatesNeeded;
716   unsigned Reg = Def.getReg();
717   auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
718     int DataIdx = createsVALUHazard(*MI);
719     return DataIdx >= 0 &&
720     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
721   };
722   int WaitStatesNeededForDef =
723     VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
724   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
725 
726   return WaitStatesNeeded;
727 }
728 
729 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
730   // This checks for the hazard where VMEM instructions that store more than
731   // 8 bytes can have there store data over written by the next instruction.
732   if (!ST.has12DWordStoreHazard())
733     return 0;
734 
735   const MachineRegisterInfo &MRI = MF.getRegInfo();
736   int WaitStatesNeeded = 0;
737 
738   for (const MachineOperand &Def : VALU->defs()) {
739     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
740   }
741 
742   return WaitStatesNeeded;
743 }
744 
745 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
746   // This checks for hazards associated with inline asm statements.
747   // Since inline asms can contain just about anything, we use this
748   // to call/leverage other check*Hazard routines. Note that
749   // this function doesn't attempt to address all possible inline asm
750   // hazards (good luck), but is a collection of what has been
751   // problematic thus far.
752 
753   // see checkVALUHazards()
754   if (!ST.has12DWordStoreHazard())
755     return 0;
756 
757   const MachineRegisterInfo &MRI = MF.getRegInfo();
758   int WaitStatesNeeded = 0;
759 
760   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
761        I != E; ++I) {
762     const MachineOperand &Op = IA->getOperand(I);
763     if (Op.isReg() && Op.isDef()) {
764       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
765     }
766   }
767 
768   return WaitStatesNeeded;
769 }
770 
771 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
772   const SIInstrInfo *TII = ST.getInstrInfo();
773   const SIRegisterInfo *TRI = ST.getRegisterInfo();
774   const MachineRegisterInfo &MRI = MF.getRegInfo();
775 
776   const MachineOperand *LaneSelectOp =
777       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
778 
779   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
780     return 0;
781 
782   unsigned LaneSelectReg = LaneSelectOp->getReg();
783   auto IsHazardFn = [TII] (MachineInstr *MI) {
784     return TII->isVALU(*MI);
785   };
786 
787   const int RWLaneWaitStates = 4;
788   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
789                                               RWLaneWaitStates);
790   return RWLaneWaitStates - WaitStatesSince;
791 }
792 
793 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
794   if (!ST.hasRFEHazards())
795     return 0;
796 
797   const SIInstrInfo *TII = ST.getInstrInfo();
798 
799   const int RFEWaitStates = 1;
800 
801   auto IsHazardFn = [TII] (MachineInstr *MI) {
802     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
803   };
804   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
805   return RFEWaitStates - WaitStatesNeeded;
806 }
807 
808 int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
809   if (MI->isDebugInstr())
810     return 0;
811 
812   const SIRegisterInfo *TRI = ST.getRegisterInfo();
813   if (!ST.hasSMovFedHazard())
814     return 0;
815 
816   // Check for any instruction reading an SGPR after a write from
817   // s_mov_fed_b32.
818   int MovFedWaitStates = 1;
819   int WaitStatesNeeded = 0;
820 
821   for (const MachineOperand &Use : MI->uses()) {
822     if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
823       continue;
824     auto IsHazardFn = [] (MachineInstr *MI) {
825       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
826     };
827     int WaitStatesNeededForUse =
828         MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
829                                                  MovFedWaitStates);
830     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
831   }
832 
833   return WaitStatesNeeded;
834 }
835 
836 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
837   const SIInstrInfo *TII = ST.getInstrInfo();
838   const int SMovRelWaitStates = 1;
839   auto IsHazardFn = [TII] (MachineInstr *MI) {
840     return TII->isSALU(*MI);
841   };
842   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
843                                                    SMovRelWaitStates);
844 }
845 
846 void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
847   fixVMEMtoScalarWriteHazards(MI);
848   fixVcmpxPermlaneHazards(MI);
849   fixSMEMtoVectorWriteHazards(MI);
850   fixVcmpxExecWARHazard(MI);
851   fixLdsBranchVmemWARHazard(MI);
852 }
853 
854 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
855   if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
856     return false;
857 
858   const SIInstrInfo *TII = ST.getInstrInfo();
859   auto IsHazardFn = [TII] (MachineInstr *MI) {
860     return TII->isVOPC(*MI);
861   };
862 
863   auto IsExpiredFn = [] (MachineInstr *MI, int) {
864     if (!MI)
865       return false;
866     unsigned Opc = MI->getOpcode();
867     return SIInstrInfo::isVALU(*MI) &&
868            Opc != AMDGPU::V_NOP_e32 &&
869            Opc != AMDGPU::V_NOP_e64 &&
870            Opc != AMDGPU::V_NOP_sdwa;
871   };
872 
873   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
874       std::numeric_limits<int>::max())
875     return false;
876 
877   // V_NOP will be discarded by SQ.
878   // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
879   // which is always a VGPR and available.
880   auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
881   unsigned Reg = Src0->getReg();
882   bool IsUndef = Src0->isUndef();
883   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
884           TII->get(AMDGPU::V_MOV_B32_e32))
885     .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
886     .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
887 
888   return true;
889 }
890 
891 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
892   if (!ST.hasVMEMtoScalarWriteHazard())
893     return false;
894 
895   if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
896     return false;
897 
898   if (MI->getNumDefs() == 0)
899     return false;
900 
901   const SIRegisterInfo *TRI = ST.getRegisterInfo();
902 
903   auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
904     if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
905         !SIInstrInfo::isFLAT(*I))
906       return false;
907 
908     for (const MachineOperand &Def : MI->defs()) {
909       MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
910       if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
911         continue;
912       return true;
913     }
914     return false;
915   };
916 
917   auto IsExpiredFn = [] (MachineInstr *MI, int) {
918     return MI && (SIInstrInfo::isVALU(*MI) ||
919                   (MI->getOpcode() == AMDGPU::S_WAITCNT &&
920                    !MI->getOperand(0).getImm()));
921   };
922 
923   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
924       std::numeric_limits<int>::max())
925     return false;
926 
927   const SIInstrInfo *TII = ST.getInstrInfo();
928   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
929   return true;
930 }
931 
932 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
933   if (!ST.hasSMEMtoVectorWriteHazard())
934     return false;
935 
936   if (!SIInstrInfo::isVALU(*MI))
937     return false;
938 
939   unsigned SDSTName;
940   switch (MI->getOpcode()) {
941   case AMDGPU::V_READLANE_B32:
942   case AMDGPU::V_READFIRSTLANE_B32:
943     SDSTName = AMDGPU::OpName::vdst;
944     break;
945   default:
946     SDSTName = AMDGPU::OpName::sdst;
947     break;
948   }
949 
950   const SIInstrInfo *TII = ST.getInstrInfo();
951   const SIRegisterInfo *TRI = ST.getRegisterInfo();
952   const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
953   const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
954   if (!SDST) {
955     for (const auto &MO : MI->implicit_operands()) {
956       if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
957         SDST = &MO;
958         break;
959       }
960     }
961   }
962 
963   if (!SDST)
964     return false;
965 
966   const unsigned SDSTReg = SDST->getReg();
967   auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
968     return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
969   };
970 
971   auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
972     if (MI) {
973       if (TII->isSALU(*MI)) {
974         switch (MI->getOpcode()) {
975         case AMDGPU::S_SETVSKIP:
976         case AMDGPU::S_VERSION:
977         case AMDGPU::S_WAITCNT_VSCNT:
978         case AMDGPU::S_WAITCNT_VMCNT:
979         case AMDGPU::S_WAITCNT_EXPCNT:
980           // These instructions cannot not mitigate the hazard.
981           return false;
982         case AMDGPU::S_WAITCNT_LGKMCNT:
983           // Reducing lgkmcnt count to 0 always mitigates the hazard.
984           return (MI->getOperand(1).getImm() == 0) &&
985                  (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
986         case AMDGPU::S_WAITCNT: {
987           const int64_t Imm = MI->getOperand(0).getImm();
988           AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
989           return (Decoded.LgkmCnt == 0);
990         }
991         default:
992           // SOPP instructions cannot mitigate the hazard.
993           if (TII->isSOPP(*MI))
994             return false;
995           // At this point the SALU can be assumed to mitigate the hazard
996           // because either:
997           // (a) it is independent of the at risk SMEM (breaking chain),
998           // or
999           // (b) it is dependent on the SMEM, in which case an appropriate
1000           //     s_waitcnt lgkmcnt _must_ exist between it and the at risk
1001           //     SMEM instruction.
1002           return true;
1003         }
1004       }
1005     }
1006     return false;
1007   };
1008 
1009   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1010       std::numeric_limits<int>::max())
1011     return false;
1012 
1013   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1014           TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
1015       .addImm(0);
1016   return true;
1017 }
1018 
1019 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
1020   if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
1021     return false;
1022 
1023   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1024   if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
1025     return false;
1026 
1027   auto IsHazardFn = [TRI] (MachineInstr *I) {
1028     if (SIInstrInfo::isVALU(*I))
1029       return false;
1030     return I->readsRegister(AMDGPU::EXEC, TRI);
1031   };
1032 
1033   const SIInstrInfo *TII = ST.getInstrInfo();
1034   auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
1035     if (!MI)
1036       return false;
1037     if (SIInstrInfo::isVALU(*MI)) {
1038       if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
1039         return true;
1040       for (auto MO : MI->implicit_operands())
1041         if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
1042           return true;
1043     }
1044     if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
1045         (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
1046       return true;
1047     return false;
1048   };
1049 
1050   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1051       std::numeric_limits<int>::max())
1052     return false;
1053 
1054   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1055           TII->get(AMDGPU::S_WAITCNT_DEPCTR))
1056     .addImm(0xfffe);
1057   return true;
1058 }
1059 
1060 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
1061   if (!ST.hasLdsBranchVmemWARHazard())
1062     return false;
1063 
1064   auto IsHazardInst = [] (const MachineInstr *MI) {
1065     if (SIInstrInfo::isDS(*MI))
1066       return 1;
1067     if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
1068       return 2;
1069     return 0;
1070   };
1071 
1072   auto InstType = IsHazardInst(MI);
1073   if (!InstType)
1074     return false;
1075 
1076   auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
1077     return I && (IsHazardInst(I) ||
1078                  (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1079                   I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1080                   !I->getOperand(1).getImm()));
1081   };
1082 
1083   auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
1084     if (!I->isBranch())
1085       return false;
1086 
1087     auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
1088       auto InstType2 = IsHazardInst(I);
1089       return InstType2 && InstType != InstType2;
1090     };
1091 
1092     auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
1093       if (!I)
1094         return false;
1095 
1096       auto InstType2 = IsHazardInst(I);
1097       if (InstType == InstType2)
1098         return true;
1099 
1100       return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
1101              I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
1102              !I->getOperand(1).getImm();
1103     };
1104 
1105     return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
1106            std::numeric_limits<int>::max();
1107   };
1108 
1109   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1110       std::numeric_limits<int>::max())
1111     return false;
1112 
1113   const SIInstrInfo *TII = ST.getInstrInfo();
1114   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1115           TII->get(AMDGPU::S_WAITCNT_VSCNT))
1116     .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1117     .addImm(0);
1118 
1119   return true;
1120 }
1121 
1122 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1123   int NSAtoVMEMWaitStates = 1;
1124 
1125   if (!ST.hasNSAtoVMEMBug())
1126     return 0;
1127 
1128   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1129     return 0;
1130 
1131   const SIInstrInfo *TII = ST.getInstrInfo();
1132   const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1133   if (!Offset || (Offset->getImm() & 6) == 0)
1134     return 0;
1135 
1136   auto IsHazardFn = [TII] (MachineInstr *I) {
1137     if (!SIInstrInfo::isMIMG(*I))
1138       return false;
1139     const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1140     return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1141            TII->getInstSizeInBytes(*I) >= 16;
1142   };
1143 
1144   return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1145 }
1146 
1147 int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
1148   int FPAtomicToDenormModeWaitStates = 3;
1149 
1150   if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
1151     return 0;
1152 
1153   auto IsHazardFn = [] (MachineInstr *I) {
1154     if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
1155       return false;
1156     return SIInstrInfo::isFPAtomic(*I);
1157   };
1158 
1159   auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
1160     if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
1161       return true;
1162 
1163     switch (MI->getOpcode()) {
1164     case AMDGPU::S_WAITCNT:
1165     case AMDGPU::S_WAITCNT_VSCNT:
1166     case AMDGPU::S_WAITCNT_VMCNT:
1167     case AMDGPU::S_WAITCNT_EXPCNT:
1168     case AMDGPU::S_WAITCNT_LGKMCNT:
1169     case AMDGPU::S_WAITCNT_IDLE:
1170       return true;
1171     default:
1172       break;
1173     }
1174 
1175     return false;
1176   };
1177 
1178 
1179   return FPAtomicToDenormModeWaitStates -
1180          ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
1181 }
1182