xref: /llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (revision 51d1415a169f5786e1061750915af7f298626c4d)
1 //===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hazard recognizers for scheduling on GCN processors.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "GCNHazardRecognizer.h"
14 #include "AMDGPUSubtarget.h"
15 #include "SIDefines.h"
16 #include "SIInstrInfo.h"
17 #include "SIRegisterInfo.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "Utils/AMDGPUBaseInfo.h"
20 #include "llvm/ADT/iterator_range.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/CodeGen/ScheduleDAG.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <limits>
31 #include <set>
32 #include <vector>
33 
34 using namespace llvm;
35 
36 //===----------------------------------------------------------------------===//
37 // Hazard Recoginizer Implementation
38 //===----------------------------------------------------------------------===//
39 
40 GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
41   IsHazardRecognizerMode(false),
42   CurrCycleInstr(nullptr),
43   MF(MF),
44   ST(MF.getSubtarget<GCNSubtarget>()),
45   TII(*ST.getInstrInfo()),
46   TRI(TII.getRegisterInfo()),
47   ClauseUses(TRI.getNumRegUnits()),
48   ClauseDefs(TRI.getNumRegUnits()) {
49   MaxLookAhead = 5;
50 }
51 
52 void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
53   EmitInstruction(SU->getInstr());
54 }
55 
56 void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
57   CurrCycleInstr = MI;
58 }
59 
60 static bool isDivFMas(unsigned Opcode) {
61   return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
62 }
63 
64 static bool isSGetReg(unsigned Opcode) {
65   return Opcode == AMDGPU::S_GETREG_B32;
66 }
67 
68 static bool isSSetReg(unsigned Opcode) {
69   return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
70 }
71 
72 static bool isRWLane(unsigned Opcode) {
73   return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
74 }
75 
76 static bool isRFE(unsigned Opcode) {
77   return Opcode == AMDGPU::S_RFE_B64;
78 }
79 
80 static bool isSMovRel(unsigned Opcode) {
81   switch (Opcode) {
82   case AMDGPU::S_MOVRELS_B32:
83   case AMDGPU::S_MOVRELS_B64:
84   case AMDGPU::S_MOVRELD_B32:
85   case AMDGPU::S_MOVRELD_B64:
86     return true;
87   default:
88     return false;
89   }
90 }
91 
92 static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
93                                     const MachineInstr &MI) {
94   if (TII.isAlwaysGDS(MI.getOpcode()))
95     return true;
96 
97   switch (MI.getOpcode()) {
98   case AMDGPU::S_SENDMSG:
99   case AMDGPU::S_SENDMSGHALT:
100   case AMDGPU::S_TTRACEDATA:
101     return true;
102   // These DS opcodes don't support GDS.
103   case AMDGPU::DS_NOP:
104   case AMDGPU::DS_PERMUTE_B32:
105   case AMDGPU::DS_BPERMUTE_B32:
106     return false;
107   default:
108     if (TII.isDS(MI.getOpcode())) {
109       int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
110                                            AMDGPU::OpName::gds);
111       if (MI.getOperand(GDS).getImm())
112         return true;
113     }
114     return false;
115   }
116 }
117 
118 static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
119   const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
120                                                      AMDGPU::OpName::simm16);
121   return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
122 }
123 
124 ScheduleHazardRecognizer::HazardType
125 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
126   MachineInstr *MI = SU->getInstr();
127 
128   if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
129     return NoopHazard;
130 
131   // FIXME: Should flat be considered vmem?
132   if ((SIInstrInfo::isVMEM(*MI) ||
133        SIInstrInfo::isFLAT(*MI))
134       && checkVMEMHazards(MI) > 0)
135     return NoopHazard;
136 
137   if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
138     return NoopHazard;
139 
140   if (ST.hasNoDataDepHazard())
141     return NoHazard;
142 
143   if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
144     return NoopHazard;
145 
146   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
147     return NoopHazard;
148 
149   if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
150     return NoopHazard;
151 
152   if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
153     return NoopHazard;
154 
155   if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
156     return NoopHazard;
157 
158   if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
159     return NoopHazard;
160 
161   if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
162     return NoopHazard;
163 
164   if (ST.hasReadM0MovRelInterpHazard() &&
165       (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
166       checkReadM0Hazards(MI) > 0)
167     return NoopHazard;
168 
169   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
170       checkReadM0Hazards(MI) > 0)
171     return NoopHazard;
172 
173   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
174     return NoopHazard;
175 
176   if (checkAnyInstHazards(MI) > 0)
177     return NoopHazard;
178 
179   return NoHazard;
180 }
181 
182 unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
183   IsHazardRecognizerMode = false;
184   return PreEmitNoopsCommon(SU->getInstr());
185 }
186 
187 unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
188   IsHazardRecognizerMode = true;
189   CurrCycleInstr = MI;
190   unsigned W = PreEmitNoopsCommon(MI);
191 
192   fixVMEMtoScalarWriteHazards(MI);
193   fixSMEMtoVectorWriteHazards(MI);
194   fixVcmpxExecWARHazard(MI);
195   fixLdsBranchVmemWARHazard(MI);
196 
197   CurrCycleInstr = nullptr;
198   return W;
199 }
200 
201 unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
202   int WaitStates = std::max(0, checkAnyInstHazards(MI));
203 
204   if (SIInstrInfo::isSMRD(*MI))
205     return std::max(WaitStates, checkSMRDHazards(MI));
206 
207   if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
208     WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
209 
210   if (ST.hasNSAtoVMEMBug())
211     WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
212 
213   if (ST.hasNoDataDepHazard())
214     return WaitStates;
215 
216   if (SIInstrInfo::isVALU(*MI))
217     WaitStates = std::max(WaitStates, checkVALUHazards(MI));
218 
219   if (SIInstrInfo::isDPP(*MI))
220     WaitStates = std::max(WaitStates, checkDPPHazards(MI));
221 
222   if (isDivFMas(MI->getOpcode()))
223     WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
224 
225   if (isRWLane(MI->getOpcode()))
226     WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
227 
228   if (MI->isInlineAsm())
229     return std::max(WaitStates, checkInlineAsmHazards(MI));
230 
231   if (isSGetReg(MI->getOpcode()))
232     return std::max(WaitStates, checkGetRegHazards(MI));
233 
234   if (isSSetReg(MI->getOpcode()))
235     return std::max(WaitStates, checkSetRegHazards(MI));
236 
237   if (isRFE(MI->getOpcode()))
238     return std::max(WaitStates, checkRFEHazards(MI));
239 
240   if (ST.hasReadM0MovRelInterpHazard() && (TII.isVINTRP(*MI) ||
241                                            isSMovRel(MI->getOpcode())))
242     return std::max(WaitStates, checkReadM0Hazards(MI));
243 
244   if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
245     return std::max(WaitStates, checkReadM0Hazards(MI));
246 
247   return WaitStates;
248 }
249 
250 void GCNHazardRecognizer::EmitNoop() {
251   EmittedInstrs.push_front(nullptr);
252 }
253 
254 void GCNHazardRecognizer::AdvanceCycle() {
255   // When the scheduler detects a stall, it will call AdvanceCycle() without
256   // emitting any instructions.
257   if (!CurrCycleInstr)
258     return;
259 
260   // Do not track non-instructions which do not affect the wait states.
261   // If included, these instructions can lead to buffer overflow such that
262   // detectable hazards are missed.
263   if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
264       CurrCycleInstr->isKill())
265     return;
266 
267   unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
268 
269   // Keep track of emitted instructions
270   EmittedInstrs.push_front(CurrCycleInstr);
271 
272   // Add a nullptr for each additional wait state after the first.  Make sure
273   // not to add more than getMaxLookAhead() items to the list, since we
274   // truncate the list to that size right after this loop.
275   for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
276        i < e; ++i) {
277     EmittedInstrs.push_front(nullptr);
278   }
279 
280   // getMaxLookahead() is the largest number of wait states we will ever need
281   // to insert, so there is no point in keeping track of more than that many
282   // wait states.
283   EmittedInstrs.resize(getMaxLookAhead());
284 
285   CurrCycleInstr = nullptr;
286 }
287 
288 void GCNHazardRecognizer::RecedeCycle() {
289   llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
290 }
291 
292 //===----------------------------------------------------------------------===//
293 // Helper Functions
294 //===----------------------------------------------------------------------===//
295 
296 typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
297 
298 // Returns a minimum wait states since \p I walking all predecessors.
299 // Only scans until \p IsExpired does not return true.
300 // Can only be run in a hazard recognizer mode.
301 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
302                               MachineBasicBlock *MBB,
303                               MachineBasicBlock::reverse_instr_iterator I,
304                               int WaitStates,
305                               IsExpiredFn IsExpired,
306                               DenseSet<const MachineBasicBlock *> &Visited) {
307 
308   for (auto E = MBB->rend() ; I != E; ++I) {
309     if (IsHazard(&*I))
310       return WaitStates;
311 
312     if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
313       continue;
314 
315     WaitStates += SIInstrInfo::getNumWaitStates(*I);
316 
317     if (IsExpired(&*I, WaitStates))
318       return std::numeric_limits<int>::max();
319   }
320 
321   int MinWaitStates = WaitStates;
322   bool Found = false;
323   for (MachineBasicBlock *Pred : MBB->predecessors()) {
324     if (!Visited.insert(Pred).second)
325       continue;
326 
327     int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
328                                WaitStates, IsExpired, Visited);
329 
330     if (W == std::numeric_limits<int>::max())
331       continue;
332 
333     MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
334     if (IsExpired(nullptr, MinWaitStates))
335       return MinWaitStates;
336 
337     Found = true;
338   }
339 
340   if (Found)
341     return MinWaitStates;
342 
343   return std::numeric_limits<int>::max();
344 }
345 
346 static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
347                               MachineInstr *MI,
348                               IsExpiredFn IsExpired) {
349   DenseSet<const MachineBasicBlock *> Visited;
350   return getWaitStatesSince(IsHazard, MI->getParent(),
351                             std::next(MI->getReverseIterator()),
352                             0, IsExpired, Visited);
353 }
354 
355 int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
356   if (IsHazardRecognizerMode) {
357     auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
358       return WaitStates >= Limit;
359     };
360     return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
361   }
362 
363   int WaitStates = 0;
364   for (MachineInstr *MI : EmittedInstrs) {
365     if (MI) {
366       if (IsHazard(MI))
367         return WaitStates;
368 
369       if (MI->isInlineAsm())
370         continue;
371     }
372     ++WaitStates;
373 
374     if (WaitStates >= Limit)
375       break;
376   }
377   return std::numeric_limits<int>::max();
378 }
379 
380 int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
381                                                IsHazardFn IsHazardDef,
382                                                int Limit) {
383   const SIRegisterInfo *TRI = ST.getRegisterInfo();
384 
385   auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
386     return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
387   };
388 
389   return getWaitStatesSince(IsHazardFn, Limit);
390 }
391 
392 int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
393                                                   int Limit) {
394   auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
395     return isSSetReg(MI->getOpcode()) && IsHazard(MI);
396   };
397 
398   return getWaitStatesSince(IsHazardFn, Limit);
399 }
400 
401 //===----------------------------------------------------------------------===//
402 // No-op Hazard Detection
403 //===----------------------------------------------------------------------===//
404 
405 static void addRegUnits(const SIRegisterInfo &TRI,
406                         BitVector &BV, unsigned Reg) {
407   for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
408     BV.set(*RUI);
409 }
410 
411 static void addRegsToSet(const SIRegisterInfo &TRI,
412                          iterator_range<MachineInstr::const_mop_iterator> Ops,
413                          BitVector &Set) {
414   for (const MachineOperand &Op : Ops) {
415     if (Op.isReg())
416       addRegUnits(TRI, Set, Op.getReg());
417   }
418 }
419 
420 void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
421   // XXX: Do we need to worry about implicit operands
422   addRegsToSet(TRI, MI.defs(), ClauseDefs);
423   addRegsToSet(TRI, MI.uses(), ClauseUses);
424 }
425 
426 int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
427   // SMEM soft clause are only present on VI+, and only matter if xnack is
428   // enabled.
429   if (!ST.isXNACKEnabled())
430     return 0;
431 
432   bool IsSMRD = TII.isSMRD(*MEM);
433 
434   resetClause();
435 
436   // A soft-clause is any group of consecutive SMEM instructions.  The
437   // instructions in this group may return out of order and/or may be
438   // replayed (i.e. the same instruction issued more than once).
439   //
440   // In order to handle these situations correctly we need to make sure
441   // that when a clause has more than one instruction, no instruction in the
442   // clause writes to a register that is read another instruction in the clause
443   // (including itself). If we encounter this situaion, we need to break the
444   // clause by inserting a non SMEM instruction.
445 
446   for (MachineInstr *MI : EmittedInstrs) {
447     // When we hit a non-SMEM instruction then we have passed the start of the
448     // clause and we can stop.
449     if (!MI)
450       break;
451 
452     if (IsSMRD != SIInstrInfo::isSMRD(*MI))
453       break;
454 
455     addClauseInst(*MI);
456   }
457 
458   if (ClauseDefs.none())
459     return 0;
460 
461   // We need to make sure not to put loads and stores in the same clause if they
462   // use the same address. For now, just start a new clause whenever we see a
463   // store.
464   if (MEM->mayStore())
465     return 1;
466 
467   addClauseInst(*MEM);
468 
469   // If the set of defs and uses intersect then we cannot add this instruction
470   // to the clause, so we have a hazard.
471   return ClauseDefs.anyCommon(ClauseUses) ? 1 : 0;
472 }
473 
474 int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
475   int WaitStatesNeeded = 0;
476 
477   WaitStatesNeeded = checkSoftClauseHazards(SMRD);
478 
479   // This SMRD hazard only affects SI.
480   if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
481     return WaitStatesNeeded;
482 
483   // A read of an SGPR by SMRD instruction requires 4 wait states when the
484   // SGPR was written by a VALU instruction.
485   int SmrdSgprWaitStates = 4;
486   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
487   auto IsBufferHazardDefFn = [this] (MachineInstr *MI) { return TII.isSALU(*MI); };
488 
489   bool IsBufferSMRD = TII.isBufferSMRD(*SMRD);
490 
491   for (const MachineOperand &Use : SMRD->uses()) {
492     if (!Use.isReg())
493       continue;
494     int WaitStatesNeededForUse =
495         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
496                                                    SmrdSgprWaitStates);
497     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
498 
499     // This fixes what appears to be undocumented hardware behavior in SI where
500     // s_mov writing a descriptor and s_buffer_load_dword reading the descriptor
501     // needs some number of nops in between. We don't know how many we need, but
502     // let's use 4. This wasn't discovered before probably because the only
503     // case when this happens is when we expand a 64-bit pointer into a full
504     // descriptor and use s_buffer_load_dword instead of s_load_dword, which was
505     // probably never encountered in the closed-source land.
506     if (IsBufferSMRD) {
507       int WaitStatesNeededForUse =
508         SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
509                                                    IsBufferHazardDefFn,
510                                                    SmrdSgprWaitStates);
511       WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
512     }
513   }
514 
515   return WaitStatesNeeded;
516 }
517 
518 int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
519   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
520     return 0;
521 
522   int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
523 
524   // A read of an SGPR by a VMEM instruction requires 5 wait states when the
525   // SGPR was written by a VALU Instruction.
526   const int VmemSgprWaitStates = 5;
527   auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
528 
529   for (const MachineOperand &Use : VMEM->uses()) {
530     if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
531       continue;
532 
533     int WaitStatesNeededForUse =
534         VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
535                                                    VmemSgprWaitStates);
536     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
537   }
538   return WaitStatesNeeded;
539 }
540 
541 int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
542   const SIRegisterInfo *TRI = ST.getRegisterInfo();
543   const SIInstrInfo *TII = ST.getInstrInfo();
544 
545   // Check for DPP VGPR read after VALU VGPR write and EXEC write.
546   int DppVgprWaitStates = 2;
547   int DppExecWaitStates = 5;
548   int WaitStatesNeeded = 0;
549   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
550 
551   for (const MachineOperand &Use : DPP->uses()) {
552     if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
553       continue;
554     int WaitStatesNeededForUse =
555         DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
556                               [](MachineInstr *) { return true; },
557                               DppVgprWaitStates);
558     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
559   }
560 
561   WaitStatesNeeded = std::max(
562       WaitStatesNeeded,
563       DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
564                                                 DppExecWaitStates));
565 
566   return WaitStatesNeeded;
567 }
568 
569 int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
570   const SIInstrInfo *TII = ST.getInstrInfo();
571 
572   // v_div_fmas requires 4 wait states after a write to vcc from a VALU
573   // instruction.
574   const int DivFMasWaitStates = 4;
575   auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
576   int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
577                                                DivFMasWaitStates);
578 
579   return DivFMasWaitStates - WaitStatesNeeded;
580 }
581 
582 int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
583   const SIInstrInfo *TII = ST.getInstrInfo();
584   unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
585 
586   const int GetRegWaitStates = 2;
587   auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
588     return GetRegHWReg == getHWReg(TII, *MI);
589   };
590   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
591 
592   return GetRegWaitStates - WaitStatesNeeded;
593 }
594 
595 int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
596   const SIInstrInfo *TII = ST.getInstrInfo();
597   unsigned HWReg = getHWReg(TII, *SetRegInstr);
598 
599   const int SetRegWaitStates =
600       ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
601   auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
602     return HWReg == getHWReg(TII, *MI);
603   };
604   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
605   return SetRegWaitStates - WaitStatesNeeded;
606 }
607 
608 int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
609   if (!MI.mayStore())
610     return -1;
611 
612   const SIInstrInfo *TII = ST.getInstrInfo();
613   unsigned Opcode = MI.getOpcode();
614   const MCInstrDesc &Desc = MI.getDesc();
615 
616   int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
617   int VDataRCID = -1;
618   if (VDataIdx != -1)
619     VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
620 
621   if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
622     // There is no hazard if the instruction does not use vector regs
623     // (like wbinvl1)
624     if (VDataIdx == -1)
625       return -1;
626     // For MUBUF/MTBUF instructions this hazard only exists if the
627     // instruction is not using a register in the soffset field.
628     const MachineOperand *SOffset =
629         TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
630     // If we have no soffset operand, then assume this field has been
631     // hardcoded to zero.
632     if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
633         (!SOffset || !SOffset->isReg()))
634       return VDataIdx;
635   }
636 
637   // MIMG instructions create a hazard if they don't use a 256-bit T# and
638   // the store size is greater than 8 bytes and they have more than two bits
639   // of their dmask set.
640   // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
641   if (TII->isMIMG(MI)) {
642     int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
643     assert(SRsrcIdx != -1 &&
644            AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
645     (void)SRsrcIdx;
646   }
647 
648   if (TII->isFLAT(MI)) {
649     int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
650     if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
651       return DataIdx;
652   }
653 
654   return -1;
655 }
656 
657 int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
658 						const MachineRegisterInfo &MRI) {
659   // Helper to check for the hazard where VMEM instructions that store more than
660   // 8 bytes can have there store data over written by the next instruction.
661   const SIRegisterInfo *TRI = ST.getRegisterInfo();
662 
663   const int VALUWaitStates = 1;
664   int WaitStatesNeeded = 0;
665 
666   if (!TRI->isVGPR(MRI, Def.getReg()))
667     return WaitStatesNeeded;
668   unsigned Reg = Def.getReg();
669   auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
670     int DataIdx = createsVALUHazard(*MI);
671     return DataIdx >= 0 &&
672     TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
673   };
674   int WaitStatesNeededForDef =
675     VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
676   WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
677 
678   return WaitStatesNeeded;
679 }
680 
681 int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
682   // This checks for the hazard where VMEM instructions that store more than
683   // 8 bytes can have there store data over written by the next instruction.
684   if (!ST.has12DWordStoreHazard())
685     return 0;
686 
687   const MachineRegisterInfo &MRI = MF.getRegInfo();
688   int WaitStatesNeeded = 0;
689 
690   for (const MachineOperand &Def : VALU->defs()) {
691     WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI));
692   }
693 
694   return WaitStatesNeeded;
695 }
696 
697 int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
698   // This checks for hazards associated with inline asm statements.
699   // Since inline asms can contain just about anything, we use this
700   // to call/leverage other check*Hazard routines. Note that
701   // this function doesn't attempt to address all possible inline asm
702   // hazards (good luck), but is a collection of what has been
703   // problematic thus far.
704 
705   // see checkVALUHazards()
706   if (!ST.has12DWordStoreHazard())
707     return 0;
708 
709   const MachineRegisterInfo &MRI = MF.getRegInfo();
710   int WaitStatesNeeded = 0;
711 
712   for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
713        I != E; ++I) {
714     const MachineOperand &Op = IA->getOperand(I);
715     if (Op.isReg() && Op.isDef()) {
716       WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
717     }
718   }
719 
720   return WaitStatesNeeded;
721 }
722 
723 int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
724   const SIInstrInfo *TII = ST.getInstrInfo();
725   const SIRegisterInfo *TRI = ST.getRegisterInfo();
726   const MachineRegisterInfo &MRI = MF.getRegInfo();
727 
728   const MachineOperand *LaneSelectOp =
729       TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
730 
731   if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
732     return 0;
733 
734   unsigned LaneSelectReg = LaneSelectOp->getReg();
735   auto IsHazardFn = [TII] (MachineInstr *MI) {
736     return TII->isVALU(*MI);
737   };
738 
739   const int RWLaneWaitStates = 4;
740   int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
741                                               RWLaneWaitStates);
742   return RWLaneWaitStates - WaitStatesSince;
743 }
744 
745 int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
746   if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
747     return 0;
748 
749   const SIInstrInfo *TII = ST.getInstrInfo();
750 
751   const int RFEWaitStates = 1;
752 
753   auto IsHazardFn = [TII] (MachineInstr *MI) {
754     return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
755   };
756   int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
757   return RFEWaitStates - WaitStatesNeeded;
758 }
759 
760 int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
761   if (MI->isDebugInstr())
762     return 0;
763 
764   const SIRegisterInfo *TRI = ST.getRegisterInfo();
765   if (!ST.hasSMovFedHazard())
766     return 0;
767 
768   // Check for any instruction reading an SGPR after a write from
769   // s_mov_fed_b32.
770   int MovFedWaitStates = 1;
771   int WaitStatesNeeded = 0;
772 
773   for (const MachineOperand &Use : MI->uses()) {
774     if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
775       continue;
776     auto IsHazardFn = [] (MachineInstr *MI) {
777       return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
778     };
779     int WaitStatesNeededForUse =
780         MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
781                                                  MovFedWaitStates);
782     WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
783   }
784 
785   return WaitStatesNeeded;
786 }
787 
788 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
789   const SIInstrInfo *TII = ST.getInstrInfo();
790   const int SMovRelWaitStates = 1;
791   auto IsHazardFn = [TII] (MachineInstr *MI) {
792     return TII->isSALU(*MI);
793   };
794   return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
795                                                    SMovRelWaitStates);
796 }
797 
798 bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
799   if (!ST.hasVMEMtoScalarWriteHazard())
800     return false;
801 
802   if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
803     return false;
804 
805   if (MI->getNumDefs() == 0)
806     return false;
807 
808   const SIRegisterInfo *TRI = ST.getRegisterInfo();
809 
810   auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
811     if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
812         !SIInstrInfo::isFLAT(*I))
813       return false;
814 
815     for (const MachineOperand &Def : MI->defs()) {
816       MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
817       if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
818         continue;
819       return true;
820     }
821     return false;
822   };
823 
824   auto IsExpiredFn = [] (MachineInstr *MI, int) {
825     return MI && (SIInstrInfo::isVALU(*MI) ||
826                   (MI->getOpcode() == AMDGPU::S_WAITCNT &&
827                    !MI->getOperand(0).getImm()));
828   };
829 
830   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
831       std::numeric_limits<int>::max())
832     return false;
833 
834   const SIInstrInfo *TII = ST.getInstrInfo();
835   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
836   return true;
837 }
838 
839 bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
840   if (!ST.hasSMEMtoVectorWriteHazard())
841     return false;
842 
843   if (!SIInstrInfo::isVALU(*MI))
844     return false;
845 
846   unsigned SDSTName;
847   switch (MI->getOpcode()) {
848   case AMDGPU::V_READLANE_B32:
849   case AMDGPU::V_READFIRSTLANE_B32:
850     SDSTName = AMDGPU::OpName::vdst;
851     break;
852   default:
853     SDSTName = AMDGPU::OpName::sdst;
854     break;
855   }
856 
857   const SIInstrInfo *TII = ST.getInstrInfo();
858   const SIRegisterInfo *TRI = ST.getRegisterInfo();
859   const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
860   if (!SDST) {
861     for (auto MO : MI->implicit_operands()) {
862       if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
863         SDST = &MO;
864         break;
865       }
866     }
867   }
868 
869   if (!SDST)
870     return false;
871 
872   const unsigned SDSTReg = SDST->getReg();
873   auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
874     return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
875   };
876 
877   // This assumes that there will be s_waitcnt lgkmcnt(0) or equivalent
878   // between any at risk SMEM and any SALU dependent on the SMEM results.
879   auto IsExpiredFn = [TII] (MachineInstr *MI, int) {
880     if (MI) {
881       if (TII->isSALU(*MI)) {
882         if (TII->isSOPP(*MI))
883           return false;
884         switch (MI->getOpcode()) {
885         case AMDGPU::S_SETVSKIP:
886         case AMDGPU::S_VERSION:
887         case AMDGPU::S_WAITCNT_VSCNT:
888         case AMDGPU::S_WAITCNT_VMCNT:
889         case AMDGPU::S_WAITCNT_EXPCNT:
890         case AMDGPU::S_WAITCNT_LGKMCNT:
891           return false;
892         default:
893           return true;
894         }
895       }
896     }
897     return false;
898   };
899 
900   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
901       std::numeric_limits<int>::max())
902     return false;
903 
904   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
905           TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
906       .addImm(0);
907   return true;
908 }
909 
910 bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
911   if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
912     return false;
913 
914   const SIRegisterInfo *TRI = ST.getRegisterInfo();
915   if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
916     return false;
917 
918   auto IsHazardFn = [TRI] (MachineInstr *I) {
919     if (SIInstrInfo::isVALU(*I))
920       return false;
921     return I->readsRegister(AMDGPU::EXEC, TRI);
922   };
923 
924   const SIInstrInfo *TII = ST.getInstrInfo();
925   auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
926     if (!MI)
927       return false;
928     if (SIInstrInfo::isVALU(*MI)) {
929       if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
930         return true;
931       for (auto MO : MI->implicit_operands())
932         if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
933           return true;
934     }
935     if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
936         (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
937       return true;
938     return false;
939   };
940 
941   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
942       std::numeric_limits<int>::max())
943     return false;
944 
945   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
946           TII->get(AMDGPU::S_WAITCNT_DEPCTR))
947     .addImm(0xfffe);
948   return true;
949 }
950 
951 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
952   if (!ST.hasLdsBranchVmemWARHazard())
953     return false;
954 
955   auto IsHazardInst = [] (const MachineInstr *MI) {
956     if (SIInstrInfo::isDS(*MI))
957       return 1;
958     if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
959       return 2;
960     return 0;
961   };
962 
963   auto InstType = IsHazardInst(MI);
964   if (!InstType)
965     return false;
966 
967   auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
968     return I && (IsHazardInst(I) ||
969                  (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
970                   I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
971                   !I->getOperand(1).getImm()));
972   };
973 
974   auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
975     if (!I->isBranch())
976       return false;
977 
978     auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
979       auto InstType2 = IsHazardInst(I);
980       return InstType2 && InstType != InstType2;
981     };
982 
983     auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
984       if (!I)
985         return false;
986 
987       auto InstType2 = IsHazardInst(I);
988       if (InstType == InstType2)
989         return true;
990 
991       return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
992              I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
993              !I->getOperand(1).getImm();
994     };
995 
996     return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
997            std::numeric_limits<int>::max();
998   };
999 
1000   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
1001       std::numeric_limits<int>::max())
1002     return false;
1003 
1004   const SIInstrInfo *TII = ST.getInstrInfo();
1005   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1006           TII->get(AMDGPU::S_WAITCNT_VSCNT))
1007     .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
1008     .addImm(0);
1009 
1010   return true;
1011 }
1012 
1013 int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
1014   int NSAtoVMEMWaitStates = 1;
1015 
1016   if (!ST.hasNSAtoVMEMBug())
1017     return 0;
1018 
1019   if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
1020     return 0;
1021 
1022   const SIInstrInfo *TII = ST.getInstrInfo();
1023   const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1024   if (!Offset || (Offset->getImm() & 6) == 0)
1025     return 0;
1026 
1027   auto IsHazardFn = [TII] (MachineInstr *I) {
1028     if (!SIInstrInfo::isMIMG(*I))
1029       return false;
1030     const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
1031     return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
1032            TII->getInstSizeInBytes(*I) >= 16;
1033   };
1034 
1035   return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
1036 }
1037