xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry Andric //===--- AMDGPUIGroupLP.cpp - AMDGPU IGroupLP  ------------===//
2*81ad6265SDimitry Andric //
3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*81ad6265SDimitry Andric //
7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
8*81ad6265SDimitry Andric //
9*81ad6265SDimitry Andric // \file This file defines a set of schedule DAG mutations that can be used to
10*81ad6265SDimitry Andric // override default scheduler behavior to enforce specific scheduling patterns.
11*81ad6265SDimitry Andric // They should be used in cases where runtime performance considerations such as
12*81ad6265SDimitry Andric // inter-wavefront interactions, mean that compile-time heuristics cannot
13*81ad6265SDimitry Andric // predict the optimal instruction ordering, or in kernels where optimum
14*81ad6265SDimitry Andric // instruction scheduling is important enough to warrant manual intervention.
15*81ad6265SDimitry Andric //
16*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
17*81ad6265SDimitry Andric 
18*81ad6265SDimitry Andric #include "AMDGPUIGroupLP.h"
19*81ad6265SDimitry Andric #include "AMDGPUTargetMachine.h"
20*81ad6265SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21*81ad6265SDimitry Andric #include "SIInstrInfo.h"
22*81ad6265SDimitry Andric #include "SIMachineFunctionInfo.h"
23*81ad6265SDimitry Andric #include "llvm/ADT/BitmaskEnum.h"
24*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
25*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
26*81ad6265SDimitry Andric 
27*81ad6265SDimitry Andric using namespace llvm;
28*81ad6265SDimitry Andric 
29*81ad6265SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
30*81ad6265SDimitry Andric 
31*81ad6265SDimitry Andric namespace {
32*81ad6265SDimitry Andric 
33*81ad6265SDimitry Andric static cl::opt<bool>
34*81ad6265SDimitry Andric     EnableIGroupLP("amdgpu-igrouplp",
35*81ad6265SDimitry Andric                    cl::desc("Enable construction of Instruction Groups and "
36*81ad6265SDimitry Andric                             "their ordering for scheduling"),
37*81ad6265SDimitry Andric                    cl::init(false));
38*81ad6265SDimitry Andric 
39*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>>
40*81ad6265SDimitry Andric     VMEMGroupMaxSize("amdgpu-igrouplp-vmem-group-size", cl::init(None),
41*81ad6265SDimitry Andric                      cl::Hidden,
42*81ad6265SDimitry Andric                      cl::desc("The maximum number of instructions to include "
43*81ad6265SDimitry Andric                               "in VMEM group."));
44*81ad6265SDimitry Andric 
45*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>>
46*81ad6265SDimitry Andric     MFMAGroupMaxSize("amdgpu-igrouplp-mfma-group-size", cl::init(None),
47*81ad6265SDimitry Andric                      cl::Hidden,
48*81ad6265SDimitry Andric                      cl::desc("The maximum number of instructions to include "
49*81ad6265SDimitry Andric                               "in MFMA group."));
50*81ad6265SDimitry Andric 
51*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>>
52*81ad6265SDimitry Andric     LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None),
53*81ad6265SDimitry Andric                     cl::Hidden,
54*81ad6265SDimitry Andric                     cl::desc("The maximum number of instructions to include "
55*81ad6265SDimitry Andric                              "in lds/gds read group."));
56*81ad6265SDimitry Andric 
57*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>>
58*81ad6265SDimitry Andric     LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None),
59*81ad6265SDimitry Andric                     cl::Hidden,
60*81ad6265SDimitry Andric                     cl::desc("The maximum number of instructions to include "
61*81ad6265SDimitry Andric                              "in lds/gds write group."));
62*81ad6265SDimitry Andric 
63*81ad6265SDimitry Andric typedef function_ref<bool(const MachineInstr &, const SIInstrInfo *)>
64*81ad6265SDimitry Andric     CanAddMIFn;
65*81ad6265SDimitry Andric 
66*81ad6265SDimitry Andric // Classify instructions into groups to enable fine tuned control over the
67*81ad6265SDimitry Andric // scheduler. These groups may be more specific than current SchedModel
68*81ad6265SDimitry Andric // instruction classes.
69*81ad6265SDimitry Andric class SchedGroup {
70*81ad6265SDimitry Andric private:
71*81ad6265SDimitry Andric   // Function that returns true if a non-bundle MI may be inserted into this
72*81ad6265SDimitry Andric   // group.
73*81ad6265SDimitry Andric   const CanAddMIFn canAddMI;
74*81ad6265SDimitry Andric 
75*81ad6265SDimitry Andric   // Maximum number of SUnits that can be added to this group.
76*81ad6265SDimitry Andric   Optional<unsigned> MaxSize;
77*81ad6265SDimitry Andric 
78*81ad6265SDimitry Andric   // Collection of SUnits that are classified as members of this group.
79*81ad6265SDimitry Andric   SmallVector<SUnit *, 32> Collection;
80*81ad6265SDimitry Andric 
81*81ad6265SDimitry Andric   ScheduleDAGInstrs *DAG;
82*81ad6265SDimitry Andric 
83*81ad6265SDimitry Andric   void tryAddEdge(SUnit *A, SUnit *B) {
84*81ad6265SDimitry Andric     if (A != B && DAG->canAddEdge(B, A)) {
85*81ad6265SDimitry Andric       DAG->addEdge(B, SDep(A, SDep::Artificial));
86*81ad6265SDimitry Andric       LLVM_DEBUG(dbgs() << "Adding edge...\n"
87*81ad6265SDimitry Andric                         << "from: SU(" << A->NodeNum << ") " << *A->getInstr()
88*81ad6265SDimitry Andric                         << "to: SU(" << B->NodeNum << ") " << *B->getInstr());
89*81ad6265SDimitry Andric     }
90*81ad6265SDimitry Andric   }
91*81ad6265SDimitry Andric 
92*81ad6265SDimitry Andric public:
93*81ad6265SDimitry Andric   // Add DAG dependencies from all SUnits in this SchedGroup and this SU. If
94*81ad6265SDimitry Andric   // MakePred is true, SU will be a predecessor of the SUnits in this
95*81ad6265SDimitry Andric   // SchedGroup, otherwise SU will be a successor.
96*81ad6265SDimitry Andric   void link(SUnit &SU, bool MakePred = false) {
97*81ad6265SDimitry Andric     for (auto A : Collection) {
98*81ad6265SDimitry Andric       SUnit *B = &SU;
99*81ad6265SDimitry Andric       if (MakePred)
100*81ad6265SDimitry Andric         std::swap(A, B);
101*81ad6265SDimitry Andric 
102*81ad6265SDimitry Andric       tryAddEdge(A, B);
103*81ad6265SDimitry Andric     }
104*81ad6265SDimitry Andric   }
105*81ad6265SDimitry Andric 
106*81ad6265SDimitry Andric   // Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use
107*81ad6265SDimitry Andric   // the predicate to determine whether SU should be a predecessor (P = true)
108*81ad6265SDimitry Andric   // or a successor (P = false) of this SchedGroup.
109*81ad6265SDimitry Andric   void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P) {
110*81ad6265SDimitry Andric     for (auto A : Collection) {
111*81ad6265SDimitry Andric       SUnit *B = &SU;
112*81ad6265SDimitry Andric       if (P(A, B))
113*81ad6265SDimitry Andric         std::swap(A, B);
114*81ad6265SDimitry Andric 
115*81ad6265SDimitry Andric       tryAddEdge(A, B);
116*81ad6265SDimitry Andric     }
117*81ad6265SDimitry Andric   }
118*81ad6265SDimitry Andric 
119*81ad6265SDimitry Andric   // Add DAG dependencies such that SUnits in this group shall be ordered
120*81ad6265SDimitry Andric   // before SUnits in OtherGroup.
121*81ad6265SDimitry Andric   void link(SchedGroup &OtherGroup) {
122*81ad6265SDimitry Andric     for (auto B : OtherGroup.Collection)
123*81ad6265SDimitry Andric       link(*B);
124*81ad6265SDimitry Andric   }
125*81ad6265SDimitry Andric 
126*81ad6265SDimitry Andric   // Returns true if no more instructions may be added to this group.
127*81ad6265SDimitry Andric   bool isFull() { return MaxSize && Collection.size() >= *MaxSize; }
128*81ad6265SDimitry Andric 
129*81ad6265SDimitry Andric   // Returns true if SU can be added to this SchedGroup.
130*81ad6265SDimitry Andric   bool canAddSU(SUnit &SU, const SIInstrInfo *TII) {
131*81ad6265SDimitry Andric     if (isFull())
132*81ad6265SDimitry Andric       return false;
133*81ad6265SDimitry Andric 
134*81ad6265SDimitry Andric     MachineInstr &MI = *SU.getInstr();
135*81ad6265SDimitry Andric     if (MI.getOpcode() != TargetOpcode::BUNDLE)
136*81ad6265SDimitry Andric       return canAddMI(MI, TII);
137*81ad6265SDimitry Andric 
138*81ad6265SDimitry Andric     // Special case for bundled MIs.
139*81ad6265SDimitry Andric     const MachineBasicBlock *MBB = MI.getParent();
140*81ad6265SDimitry Andric     MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B;
141*81ad6265SDimitry Andric     while (E != MBB->end() && E->isBundledWithPred())
142*81ad6265SDimitry Andric       ++E;
143*81ad6265SDimitry Andric 
144*81ad6265SDimitry Andric     // Return true if all of the bundled MIs can be added to this group.
145*81ad6265SDimitry Andric     return std::all_of(
146*81ad6265SDimitry Andric         B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); });
147*81ad6265SDimitry Andric   }
148*81ad6265SDimitry Andric 
149*81ad6265SDimitry Andric   void add(SUnit &SU) { Collection.push_back(&SU); }
150*81ad6265SDimitry Andric 
151*81ad6265SDimitry Andric   SchedGroup(CanAddMIFn canAddMI, Optional<unsigned> MaxSize,
152*81ad6265SDimitry Andric              ScheduleDAGInstrs *DAG)
153*81ad6265SDimitry Andric       : canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {}
154*81ad6265SDimitry Andric };
155*81ad6265SDimitry Andric 
156*81ad6265SDimitry Andric bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
157*81ad6265SDimitry Andric   return TII->isMFMA(MI);
158*81ad6265SDimitry Andric }
159*81ad6265SDimitry Andric 
160*81ad6265SDimitry Andric bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
161*81ad6265SDimitry Andric   return TII->isVALU(MI) && !TII->isMFMA(MI);
162*81ad6265SDimitry Andric }
163*81ad6265SDimitry Andric 
164*81ad6265SDimitry Andric bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
165*81ad6265SDimitry Andric   return TII->isSALU(MI);
166*81ad6265SDimitry Andric }
167*81ad6265SDimitry Andric 
168*81ad6265SDimitry Andric bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
169*81ad6265SDimitry Andric   return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI));
170*81ad6265SDimitry Andric }
171*81ad6265SDimitry Andric 
172*81ad6265SDimitry Andric bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
173*81ad6265SDimitry Andric   return MI.mayLoad() &&
174*81ad6265SDimitry Andric          (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
175*81ad6265SDimitry Andric }
176*81ad6265SDimitry Andric 
177*81ad6265SDimitry Andric bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
178*81ad6265SDimitry Andric   return MI.mayStore() &&
179*81ad6265SDimitry Andric          (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)));
180*81ad6265SDimitry Andric }
181*81ad6265SDimitry Andric 
182*81ad6265SDimitry Andric bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
183*81ad6265SDimitry Andric   return MI.mayStore() && TII->isDS(MI);
184*81ad6265SDimitry Andric }
185*81ad6265SDimitry Andric 
186*81ad6265SDimitry Andric bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) {
187*81ad6265SDimitry Andric   return MI.mayLoad() && TII->isDS(MI);
188*81ad6265SDimitry Andric }
189*81ad6265SDimitry Andric 
190*81ad6265SDimitry Andric class IGroupLPDAGMutation : public ScheduleDAGMutation {
191*81ad6265SDimitry Andric public:
192*81ad6265SDimitry Andric   const SIInstrInfo *TII;
193*81ad6265SDimitry Andric   ScheduleDAGMI *DAG;
194*81ad6265SDimitry Andric 
195*81ad6265SDimitry Andric   IGroupLPDAGMutation() = default;
196*81ad6265SDimitry Andric   void apply(ScheduleDAGInstrs *DAGInstrs) override;
197*81ad6265SDimitry Andric };
198*81ad6265SDimitry Andric 
199*81ad6265SDimitry Andric // DAG mutation that coordinates with the SCHED_BARRIER instruction and
200*81ad6265SDimitry Andric // corresponding builtin. The mutation adds edges from specific instruction
201*81ad6265SDimitry Andric // classes determined by the SCHED_BARRIER mask so that they cannot be
202*81ad6265SDimitry Andric // scheduled around the SCHED_BARRIER.
203*81ad6265SDimitry Andric class SchedBarrierDAGMutation : public ScheduleDAGMutation {
204*81ad6265SDimitry Andric private:
205*81ad6265SDimitry Andric   const SIInstrInfo *TII;
206*81ad6265SDimitry Andric 
207*81ad6265SDimitry Andric   ScheduleDAGMI *DAG;
208*81ad6265SDimitry Andric 
209*81ad6265SDimitry Andric   // Components of the mask that determines which instructions may not be
210*81ad6265SDimitry Andric   // scheduled across the SCHED_BARRIER.
211*81ad6265SDimitry Andric   enum class SchedBarrierMasks {
212*81ad6265SDimitry Andric     NONE = 0u,
213*81ad6265SDimitry Andric     ALU = 1u << 0,
214*81ad6265SDimitry Andric     VALU = 1u << 1,
215*81ad6265SDimitry Andric     SALU = 1u << 2,
216*81ad6265SDimitry Andric     MFMA = 1u << 3,
217*81ad6265SDimitry Andric     VMEM = 1u << 4,
218*81ad6265SDimitry Andric     VMEM_READ = 1u << 5,
219*81ad6265SDimitry Andric     VMEM_WRITE = 1u << 6,
220*81ad6265SDimitry Andric     DS = 1u << 7,
221*81ad6265SDimitry Andric     DS_READ = 1u << 8,
222*81ad6265SDimitry Andric     DS_WRITE = 1u << 9,
223*81ad6265SDimitry Andric     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ DS_WRITE)
224*81ad6265SDimitry Andric   };
225*81ad6265SDimitry Andric 
226*81ad6265SDimitry Andric   // Cache SchedGroups of each type if we have multiple SCHED_BARRIERs in a
227*81ad6265SDimitry Andric   // region.
228*81ad6265SDimitry Andric   //
229*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> MFMASchedGroup = nullptr;
230*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> VALUSchedGroup = nullptr;
231*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> SALUSchedGroup = nullptr;
232*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> VMEMReadSchedGroup = nullptr;
233*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> VMEMWriteSchedGroup = nullptr;
234*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> DSWriteSchedGroup = nullptr;
235*81ad6265SDimitry Andric   std::unique_ptr<SchedGroup> DSReadSchedGroup = nullptr;
236*81ad6265SDimitry Andric 
237*81ad6265SDimitry Andric   // Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should
238*81ad6265SDimitry Andric   // not be reordered accross the SCHED_BARRIER.
239*81ad6265SDimitry Andric   void getSchedGroupsFromMask(int32_t Mask,
240*81ad6265SDimitry Andric                               SmallVectorImpl<SchedGroup *> &SchedGroups);
241*81ad6265SDimitry Andric 
242*81ad6265SDimitry Andric   // Add DAG edges that enforce SCHED_BARRIER ordering.
243*81ad6265SDimitry Andric   void addSchedBarrierEdges(SUnit &SU);
244*81ad6265SDimitry Andric 
245*81ad6265SDimitry Andric   // Classify instructions and add them to the SchedGroup.
246*81ad6265SDimitry Andric   void initSchedGroup(SchedGroup *SG);
247*81ad6265SDimitry Andric 
248*81ad6265SDimitry Andric   // Remove all existing edges from a SCHED_BARRIER.
249*81ad6265SDimitry Andric   void resetSchedBarrierEdges(SUnit &SU);
250*81ad6265SDimitry Andric 
251*81ad6265SDimitry Andric public:
252*81ad6265SDimitry Andric   void apply(ScheduleDAGInstrs *DAGInstrs) override;
253*81ad6265SDimitry Andric 
254*81ad6265SDimitry Andric   SchedBarrierDAGMutation() = default;
255*81ad6265SDimitry Andric };
256*81ad6265SDimitry Andric 
257*81ad6265SDimitry Andric void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
258*81ad6265SDimitry Andric   const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
259*81ad6265SDimitry Andric   TII = ST.getInstrInfo();
260*81ad6265SDimitry Andric   DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
261*81ad6265SDimitry Andric   const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
262*81ad6265SDimitry Andric   if (!TSchedModel || DAG->SUnits.empty())
263*81ad6265SDimitry Andric     return;
264*81ad6265SDimitry Andric 
265*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n");
266*81ad6265SDimitry Andric 
267*81ad6265SDimitry Andric   // The order of InstructionGroups in this vector defines the
268*81ad6265SDimitry Andric   // order in which edges will be added. In other words, given the
269*81ad6265SDimitry Andric   // present ordering, we will try to make each VMEMRead instruction
270*81ad6265SDimitry Andric   // a predecessor of each DSRead instruction, and so on.
271*81ad6265SDimitry Andric   SmallVector<SchedGroup, 4> PipelineOrderGroups = {
272*81ad6265SDimitry Andric       SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG),
273*81ad6265SDimitry Andric       SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG),
274*81ad6265SDimitry Andric       SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG),
275*81ad6265SDimitry Andric       SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)};
276*81ad6265SDimitry Andric 
277*81ad6265SDimitry Andric   for (SUnit &SU : DAG->SUnits) {
278*81ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU));
279*81ad6265SDimitry Andric     for (auto &SG : PipelineOrderGroups)
280*81ad6265SDimitry Andric       if (SG.canAddSU(SU, TII))
281*81ad6265SDimitry Andric         SG.add(SU);
282*81ad6265SDimitry Andric   }
283*81ad6265SDimitry Andric 
284*81ad6265SDimitry Andric   for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) {
285*81ad6265SDimitry Andric     auto &GroupA = PipelineOrderGroups[i];
286*81ad6265SDimitry Andric     for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) {
287*81ad6265SDimitry Andric       auto &GroupB = PipelineOrderGroups[j];
288*81ad6265SDimitry Andric       GroupA.link(GroupB);
289*81ad6265SDimitry Andric     }
290*81ad6265SDimitry Andric   }
291*81ad6265SDimitry Andric }
292*81ad6265SDimitry Andric 
293*81ad6265SDimitry Andric void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
294*81ad6265SDimitry Andric   const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
295*81ad6265SDimitry Andric   if (!TSchedModel || DAGInstrs->SUnits.empty())
296*81ad6265SDimitry Andric     return;
297*81ad6265SDimitry Andric 
298*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n");
299*81ad6265SDimitry Andric 
300*81ad6265SDimitry Andric   const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
301*81ad6265SDimitry Andric   TII = ST.getInstrInfo();
302*81ad6265SDimitry Andric   DAG = static_cast<ScheduleDAGMI *>(DAGInstrs);
303*81ad6265SDimitry Andric   for (auto &SU : DAG->SUnits)
304*81ad6265SDimitry Andric     if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER)
305*81ad6265SDimitry Andric       addSchedBarrierEdges(SU);
306*81ad6265SDimitry Andric }
307*81ad6265SDimitry Andric 
308*81ad6265SDimitry Andric void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
309*81ad6265SDimitry Andric   MachineInstr &MI = *SchedBarrier.getInstr();
310*81ad6265SDimitry Andric   assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
311*81ad6265SDimitry Andric   // Remove all existing edges from the SCHED_BARRIER that were added due to the
312*81ad6265SDimitry Andric   // instruction having side effects.
313*81ad6265SDimitry Andric   resetSchedBarrierEdges(SchedBarrier);
314*81ad6265SDimitry Andric   SmallVector<SchedGroup *, 4> SchedGroups;
315*81ad6265SDimitry Andric   int32_t Mask = MI.getOperand(0).getImm();
316*81ad6265SDimitry Andric   getSchedGroupsFromMask(Mask, SchedGroups);
317*81ad6265SDimitry Andric   for (auto SG : SchedGroups)
318*81ad6265SDimitry Andric     SG->link(
319*81ad6265SDimitry Andric         SchedBarrier, (function_ref<bool(const SUnit *A, const SUnit *B)>)[](
320*81ad6265SDimitry Andric                           const SUnit *A, const SUnit *B) {
321*81ad6265SDimitry Andric           return A->NodeNum > B->NodeNum;
322*81ad6265SDimitry Andric         });
323*81ad6265SDimitry Andric }
324*81ad6265SDimitry Andric 
325*81ad6265SDimitry Andric void SchedBarrierDAGMutation::getSchedGroupsFromMask(
326*81ad6265SDimitry Andric     int32_t Mask, SmallVectorImpl<SchedGroup *> &SchedGroups) {
327*81ad6265SDimitry Andric   SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask;
328*81ad6265SDimitry Andric   // See IntrinsicsAMDGPU.td for an explanation of these masks and their
329*81ad6265SDimitry Andric   // mappings.
330*81ad6265SDimitry Andric   //
331*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE &&
332*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
333*81ad6265SDimitry Andric     if (!VALUSchedGroup) {
334*81ad6265SDimitry Andric       VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember, None, DAG);
335*81ad6265SDimitry Andric       initSchedGroup(VALUSchedGroup.get());
336*81ad6265SDimitry Andric     }
337*81ad6265SDimitry Andric 
338*81ad6265SDimitry Andric     SchedGroups.push_back(VALUSchedGroup.get());
339*81ad6265SDimitry Andric   }
340*81ad6265SDimitry Andric 
341*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE &&
342*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
343*81ad6265SDimitry Andric     if (!SALUSchedGroup) {
344*81ad6265SDimitry Andric       SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember, None, DAG);
345*81ad6265SDimitry Andric       initSchedGroup(SALUSchedGroup.get());
346*81ad6265SDimitry Andric     }
347*81ad6265SDimitry Andric 
348*81ad6265SDimitry Andric     SchedGroups.push_back(SALUSchedGroup.get());
349*81ad6265SDimitry Andric   }
350*81ad6265SDimitry Andric 
351*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE &&
352*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) {
353*81ad6265SDimitry Andric     if (!MFMASchedGroup) {
354*81ad6265SDimitry Andric       MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember, None, DAG);
355*81ad6265SDimitry Andric       initSchedGroup(MFMASchedGroup.get());
356*81ad6265SDimitry Andric     }
357*81ad6265SDimitry Andric 
358*81ad6265SDimitry Andric     SchedGroups.push_back(MFMASchedGroup.get());
359*81ad6265SDimitry Andric   }
360*81ad6265SDimitry Andric 
361*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE &&
362*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
363*81ad6265SDimitry Andric     if (!VMEMReadSchedGroup) {
364*81ad6265SDimitry Andric       VMEMReadSchedGroup =
365*81ad6265SDimitry Andric           std::make_unique<SchedGroup>(isVMEMReadSGMember, None, DAG);
366*81ad6265SDimitry Andric       initSchedGroup(VMEMReadSchedGroup.get());
367*81ad6265SDimitry Andric     }
368*81ad6265SDimitry Andric 
369*81ad6265SDimitry Andric     SchedGroups.push_back(VMEMReadSchedGroup.get());
370*81ad6265SDimitry Andric   }
371*81ad6265SDimitry Andric 
372*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE &&
373*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) {
374*81ad6265SDimitry Andric     if (!VMEMWriteSchedGroup) {
375*81ad6265SDimitry Andric       VMEMWriteSchedGroup =
376*81ad6265SDimitry Andric           std::make_unique<SchedGroup>(isVMEMWriteSGMember, None, DAG);
377*81ad6265SDimitry Andric       initSchedGroup(VMEMWriteSchedGroup.get());
378*81ad6265SDimitry Andric     }
379*81ad6265SDimitry Andric 
380*81ad6265SDimitry Andric     SchedGroups.push_back(VMEMWriteSchedGroup.get());
381*81ad6265SDimitry Andric   }
382*81ad6265SDimitry Andric 
383*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE &&
384*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
385*81ad6265SDimitry Andric     if (!DSReadSchedGroup) {
386*81ad6265SDimitry Andric       DSReadSchedGroup =
387*81ad6265SDimitry Andric           std::make_unique<SchedGroup>(isDSReadSGMember, None, DAG);
388*81ad6265SDimitry Andric       initSchedGroup(DSReadSchedGroup.get());
389*81ad6265SDimitry Andric     }
390*81ad6265SDimitry Andric 
391*81ad6265SDimitry Andric     SchedGroups.push_back(DSReadSchedGroup.get());
392*81ad6265SDimitry Andric   }
393*81ad6265SDimitry Andric 
394*81ad6265SDimitry Andric   if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE &&
395*81ad6265SDimitry Andric       (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) {
396*81ad6265SDimitry Andric     if (!DSWriteSchedGroup) {
397*81ad6265SDimitry Andric       DSWriteSchedGroup =
398*81ad6265SDimitry Andric           std::make_unique<SchedGroup>(isDSWriteSGMember, None, DAG);
399*81ad6265SDimitry Andric       initSchedGroup(DSWriteSchedGroup.get());
400*81ad6265SDimitry Andric     }
401*81ad6265SDimitry Andric 
402*81ad6265SDimitry Andric     SchedGroups.push_back(DSWriteSchedGroup.get());
403*81ad6265SDimitry Andric   }
404*81ad6265SDimitry Andric }
405*81ad6265SDimitry Andric 
406*81ad6265SDimitry Andric void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) {
407*81ad6265SDimitry Andric   assert(SG);
408*81ad6265SDimitry Andric   for (auto &SU : DAG->SUnits)
409*81ad6265SDimitry Andric     if (SG->canAddSU(SU, TII))
410*81ad6265SDimitry Andric       SG->add(SU);
411*81ad6265SDimitry Andric }
412*81ad6265SDimitry Andric 
413*81ad6265SDimitry Andric void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) {
414*81ad6265SDimitry Andric   assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER);
415*81ad6265SDimitry Andric   for (auto &P : SU.Preds)
416*81ad6265SDimitry Andric     SU.removePred(P);
417*81ad6265SDimitry Andric 
418*81ad6265SDimitry Andric   for (auto &S : SU.Succs) {
419*81ad6265SDimitry Andric     for (auto &SP : S.getSUnit()->Preds) {
420*81ad6265SDimitry Andric       if (SP.getSUnit() == &SU) {
421*81ad6265SDimitry Andric         S.getSUnit()->removePred(SP);
422*81ad6265SDimitry Andric       }
423*81ad6265SDimitry Andric     }
424*81ad6265SDimitry Andric   }
425*81ad6265SDimitry Andric }
426*81ad6265SDimitry Andric 
427*81ad6265SDimitry Andric } // namespace
428*81ad6265SDimitry Andric 
429*81ad6265SDimitry Andric namespace llvm {
430*81ad6265SDimitry Andric 
431*81ad6265SDimitry Andric std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() {
432*81ad6265SDimitry Andric   return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr;
433*81ad6265SDimitry Andric }
434*81ad6265SDimitry Andric 
435*81ad6265SDimitry Andric std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() {
436*81ad6265SDimitry Andric   return std::make_unique<SchedBarrierDAGMutation>();
437*81ad6265SDimitry Andric }
438*81ad6265SDimitry Andric 
439*81ad6265SDimitry Andric } // end namespace llvm
440