1*81ad6265SDimitry Andric //===--- AMDGPUIGroupLP.cpp - AMDGPU IGroupLP ------------===// 2*81ad6265SDimitry Andric // 3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*81ad6265SDimitry Andric // 7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 8*81ad6265SDimitry Andric // 9*81ad6265SDimitry Andric // \file This file defines a set of schedule DAG mutations that can be used to 10*81ad6265SDimitry Andric // override default scheduler behavior to enforce specific scheduling patterns. 11*81ad6265SDimitry Andric // They should be used in cases where runtime performance considerations such as 12*81ad6265SDimitry Andric // inter-wavefront interactions, mean that compile-time heuristics cannot 13*81ad6265SDimitry Andric // predict the optimal instruction ordering, or in kernels where optimum 14*81ad6265SDimitry Andric // instruction scheduling is important enough to warrant manual intervention. 15*81ad6265SDimitry Andric // 16*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 17*81ad6265SDimitry Andric 18*81ad6265SDimitry Andric #include "AMDGPUIGroupLP.h" 19*81ad6265SDimitry Andric #include "AMDGPUTargetMachine.h" 20*81ad6265SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21*81ad6265SDimitry Andric #include "SIInstrInfo.h" 22*81ad6265SDimitry Andric #include "SIMachineFunctionInfo.h" 23*81ad6265SDimitry Andric #include "llvm/ADT/BitmaskEnum.h" 24*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 25*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h" 26*81ad6265SDimitry Andric 27*81ad6265SDimitry Andric using namespace llvm; 28*81ad6265SDimitry Andric 29*81ad6265SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 30*81ad6265SDimitry Andric 31*81ad6265SDimitry Andric namespace { 32*81ad6265SDimitry Andric 33*81ad6265SDimitry Andric static cl::opt<bool> 34*81ad6265SDimitry Andric EnableIGroupLP("amdgpu-igrouplp", 35*81ad6265SDimitry Andric cl::desc("Enable construction of Instruction Groups and " 36*81ad6265SDimitry Andric "their ordering for scheduling"), 37*81ad6265SDimitry Andric cl::init(false)); 38*81ad6265SDimitry Andric 39*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>> 40*81ad6265SDimitry Andric VMEMGroupMaxSize("amdgpu-igrouplp-vmem-group-size", cl::init(None), 41*81ad6265SDimitry Andric cl::Hidden, 42*81ad6265SDimitry Andric cl::desc("The maximum number of instructions to include " 43*81ad6265SDimitry Andric "in VMEM group.")); 44*81ad6265SDimitry Andric 45*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>> 46*81ad6265SDimitry Andric MFMAGroupMaxSize("amdgpu-igrouplp-mfma-group-size", cl::init(None), 47*81ad6265SDimitry Andric cl::Hidden, 48*81ad6265SDimitry Andric cl::desc("The maximum number of instructions to include " 49*81ad6265SDimitry Andric "in MFMA group.")); 50*81ad6265SDimitry Andric 51*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>> 52*81ad6265SDimitry Andric LDRGroupMaxSize("amdgpu-igrouplp-ldr-group-size", cl::init(None), 53*81ad6265SDimitry Andric cl::Hidden, 54*81ad6265SDimitry Andric cl::desc("The maximum number of instructions to include " 55*81ad6265SDimitry Andric "in lds/gds read group.")); 56*81ad6265SDimitry Andric 57*81ad6265SDimitry Andric static cl::opt<Optional<unsigned>> 58*81ad6265SDimitry Andric LDWGroupMaxSize("amdgpu-igrouplp-ldw-group-size", cl::init(None), 59*81ad6265SDimitry Andric cl::Hidden, 60*81ad6265SDimitry Andric cl::desc("The maximum number of instructions to include " 61*81ad6265SDimitry Andric "in lds/gds write group.")); 62*81ad6265SDimitry Andric 63*81ad6265SDimitry Andric typedef function_ref<bool(const MachineInstr &, const SIInstrInfo *)> 64*81ad6265SDimitry Andric CanAddMIFn; 65*81ad6265SDimitry Andric 66*81ad6265SDimitry Andric // Classify instructions into groups to enable fine tuned control over the 67*81ad6265SDimitry Andric // scheduler. These groups may be more specific than current SchedModel 68*81ad6265SDimitry Andric // instruction classes. 69*81ad6265SDimitry Andric class SchedGroup { 70*81ad6265SDimitry Andric private: 71*81ad6265SDimitry Andric // Function that returns true if a non-bundle MI may be inserted into this 72*81ad6265SDimitry Andric // group. 73*81ad6265SDimitry Andric const CanAddMIFn canAddMI; 74*81ad6265SDimitry Andric 75*81ad6265SDimitry Andric // Maximum number of SUnits that can be added to this group. 76*81ad6265SDimitry Andric Optional<unsigned> MaxSize; 77*81ad6265SDimitry Andric 78*81ad6265SDimitry Andric // Collection of SUnits that are classified as members of this group. 79*81ad6265SDimitry Andric SmallVector<SUnit *, 32> Collection; 80*81ad6265SDimitry Andric 81*81ad6265SDimitry Andric ScheduleDAGInstrs *DAG; 82*81ad6265SDimitry Andric 83*81ad6265SDimitry Andric void tryAddEdge(SUnit *A, SUnit *B) { 84*81ad6265SDimitry Andric if (A != B && DAG->canAddEdge(B, A)) { 85*81ad6265SDimitry Andric DAG->addEdge(B, SDep(A, SDep::Artificial)); 86*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Adding edge...\n" 87*81ad6265SDimitry Andric << "from: SU(" << A->NodeNum << ") " << *A->getInstr() 88*81ad6265SDimitry Andric << "to: SU(" << B->NodeNum << ") " << *B->getInstr()); 89*81ad6265SDimitry Andric } 90*81ad6265SDimitry Andric } 91*81ad6265SDimitry Andric 92*81ad6265SDimitry Andric public: 93*81ad6265SDimitry Andric // Add DAG dependencies from all SUnits in this SchedGroup and this SU. If 94*81ad6265SDimitry Andric // MakePred is true, SU will be a predecessor of the SUnits in this 95*81ad6265SDimitry Andric // SchedGroup, otherwise SU will be a successor. 96*81ad6265SDimitry Andric void link(SUnit &SU, bool MakePred = false) { 97*81ad6265SDimitry Andric for (auto A : Collection) { 98*81ad6265SDimitry Andric SUnit *B = &SU; 99*81ad6265SDimitry Andric if (MakePred) 100*81ad6265SDimitry Andric std::swap(A, B); 101*81ad6265SDimitry Andric 102*81ad6265SDimitry Andric tryAddEdge(A, B); 103*81ad6265SDimitry Andric } 104*81ad6265SDimitry Andric } 105*81ad6265SDimitry Andric 106*81ad6265SDimitry Andric // Add DAG dependencies from all SUnits in this SchedGroup and this SU. Use 107*81ad6265SDimitry Andric // the predicate to determine whether SU should be a predecessor (P = true) 108*81ad6265SDimitry Andric // or a successor (P = false) of this SchedGroup. 109*81ad6265SDimitry Andric void link(SUnit &SU, function_ref<bool(const SUnit *A, const SUnit *B)> P) { 110*81ad6265SDimitry Andric for (auto A : Collection) { 111*81ad6265SDimitry Andric SUnit *B = &SU; 112*81ad6265SDimitry Andric if (P(A, B)) 113*81ad6265SDimitry Andric std::swap(A, B); 114*81ad6265SDimitry Andric 115*81ad6265SDimitry Andric tryAddEdge(A, B); 116*81ad6265SDimitry Andric } 117*81ad6265SDimitry Andric } 118*81ad6265SDimitry Andric 119*81ad6265SDimitry Andric // Add DAG dependencies such that SUnits in this group shall be ordered 120*81ad6265SDimitry Andric // before SUnits in OtherGroup. 121*81ad6265SDimitry Andric void link(SchedGroup &OtherGroup) { 122*81ad6265SDimitry Andric for (auto B : OtherGroup.Collection) 123*81ad6265SDimitry Andric link(*B); 124*81ad6265SDimitry Andric } 125*81ad6265SDimitry Andric 126*81ad6265SDimitry Andric // Returns true if no more instructions may be added to this group. 127*81ad6265SDimitry Andric bool isFull() { return MaxSize && Collection.size() >= *MaxSize; } 128*81ad6265SDimitry Andric 129*81ad6265SDimitry Andric // Returns true if SU can be added to this SchedGroup. 130*81ad6265SDimitry Andric bool canAddSU(SUnit &SU, const SIInstrInfo *TII) { 131*81ad6265SDimitry Andric if (isFull()) 132*81ad6265SDimitry Andric return false; 133*81ad6265SDimitry Andric 134*81ad6265SDimitry Andric MachineInstr &MI = *SU.getInstr(); 135*81ad6265SDimitry Andric if (MI.getOpcode() != TargetOpcode::BUNDLE) 136*81ad6265SDimitry Andric return canAddMI(MI, TII); 137*81ad6265SDimitry Andric 138*81ad6265SDimitry Andric // Special case for bundled MIs. 139*81ad6265SDimitry Andric const MachineBasicBlock *MBB = MI.getParent(); 140*81ad6265SDimitry Andric MachineBasicBlock::instr_iterator B = MI.getIterator(), E = ++B; 141*81ad6265SDimitry Andric while (E != MBB->end() && E->isBundledWithPred()) 142*81ad6265SDimitry Andric ++E; 143*81ad6265SDimitry Andric 144*81ad6265SDimitry Andric // Return true if all of the bundled MIs can be added to this group. 145*81ad6265SDimitry Andric return std::all_of( 146*81ad6265SDimitry Andric B, E, [this, TII](MachineInstr &MI) { return canAddMI(MI, TII); }); 147*81ad6265SDimitry Andric } 148*81ad6265SDimitry Andric 149*81ad6265SDimitry Andric void add(SUnit &SU) { Collection.push_back(&SU); } 150*81ad6265SDimitry Andric 151*81ad6265SDimitry Andric SchedGroup(CanAddMIFn canAddMI, Optional<unsigned> MaxSize, 152*81ad6265SDimitry Andric ScheduleDAGInstrs *DAG) 153*81ad6265SDimitry Andric : canAddMI(canAddMI), MaxSize(MaxSize), DAG(DAG) {} 154*81ad6265SDimitry Andric }; 155*81ad6265SDimitry Andric 156*81ad6265SDimitry Andric bool isMFMASGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 157*81ad6265SDimitry Andric return TII->isMFMA(MI); 158*81ad6265SDimitry Andric } 159*81ad6265SDimitry Andric 160*81ad6265SDimitry Andric bool isVALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 161*81ad6265SDimitry Andric return TII->isVALU(MI) && !TII->isMFMA(MI); 162*81ad6265SDimitry Andric } 163*81ad6265SDimitry Andric 164*81ad6265SDimitry Andric bool isSALUSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 165*81ad6265SDimitry Andric return TII->isSALU(MI); 166*81ad6265SDimitry Andric } 167*81ad6265SDimitry Andric 168*81ad6265SDimitry Andric bool isVMEMSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 169*81ad6265SDimitry Andric return TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI)); 170*81ad6265SDimitry Andric } 171*81ad6265SDimitry Andric 172*81ad6265SDimitry Andric bool isVMEMReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 173*81ad6265SDimitry Andric return MI.mayLoad() && 174*81ad6265SDimitry Andric (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))); 175*81ad6265SDimitry Andric } 176*81ad6265SDimitry Andric 177*81ad6265SDimitry Andric bool isVMEMWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 178*81ad6265SDimitry Andric return MI.mayStore() && 179*81ad6265SDimitry Andric (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))); 180*81ad6265SDimitry Andric } 181*81ad6265SDimitry Andric 182*81ad6265SDimitry Andric bool isDSWriteSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 183*81ad6265SDimitry Andric return MI.mayStore() && TII->isDS(MI); 184*81ad6265SDimitry Andric } 185*81ad6265SDimitry Andric 186*81ad6265SDimitry Andric bool isDSReadSGMember(const MachineInstr &MI, const SIInstrInfo *TII) { 187*81ad6265SDimitry Andric return MI.mayLoad() && TII->isDS(MI); 188*81ad6265SDimitry Andric } 189*81ad6265SDimitry Andric 190*81ad6265SDimitry Andric class IGroupLPDAGMutation : public ScheduleDAGMutation { 191*81ad6265SDimitry Andric public: 192*81ad6265SDimitry Andric const SIInstrInfo *TII; 193*81ad6265SDimitry Andric ScheduleDAGMI *DAG; 194*81ad6265SDimitry Andric 195*81ad6265SDimitry Andric IGroupLPDAGMutation() = default; 196*81ad6265SDimitry Andric void apply(ScheduleDAGInstrs *DAGInstrs) override; 197*81ad6265SDimitry Andric }; 198*81ad6265SDimitry Andric 199*81ad6265SDimitry Andric // DAG mutation that coordinates with the SCHED_BARRIER instruction and 200*81ad6265SDimitry Andric // corresponding builtin. The mutation adds edges from specific instruction 201*81ad6265SDimitry Andric // classes determined by the SCHED_BARRIER mask so that they cannot be 202*81ad6265SDimitry Andric // scheduled around the SCHED_BARRIER. 203*81ad6265SDimitry Andric class SchedBarrierDAGMutation : public ScheduleDAGMutation { 204*81ad6265SDimitry Andric private: 205*81ad6265SDimitry Andric const SIInstrInfo *TII; 206*81ad6265SDimitry Andric 207*81ad6265SDimitry Andric ScheduleDAGMI *DAG; 208*81ad6265SDimitry Andric 209*81ad6265SDimitry Andric // Components of the mask that determines which instructions may not be 210*81ad6265SDimitry Andric // scheduled across the SCHED_BARRIER. 211*81ad6265SDimitry Andric enum class SchedBarrierMasks { 212*81ad6265SDimitry Andric NONE = 0u, 213*81ad6265SDimitry Andric ALU = 1u << 0, 214*81ad6265SDimitry Andric VALU = 1u << 1, 215*81ad6265SDimitry Andric SALU = 1u << 2, 216*81ad6265SDimitry Andric MFMA = 1u << 3, 217*81ad6265SDimitry Andric VMEM = 1u << 4, 218*81ad6265SDimitry Andric VMEM_READ = 1u << 5, 219*81ad6265SDimitry Andric VMEM_WRITE = 1u << 6, 220*81ad6265SDimitry Andric DS = 1u << 7, 221*81ad6265SDimitry Andric DS_READ = 1u << 8, 222*81ad6265SDimitry Andric DS_WRITE = 1u << 9, 223*81ad6265SDimitry Andric LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ DS_WRITE) 224*81ad6265SDimitry Andric }; 225*81ad6265SDimitry Andric 226*81ad6265SDimitry Andric // Cache SchedGroups of each type if we have multiple SCHED_BARRIERs in a 227*81ad6265SDimitry Andric // region. 228*81ad6265SDimitry Andric // 229*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> MFMASchedGroup = nullptr; 230*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> VALUSchedGroup = nullptr; 231*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> SALUSchedGroup = nullptr; 232*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> VMEMReadSchedGroup = nullptr; 233*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> VMEMWriteSchedGroup = nullptr; 234*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> DSWriteSchedGroup = nullptr; 235*81ad6265SDimitry Andric std::unique_ptr<SchedGroup> DSReadSchedGroup = nullptr; 236*81ad6265SDimitry Andric 237*81ad6265SDimitry Andric // Use a SCHED_BARRIER's mask to identify instruction SchedGroups that should 238*81ad6265SDimitry Andric // not be reordered accross the SCHED_BARRIER. 239*81ad6265SDimitry Andric void getSchedGroupsFromMask(int32_t Mask, 240*81ad6265SDimitry Andric SmallVectorImpl<SchedGroup *> &SchedGroups); 241*81ad6265SDimitry Andric 242*81ad6265SDimitry Andric // Add DAG edges that enforce SCHED_BARRIER ordering. 243*81ad6265SDimitry Andric void addSchedBarrierEdges(SUnit &SU); 244*81ad6265SDimitry Andric 245*81ad6265SDimitry Andric // Classify instructions and add them to the SchedGroup. 246*81ad6265SDimitry Andric void initSchedGroup(SchedGroup *SG); 247*81ad6265SDimitry Andric 248*81ad6265SDimitry Andric // Remove all existing edges from a SCHED_BARRIER. 249*81ad6265SDimitry Andric void resetSchedBarrierEdges(SUnit &SU); 250*81ad6265SDimitry Andric 251*81ad6265SDimitry Andric public: 252*81ad6265SDimitry Andric void apply(ScheduleDAGInstrs *DAGInstrs) override; 253*81ad6265SDimitry Andric 254*81ad6265SDimitry Andric SchedBarrierDAGMutation() = default; 255*81ad6265SDimitry Andric }; 256*81ad6265SDimitry Andric 257*81ad6265SDimitry Andric void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { 258*81ad6265SDimitry Andric const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); 259*81ad6265SDimitry Andric TII = ST.getInstrInfo(); 260*81ad6265SDimitry Andric DAG = static_cast<ScheduleDAGMI *>(DAGInstrs); 261*81ad6265SDimitry Andric const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); 262*81ad6265SDimitry Andric if (!TSchedModel || DAG->SUnits.empty()) 263*81ad6265SDimitry Andric return; 264*81ad6265SDimitry Andric 265*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Applying IGroupLPDAGMutation...\n"); 266*81ad6265SDimitry Andric 267*81ad6265SDimitry Andric // The order of InstructionGroups in this vector defines the 268*81ad6265SDimitry Andric // order in which edges will be added. In other words, given the 269*81ad6265SDimitry Andric // present ordering, we will try to make each VMEMRead instruction 270*81ad6265SDimitry Andric // a predecessor of each DSRead instruction, and so on. 271*81ad6265SDimitry Andric SmallVector<SchedGroup, 4> PipelineOrderGroups = { 272*81ad6265SDimitry Andric SchedGroup(isVMEMSGMember, VMEMGroupMaxSize, DAG), 273*81ad6265SDimitry Andric SchedGroup(isDSReadSGMember, LDRGroupMaxSize, DAG), 274*81ad6265SDimitry Andric SchedGroup(isMFMASGMember, MFMAGroupMaxSize, DAG), 275*81ad6265SDimitry Andric SchedGroup(isDSWriteSGMember, LDWGroupMaxSize, DAG)}; 276*81ad6265SDimitry Andric 277*81ad6265SDimitry Andric for (SUnit &SU : DAG->SUnits) { 278*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Checking Node"; DAG->dumpNode(SU)); 279*81ad6265SDimitry Andric for (auto &SG : PipelineOrderGroups) 280*81ad6265SDimitry Andric if (SG.canAddSU(SU, TII)) 281*81ad6265SDimitry Andric SG.add(SU); 282*81ad6265SDimitry Andric } 283*81ad6265SDimitry Andric 284*81ad6265SDimitry Andric for (unsigned i = 0; i < PipelineOrderGroups.size() - 1; i++) { 285*81ad6265SDimitry Andric auto &GroupA = PipelineOrderGroups[i]; 286*81ad6265SDimitry Andric for (unsigned j = i + 1; j < PipelineOrderGroups.size(); j++) { 287*81ad6265SDimitry Andric auto &GroupB = PipelineOrderGroups[j]; 288*81ad6265SDimitry Andric GroupA.link(GroupB); 289*81ad6265SDimitry Andric } 290*81ad6265SDimitry Andric } 291*81ad6265SDimitry Andric } 292*81ad6265SDimitry Andric 293*81ad6265SDimitry Andric void SchedBarrierDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) { 294*81ad6265SDimitry Andric const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel(); 295*81ad6265SDimitry Andric if (!TSchedModel || DAGInstrs->SUnits.empty()) 296*81ad6265SDimitry Andric return; 297*81ad6265SDimitry Andric 298*81ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Applying SchedBarrierDAGMutation...\n"); 299*81ad6265SDimitry Andric 300*81ad6265SDimitry Andric const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>(); 301*81ad6265SDimitry Andric TII = ST.getInstrInfo(); 302*81ad6265SDimitry Andric DAG = static_cast<ScheduleDAGMI *>(DAGInstrs); 303*81ad6265SDimitry Andric for (auto &SU : DAG->SUnits) 304*81ad6265SDimitry Andric if (SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER) 305*81ad6265SDimitry Andric addSchedBarrierEdges(SU); 306*81ad6265SDimitry Andric } 307*81ad6265SDimitry Andric 308*81ad6265SDimitry Andric void SchedBarrierDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) { 309*81ad6265SDimitry Andric MachineInstr &MI = *SchedBarrier.getInstr(); 310*81ad6265SDimitry Andric assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER); 311*81ad6265SDimitry Andric // Remove all existing edges from the SCHED_BARRIER that were added due to the 312*81ad6265SDimitry Andric // instruction having side effects. 313*81ad6265SDimitry Andric resetSchedBarrierEdges(SchedBarrier); 314*81ad6265SDimitry Andric SmallVector<SchedGroup *, 4> SchedGroups; 315*81ad6265SDimitry Andric int32_t Mask = MI.getOperand(0).getImm(); 316*81ad6265SDimitry Andric getSchedGroupsFromMask(Mask, SchedGroups); 317*81ad6265SDimitry Andric for (auto SG : SchedGroups) 318*81ad6265SDimitry Andric SG->link( 319*81ad6265SDimitry Andric SchedBarrier, (function_ref<bool(const SUnit *A, const SUnit *B)>)[]( 320*81ad6265SDimitry Andric const SUnit *A, const SUnit *B) { 321*81ad6265SDimitry Andric return A->NodeNum > B->NodeNum; 322*81ad6265SDimitry Andric }); 323*81ad6265SDimitry Andric } 324*81ad6265SDimitry Andric 325*81ad6265SDimitry Andric void SchedBarrierDAGMutation::getSchedGroupsFromMask( 326*81ad6265SDimitry Andric int32_t Mask, SmallVectorImpl<SchedGroup *> &SchedGroups) { 327*81ad6265SDimitry Andric SchedBarrierMasks SBMask = (SchedBarrierMasks)Mask; 328*81ad6265SDimitry Andric // See IntrinsicsAMDGPU.td for an explanation of these masks and their 329*81ad6265SDimitry Andric // mappings. 330*81ad6265SDimitry Andric // 331*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::VALU) == SchedBarrierMasks::NONE && 332*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { 333*81ad6265SDimitry Andric if (!VALUSchedGroup) { 334*81ad6265SDimitry Andric VALUSchedGroup = std::make_unique<SchedGroup>(isVALUSGMember, None, DAG); 335*81ad6265SDimitry Andric initSchedGroup(VALUSchedGroup.get()); 336*81ad6265SDimitry Andric } 337*81ad6265SDimitry Andric 338*81ad6265SDimitry Andric SchedGroups.push_back(VALUSchedGroup.get()); 339*81ad6265SDimitry Andric } 340*81ad6265SDimitry Andric 341*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::SALU) == SchedBarrierMasks::NONE && 342*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { 343*81ad6265SDimitry Andric if (!SALUSchedGroup) { 344*81ad6265SDimitry Andric SALUSchedGroup = std::make_unique<SchedGroup>(isSALUSGMember, None, DAG); 345*81ad6265SDimitry Andric initSchedGroup(SALUSchedGroup.get()); 346*81ad6265SDimitry Andric } 347*81ad6265SDimitry Andric 348*81ad6265SDimitry Andric SchedGroups.push_back(SALUSchedGroup.get()); 349*81ad6265SDimitry Andric } 350*81ad6265SDimitry Andric 351*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::MFMA) == SchedBarrierMasks::NONE && 352*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::ALU) == SchedBarrierMasks::NONE) { 353*81ad6265SDimitry Andric if (!MFMASchedGroup) { 354*81ad6265SDimitry Andric MFMASchedGroup = std::make_unique<SchedGroup>(isMFMASGMember, None, DAG); 355*81ad6265SDimitry Andric initSchedGroup(MFMASchedGroup.get()); 356*81ad6265SDimitry Andric } 357*81ad6265SDimitry Andric 358*81ad6265SDimitry Andric SchedGroups.push_back(MFMASchedGroup.get()); 359*81ad6265SDimitry Andric } 360*81ad6265SDimitry Andric 361*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::VMEM_READ) == SchedBarrierMasks::NONE && 362*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) { 363*81ad6265SDimitry Andric if (!VMEMReadSchedGroup) { 364*81ad6265SDimitry Andric VMEMReadSchedGroup = 365*81ad6265SDimitry Andric std::make_unique<SchedGroup>(isVMEMReadSGMember, None, DAG); 366*81ad6265SDimitry Andric initSchedGroup(VMEMReadSchedGroup.get()); 367*81ad6265SDimitry Andric } 368*81ad6265SDimitry Andric 369*81ad6265SDimitry Andric SchedGroups.push_back(VMEMReadSchedGroup.get()); 370*81ad6265SDimitry Andric } 371*81ad6265SDimitry Andric 372*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::VMEM_WRITE) == SchedBarrierMasks::NONE && 373*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::VMEM) == SchedBarrierMasks::NONE) { 374*81ad6265SDimitry Andric if (!VMEMWriteSchedGroup) { 375*81ad6265SDimitry Andric VMEMWriteSchedGroup = 376*81ad6265SDimitry Andric std::make_unique<SchedGroup>(isVMEMWriteSGMember, None, DAG); 377*81ad6265SDimitry Andric initSchedGroup(VMEMWriteSchedGroup.get()); 378*81ad6265SDimitry Andric } 379*81ad6265SDimitry Andric 380*81ad6265SDimitry Andric SchedGroups.push_back(VMEMWriteSchedGroup.get()); 381*81ad6265SDimitry Andric } 382*81ad6265SDimitry Andric 383*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::DS_READ) == SchedBarrierMasks::NONE && 384*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) { 385*81ad6265SDimitry Andric if (!DSReadSchedGroup) { 386*81ad6265SDimitry Andric DSReadSchedGroup = 387*81ad6265SDimitry Andric std::make_unique<SchedGroup>(isDSReadSGMember, None, DAG); 388*81ad6265SDimitry Andric initSchedGroup(DSReadSchedGroup.get()); 389*81ad6265SDimitry Andric } 390*81ad6265SDimitry Andric 391*81ad6265SDimitry Andric SchedGroups.push_back(DSReadSchedGroup.get()); 392*81ad6265SDimitry Andric } 393*81ad6265SDimitry Andric 394*81ad6265SDimitry Andric if ((SBMask & SchedBarrierMasks::DS_WRITE) == SchedBarrierMasks::NONE && 395*81ad6265SDimitry Andric (SBMask & SchedBarrierMasks::DS) == SchedBarrierMasks::NONE) { 396*81ad6265SDimitry Andric if (!DSWriteSchedGroup) { 397*81ad6265SDimitry Andric DSWriteSchedGroup = 398*81ad6265SDimitry Andric std::make_unique<SchedGroup>(isDSWriteSGMember, None, DAG); 399*81ad6265SDimitry Andric initSchedGroup(DSWriteSchedGroup.get()); 400*81ad6265SDimitry Andric } 401*81ad6265SDimitry Andric 402*81ad6265SDimitry Andric SchedGroups.push_back(DSWriteSchedGroup.get()); 403*81ad6265SDimitry Andric } 404*81ad6265SDimitry Andric } 405*81ad6265SDimitry Andric 406*81ad6265SDimitry Andric void SchedBarrierDAGMutation::initSchedGroup(SchedGroup *SG) { 407*81ad6265SDimitry Andric assert(SG); 408*81ad6265SDimitry Andric for (auto &SU : DAG->SUnits) 409*81ad6265SDimitry Andric if (SG->canAddSU(SU, TII)) 410*81ad6265SDimitry Andric SG->add(SU); 411*81ad6265SDimitry Andric } 412*81ad6265SDimitry Andric 413*81ad6265SDimitry Andric void SchedBarrierDAGMutation::resetSchedBarrierEdges(SUnit &SU) { 414*81ad6265SDimitry Andric assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER); 415*81ad6265SDimitry Andric for (auto &P : SU.Preds) 416*81ad6265SDimitry Andric SU.removePred(P); 417*81ad6265SDimitry Andric 418*81ad6265SDimitry Andric for (auto &S : SU.Succs) { 419*81ad6265SDimitry Andric for (auto &SP : S.getSUnit()->Preds) { 420*81ad6265SDimitry Andric if (SP.getSUnit() == &SU) { 421*81ad6265SDimitry Andric S.getSUnit()->removePred(SP); 422*81ad6265SDimitry Andric } 423*81ad6265SDimitry Andric } 424*81ad6265SDimitry Andric } 425*81ad6265SDimitry Andric } 426*81ad6265SDimitry Andric 427*81ad6265SDimitry Andric } // namespace 428*81ad6265SDimitry Andric 429*81ad6265SDimitry Andric namespace llvm { 430*81ad6265SDimitry Andric 431*81ad6265SDimitry Andric std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation() { 432*81ad6265SDimitry Andric return EnableIGroupLP ? std::make_unique<IGroupLPDAGMutation>() : nullptr; 433*81ad6265SDimitry Andric } 434*81ad6265SDimitry Andric 435*81ad6265SDimitry Andric std::unique_ptr<ScheduleDAGMutation> createSchedBarrierDAGMutation() { 436*81ad6265SDimitry Andric return std::make_unique<SchedBarrierDAGMutation>(); 437*81ad6265SDimitry Andric } 438*81ad6265SDimitry Andric 439*81ad6265SDimitry Andric } // end namespace llvm 440