xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
181ad6265SDimitry Andric //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric //
981ad6265SDimitry Andric /// \file
1081ad6265SDimitry Andric /// Pass to temporarily raise the wave priority beginning the start of
1181ad6265SDimitry Andric /// the shader function until its last VMEM instructions to allow younger
1281ad6265SDimitry Andric /// waves to issue their VMEM instructions as well.
1381ad6265SDimitry Andric //
1481ad6265SDimitry Andric //===----------------------------------------------------------------------===//
1581ad6265SDimitry Andric 
1681ad6265SDimitry Andric #include "AMDGPU.h"
1781ad6265SDimitry Andric #include "GCNSubtarget.h"
1881ad6265SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1981ad6265SDimitry Andric #include "SIInstrInfo.h"
2081ad6265SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
2181ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
2281ad6265SDimitry Andric #include "llvm/InitializePasses.h"
2381ad6265SDimitry Andric #include "llvm/Support/Allocator.h"
2481ad6265SDimitry Andric 
2581ad6265SDimitry Andric using namespace llvm;
2681ad6265SDimitry Andric 
2781ad6265SDimitry Andric #define DEBUG_TYPE "amdgpu-set-wave-priority"
2881ad6265SDimitry Andric 
29*bdd1243dSDimitry Andric static cl::opt<unsigned> DefaultVALUInstsThreshold(
30*bdd1243dSDimitry Andric     "amdgpu-set-wave-priority-valu-insts-threshold",
31*bdd1243dSDimitry Andric     cl::desc("VALU instruction count threshold for adjusting wave priority"),
32*bdd1243dSDimitry Andric     cl::init(100), cl::Hidden);
33*bdd1243dSDimitry Andric 
3481ad6265SDimitry Andric namespace {
3581ad6265SDimitry Andric 
3681ad6265SDimitry Andric struct MBBInfo {
3781ad6265SDimitry Andric   MBBInfo() = default;
38*bdd1243dSDimitry Andric   unsigned NumVALUInstsAtStart = 0;
3981ad6265SDimitry Andric   bool MayReachVMEMLoad = false;
40*bdd1243dSDimitry Andric   MachineInstr *LastVMEMLoad = nullptr;
4181ad6265SDimitry Andric };
4281ad6265SDimitry Andric 
4381ad6265SDimitry Andric using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
4481ad6265SDimitry Andric 
4581ad6265SDimitry Andric class AMDGPUSetWavePriority : public MachineFunctionPass {
4681ad6265SDimitry Andric public:
4781ad6265SDimitry Andric   static char ID;
4881ad6265SDimitry Andric 
AMDGPUSetWavePriority()4981ad6265SDimitry Andric   AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
5081ad6265SDimitry Andric 
getPassName() const5181ad6265SDimitry Andric   StringRef getPassName() const override { return "Set wave priority"; }
5281ad6265SDimitry Andric 
5381ad6265SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
5481ad6265SDimitry Andric 
5581ad6265SDimitry Andric private:
56*bdd1243dSDimitry Andric   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
57*bdd1243dSDimitry Andric                                MachineBasicBlock::iterator I,
58*bdd1243dSDimitry Andric                                unsigned priority) const;
5981ad6265SDimitry Andric 
6081ad6265SDimitry Andric   const SIInstrInfo *TII;
6181ad6265SDimitry Andric };
6281ad6265SDimitry Andric 
6381ad6265SDimitry Andric } // End anonymous namespace.
6481ad6265SDimitry Andric 
6581ad6265SDimitry Andric INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
6681ad6265SDimitry Andric                 false)
6781ad6265SDimitry Andric 
6881ad6265SDimitry Andric char AMDGPUSetWavePriority::ID = 0;
6981ad6265SDimitry Andric 
createAMDGPUSetWavePriorityPass()7081ad6265SDimitry Andric FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
7181ad6265SDimitry Andric   return new AMDGPUSetWavePriority();
7281ad6265SDimitry Andric }
7381ad6265SDimitry Andric 
74*bdd1243dSDimitry Andric MachineInstr *
BuildSetprioMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,unsigned priority) const75*bdd1243dSDimitry Andric AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
76*bdd1243dSDimitry Andric                                       MachineBasicBlock::iterator I,
7781ad6265SDimitry Andric                                       unsigned priority) const {
78*bdd1243dSDimitry Andric   return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
79*bdd1243dSDimitry Andric       .addImm(priority);
8081ad6265SDimitry Andric }
8181ad6265SDimitry Andric 
8281ad6265SDimitry Andric // Checks that for every predecessor Pred that can reach a VMEM load,
8381ad6265SDimitry Andric // none of Pred's successors can reach a VMEM load.
CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock & MBB,MBBInfoSet & MBBInfos)8481ad6265SDimitry Andric static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
8581ad6265SDimitry Andric                                                    MBBInfoSet &MBBInfos) {
8681ad6265SDimitry Andric   for (const MachineBasicBlock *Pred : MBB.predecessors()) {
8781ad6265SDimitry Andric     if (!MBBInfos[Pred].MayReachVMEMLoad)
8881ad6265SDimitry Andric       continue;
8981ad6265SDimitry Andric     for (const MachineBasicBlock *Succ : Pred->successors()) {
9081ad6265SDimitry Andric       if (MBBInfos[Succ].MayReachVMEMLoad)
9181ad6265SDimitry Andric         return false;
9281ad6265SDimitry Andric     }
9381ad6265SDimitry Andric   }
9481ad6265SDimitry Andric   return true;
9581ad6265SDimitry Andric }
9681ad6265SDimitry Andric 
isVMEMLoad(const MachineInstr & MI)9781ad6265SDimitry Andric static bool isVMEMLoad(const MachineInstr &MI) {
9881ad6265SDimitry Andric   return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
9981ad6265SDimitry Andric }
10081ad6265SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)10181ad6265SDimitry Andric bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
10281ad6265SDimitry Andric   const unsigned HighPriority = 3;
10381ad6265SDimitry Andric   const unsigned LowPriority = 0;
10481ad6265SDimitry Andric 
10581ad6265SDimitry Andric   Function &F = MF.getFunction();
10681ad6265SDimitry Andric   if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
10781ad6265SDimitry Andric     return false;
10881ad6265SDimitry Andric 
10981ad6265SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
11081ad6265SDimitry Andric   TII = ST.getInstrInfo();
11181ad6265SDimitry Andric 
112*bdd1243dSDimitry Andric   unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
113*bdd1243dSDimitry Andric   Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
114*bdd1243dSDimitry Andric   if (A.isValid())
115*bdd1243dSDimitry Andric     A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
116*bdd1243dSDimitry Andric 
117*bdd1243dSDimitry Andric   // Find VMEM loads that may be executed before long-enough sequences of
118*bdd1243dSDimitry Andric   // VALU instructions. We currently assume that backedges/loops, branch
119*bdd1243dSDimitry Andric   // probabilities and other details can be ignored, so we essentially
120*bdd1243dSDimitry Andric   // determine the largest number of VALU instructions along every
121*bdd1243dSDimitry Andric   // possible path from the start of the function that may potentially be
122*bdd1243dSDimitry Andric   // executed provided no backedge is ever taken.
12381ad6265SDimitry Andric   MBBInfoSet MBBInfos;
124*bdd1243dSDimitry Andric   for (MachineBasicBlock *MBB : post_order(&MF)) {
125*bdd1243dSDimitry Andric     bool AtStart = true;
126*bdd1243dSDimitry Andric     unsigned MaxNumVALUInstsInMiddle = 0;
127*bdd1243dSDimitry Andric     unsigned NumVALUInstsAtEnd = 0;
128*bdd1243dSDimitry Andric     for (MachineInstr &MI : *MBB) {
129*bdd1243dSDimitry Andric       if (isVMEMLoad(MI)) {
130*bdd1243dSDimitry Andric         AtStart = false;
131*bdd1243dSDimitry Andric         MBBInfo &Info = MBBInfos[MBB];
132*bdd1243dSDimitry Andric         Info.NumVALUInstsAtStart = 0;
133*bdd1243dSDimitry Andric         MaxNumVALUInstsInMiddle = 0;
134*bdd1243dSDimitry Andric         NumVALUInstsAtEnd = 0;
135*bdd1243dSDimitry Andric         Info.LastVMEMLoad = &MI;
136*bdd1243dSDimitry Andric       } else if (SIInstrInfo::isDS(MI)) {
137*bdd1243dSDimitry Andric         AtStart = false;
138*bdd1243dSDimitry Andric         MaxNumVALUInstsInMiddle =
139*bdd1243dSDimitry Andric             std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
140*bdd1243dSDimitry Andric         NumVALUInstsAtEnd = 0;
141*bdd1243dSDimitry Andric       } else if (SIInstrInfo::isVALU(MI)) {
142*bdd1243dSDimitry Andric         if (AtStart)
143*bdd1243dSDimitry Andric           ++MBBInfos[MBB].NumVALUInstsAtStart;
144*bdd1243dSDimitry Andric         ++NumVALUInstsAtEnd;
145*bdd1243dSDimitry Andric       }
14681ad6265SDimitry Andric     }
14781ad6265SDimitry Andric 
148*bdd1243dSDimitry Andric     bool SuccsMayReachVMEMLoad = false;
149*bdd1243dSDimitry Andric     unsigned NumFollowingVALUInsts = 0;
150*bdd1243dSDimitry Andric     for (const MachineBasicBlock *Succ : MBB->successors()) {
151*bdd1243dSDimitry Andric       SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
152*bdd1243dSDimitry Andric       NumFollowingVALUInsts =
153*bdd1243dSDimitry Andric           std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
15481ad6265SDimitry Andric     }
155*bdd1243dSDimitry Andric     MBBInfo &Info = MBBInfos[MBB];
156*bdd1243dSDimitry Andric     if (AtStart)
157*bdd1243dSDimitry Andric       Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
158*bdd1243dSDimitry Andric     NumVALUInstsAtEnd += NumFollowingVALUInsts;
159*bdd1243dSDimitry Andric 
160*bdd1243dSDimitry Andric     unsigned MaxNumVALUInsts =
161*bdd1243dSDimitry Andric         std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
162*bdd1243dSDimitry Andric     Info.MayReachVMEMLoad =
163*bdd1243dSDimitry Andric         SuccsMayReachVMEMLoad ||
164*bdd1243dSDimitry Andric         (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
16581ad6265SDimitry Andric   }
16681ad6265SDimitry Andric 
16781ad6265SDimitry Andric   MachineBasicBlock &Entry = MF.front();
16881ad6265SDimitry Andric   if (!MBBInfos[&Entry].MayReachVMEMLoad)
16981ad6265SDimitry Andric     return false;
17081ad6265SDimitry Andric 
17181ad6265SDimitry Andric   // Raise the priority at the beginning of the shader.
17281ad6265SDimitry Andric   MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
17381ad6265SDimitry Andric   while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
17481ad6265SDimitry Andric     ++I;
175*bdd1243dSDimitry Andric   BuildSetprioMI(Entry, I, HighPriority);
17681ad6265SDimitry Andric 
17781ad6265SDimitry Andric   // Lower the priority on edges where control leaves blocks from which
178*bdd1243dSDimitry Andric   // the VMEM loads are reachable.
17981ad6265SDimitry Andric   SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
18081ad6265SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
18181ad6265SDimitry Andric     if (MBBInfos[&MBB].MayReachVMEMLoad) {
18281ad6265SDimitry Andric       if (MBB.succ_empty())
18381ad6265SDimitry Andric         PriorityLoweringBlocks.insert(&MBB);
18481ad6265SDimitry Andric       continue;
18581ad6265SDimitry Andric     }
18681ad6265SDimitry Andric 
18781ad6265SDimitry Andric     if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
18881ad6265SDimitry Andric       for (MachineBasicBlock *Pred : MBB.predecessors()) {
18981ad6265SDimitry Andric         if (MBBInfos[Pred].MayReachVMEMLoad)
19081ad6265SDimitry Andric           PriorityLoweringBlocks.insert(Pred);
19181ad6265SDimitry Andric       }
19281ad6265SDimitry Andric       continue;
19381ad6265SDimitry Andric     }
19481ad6265SDimitry Andric 
19581ad6265SDimitry Andric     // Where lowering the priority in predecessors is not possible, the
19681ad6265SDimitry Andric     // block receiving control either was not part of a loop in the first
19781ad6265SDimitry Andric     // place or the loop simplification/canonicalization pass should have
19881ad6265SDimitry Andric     // already tried to split the edge and insert a preheader, and if for
19981ad6265SDimitry Andric     // whatever reason it failed to do so, then this leaves us with the
20081ad6265SDimitry Andric     // only option of lowering the priority within the loop.
20181ad6265SDimitry Andric     PriorityLoweringBlocks.insert(&MBB);
20281ad6265SDimitry Andric   }
20381ad6265SDimitry Andric 
20481ad6265SDimitry Andric   for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
205*bdd1243dSDimitry Andric     BuildSetprioMI(
206*bdd1243dSDimitry Andric         *MBB,
207*bdd1243dSDimitry Andric         MBBInfos[MBB].LastVMEMLoad
208*bdd1243dSDimitry Andric             ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
209*bdd1243dSDimitry Andric             : MBB->begin(),
210*bdd1243dSDimitry Andric         LowPriority);
21181ad6265SDimitry Andric   }
21281ad6265SDimitry Andric 
21381ad6265SDimitry Andric   return true;
21481ad6265SDimitry Andric }
215