xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric /// \file
10*fe6060f1SDimitry Andric /// This pass mainly lowers early terminate pseudo instructions.
11*fe6060f1SDimitry Andric //
12*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
13*fe6060f1SDimitry Andric 
14*fe6060f1SDimitry Andric #include "AMDGPU.h"
15*fe6060f1SDimitry Andric #include "GCNSubtarget.h"
16*fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17*fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
18*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
19*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
20*fe6060f1SDimitry Andric 
21*fe6060f1SDimitry Andric using namespace llvm;
22*fe6060f1SDimitry Andric 
23*fe6060f1SDimitry Andric #define DEBUG_TYPE "si-late-branch-lowering"
24*fe6060f1SDimitry Andric 
25*fe6060f1SDimitry Andric namespace {
26*fe6060f1SDimitry Andric 
27*fe6060f1SDimitry Andric class SILateBranchLowering : public MachineFunctionPass {
28*fe6060f1SDimitry Andric private:
29*fe6060f1SDimitry Andric   const SIRegisterInfo *TRI = nullptr;
30*fe6060f1SDimitry Andric   const SIInstrInfo *TII = nullptr;
31*fe6060f1SDimitry Andric   MachineDominatorTree *MDT = nullptr;
32*fe6060f1SDimitry Andric 
33*fe6060f1SDimitry Andric   void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
34*fe6060f1SDimitry Andric 
35*fe6060f1SDimitry Andric public:
36*fe6060f1SDimitry Andric   static char ID;
37*fe6060f1SDimitry Andric 
38*fe6060f1SDimitry Andric   unsigned MovOpc;
39*fe6060f1SDimitry Andric   Register ExecReg;
40*fe6060f1SDimitry Andric 
41*fe6060f1SDimitry Andric   SILateBranchLowering() : MachineFunctionPass(ID) {}
42*fe6060f1SDimitry Andric 
43*fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
44*fe6060f1SDimitry Andric 
45*fe6060f1SDimitry Andric   StringRef getPassName() const override {
46*fe6060f1SDimitry Andric     return "SI Final Branch Preparation";
47*fe6060f1SDimitry Andric   }
48*fe6060f1SDimitry Andric 
49*fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
50*fe6060f1SDimitry Andric     AU.addRequired<MachineDominatorTree>();
51*fe6060f1SDimitry Andric     AU.addPreserved<MachineDominatorTree>();
52*fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
53*fe6060f1SDimitry Andric   }
54*fe6060f1SDimitry Andric };
55*fe6060f1SDimitry Andric 
56*fe6060f1SDimitry Andric } // end anonymous namespace
57*fe6060f1SDimitry Andric 
58*fe6060f1SDimitry Andric char SILateBranchLowering::ID = 0;
59*fe6060f1SDimitry Andric 
60*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
61*fe6060f1SDimitry Andric                       "SI insert s_cbranch_execz instructions", false, false)
62*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
63*fe6060f1SDimitry Andric INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
64*fe6060f1SDimitry Andric                     "SI insert s_cbranch_execz instructions", false, false)
65*fe6060f1SDimitry Andric 
66*fe6060f1SDimitry Andric char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
67*fe6060f1SDimitry Andric 
68*fe6060f1SDimitry Andric static void generateEndPgm(MachineBasicBlock &MBB,
69*fe6060f1SDimitry Andric                            MachineBasicBlock::iterator I, DebugLoc DL,
70*fe6060f1SDimitry Andric                            const SIInstrInfo *TII, MachineFunction &MF) {
71*fe6060f1SDimitry Andric   const Function &F = MF.getFunction();
72*fe6060f1SDimitry Andric   bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
73*fe6060f1SDimitry Andric 
74*fe6060f1SDimitry Andric   // Check if hardware has been configured to expect color or depth exports.
75*fe6060f1SDimitry Andric   bool HasExports =
76*fe6060f1SDimitry Andric       AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);
77*fe6060f1SDimitry Andric 
78*fe6060f1SDimitry Andric   // Prior to GFX10, hardware always expects at least one export for PS.
79*fe6060f1SDimitry Andric   bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
80*fe6060f1SDimitry Andric 
81*fe6060f1SDimitry Andric   if (IsPS && (HasExports || MustExport)) {
82*fe6060f1SDimitry Andric     // Generate "null export" if hardware is expecting PS to export.
83*fe6060f1SDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
84*fe6060f1SDimitry Andric         .addImm(AMDGPU::Exp::ET_NULL)
85*fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
86*fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
87*fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
88*fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
89*fe6060f1SDimitry Andric         .addImm(1)  // vm
90*fe6060f1SDimitry Andric         .addImm(0)  // compr
91*fe6060f1SDimitry Andric         .addImm(0); // en
92*fe6060f1SDimitry Andric   }
93*fe6060f1SDimitry Andric 
94*fe6060f1SDimitry Andric   // s_endpgm
95*fe6060f1SDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
96*fe6060f1SDimitry Andric }
97*fe6060f1SDimitry Andric 
98*fe6060f1SDimitry Andric static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
99*fe6060f1SDimitry Andric                        MachineDominatorTree *MDT) {
100*fe6060f1SDimitry Andric   MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
101*fe6060f1SDimitry Andric 
102*fe6060f1SDimitry Andric   // Update dominator tree
103*fe6060f1SDimitry Andric   using DomTreeT = DomTreeBase<MachineBasicBlock>;
104*fe6060f1SDimitry Andric   SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
105*fe6060f1SDimitry Andric   for (MachineBasicBlock *Succ : SplitBB->successors()) {
106*fe6060f1SDimitry Andric     DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
107*fe6060f1SDimitry Andric     DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
108*fe6060f1SDimitry Andric   }
109*fe6060f1SDimitry Andric   DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
110*fe6060f1SDimitry Andric   MDT->getBase().applyUpdates(DTUpdates);
111*fe6060f1SDimitry Andric }
112*fe6060f1SDimitry Andric 
113*fe6060f1SDimitry Andric void SILateBranchLowering::earlyTerm(MachineInstr &MI,
114*fe6060f1SDimitry Andric                                      MachineBasicBlock *EarlyExitBlock) {
115*fe6060f1SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
116*fe6060f1SDimitry Andric   const DebugLoc DL = MI.getDebugLoc();
117*fe6060f1SDimitry Andric 
118*fe6060f1SDimitry Andric   auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
119*fe6060f1SDimitry Andric                       .addMBB(EarlyExitBlock);
120*fe6060f1SDimitry Andric   auto Next = std::next(MI.getIterator());
121*fe6060f1SDimitry Andric 
122*fe6060f1SDimitry Andric   if (Next != MBB.end() && !Next->isTerminator())
123*fe6060f1SDimitry Andric     splitBlock(MBB, *BranchMI, MDT);
124*fe6060f1SDimitry Andric 
125*fe6060f1SDimitry Andric   MBB.addSuccessor(EarlyExitBlock);
126*fe6060f1SDimitry Andric   MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
127*fe6060f1SDimitry Andric }
128*fe6060f1SDimitry Andric 
129*fe6060f1SDimitry Andric bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
130*fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
131*fe6060f1SDimitry Andric   TII = ST.getInstrInfo();
132*fe6060f1SDimitry Andric   TRI = &TII->getRegisterInfo();
133*fe6060f1SDimitry Andric   MDT = &getAnalysis<MachineDominatorTree>();
134*fe6060f1SDimitry Andric 
135*fe6060f1SDimitry Andric   MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
136*fe6060f1SDimitry Andric   ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
137*fe6060f1SDimitry Andric 
138*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> EarlyTermInstrs;
139*fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 1> EpilogInstrs;
140*fe6060f1SDimitry Andric   bool MadeChange = false;
141*fe6060f1SDimitry Andric 
142*fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
143*fe6060f1SDimitry Andric     MachineBasicBlock::iterator I, Next;
144*fe6060f1SDimitry Andric     for (I = MBB.begin(); I != MBB.end(); I = Next) {
145*fe6060f1SDimitry Andric       Next = std::next(I);
146*fe6060f1SDimitry Andric       MachineInstr &MI = *I;
147*fe6060f1SDimitry Andric 
148*fe6060f1SDimitry Andric       switch (MI.getOpcode()) {
149*fe6060f1SDimitry Andric       case AMDGPU::S_BRANCH:
150*fe6060f1SDimitry Andric         // Optimize out branches to the next block.
151*fe6060f1SDimitry Andric         // This only occurs in -O0 when BranchFolding is not executed.
152*fe6060f1SDimitry Andric         if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
153*fe6060f1SDimitry Andric           assert(&MI == &MBB.back());
154*fe6060f1SDimitry Andric           MI.eraseFromParent();
155*fe6060f1SDimitry Andric           MadeChange = true;
156*fe6060f1SDimitry Andric         }
157*fe6060f1SDimitry Andric         break;
158*fe6060f1SDimitry Andric 
159*fe6060f1SDimitry Andric       case AMDGPU::SI_EARLY_TERMINATE_SCC0:
160*fe6060f1SDimitry Andric         EarlyTermInstrs.push_back(&MI);
161*fe6060f1SDimitry Andric         break;
162*fe6060f1SDimitry Andric 
163*fe6060f1SDimitry Andric       case AMDGPU::SI_RETURN_TO_EPILOG:
164*fe6060f1SDimitry Andric         EpilogInstrs.push_back(&MI);
165*fe6060f1SDimitry Andric         break;
166*fe6060f1SDimitry Andric 
167*fe6060f1SDimitry Andric       default:
168*fe6060f1SDimitry Andric         break;
169*fe6060f1SDimitry Andric       }
170*fe6060f1SDimitry Andric     }
171*fe6060f1SDimitry Andric   }
172*fe6060f1SDimitry Andric 
173*fe6060f1SDimitry Andric   // Lower any early exit branches first
174*fe6060f1SDimitry Andric   if (!EarlyTermInstrs.empty()) {
175*fe6060f1SDimitry Andric     MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock();
176*fe6060f1SDimitry Andric     DebugLoc DL;
177*fe6060f1SDimitry Andric 
178*fe6060f1SDimitry Andric     MF.insert(MF.end(), EarlyExitBlock);
179*fe6060f1SDimitry Andric     BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
180*fe6060f1SDimitry Andric             ExecReg)
181*fe6060f1SDimitry Andric         .addImm(0);
182*fe6060f1SDimitry Andric     generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
183*fe6060f1SDimitry Andric 
184*fe6060f1SDimitry Andric     for (MachineInstr *Instr : EarlyTermInstrs) {
185*fe6060f1SDimitry Andric       // Early termination in GS does nothing
186*fe6060f1SDimitry Andric       if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
187*fe6060f1SDimitry Andric         earlyTerm(*Instr, EarlyExitBlock);
188*fe6060f1SDimitry Andric       Instr->eraseFromParent();
189*fe6060f1SDimitry Andric     }
190*fe6060f1SDimitry Andric 
191*fe6060f1SDimitry Andric     EarlyTermInstrs.clear();
192*fe6060f1SDimitry Andric     MadeChange = true;
193*fe6060f1SDimitry Andric   }
194*fe6060f1SDimitry Andric 
195*fe6060f1SDimitry Andric   // Now check return to epilog instructions occur at function end
196*fe6060f1SDimitry Andric   if (!EpilogInstrs.empty()) {
197*fe6060f1SDimitry Andric     MachineBasicBlock *EmptyMBBAtEnd = nullptr;
198*fe6060f1SDimitry Andric     assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
199*fe6060f1SDimitry Andric 
200*fe6060f1SDimitry Andric     // If there are multiple returns to epilog then all will
201*fe6060f1SDimitry Andric     // become jumps to new empty end block.
202*fe6060f1SDimitry Andric     if (EpilogInstrs.size() > 1) {
203*fe6060f1SDimitry Andric       EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
204*fe6060f1SDimitry Andric       MF.insert(MF.end(), EmptyMBBAtEnd);
205*fe6060f1SDimitry Andric     }
206*fe6060f1SDimitry Andric 
207*fe6060f1SDimitry Andric     for (auto MI : EpilogInstrs) {
208*fe6060f1SDimitry Andric       auto MBB = MI->getParent();
209*fe6060f1SDimitry Andric       if (MBB == &MF.back() && MI == &MBB->back())
210*fe6060f1SDimitry Andric         continue;
211*fe6060f1SDimitry Andric 
212*fe6060f1SDimitry Andric       // SI_RETURN_TO_EPILOG is not the last instruction.
213*fe6060f1SDimitry Andric       // Jump to empty block at function end.
214*fe6060f1SDimitry Andric       if (!EmptyMBBAtEnd) {
215*fe6060f1SDimitry Andric         EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
216*fe6060f1SDimitry Andric         MF.insert(MF.end(), EmptyMBBAtEnd);
217*fe6060f1SDimitry Andric       }
218*fe6060f1SDimitry Andric 
219*fe6060f1SDimitry Andric       MBB->addSuccessor(EmptyMBBAtEnd);
220*fe6060f1SDimitry Andric       MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd);
221*fe6060f1SDimitry Andric       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
222*fe6060f1SDimitry Andric           .addMBB(EmptyMBBAtEnd);
223*fe6060f1SDimitry Andric       MI->eraseFromParent();
224*fe6060f1SDimitry Andric       MadeChange = true;
225*fe6060f1SDimitry Andric     }
226*fe6060f1SDimitry Andric 
227*fe6060f1SDimitry Andric     EpilogInstrs.clear();
228*fe6060f1SDimitry Andric   }
229*fe6060f1SDimitry Andric 
230*fe6060f1SDimitry Andric   return MadeChange;
231*fe6060f1SDimitry Andric }
232