1*fe6060f1SDimitry Andric //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric /// \file 10*fe6060f1SDimitry Andric /// This pass mainly lowers early terminate pseudo instructions. 11*fe6060f1SDimitry Andric // 12*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 13*fe6060f1SDimitry Andric 14*fe6060f1SDimitry Andric #include "AMDGPU.h" 15*fe6060f1SDimitry Andric #include "GCNSubtarget.h" 16*fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17*fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h" 18*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 19*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 20*fe6060f1SDimitry Andric 21*fe6060f1SDimitry Andric using namespace llvm; 22*fe6060f1SDimitry Andric 23*fe6060f1SDimitry Andric #define DEBUG_TYPE "si-late-branch-lowering" 24*fe6060f1SDimitry Andric 25*fe6060f1SDimitry Andric namespace { 26*fe6060f1SDimitry Andric 27*fe6060f1SDimitry Andric class SILateBranchLowering : public MachineFunctionPass { 28*fe6060f1SDimitry Andric private: 29*fe6060f1SDimitry Andric const SIRegisterInfo *TRI = nullptr; 30*fe6060f1SDimitry Andric const SIInstrInfo *TII = nullptr; 31*fe6060f1SDimitry Andric MachineDominatorTree *MDT = nullptr; 32*fe6060f1SDimitry Andric 33*fe6060f1SDimitry Andric void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); 34*fe6060f1SDimitry Andric 35*fe6060f1SDimitry Andric public: 36*fe6060f1SDimitry Andric static char ID; 37*fe6060f1SDimitry Andric 38*fe6060f1SDimitry Andric unsigned MovOpc; 39*fe6060f1SDimitry Andric Register ExecReg; 40*fe6060f1SDimitry Andric 41*fe6060f1SDimitry Andric SILateBranchLowering() : MachineFunctionPass(ID) {} 42*fe6060f1SDimitry Andric 43*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 44*fe6060f1SDimitry Andric 45*fe6060f1SDimitry Andric StringRef getPassName() const override { 46*fe6060f1SDimitry Andric return "SI Final Branch Preparation"; 47*fe6060f1SDimitry Andric } 48*fe6060f1SDimitry Andric 49*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 50*fe6060f1SDimitry Andric AU.addRequired<MachineDominatorTree>(); 51*fe6060f1SDimitry Andric AU.addPreserved<MachineDominatorTree>(); 52*fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 53*fe6060f1SDimitry Andric } 54*fe6060f1SDimitry Andric }; 55*fe6060f1SDimitry Andric 56*fe6060f1SDimitry Andric } // end anonymous namespace 57*fe6060f1SDimitry Andric 58*fe6060f1SDimitry Andric char SILateBranchLowering::ID = 0; 59*fe6060f1SDimitry Andric 60*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, 61*fe6060f1SDimitry Andric "SI insert s_cbranch_execz instructions", false, false) 62*fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 63*fe6060f1SDimitry Andric INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE, 64*fe6060f1SDimitry Andric "SI insert s_cbranch_execz instructions", false, false) 65*fe6060f1SDimitry Andric 66*fe6060f1SDimitry Andric char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID; 67*fe6060f1SDimitry Andric 68*fe6060f1SDimitry Andric static void generateEndPgm(MachineBasicBlock &MBB, 69*fe6060f1SDimitry Andric MachineBasicBlock::iterator I, DebugLoc DL, 70*fe6060f1SDimitry Andric const SIInstrInfo *TII, MachineFunction &MF) { 71*fe6060f1SDimitry Andric const Function &F = MF.getFunction(); 72*fe6060f1SDimitry Andric bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS; 73*fe6060f1SDimitry Andric 74*fe6060f1SDimitry Andric // Check if hardware has been configured to expect color or depth exports. 75*fe6060f1SDimitry Andric bool HasExports = 76*fe6060f1SDimitry Andric AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F); 77*fe6060f1SDimitry Andric 78*fe6060f1SDimitry Andric // Prior to GFX10, hardware always expects at least one export for PS. 79*fe6060f1SDimitry Andric bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget()); 80*fe6060f1SDimitry Andric 81*fe6060f1SDimitry Andric if (IsPS && (HasExports || MustExport)) { 82*fe6060f1SDimitry Andric // Generate "null export" if hardware is expecting PS to export. 83*fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE)) 84*fe6060f1SDimitry Andric .addImm(AMDGPU::Exp::ET_NULL) 85*fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 86*fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 87*fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 88*fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 89*fe6060f1SDimitry Andric .addImm(1) // vm 90*fe6060f1SDimitry Andric .addImm(0) // compr 91*fe6060f1SDimitry Andric .addImm(0); // en 92*fe6060f1SDimitry Andric } 93*fe6060f1SDimitry Andric 94*fe6060f1SDimitry Andric // s_endpgm 95*fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0); 96*fe6060f1SDimitry Andric } 97*fe6060f1SDimitry Andric 98*fe6060f1SDimitry Andric static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, 99*fe6060f1SDimitry Andric MachineDominatorTree *MDT) { 100*fe6060f1SDimitry Andric MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true); 101*fe6060f1SDimitry Andric 102*fe6060f1SDimitry Andric // Update dominator tree 103*fe6060f1SDimitry Andric using DomTreeT = DomTreeBase<MachineBasicBlock>; 104*fe6060f1SDimitry Andric SmallVector<DomTreeT::UpdateType, 16> DTUpdates; 105*fe6060f1SDimitry Andric for (MachineBasicBlock *Succ : SplitBB->successors()) { 106*fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ}); 107*fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ}); 108*fe6060f1SDimitry Andric } 109*fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB}); 110*fe6060f1SDimitry Andric MDT->getBase().applyUpdates(DTUpdates); 111*fe6060f1SDimitry Andric } 112*fe6060f1SDimitry Andric 113*fe6060f1SDimitry Andric void SILateBranchLowering::earlyTerm(MachineInstr &MI, 114*fe6060f1SDimitry Andric MachineBasicBlock *EarlyExitBlock) { 115*fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 116*fe6060f1SDimitry Andric const DebugLoc DL = MI.getDebugLoc(); 117*fe6060f1SDimitry Andric 118*fe6060f1SDimitry Andric auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0)) 119*fe6060f1SDimitry Andric .addMBB(EarlyExitBlock); 120*fe6060f1SDimitry Andric auto Next = std::next(MI.getIterator()); 121*fe6060f1SDimitry Andric 122*fe6060f1SDimitry Andric if (Next != MBB.end() && !Next->isTerminator()) 123*fe6060f1SDimitry Andric splitBlock(MBB, *BranchMI, MDT); 124*fe6060f1SDimitry Andric 125*fe6060f1SDimitry Andric MBB.addSuccessor(EarlyExitBlock); 126*fe6060f1SDimitry Andric MDT->getBase().insertEdge(&MBB, EarlyExitBlock); 127*fe6060f1SDimitry Andric } 128*fe6060f1SDimitry Andric 129*fe6060f1SDimitry Andric bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { 130*fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 131*fe6060f1SDimitry Andric TII = ST.getInstrInfo(); 132*fe6060f1SDimitry Andric TRI = &TII->getRegisterInfo(); 133*fe6060f1SDimitry Andric MDT = &getAnalysis<MachineDominatorTree>(); 134*fe6060f1SDimitry Andric 135*fe6060f1SDimitry Andric MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 136*fe6060f1SDimitry Andric ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 137*fe6060f1SDimitry Andric 138*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> EarlyTermInstrs; 139*fe6060f1SDimitry Andric SmallVector<MachineInstr *, 1> EpilogInstrs; 140*fe6060f1SDimitry Andric bool MadeChange = false; 141*fe6060f1SDimitry Andric 142*fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 143*fe6060f1SDimitry Andric MachineBasicBlock::iterator I, Next; 144*fe6060f1SDimitry Andric for (I = MBB.begin(); I != MBB.end(); I = Next) { 145*fe6060f1SDimitry Andric Next = std::next(I); 146*fe6060f1SDimitry Andric MachineInstr &MI = *I; 147*fe6060f1SDimitry Andric 148*fe6060f1SDimitry Andric switch (MI.getOpcode()) { 149*fe6060f1SDimitry Andric case AMDGPU::S_BRANCH: 150*fe6060f1SDimitry Andric // Optimize out branches to the next block. 151*fe6060f1SDimitry Andric // This only occurs in -O0 when BranchFolding is not executed. 152*fe6060f1SDimitry Andric if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) { 153*fe6060f1SDimitry Andric assert(&MI == &MBB.back()); 154*fe6060f1SDimitry Andric MI.eraseFromParent(); 155*fe6060f1SDimitry Andric MadeChange = true; 156*fe6060f1SDimitry Andric } 157*fe6060f1SDimitry Andric break; 158*fe6060f1SDimitry Andric 159*fe6060f1SDimitry Andric case AMDGPU::SI_EARLY_TERMINATE_SCC0: 160*fe6060f1SDimitry Andric EarlyTermInstrs.push_back(&MI); 161*fe6060f1SDimitry Andric break; 162*fe6060f1SDimitry Andric 163*fe6060f1SDimitry Andric case AMDGPU::SI_RETURN_TO_EPILOG: 164*fe6060f1SDimitry Andric EpilogInstrs.push_back(&MI); 165*fe6060f1SDimitry Andric break; 166*fe6060f1SDimitry Andric 167*fe6060f1SDimitry Andric default: 168*fe6060f1SDimitry Andric break; 169*fe6060f1SDimitry Andric } 170*fe6060f1SDimitry Andric } 171*fe6060f1SDimitry Andric } 172*fe6060f1SDimitry Andric 173*fe6060f1SDimitry Andric // Lower any early exit branches first 174*fe6060f1SDimitry Andric if (!EarlyTermInstrs.empty()) { 175*fe6060f1SDimitry Andric MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock(); 176*fe6060f1SDimitry Andric DebugLoc DL; 177*fe6060f1SDimitry Andric 178*fe6060f1SDimitry Andric MF.insert(MF.end(), EarlyExitBlock); 179*fe6060f1SDimitry Andric BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc), 180*fe6060f1SDimitry Andric ExecReg) 181*fe6060f1SDimitry Andric .addImm(0); 182*fe6060f1SDimitry Andric generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF); 183*fe6060f1SDimitry Andric 184*fe6060f1SDimitry Andric for (MachineInstr *Instr : EarlyTermInstrs) { 185*fe6060f1SDimitry Andric // Early termination in GS does nothing 186*fe6060f1SDimitry Andric if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS) 187*fe6060f1SDimitry Andric earlyTerm(*Instr, EarlyExitBlock); 188*fe6060f1SDimitry Andric Instr->eraseFromParent(); 189*fe6060f1SDimitry Andric } 190*fe6060f1SDimitry Andric 191*fe6060f1SDimitry Andric EarlyTermInstrs.clear(); 192*fe6060f1SDimitry Andric MadeChange = true; 193*fe6060f1SDimitry Andric } 194*fe6060f1SDimitry Andric 195*fe6060f1SDimitry Andric // Now check return to epilog instructions occur at function end 196*fe6060f1SDimitry Andric if (!EpilogInstrs.empty()) { 197*fe6060f1SDimitry Andric MachineBasicBlock *EmptyMBBAtEnd = nullptr; 198*fe6060f1SDimitry Andric assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()); 199*fe6060f1SDimitry Andric 200*fe6060f1SDimitry Andric // If there are multiple returns to epilog then all will 201*fe6060f1SDimitry Andric // become jumps to new empty end block. 202*fe6060f1SDimitry Andric if (EpilogInstrs.size() > 1) { 203*fe6060f1SDimitry Andric EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); 204*fe6060f1SDimitry Andric MF.insert(MF.end(), EmptyMBBAtEnd); 205*fe6060f1SDimitry Andric } 206*fe6060f1SDimitry Andric 207*fe6060f1SDimitry Andric for (auto MI : EpilogInstrs) { 208*fe6060f1SDimitry Andric auto MBB = MI->getParent(); 209*fe6060f1SDimitry Andric if (MBB == &MF.back() && MI == &MBB->back()) 210*fe6060f1SDimitry Andric continue; 211*fe6060f1SDimitry Andric 212*fe6060f1SDimitry Andric // SI_RETURN_TO_EPILOG is not the last instruction. 213*fe6060f1SDimitry Andric // Jump to empty block at function end. 214*fe6060f1SDimitry Andric if (!EmptyMBBAtEnd) { 215*fe6060f1SDimitry Andric EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); 216*fe6060f1SDimitry Andric MF.insert(MF.end(), EmptyMBBAtEnd); 217*fe6060f1SDimitry Andric } 218*fe6060f1SDimitry Andric 219*fe6060f1SDimitry Andric MBB->addSuccessor(EmptyMBBAtEnd); 220*fe6060f1SDimitry Andric MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd); 221*fe6060f1SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH)) 222*fe6060f1SDimitry Andric .addMBB(EmptyMBBAtEnd); 223*fe6060f1SDimitry Andric MI->eraseFromParent(); 224*fe6060f1SDimitry Andric MadeChange = true; 225*fe6060f1SDimitry Andric } 226*fe6060f1SDimitry Andric 227*fe6060f1SDimitry Andric EpilogInstrs.clear(); 228*fe6060f1SDimitry Andric } 229*fe6060f1SDimitry Andric 230*fe6060f1SDimitry Andric return MadeChange; 231*fe6060f1SDimitry Andric } 232