1fe6060f1SDimitry Andric //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file 10fe6060f1SDimitry Andric /// This pass mainly lowers early terminate pseudo instructions. 11fe6060f1SDimitry Andric // 12fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 13fe6060f1SDimitry Andric 14fe6060f1SDimitry Andric #include "AMDGPU.h" 15fe6060f1SDimitry Andric #include "GCNSubtarget.h" 16fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h" 18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 19fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 20fe6060f1SDimitry Andric 21fe6060f1SDimitry Andric using namespace llvm; 22fe6060f1SDimitry Andric 23fe6060f1SDimitry Andric #define DEBUG_TYPE "si-late-branch-lowering" 24fe6060f1SDimitry Andric 25fe6060f1SDimitry Andric namespace { 26fe6060f1SDimitry Andric 27fe6060f1SDimitry Andric class SILateBranchLowering : public MachineFunctionPass { 28fe6060f1SDimitry Andric private: 29fe6060f1SDimitry Andric const SIRegisterInfo *TRI = nullptr; 30fe6060f1SDimitry Andric const SIInstrInfo *TII = nullptr; 31fe6060f1SDimitry Andric MachineDominatorTree *MDT = nullptr; 32fe6060f1SDimitry Andric 335f757f3fSDimitry Andric void expandChainCall(MachineInstr &MI); 34fe6060f1SDimitry Andric void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); 35fe6060f1SDimitry Andric 36fe6060f1SDimitry Andric public: 37fe6060f1SDimitry Andric static char ID; 38fe6060f1SDimitry Andric 39fe6060f1SDimitry Andric unsigned MovOpc; 40fe6060f1SDimitry Andric Register ExecReg; 41fe6060f1SDimitry Andric 42fe6060f1SDimitry Andric SILateBranchLowering() : MachineFunctionPass(ID) {} 43fe6060f1SDimitry Andric 44fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 45fe6060f1SDimitry Andric 46fe6060f1SDimitry Andric StringRef getPassName() const override { 47fe6060f1SDimitry Andric return "SI Final Branch Preparation"; 48fe6060f1SDimitry Andric } 49fe6060f1SDimitry Andric 50fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 51*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>(); 52*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>(); 53fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 54fe6060f1SDimitry Andric } 55fe6060f1SDimitry Andric }; 56fe6060f1SDimitry Andric 57fe6060f1SDimitry Andric } // end anonymous namespace 58fe6060f1SDimitry Andric 59fe6060f1SDimitry Andric char SILateBranchLowering::ID = 0; 60fe6060f1SDimitry Andric 61fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, 62fe6060f1SDimitry Andric "SI insert s_cbranch_execz instructions", false, false) 63*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 64fe6060f1SDimitry Andric INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE, 65fe6060f1SDimitry Andric "SI insert s_cbranch_execz instructions", false, false) 66fe6060f1SDimitry Andric 67fe6060f1SDimitry Andric char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID; 68fe6060f1SDimitry Andric 69fe6060f1SDimitry Andric static void generateEndPgm(MachineBasicBlock &MBB, 70fe6060f1SDimitry Andric MachineBasicBlock::iterator I, DebugLoc DL, 71fe6060f1SDimitry Andric const SIInstrInfo *TII, MachineFunction &MF) { 72fe6060f1SDimitry Andric const Function &F = MF.getFunction(); 73fe6060f1SDimitry Andric bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS; 74fe6060f1SDimitry Andric 75fe6060f1SDimitry Andric // Check if hardware has been configured to expect color or depth exports. 7681ad6265SDimitry Andric bool HasColorExports = AMDGPU::getHasColorExport(F); 7781ad6265SDimitry Andric bool HasDepthExports = AMDGPU::getHasDepthExport(F); 7881ad6265SDimitry Andric bool HasExports = HasColorExports || HasDepthExports; 79fe6060f1SDimitry Andric 80fe6060f1SDimitry Andric // Prior to GFX10, hardware always expects at least one export for PS. 81fe6060f1SDimitry Andric bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget()); 82fe6060f1SDimitry Andric 83fe6060f1SDimitry Andric if (IsPS && (HasExports || MustExport)) { 84fe6060f1SDimitry Andric // Generate "null export" if hardware is expecting PS to export. 8581ad6265SDimitry Andric const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>(); 8681ad6265SDimitry Andric int Target = 8781ad6265SDimitry Andric ST.hasNullExportTarget() 8881ad6265SDimitry Andric ? AMDGPU::Exp::ET_NULL 8981ad6265SDimitry Andric : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ); 90fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE)) 9181ad6265SDimitry Andric .addImm(Target) 92fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 93fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 94fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 95fe6060f1SDimitry Andric .addReg(AMDGPU::VGPR0, RegState::Undef) 96fe6060f1SDimitry Andric .addImm(1) // vm 97fe6060f1SDimitry Andric .addImm(0) // compr 98fe6060f1SDimitry Andric .addImm(0); // en 99fe6060f1SDimitry Andric } 100fe6060f1SDimitry Andric 101fe6060f1SDimitry Andric // s_endpgm 102fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0); 103fe6060f1SDimitry Andric } 104fe6060f1SDimitry Andric 105fe6060f1SDimitry Andric static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, 106fe6060f1SDimitry Andric MachineDominatorTree *MDT) { 107fe6060f1SDimitry Andric MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true); 108fe6060f1SDimitry Andric 109fe6060f1SDimitry Andric // Update dominator tree 110fe6060f1SDimitry Andric using DomTreeT = DomTreeBase<MachineBasicBlock>; 111fe6060f1SDimitry Andric SmallVector<DomTreeT::UpdateType, 16> DTUpdates; 112fe6060f1SDimitry Andric for (MachineBasicBlock *Succ : SplitBB->successors()) { 113fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ}); 114fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ}); 115fe6060f1SDimitry Andric } 116fe6060f1SDimitry Andric DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB}); 117fe6060f1SDimitry Andric MDT->getBase().applyUpdates(DTUpdates); 118fe6060f1SDimitry Andric } 119fe6060f1SDimitry Andric 1205f757f3fSDimitry Andric void SILateBranchLowering::expandChainCall(MachineInstr &MI) { 1215f757f3fSDimitry Andric // This is a tail call that needs to be expanded into at least 1225f757f3fSDimitry Andric // 2 instructions, one for setting EXEC and one for the actual tail call. 1235f757f3fSDimitry Andric constexpr unsigned ExecIdx = 3; 1245f757f3fSDimitry Andric 1255f757f3fSDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MovOpc), ExecReg) 1265f757f3fSDimitry Andric ->addOperand(MI.getOperand(ExecIdx)); 1275f757f3fSDimitry Andric MI.removeOperand(ExecIdx); 1285f757f3fSDimitry Andric 1295f757f3fSDimitry Andric MI.setDesc(TII->get(AMDGPU::SI_TCRETURN)); 1305f757f3fSDimitry Andric } 1315f757f3fSDimitry Andric 132fe6060f1SDimitry Andric void SILateBranchLowering::earlyTerm(MachineInstr &MI, 133fe6060f1SDimitry Andric MachineBasicBlock *EarlyExitBlock) { 134fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 135fe6060f1SDimitry Andric const DebugLoc DL = MI.getDebugLoc(); 136fe6060f1SDimitry Andric 137fe6060f1SDimitry Andric auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0)) 138fe6060f1SDimitry Andric .addMBB(EarlyExitBlock); 139fe6060f1SDimitry Andric auto Next = std::next(MI.getIterator()); 140fe6060f1SDimitry Andric 141fe6060f1SDimitry Andric if (Next != MBB.end() && !Next->isTerminator()) 142fe6060f1SDimitry Andric splitBlock(MBB, *BranchMI, MDT); 143fe6060f1SDimitry Andric 144fe6060f1SDimitry Andric MBB.addSuccessor(EarlyExitBlock); 145fe6060f1SDimitry Andric MDT->getBase().insertEdge(&MBB, EarlyExitBlock); 146fe6060f1SDimitry Andric } 147fe6060f1SDimitry Andric 148fe6060f1SDimitry Andric bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { 149fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 150fe6060f1SDimitry Andric TII = ST.getInstrInfo(); 151fe6060f1SDimitry Andric TRI = &TII->getRegisterInfo(); 152*0fca6ea1SDimitry Andric MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 153fe6060f1SDimitry Andric 154fe6060f1SDimitry Andric MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 155fe6060f1SDimitry Andric ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 156fe6060f1SDimitry Andric 157fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> EarlyTermInstrs; 158fe6060f1SDimitry Andric SmallVector<MachineInstr *, 1> EpilogInstrs; 159fe6060f1SDimitry Andric bool MadeChange = false; 160fe6060f1SDimitry Andric 161fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 162349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { 163fe6060f1SDimitry Andric switch (MI.getOpcode()) { 164fe6060f1SDimitry Andric case AMDGPU::S_BRANCH: 165fe6060f1SDimitry Andric // Optimize out branches to the next block. 166fe6060f1SDimitry Andric // This only occurs in -O0 when BranchFolding is not executed. 167fe6060f1SDimitry Andric if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) { 168fe6060f1SDimitry Andric assert(&MI == &MBB.back()); 169fe6060f1SDimitry Andric MI.eraseFromParent(); 170fe6060f1SDimitry Andric MadeChange = true; 171fe6060f1SDimitry Andric } 172fe6060f1SDimitry Andric break; 173fe6060f1SDimitry Andric 1745f757f3fSDimitry Andric case AMDGPU::SI_CS_CHAIN_TC_W32: 1755f757f3fSDimitry Andric case AMDGPU::SI_CS_CHAIN_TC_W64: 1765f757f3fSDimitry Andric expandChainCall(MI); 1775f757f3fSDimitry Andric MadeChange = true; 1785f757f3fSDimitry Andric break; 1795f757f3fSDimitry Andric 180fe6060f1SDimitry Andric case AMDGPU::SI_EARLY_TERMINATE_SCC0: 181fe6060f1SDimitry Andric EarlyTermInstrs.push_back(&MI); 182fe6060f1SDimitry Andric break; 183fe6060f1SDimitry Andric 184fe6060f1SDimitry Andric case AMDGPU::SI_RETURN_TO_EPILOG: 185fe6060f1SDimitry Andric EpilogInstrs.push_back(&MI); 186fe6060f1SDimitry Andric break; 187fe6060f1SDimitry Andric 188fe6060f1SDimitry Andric default: 189fe6060f1SDimitry Andric break; 190fe6060f1SDimitry Andric } 191fe6060f1SDimitry Andric } 192fe6060f1SDimitry Andric } 193fe6060f1SDimitry Andric 194fe6060f1SDimitry Andric // Lower any early exit branches first 195fe6060f1SDimitry Andric if (!EarlyTermInstrs.empty()) { 196fe6060f1SDimitry Andric MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock(); 197fe6060f1SDimitry Andric DebugLoc DL; 198fe6060f1SDimitry Andric 199fe6060f1SDimitry Andric MF.insert(MF.end(), EarlyExitBlock); 200fe6060f1SDimitry Andric BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc), 201fe6060f1SDimitry Andric ExecReg) 202fe6060f1SDimitry Andric .addImm(0); 203fe6060f1SDimitry Andric generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF); 204fe6060f1SDimitry Andric 205fe6060f1SDimitry Andric for (MachineInstr *Instr : EarlyTermInstrs) { 206fe6060f1SDimitry Andric // Early termination in GS does nothing 207fe6060f1SDimitry Andric if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS) 208fe6060f1SDimitry Andric earlyTerm(*Instr, EarlyExitBlock); 209fe6060f1SDimitry Andric Instr->eraseFromParent(); 210fe6060f1SDimitry Andric } 211fe6060f1SDimitry Andric 212fe6060f1SDimitry Andric EarlyTermInstrs.clear(); 213fe6060f1SDimitry Andric MadeChange = true; 214fe6060f1SDimitry Andric } 215fe6060f1SDimitry Andric 216fe6060f1SDimitry Andric // Now check return to epilog instructions occur at function end 217fe6060f1SDimitry Andric if (!EpilogInstrs.empty()) { 218fe6060f1SDimitry Andric MachineBasicBlock *EmptyMBBAtEnd = nullptr; 219fe6060f1SDimitry Andric assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid()); 220fe6060f1SDimitry Andric 221fe6060f1SDimitry Andric // If there are multiple returns to epilog then all will 222fe6060f1SDimitry Andric // become jumps to new empty end block. 223fe6060f1SDimitry Andric if (EpilogInstrs.size() > 1) { 224fe6060f1SDimitry Andric EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); 225fe6060f1SDimitry Andric MF.insert(MF.end(), EmptyMBBAtEnd); 226fe6060f1SDimitry Andric } 227fe6060f1SDimitry Andric 228bdd1243dSDimitry Andric for (auto *MI : EpilogInstrs) { 229fe6060f1SDimitry Andric auto MBB = MI->getParent(); 230fe6060f1SDimitry Andric if (MBB == &MF.back() && MI == &MBB->back()) 231fe6060f1SDimitry Andric continue; 232fe6060f1SDimitry Andric 233fe6060f1SDimitry Andric // SI_RETURN_TO_EPILOG is not the last instruction. 234fe6060f1SDimitry Andric // Jump to empty block at function end. 235fe6060f1SDimitry Andric if (!EmptyMBBAtEnd) { 236fe6060f1SDimitry Andric EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); 237fe6060f1SDimitry Andric MF.insert(MF.end(), EmptyMBBAtEnd); 238fe6060f1SDimitry Andric } 239fe6060f1SDimitry Andric 240fe6060f1SDimitry Andric MBB->addSuccessor(EmptyMBBAtEnd); 241fe6060f1SDimitry Andric MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd); 242fe6060f1SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH)) 243fe6060f1SDimitry Andric .addMBB(EmptyMBBAtEnd); 244fe6060f1SDimitry Andric MI->eraseFromParent(); 245fe6060f1SDimitry Andric MadeChange = true; 246fe6060f1SDimitry Andric } 247fe6060f1SDimitry Andric 248fe6060f1SDimitry Andric EpilogInstrs.clear(); 249fe6060f1SDimitry Andric } 250fe6060f1SDimitry Andric 251fe6060f1SDimitry Andric return MadeChange; 252fe6060f1SDimitry Andric } 253