15ffd83dbSDimitry Andric //===-- SIPreEmitPeephole.cpp ------------------------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric /// \file 105ffd83dbSDimitry Andric /// This pass performs the peephole optimizations before code emission. 115ffd83dbSDimitry Andric /// 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 145ffd83dbSDimitry Andric #include "AMDGPU.h" 15e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 165ffd83dbSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 175ffd83dbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric using namespace llvm; 205ffd83dbSDimitry Andric 215ffd83dbSDimitry Andric #define DEBUG_TYPE "si-pre-emit-peephole" 225ffd83dbSDimitry Andric 23fe6060f1SDimitry Andric static unsigned SkipThreshold; 24fe6060f1SDimitry Andric 25fe6060f1SDimitry Andric static cl::opt<unsigned, true> SkipThresholdFlag( 26fe6060f1SDimitry Andric "amdgpu-skip-threshold", cl::Hidden, 27fe6060f1SDimitry Andric cl::desc( 28fe6060f1SDimitry Andric "Number of instructions before jumping over divergent control flow"), 29fe6060f1SDimitry Andric cl::location(SkipThreshold), cl::init(12)); 30fe6060f1SDimitry Andric 315ffd83dbSDimitry Andric namespace { 325ffd83dbSDimitry Andric 335ffd83dbSDimitry Andric class SIPreEmitPeephole : public MachineFunctionPass { 345ffd83dbSDimitry Andric private: 355ffd83dbSDimitry Andric const SIInstrInfo *TII = nullptr; 365ffd83dbSDimitry Andric const SIRegisterInfo *TRI = nullptr; 375ffd83dbSDimitry Andric 385ffd83dbSDimitry Andric bool optimizeVccBranch(MachineInstr &MI) const; 395ffd83dbSDimitry Andric bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const; 40fe6060f1SDimitry Andric bool getBlockDestinations(MachineBasicBlock &SrcMBB, 41fe6060f1SDimitry Andric MachineBasicBlock *&TrueMBB, 42fe6060f1SDimitry Andric MachineBasicBlock *&FalseMBB, 43fe6060f1SDimitry Andric SmallVectorImpl<MachineOperand> &Cond); 44fe6060f1SDimitry Andric bool mustRetainExeczBranch(const MachineBasicBlock &From, 45fe6060f1SDimitry Andric const MachineBasicBlock &To) const; 46fe6060f1SDimitry Andric bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); 475ffd83dbSDimitry Andric 485ffd83dbSDimitry Andric public: 495ffd83dbSDimitry Andric static char ID; 505ffd83dbSDimitry Andric 515ffd83dbSDimitry Andric SIPreEmitPeephole() : MachineFunctionPass(ID) { 525ffd83dbSDimitry Andric initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); 535ffd83dbSDimitry Andric } 545ffd83dbSDimitry Andric 555ffd83dbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 565ffd83dbSDimitry Andric }; 575ffd83dbSDimitry Andric 585ffd83dbSDimitry Andric } // End anonymous namespace. 595ffd83dbSDimitry Andric 605ffd83dbSDimitry Andric INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE, 615ffd83dbSDimitry Andric "SI peephole optimizations", false, false) 625ffd83dbSDimitry Andric 635ffd83dbSDimitry Andric char SIPreEmitPeephole::ID = 0; 645ffd83dbSDimitry Andric 655ffd83dbSDimitry Andric char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID; 665ffd83dbSDimitry Andric 675ffd83dbSDimitry Andric bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { 685ffd83dbSDimitry Andric // Match: 695ffd83dbSDimitry Andric // sreg = -1 or 0 705ffd83dbSDimitry Andric // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg 715ffd83dbSDimitry Andric // S_CBRANCH_VCC[N]Z 725ffd83dbSDimitry Andric // => 735ffd83dbSDimitry Andric // S_CBRANCH_EXEC[N]Z 745ffd83dbSDimitry Andric // We end up with this pattern sometimes after basic block placement. 755ffd83dbSDimitry Andric // It happens while combining a block which assigns -1 or 0 to a saved mask 765ffd83dbSDimitry Andric // and another block which consumes that saved mask and then a branch. 7781ad6265SDimitry Andric // 7881ad6265SDimitry Andric // While searching this also performs the following substitution: 7981ad6265SDimitry Andric // vcc = V_CMP 8081ad6265SDimitry Andric // vcc = S_AND exec, vcc 8181ad6265SDimitry Andric // S_CBRANCH_VCC[N]Z 8281ad6265SDimitry Andric // => 8381ad6265SDimitry Andric // vcc = V_CMP 8481ad6265SDimitry Andric // S_CBRANCH_VCC[N]Z 8581ad6265SDimitry Andric 865ffd83dbSDimitry Andric bool Changed = false; 875ffd83dbSDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 885ffd83dbSDimitry Andric const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>(); 895ffd83dbSDimitry Andric const bool IsWave32 = ST.isWave32(); 905ffd83dbSDimitry Andric const unsigned CondReg = TRI->getVCC(); 915ffd83dbSDimitry Andric const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 925ffd83dbSDimitry Andric const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; 935ffd83dbSDimitry Andric const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64; 94e8d8bef9SDimitry Andric const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 955ffd83dbSDimitry Andric 965ffd83dbSDimitry Andric MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(), 975ffd83dbSDimitry Andric E = MBB.rend(); 985ffd83dbSDimitry Andric bool ReadsCond = false; 995ffd83dbSDimitry Andric unsigned Threshold = 5; 1005ffd83dbSDimitry Andric for (++A; A != E; ++A) { 1015ffd83dbSDimitry Andric if (!--Threshold) 1025ffd83dbSDimitry Andric return false; 1035ffd83dbSDimitry Andric if (A->modifiesRegister(ExecReg, TRI)) 1045ffd83dbSDimitry Andric return false; 1055ffd83dbSDimitry Andric if (A->modifiesRegister(CondReg, TRI)) { 1065ffd83dbSDimitry Andric if (!A->definesRegister(CondReg, TRI) || 1075ffd83dbSDimitry Andric (A->getOpcode() != And && A->getOpcode() != AndN2)) 1085ffd83dbSDimitry Andric return false; 1095ffd83dbSDimitry Andric break; 1105ffd83dbSDimitry Andric } 1115ffd83dbSDimitry Andric ReadsCond |= A->readsRegister(CondReg, TRI); 1125ffd83dbSDimitry Andric } 1135ffd83dbSDimitry Andric if (A == E) 1145ffd83dbSDimitry Andric return false; 1155ffd83dbSDimitry Andric 1165ffd83dbSDimitry Andric MachineOperand &Op1 = A->getOperand(1); 1175ffd83dbSDimitry Andric MachineOperand &Op2 = A->getOperand(2); 1185ffd83dbSDimitry Andric if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) { 1195ffd83dbSDimitry Andric TII->commuteInstruction(*A); 1205ffd83dbSDimitry Andric Changed = true; 1215ffd83dbSDimitry Andric } 1225ffd83dbSDimitry Andric if (Op1.getReg() != ExecReg) 1235ffd83dbSDimitry Andric return Changed; 1245ffd83dbSDimitry Andric if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0)) 1255ffd83dbSDimitry Andric return Changed; 1265ffd83dbSDimitry Andric 1275ffd83dbSDimitry Andric int64_t MaskValue = 0; 1285ffd83dbSDimitry Andric Register SReg; 1295ffd83dbSDimitry Andric if (Op2.isReg()) { 1305ffd83dbSDimitry Andric SReg = Op2.getReg(); 1315ffd83dbSDimitry Andric auto M = std::next(A); 1325ffd83dbSDimitry Andric bool ReadsSreg = false; 13381ad6265SDimitry Andric bool ModifiesExec = false; 1345ffd83dbSDimitry Andric for (; M != E; ++M) { 1355ffd83dbSDimitry Andric if (M->definesRegister(SReg, TRI)) 1365ffd83dbSDimitry Andric break; 1375ffd83dbSDimitry Andric if (M->modifiesRegister(SReg, TRI)) 1385ffd83dbSDimitry Andric return Changed; 1395ffd83dbSDimitry Andric ReadsSreg |= M->readsRegister(SReg, TRI); 14081ad6265SDimitry Andric ModifiesExec |= M->modifiesRegister(ExecReg, TRI); 1415ffd83dbSDimitry Andric } 14281ad6265SDimitry Andric if (M == E) 14381ad6265SDimitry Andric return Changed; 14481ad6265SDimitry Andric // If SReg is VCC and SReg definition is a VALU comparison. 14581ad6265SDimitry Andric // This means S_AND with EXEC is not required. 14681ad6265SDimitry Andric // Erase the S_AND and return. 14781ad6265SDimitry Andric // Note: isVOPC is used instead of isCompare to catch V_CMP_CLASS 14881ad6265SDimitry Andric if (A->getOpcode() == And && SReg == CondReg && !ModifiesExec && 14981ad6265SDimitry Andric TII->isVOPC(*M)) { 15081ad6265SDimitry Andric A->eraseFromParent(); 15181ad6265SDimitry Andric return true; 15281ad6265SDimitry Andric } 15381ad6265SDimitry Andric if (!M->isMoveImmediate() || !M->getOperand(1).isImm() || 1545ffd83dbSDimitry Andric (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0)) 1555ffd83dbSDimitry Andric return Changed; 1565ffd83dbSDimitry Andric MaskValue = M->getOperand(1).getImm(); 1575ffd83dbSDimitry Andric // First if sreg is only used in the AND instruction fold the immediate 15881ad6265SDimitry Andric // into the AND. 1595ffd83dbSDimitry Andric if (!ReadsSreg && Op2.isKill()) { 1605ffd83dbSDimitry Andric A->getOperand(2).ChangeToImmediate(MaskValue); 1615ffd83dbSDimitry Andric M->eraseFromParent(); 1625ffd83dbSDimitry Andric } 1635ffd83dbSDimitry Andric } else if (Op2.isImm()) { 1645ffd83dbSDimitry Andric MaskValue = Op2.getImm(); 1655ffd83dbSDimitry Andric } else { 1665ffd83dbSDimitry Andric llvm_unreachable("Op2 must be register or immediate"); 1675ffd83dbSDimitry Andric } 1685ffd83dbSDimitry Andric 1695ffd83dbSDimitry Andric // Invert mask for s_andn2 1705ffd83dbSDimitry Andric assert(MaskValue == 0 || MaskValue == -1); 1715ffd83dbSDimitry Andric if (A->getOpcode() == AndN2) 1725ffd83dbSDimitry Andric MaskValue = ~MaskValue; 1735ffd83dbSDimitry Andric 174*0fca6ea1SDimitry Andric if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) { 175e8d8bef9SDimitry Andric if (!MI.killsRegister(CondReg, TRI)) { 176e8d8bef9SDimitry Andric // Replace AND with MOV 177e8d8bef9SDimitry Andric if (MaskValue == 0) { 178e8d8bef9SDimitry Andric BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg) 179e8d8bef9SDimitry Andric .addImm(0); 180e8d8bef9SDimitry Andric } else { 181e8d8bef9SDimitry Andric BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg) 182e8d8bef9SDimitry Andric .addReg(ExecReg); 183e8d8bef9SDimitry Andric } 184e8d8bef9SDimitry Andric } 185e8d8bef9SDimitry Andric // Remove AND instruction 1865ffd83dbSDimitry Andric A->eraseFromParent(); 187e8d8bef9SDimitry Andric } 1885ffd83dbSDimitry Andric 1895ffd83dbSDimitry Andric bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ; 1905ffd83dbSDimitry Andric if (SReg == ExecReg) { 1915ffd83dbSDimitry Andric // EXEC is updated directly 1925ffd83dbSDimitry Andric if (IsVCCZ) { 1935ffd83dbSDimitry Andric MI.eraseFromParent(); 1945ffd83dbSDimitry Andric return true; 1955ffd83dbSDimitry Andric } 1965ffd83dbSDimitry Andric MI.setDesc(TII->get(AMDGPU::S_BRANCH)); 1975ffd83dbSDimitry Andric } else if (IsVCCZ && MaskValue == 0) { 1985ffd83dbSDimitry Andric // Will always branch 199349cc55cSDimitry Andric // Remove all successors shadowed by new unconditional branch 2005ffd83dbSDimitry Andric MachineBasicBlock *Parent = MI.getParent(); 2015ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> ToRemove; 2025ffd83dbSDimitry Andric bool Found = false; 2035ffd83dbSDimitry Andric for (MachineInstr &Term : Parent->terminators()) { 2045ffd83dbSDimitry Andric if (Found) { 2055ffd83dbSDimitry Andric if (Term.isBranch()) 2065ffd83dbSDimitry Andric ToRemove.push_back(&Term); 2075ffd83dbSDimitry Andric } else { 2085ffd83dbSDimitry Andric Found = Term.isIdenticalTo(MI); 2095ffd83dbSDimitry Andric } 2105ffd83dbSDimitry Andric } 2115ffd83dbSDimitry Andric assert(Found && "conditional branch is not terminator"); 212bdd1243dSDimitry Andric for (auto *BranchMI : ToRemove) { 2135ffd83dbSDimitry Andric MachineOperand &Dst = BranchMI->getOperand(0); 2145ffd83dbSDimitry Andric assert(Dst.isMBB() && "destination is not basic block"); 2155ffd83dbSDimitry Andric Parent->removeSuccessor(Dst.getMBB()); 2165ffd83dbSDimitry Andric BranchMI->eraseFromParent(); 2175ffd83dbSDimitry Andric } 2185ffd83dbSDimitry Andric 2195ffd83dbSDimitry Andric if (MachineBasicBlock *Succ = Parent->getFallThrough()) { 2205ffd83dbSDimitry Andric Parent->removeSuccessor(Succ); 2215ffd83dbSDimitry Andric } 2225ffd83dbSDimitry Andric 2235ffd83dbSDimitry Andric // Rewrite to unconditional branch 2245ffd83dbSDimitry Andric MI.setDesc(TII->get(AMDGPU::S_BRANCH)); 2255ffd83dbSDimitry Andric } else if (!IsVCCZ && MaskValue == 0) { 2265ffd83dbSDimitry Andric // Will never branch 2275ffd83dbSDimitry Andric MachineOperand &Dst = MI.getOperand(0); 2285ffd83dbSDimitry Andric assert(Dst.isMBB() && "destination is not basic block"); 2295ffd83dbSDimitry Andric MI.getParent()->removeSuccessor(Dst.getMBB()); 2305ffd83dbSDimitry Andric MI.eraseFromParent(); 2315ffd83dbSDimitry Andric return true; 2325ffd83dbSDimitry Andric } else if (MaskValue == -1) { 2335ffd83dbSDimitry Andric // Depends only on EXEC 2345ffd83dbSDimitry Andric MI.setDesc( 2355ffd83dbSDimitry Andric TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ)); 2365ffd83dbSDimitry Andric } 2375ffd83dbSDimitry Andric 238*0fca6ea1SDimitry Andric MI.removeOperand(MI.findRegisterUseOperandIdx(CondReg, TRI, false /*Kill*/)); 2395ffd83dbSDimitry Andric MI.addImplicitDefUseOperands(*MBB.getParent()); 2405ffd83dbSDimitry Andric 2415ffd83dbSDimitry Andric return true; 2425ffd83dbSDimitry Andric } 2435ffd83dbSDimitry Andric 2445ffd83dbSDimitry Andric bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First, 2455ffd83dbSDimitry Andric MachineInstr &MI) const { 2465ffd83dbSDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 2475ffd83dbSDimitry Andric const MachineFunction &MF = *MBB.getParent(); 2485ffd83dbSDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 2495ffd83dbSDimitry Andric MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::src0); 2505ffd83dbSDimitry Andric Register IdxReg = Idx->isReg() ? Idx->getReg() : Register(); 2515ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> ToRemove; 2525ffd83dbSDimitry Andric bool IdxOn = true; 2535ffd83dbSDimitry Andric 2545ffd83dbSDimitry Andric if (!MI.isIdenticalTo(First)) 2555ffd83dbSDimitry Andric return false; 2565ffd83dbSDimitry Andric 2575ffd83dbSDimitry Andric // Scan back to find an identical S_SET_GPR_IDX_ON 258fe6060f1SDimitry Andric for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()), 259fe6060f1SDimitry Andric E = MI.getIterator(); 260fe6060f1SDimitry Andric I != E; ++I) { 261fe6060f1SDimitry Andric if (I->isBundle()) 262fe6060f1SDimitry Andric continue; 2635ffd83dbSDimitry Andric switch (I->getOpcode()) { 2645ffd83dbSDimitry Andric case AMDGPU::S_SET_GPR_IDX_MODE: 2655ffd83dbSDimitry Andric return false; 2665ffd83dbSDimitry Andric case AMDGPU::S_SET_GPR_IDX_OFF: 2675ffd83dbSDimitry Andric IdxOn = false; 2685ffd83dbSDimitry Andric ToRemove.push_back(&*I); 2695ffd83dbSDimitry Andric break; 2705ffd83dbSDimitry Andric default: 2715ffd83dbSDimitry Andric if (I->modifiesRegister(AMDGPU::M0, TRI)) 2725ffd83dbSDimitry Andric return false; 2735ffd83dbSDimitry Andric if (IdxReg && I->modifiesRegister(IdxReg, TRI)) 2745ffd83dbSDimitry Andric return false; 2755ffd83dbSDimitry Andric if (llvm::any_of(I->operands(), 2765ffd83dbSDimitry Andric [&MRI, this](const MachineOperand &MO) { 2775ffd83dbSDimitry Andric return MO.isReg() && 2785ffd83dbSDimitry Andric TRI->isVectorRegister(MRI, MO.getReg()); 2795ffd83dbSDimitry Andric })) { 2805ffd83dbSDimitry Andric // The only exception allowed here is another indirect vector move 2815ffd83dbSDimitry Andric // with the same mode. 282349cc55cSDimitry Andric if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write || 283349cc55cSDimitry Andric I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read)) 2845ffd83dbSDimitry Andric return false; 2855ffd83dbSDimitry Andric } 2865ffd83dbSDimitry Andric } 2875ffd83dbSDimitry Andric } 2885ffd83dbSDimitry Andric 289fe6060f1SDimitry Andric MI.eraseFromBundle(); 2905ffd83dbSDimitry Andric for (MachineInstr *RI : ToRemove) 291fe6060f1SDimitry Andric RI->eraseFromBundle(); 292fe6060f1SDimitry Andric return true; 293fe6060f1SDimitry Andric } 294fe6060f1SDimitry Andric 295fe6060f1SDimitry Andric bool SIPreEmitPeephole::getBlockDestinations( 296fe6060f1SDimitry Andric MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, 297fe6060f1SDimitry Andric MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) { 298fe6060f1SDimitry Andric if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond)) 299fe6060f1SDimitry Andric return false; 300fe6060f1SDimitry Andric 301fe6060f1SDimitry Andric if (!FalseMBB) 302fe6060f1SDimitry Andric FalseMBB = SrcMBB.getNextNode(); 303fe6060f1SDimitry Andric 304fe6060f1SDimitry Andric return true; 305fe6060f1SDimitry Andric } 306fe6060f1SDimitry Andric 307fe6060f1SDimitry Andric bool SIPreEmitPeephole::mustRetainExeczBranch( 308fe6060f1SDimitry Andric const MachineBasicBlock &From, const MachineBasicBlock &To) const { 309fe6060f1SDimitry Andric unsigned NumInstr = 0; 310fe6060f1SDimitry Andric const MachineFunction *MF = From.getParent(); 311fe6060f1SDimitry Andric 312fe6060f1SDimitry Andric for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end(); 313fe6060f1SDimitry Andric MBBI != End && MBBI != ToI; ++MBBI) { 314fe6060f1SDimitry Andric const MachineBasicBlock &MBB = *MBBI; 315fe6060f1SDimitry Andric 3164824e7fdSDimitry Andric for (const MachineInstr &MI : MBB) { 317fe6060f1SDimitry Andric // When a uniform loop is inside non-uniform control flow, the branch 318fe6060f1SDimitry Andric // leaving the loop might never be taken when EXEC = 0. 319fe6060f1SDimitry Andric // Hence we should retain cbranch out of the loop lest it become infinite. 3204824e7fdSDimitry Andric if (MI.isConditionalBranch()) 321fe6060f1SDimitry Andric return true; 322fe6060f1SDimitry Andric 3235f757f3fSDimitry Andric if (MI.isMetaInstruction()) 3245f757f3fSDimitry Andric continue; 3255f757f3fSDimitry Andric 3264824e7fdSDimitry Andric if (TII->hasUnwantedEffectsWhenEXECEmpty(MI)) 327fe6060f1SDimitry Andric return true; 328fe6060f1SDimitry Andric 329fe6060f1SDimitry Andric // These instructions are potentially expensive even if EXEC = 0. 3304824e7fdSDimitry Andric if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) || 331*0fca6ea1SDimitry Andric TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode())) 332fe6060f1SDimitry Andric return true; 333fe6060f1SDimitry Andric 334fe6060f1SDimitry Andric ++NumInstr; 335fe6060f1SDimitry Andric if (NumInstr >= SkipThreshold) 336fe6060f1SDimitry Andric return true; 337fe6060f1SDimitry Andric } 338fe6060f1SDimitry Andric } 339fe6060f1SDimitry Andric 340fe6060f1SDimitry Andric return false; 341fe6060f1SDimitry Andric } 342fe6060f1SDimitry Andric 343fe6060f1SDimitry Andric // Returns true if the skip branch instruction is removed. 344fe6060f1SDimitry Andric bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI, 345fe6060f1SDimitry Andric MachineBasicBlock &SrcMBB) { 346fe6060f1SDimitry Andric MachineBasicBlock *TrueMBB = nullptr; 347fe6060f1SDimitry Andric MachineBasicBlock *FalseMBB = nullptr; 348fe6060f1SDimitry Andric SmallVector<MachineOperand, 1> Cond; 349fe6060f1SDimitry Andric 350fe6060f1SDimitry Andric if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond)) 351fe6060f1SDimitry Andric return false; 352fe6060f1SDimitry Andric 353fe6060f1SDimitry Andric // Consider only the forward branches. 354fe6060f1SDimitry Andric if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) || 355fe6060f1SDimitry Andric mustRetainExeczBranch(*FalseMBB, *TrueMBB)) 356fe6060f1SDimitry Andric return false; 357fe6060f1SDimitry Andric 358fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI); 359fe6060f1SDimitry Andric MI.eraseFromParent(); 360fe6060f1SDimitry Andric SrcMBB.removeSuccessor(TrueMBB); 361fe6060f1SDimitry Andric 3625ffd83dbSDimitry Andric return true; 3635ffd83dbSDimitry Andric } 3645ffd83dbSDimitry Andric 3655ffd83dbSDimitry Andric bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) { 3665ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3675ffd83dbSDimitry Andric TII = ST.getInstrInfo(); 3685ffd83dbSDimitry Andric TRI = &TII->getRegisterInfo(); 3695ffd83dbSDimitry Andric bool Changed = false; 3705ffd83dbSDimitry Andric 371fe6060f1SDimitry Andric MF.RenumberBlocks(); 372fe6060f1SDimitry Andric 3735ffd83dbSDimitry Andric for (MachineBasicBlock &MBB : MF) { 374fe6060f1SDimitry Andric MachineBasicBlock::iterator TermI = MBB.getFirstTerminator(); 375fe6060f1SDimitry Andric // Check first terminator for branches to optimize 376eaeb601bSDimitry Andric if (TermI != MBB.end()) { 377eaeb601bSDimitry Andric MachineInstr &MI = *TermI; 3785ffd83dbSDimitry Andric switch (MI.getOpcode()) { 3795ffd83dbSDimitry Andric case AMDGPU::S_CBRANCH_VCCZ: 3805ffd83dbSDimitry Andric case AMDGPU::S_CBRANCH_VCCNZ: 3815ffd83dbSDimitry Andric Changed |= optimizeVccBranch(MI); 382fe6060f1SDimitry Andric break; 383fe6060f1SDimitry Andric case AMDGPU::S_CBRANCH_EXECZ: 384fe6060f1SDimitry Andric Changed |= removeExeczBranch(MI, MBB); 385eaeb601bSDimitry Andric break; 386eaeb601bSDimitry Andric } 387eaeb601bSDimitry Andric } 3885ffd83dbSDimitry Andric 3895ffd83dbSDimitry Andric if (!ST.hasVGPRIndexMode()) 3905ffd83dbSDimitry Andric continue; 3915ffd83dbSDimitry Andric 3925ffd83dbSDimitry Andric MachineInstr *SetGPRMI = nullptr; 3935ffd83dbSDimitry Andric const unsigned Threshold = 20; 3945ffd83dbSDimitry Andric unsigned Count = 0; 3955ffd83dbSDimitry Andric // Scan the block for two S_SET_GPR_IDX_ON instructions to see if a 3965ffd83dbSDimitry Andric // second is not needed. Do expensive checks in the optimizeSetGPR() 3975ffd83dbSDimitry Andric // and limit the distance to 20 instructions for compile time purposes. 398fe6060f1SDimitry Andric // Note: this needs to work on bundles as S_SET_GPR_IDX* instructions 399fe6060f1SDimitry Andric // may be bundled with the instructions they modify. 400*0fca6ea1SDimitry Andric for (auto &MI : make_early_inc_range(MBB.instrs())) { 4015ffd83dbSDimitry Andric if (Count == Threshold) 4025ffd83dbSDimitry Andric SetGPRMI = nullptr; 4035ffd83dbSDimitry Andric else 4045ffd83dbSDimitry Andric ++Count; 4055ffd83dbSDimitry Andric 4065ffd83dbSDimitry Andric if (MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON) 4075ffd83dbSDimitry Andric continue; 4085ffd83dbSDimitry Andric 4095ffd83dbSDimitry Andric Count = 0; 4105ffd83dbSDimitry Andric if (!SetGPRMI) { 4115ffd83dbSDimitry Andric SetGPRMI = &MI; 4125ffd83dbSDimitry Andric continue; 4135ffd83dbSDimitry Andric } 4145ffd83dbSDimitry Andric 4155ffd83dbSDimitry Andric if (optimizeSetGPR(*SetGPRMI, MI)) 4165ffd83dbSDimitry Andric Changed = true; 4175ffd83dbSDimitry Andric else 4185ffd83dbSDimitry Andric SetGPRMI = &MI; 4195ffd83dbSDimitry Andric } 4205ffd83dbSDimitry Andric } 4215ffd83dbSDimitry Andric 4225ffd83dbSDimitry Andric return Changed; 4235ffd83dbSDimitry Andric } 424