10b57cec5SDimitry Andric //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass performs peephole optimizations to cleanup ugly code sequences at
100b57cec5SDimitry Andric // MachineInstruction layer.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // Currently, there are two optimizations implemented:
130b57cec5SDimitry Andric // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
140b57cec5SDimitry Andric // zero extend 32-bit subregisters to 64-bit registers, if the compiler
150b57cec5SDimitry Andric // could prove the subregisters is defined by 32-bit operations in which
160b57cec5SDimitry Andric // case the upper half of the underlying 64-bit registers were zeroed
170b57cec5SDimitry Andric // implicitly.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // - One post-RA PreEmit pass to do final cleanup on some redundant
200b57cec5SDimitry Andric // instructions generated due to bad RA on subregister.
210b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
220b57cec5SDimitry Andric
230b57cec5SDimitry Andric #include "BPF.h"
240b57cec5SDimitry Andric #include "BPFInstrInfo.h"
250b57cec5SDimitry Andric #include "BPFTargetMachine.h"
260b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
2781ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
308bcb0991SDimitry Andric #include "llvm/Support/Debug.h"
315ffd83dbSDimitry Andric #include <set>
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric using namespace llvm;
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric #define DEBUG_TYPE "bpf-mi-zext-elim"
360b57cec5SDimitry Andric
37*5f757f3fSDimitry Andric static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
38*5f757f3fSDimitry Andric cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
39*5f757f3fSDimitry Andric
400b57cec5SDimitry Andric STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric namespace {
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric struct BPFMIPeephole : public MachineFunctionPass {
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric static char ID;
470b57cec5SDimitry Andric const BPFInstrInfo *TII;
480b57cec5SDimitry Andric MachineFunction *MF;
490b57cec5SDimitry Andric MachineRegisterInfo *MRI;
500b57cec5SDimitry Andric
BPFMIPeephole__anon0fe2a75f0111::BPFMIPeephole510b57cec5SDimitry Andric BPFMIPeephole() : MachineFunctionPass(ID) {
520b57cec5SDimitry Andric initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
530b57cec5SDimitry Andric }
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric private:
560b57cec5SDimitry Andric // Initialize class variables.
570b57cec5SDimitry Andric void initialize(MachineFunction &MFParm);
580b57cec5SDimitry Andric
59480093f4SDimitry Andric bool isCopyFrom32Def(MachineInstr *CopyMI);
60480093f4SDimitry Andric bool isInsnFrom32Def(MachineInstr *DefInsn);
61480093f4SDimitry Andric bool isPhiFrom32Def(MachineInstr *MovMI);
620b57cec5SDimitry Andric bool isMovFrom32Def(MachineInstr *MovMI);
6304eeddc0SDimitry Andric bool eliminateZExtSeq();
6404eeddc0SDimitry Andric bool eliminateZExt();
650b57cec5SDimitry Andric
66480093f4SDimitry Andric std::set<MachineInstr *> PhiInsns;
67480093f4SDimitry Andric
680b57cec5SDimitry Andric public:
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric // Main entry point for this pass.
runOnMachineFunction__anon0fe2a75f0111::BPFMIPeephole710b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override {
720b57cec5SDimitry Andric if (skipFunction(MF.getFunction()))
730b57cec5SDimitry Andric return false;
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric initialize(MF);
760b57cec5SDimitry Andric
775ffd83dbSDimitry Andric // First try to eliminate (zext, lshift, rshift) and then
785ffd83dbSDimitry Andric // try to eliminate zext.
795ffd83dbSDimitry Andric bool ZExtSeqExist, ZExtExist;
805ffd83dbSDimitry Andric ZExtSeqExist = eliminateZExtSeq();
815ffd83dbSDimitry Andric ZExtExist = eliminateZExt();
825ffd83dbSDimitry Andric return ZExtSeqExist || ZExtExist;
830b57cec5SDimitry Andric }
840b57cec5SDimitry Andric };
850b57cec5SDimitry Andric
860b57cec5SDimitry Andric // Initialize class variables.
initialize(MachineFunction & MFParm)870b57cec5SDimitry Andric void BPFMIPeephole::initialize(MachineFunction &MFParm) {
880b57cec5SDimitry Andric MF = &MFParm;
890b57cec5SDimitry Andric MRI = &MF->getRegInfo();
900b57cec5SDimitry Andric TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
918bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric
isCopyFrom32Def(MachineInstr * CopyMI)94480093f4SDimitry Andric bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
95480093f4SDimitry Andric {
96480093f4SDimitry Andric MachineOperand &opnd = CopyMI->getOperand(1);
97480093f4SDimitry Andric
98480093f4SDimitry Andric if (!opnd.isReg())
99480093f4SDimitry Andric return false;
100480093f4SDimitry Andric
101480093f4SDimitry Andric // Return false if getting value from a 32bit physical register.
102480093f4SDimitry Andric // Most likely, this physical register is aliased to
103480093f4SDimitry Andric // function call return value or current function parameters.
104480093f4SDimitry Andric Register Reg = opnd.getReg();
105bdd1243dSDimitry Andric if (!Reg.isVirtual())
106480093f4SDimitry Andric return false;
107480093f4SDimitry Andric
108480093f4SDimitry Andric if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
109480093f4SDimitry Andric return false;
110480093f4SDimitry Andric
111480093f4SDimitry Andric MachineInstr *DefInsn = MRI->getVRegDef(Reg);
112480093f4SDimitry Andric if (!isInsnFrom32Def(DefInsn))
113480093f4SDimitry Andric return false;
114480093f4SDimitry Andric
115480093f4SDimitry Andric return true;
116480093f4SDimitry Andric }
117480093f4SDimitry Andric
isPhiFrom32Def(MachineInstr * PhiMI)118480093f4SDimitry Andric bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
119480093f4SDimitry Andric {
120480093f4SDimitry Andric for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
121480093f4SDimitry Andric MachineOperand &opnd = PhiMI->getOperand(i);
122480093f4SDimitry Andric
123480093f4SDimitry Andric if (!opnd.isReg())
124480093f4SDimitry Andric return false;
125480093f4SDimitry Andric
126480093f4SDimitry Andric MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
127480093f4SDimitry Andric if (!PhiDef)
128480093f4SDimitry Andric return false;
129480093f4SDimitry Andric if (PhiDef->isPHI()) {
13081ad6265SDimitry Andric if (!PhiInsns.insert(PhiDef).second)
131480093f4SDimitry Andric return false;
132480093f4SDimitry Andric if (!isPhiFrom32Def(PhiDef))
133480093f4SDimitry Andric return false;
134480093f4SDimitry Andric }
135480093f4SDimitry Andric if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
136480093f4SDimitry Andric return false;
137480093f4SDimitry Andric }
138480093f4SDimitry Andric
139480093f4SDimitry Andric return true;
140480093f4SDimitry Andric }
141480093f4SDimitry Andric
142480093f4SDimitry Andric // The \p DefInsn instruction defines a virtual register.
isInsnFrom32Def(MachineInstr * DefInsn)143480093f4SDimitry Andric bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
144480093f4SDimitry Andric {
145480093f4SDimitry Andric if (!DefInsn)
146480093f4SDimitry Andric return false;
147480093f4SDimitry Andric
148480093f4SDimitry Andric if (DefInsn->isPHI()) {
14981ad6265SDimitry Andric if (!PhiInsns.insert(DefInsn).second)
150480093f4SDimitry Andric return false;
151480093f4SDimitry Andric if (!isPhiFrom32Def(DefInsn))
152480093f4SDimitry Andric return false;
153480093f4SDimitry Andric } else if (DefInsn->getOpcode() == BPF::COPY) {
154480093f4SDimitry Andric if (!isCopyFrom32Def(DefInsn))
155480093f4SDimitry Andric return false;
156480093f4SDimitry Andric }
157480093f4SDimitry Andric
158480093f4SDimitry Andric return true;
159480093f4SDimitry Andric }
160480093f4SDimitry Andric
isMovFrom32Def(MachineInstr * MovMI)1610b57cec5SDimitry Andric bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
1620b57cec5SDimitry Andric {
1630b57cec5SDimitry Andric MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
1640b57cec5SDimitry Andric
1650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Def of Mov Src:");
1660b57cec5SDimitry Andric LLVM_DEBUG(DefInsn->dump());
1670b57cec5SDimitry Andric
168480093f4SDimitry Andric PhiInsns.clear();
169480093f4SDimitry Andric if (!isInsnFrom32Def(DefInsn))
1700b57cec5SDimitry Andric return false;
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
1730b57cec5SDimitry Andric
1740b57cec5SDimitry Andric return true;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric
eliminateZExtSeq()17704eeddc0SDimitry Andric bool BPFMIPeephole::eliminateZExtSeq() {
1780b57cec5SDimitry Andric MachineInstr* ToErase = nullptr;
1790b57cec5SDimitry Andric bool Eliminated = false;
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric for (MachineBasicBlock &MBB : *MF) {
1820b57cec5SDimitry Andric for (MachineInstr &MI : MBB) {
1830b57cec5SDimitry Andric // If the previous instruction was marked for elimination, remove it now.
1840b57cec5SDimitry Andric if (ToErase) {
1850b57cec5SDimitry Andric ToErase->eraseFromParent();
1860b57cec5SDimitry Andric ToErase = nullptr;
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric
1890b57cec5SDimitry Andric // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
1900b57cec5SDimitry Andric //
1910b57cec5SDimitry Andric // MOV_32_64 rB, wA
1920b57cec5SDimitry Andric // SLL_ri rB, rB, 32
1930b57cec5SDimitry Andric // SRL_ri rB, rB, 32
1940b57cec5SDimitry Andric if (MI.getOpcode() == BPF::SRL_ri &&
1950b57cec5SDimitry Andric MI.getOperand(2).getImm() == 32) {
1968bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg();
1978bcb0991SDimitry Andric Register ShfReg = MI.getOperand(1).getReg();
1980b57cec5SDimitry Andric MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting SRL found:");
2010b57cec5SDimitry Andric LLVM_DEBUG(MI.dump());
2020b57cec5SDimitry Andric
2030b57cec5SDimitry Andric if (!SllMI ||
2040b57cec5SDimitry Andric SllMI->isPHI() ||
2050b57cec5SDimitry Andric SllMI->getOpcode() != BPF::SLL_ri ||
2060b57cec5SDimitry Andric SllMI->getOperand(2).getImm() != 32)
2070b57cec5SDimitry Andric continue;
2080b57cec5SDimitry Andric
2090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " SLL found:");
2100b57cec5SDimitry Andric LLVM_DEBUG(SllMI->dump());
2110b57cec5SDimitry Andric
2120b57cec5SDimitry Andric MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
2130b57cec5SDimitry Andric if (!MovMI ||
2140b57cec5SDimitry Andric MovMI->isPHI() ||
2150b57cec5SDimitry Andric MovMI->getOpcode() != BPF::MOV_32_64)
2160b57cec5SDimitry Andric continue;
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Type cast Mov found:");
2190b57cec5SDimitry Andric LLVM_DEBUG(MovMI->dump());
2200b57cec5SDimitry Andric
2218bcb0991SDimitry Andric Register SubReg = MovMI->getOperand(1).getReg();
2220b57cec5SDimitry Andric if (!isMovFrom32Def(MovMI)) {
2230b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
2240b57cec5SDimitry Andric << " One ZExt elim sequence failed qualifying elim.\n");
2250b57cec5SDimitry Andric continue;
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric
2280b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
2290b57cec5SDimitry Andric .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
2300b57cec5SDimitry Andric
2310b57cec5SDimitry Andric SllMI->eraseFromParent();
2320b57cec5SDimitry Andric MovMI->eraseFromParent();
2330b57cec5SDimitry Andric // MI is the right shift, we can't erase it in it's own iteration.
2340b57cec5SDimitry Andric // Mark it to ToErase, and erase in the next iteration.
2350b57cec5SDimitry Andric ToErase = &MI;
2360b57cec5SDimitry Andric ZExtElemNum++;
2370b57cec5SDimitry Andric Eliminated = true;
2380b57cec5SDimitry Andric }
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric return Eliminated;
2430b57cec5SDimitry Andric }
2440b57cec5SDimitry Andric
eliminateZExt()24504eeddc0SDimitry Andric bool BPFMIPeephole::eliminateZExt() {
2465ffd83dbSDimitry Andric MachineInstr* ToErase = nullptr;
2475ffd83dbSDimitry Andric bool Eliminated = false;
2485ffd83dbSDimitry Andric
2495ffd83dbSDimitry Andric for (MachineBasicBlock &MBB : *MF) {
2505ffd83dbSDimitry Andric for (MachineInstr &MI : MBB) {
2515ffd83dbSDimitry Andric // If the previous instruction was marked for elimination, remove it now.
2525ffd83dbSDimitry Andric if (ToErase) {
2535ffd83dbSDimitry Andric ToErase->eraseFromParent();
2545ffd83dbSDimitry Andric ToErase = nullptr;
2555ffd83dbSDimitry Andric }
2565ffd83dbSDimitry Andric
2575ffd83dbSDimitry Andric if (MI.getOpcode() != BPF::MOV_32_64)
2585ffd83dbSDimitry Andric continue;
2595ffd83dbSDimitry Andric
2605ffd83dbSDimitry Andric // Eliminate MOV_32_64 if possible.
2615ffd83dbSDimitry Andric // MOV_32_64 rA, wB
2625ffd83dbSDimitry Andric //
2635ffd83dbSDimitry Andric // If wB has been zero extended, replace it with a SUBREG_TO_REG.
2645ffd83dbSDimitry Andric // This is to workaround BPF programs where pkt->{data, data_end}
2655ffd83dbSDimitry Andric // is encoded as u32, but actually the verifier populates them
2665ffd83dbSDimitry Andric // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
2675ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
2685ffd83dbSDimitry Andric LLVM_DEBUG(MI.dump());
2695ffd83dbSDimitry Andric
2705ffd83dbSDimitry Andric if (!isMovFrom32Def(&MI))
2715ffd83dbSDimitry Andric continue;
2725ffd83dbSDimitry Andric
2735ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
2745ffd83dbSDimitry Andric
2755ffd83dbSDimitry Andric Register dst = MI.getOperand(0).getReg();
2765ffd83dbSDimitry Andric Register src = MI.getOperand(1).getReg();
2775ffd83dbSDimitry Andric
2785ffd83dbSDimitry Andric // Build a SUBREG_TO_REG instruction.
2795ffd83dbSDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
2805ffd83dbSDimitry Andric .addImm(0).addReg(src).addImm(BPF::sub_32);
2815ffd83dbSDimitry Andric
2825ffd83dbSDimitry Andric ToErase = &MI;
2835ffd83dbSDimitry Andric Eliminated = true;
2845ffd83dbSDimitry Andric }
2855ffd83dbSDimitry Andric }
2865ffd83dbSDimitry Andric
2875ffd83dbSDimitry Andric return Eliminated;
2885ffd83dbSDimitry Andric }
2895ffd83dbSDimitry Andric
2900b57cec5SDimitry Andric } // end default namespace
2910b57cec5SDimitry Andric
2920b57cec5SDimitry Andric INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
2938bcb0991SDimitry Andric "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
2948bcb0991SDimitry Andric false, false)
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric char BPFMIPeephole::ID = 0;
createBPFMIPeepholePass()2970b57cec5SDimitry Andric FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
2980b57cec5SDimitry Andric
2990b57cec5SDimitry Andric STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
3000b57cec5SDimitry Andric
3010b57cec5SDimitry Andric namespace {
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric struct BPFMIPreEmitPeephole : public MachineFunctionPass {
3040b57cec5SDimitry Andric
3050b57cec5SDimitry Andric static char ID;
3060b57cec5SDimitry Andric MachineFunction *MF;
3070b57cec5SDimitry Andric const TargetRegisterInfo *TRI;
308*5f757f3fSDimitry Andric const BPFInstrInfo *TII;
309*5f757f3fSDimitry Andric bool SupportGotol;
3100b57cec5SDimitry Andric
BPFMIPreEmitPeephole__anon0fe2a75f0211::BPFMIPreEmitPeephole3110b57cec5SDimitry Andric BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
3120b57cec5SDimitry Andric initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
3130b57cec5SDimitry Andric }
3140b57cec5SDimitry Andric
3150b57cec5SDimitry Andric private:
3160b57cec5SDimitry Andric // Initialize class variables.
3170b57cec5SDimitry Andric void initialize(MachineFunction &MFParm);
3180b57cec5SDimitry Andric
319*5f757f3fSDimitry Andric bool in16BitRange(int Num);
32004eeddc0SDimitry Andric bool eliminateRedundantMov();
321*5f757f3fSDimitry Andric bool adjustBranch();
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andric public:
3240b57cec5SDimitry Andric
3250b57cec5SDimitry Andric // Main entry point for this pass.
runOnMachineFunction__anon0fe2a75f0211::BPFMIPreEmitPeephole3260b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override {
3270b57cec5SDimitry Andric if (skipFunction(MF.getFunction()))
3280b57cec5SDimitry Andric return false;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andric initialize(MF);
3310b57cec5SDimitry Andric
332*5f757f3fSDimitry Andric bool Changed;
333*5f757f3fSDimitry Andric Changed = eliminateRedundantMov();
334*5f757f3fSDimitry Andric if (SupportGotol)
335*5f757f3fSDimitry Andric Changed = adjustBranch() || Changed;
336*5f757f3fSDimitry Andric return Changed;
3370b57cec5SDimitry Andric }
3380b57cec5SDimitry Andric };
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric // Initialize class variables.
initialize(MachineFunction & MFParm)3410b57cec5SDimitry Andric void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
3420b57cec5SDimitry Andric MF = &MFParm;
343*5f757f3fSDimitry Andric TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
3440b57cec5SDimitry Andric TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
345*5f757f3fSDimitry Andric SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
3460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric
eliminateRedundantMov()34904eeddc0SDimitry Andric bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
3500b57cec5SDimitry Andric MachineInstr* ToErase = nullptr;
3510b57cec5SDimitry Andric bool Eliminated = false;
3520b57cec5SDimitry Andric
3530b57cec5SDimitry Andric for (MachineBasicBlock &MBB : *MF) {
3540b57cec5SDimitry Andric for (MachineInstr &MI : MBB) {
3550b57cec5SDimitry Andric // If the previous instruction was marked for elimination, remove it now.
3560b57cec5SDimitry Andric if (ToErase) {
3570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
3580b57cec5SDimitry Andric LLVM_DEBUG(ToErase->dump());
3590b57cec5SDimitry Andric ToErase->eraseFromParent();
3600b57cec5SDimitry Andric ToErase = nullptr;
3610b57cec5SDimitry Andric }
3620b57cec5SDimitry Andric
3630b57cec5SDimitry Andric // Eliminate identical move:
3640b57cec5SDimitry Andric //
3650b57cec5SDimitry Andric // MOV rA, rA
3660b57cec5SDimitry Andric //
3675ffd83dbSDimitry Andric // Note that we cannot remove
3685ffd83dbSDimitry Andric // MOV_32_64 rA, wA
3695ffd83dbSDimitry Andric // MOV_rr_32 wA, wA
3705ffd83dbSDimitry Andric // as these two instructions having side effects, zeroing out
3715ffd83dbSDimitry Andric // top 32 bits of rA.
3728bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode();
3735ffd83dbSDimitry Andric if (Opcode == BPF::MOV_rr) {
3748bcb0991SDimitry Andric Register dst = MI.getOperand(0).getReg();
3758bcb0991SDimitry Andric Register src = MI.getOperand(1).getReg();
3760b57cec5SDimitry Andric
3778bcb0991SDimitry Andric if (dst != src)
3780b57cec5SDimitry Andric continue;
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric ToErase = &MI;
3810b57cec5SDimitry Andric RedundantMovElemNum++;
3820b57cec5SDimitry Andric Eliminated = true;
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric }
3850b57cec5SDimitry Andric }
3860b57cec5SDimitry Andric
3870b57cec5SDimitry Andric return Eliminated;
3880b57cec5SDimitry Andric }
3890b57cec5SDimitry Andric
in16BitRange(int Num)390*5f757f3fSDimitry Andric bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
391*5f757f3fSDimitry Andric // Well, the cut-off is not precisely at 16bit range since
392*5f757f3fSDimitry Andric // new codes are added during the transformation. So let us
393*5f757f3fSDimitry Andric // a little bit conservative.
394*5f757f3fSDimitry Andric return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
395*5f757f3fSDimitry Andric }
396*5f757f3fSDimitry Andric
397*5f757f3fSDimitry Andric // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
398*5f757f3fSDimitry Andric // is supported for both unconditional (JMP) and condition (JEQ, JSGT,
399*5f757f3fSDimitry Andric // etc.) branches. In certain cases, e.g., full unrolling, the branch
400*5f757f3fSDimitry Andric // target offset might exceed 16bit range. If this happens, the llvm
401*5f757f3fSDimitry Andric // will generate incorrect code as the offset is truncated to 16bit.
402*5f757f3fSDimitry Andric //
403*5f757f3fSDimitry Andric // To fix this rare case, a new insn JMPL is introduced. This new
404*5f757f3fSDimitry Andric // insn supports supports 32bit branch target offset. The compiler
405*5f757f3fSDimitry Andric // does not use this insn during insn selection. Rather, BPF backend
406*5f757f3fSDimitry Andric // will estimate the branch target offset and do JMP -> JMPL and
407*5f757f3fSDimitry Andric // JEQ -> JEQ + JMPL conversion if the estimated branch target offset
408*5f757f3fSDimitry Andric // is beyond 16bit.
adjustBranch()409*5f757f3fSDimitry Andric bool BPFMIPreEmitPeephole::adjustBranch() {
410*5f757f3fSDimitry Andric bool Changed = false;
411*5f757f3fSDimitry Andric int CurrNumInsns = 0;
412*5f757f3fSDimitry Andric DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
413*5f757f3fSDimitry Andric DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
414*5f757f3fSDimitry Andric std::vector<MachineBasicBlock *> MBBs;
415*5f757f3fSDimitry Andric
416*5f757f3fSDimitry Andric MachineBasicBlock *PrevBB = nullptr;
417*5f757f3fSDimitry Andric for (MachineBasicBlock &MBB : *MF) {
418*5f757f3fSDimitry Andric // MBB.size() is the number of insns in this basic block, including some
419*5f757f3fSDimitry Andric // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
420*5f757f3fSDimitry Andric // Typically we have way more normal insns than DEBUG_VALUE insns.
421*5f757f3fSDimitry Andric // Also, if we indeed need to convert conditional branch like JEQ to
422*5f757f3fSDimitry Andric // JEQ + JMPL, we actually introduced some new insns like below.
423*5f757f3fSDimitry Andric CurrNumInsns += (int)MBB.size();
424*5f757f3fSDimitry Andric SoFarNumInsns[&MBB] = CurrNumInsns;
425*5f757f3fSDimitry Andric if (PrevBB != nullptr)
426*5f757f3fSDimitry Andric FollowThroughBB[PrevBB] = &MBB;
427*5f757f3fSDimitry Andric PrevBB = &MBB;
428*5f757f3fSDimitry Andric // A list of original BBs to make later traveral easier.
429*5f757f3fSDimitry Andric MBBs.push_back(&MBB);
430*5f757f3fSDimitry Andric }
431*5f757f3fSDimitry Andric FollowThroughBB[PrevBB] = nullptr;
432*5f757f3fSDimitry Andric
433*5f757f3fSDimitry Andric for (unsigned i = 0; i < MBBs.size(); i++) {
434*5f757f3fSDimitry Andric // We have four cases here:
435*5f757f3fSDimitry Andric // (1). no terminator, simple follow through.
436*5f757f3fSDimitry Andric // (2). jmp to another bb.
437*5f757f3fSDimitry Andric // (3). conditional jmp to another bb or follow through.
438*5f757f3fSDimitry Andric // (4). conditional jmp followed by an unconditional jmp.
439*5f757f3fSDimitry Andric MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
440*5f757f3fSDimitry Andric
441*5f757f3fSDimitry Andric MachineBasicBlock *MBB = MBBs[i];
442*5f757f3fSDimitry Andric for (MachineInstr &Term : MBB->terminators()) {
443*5f757f3fSDimitry Andric if (Term.isConditionalBranch()) {
444*5f757f3fSDimitry Andric assert(CondJmp == nullptr);
445*5f757f3fSDimitry Andric CondJmp = &Term;
446*5f757f3fSDimitry Andric } else if (Term.isUnconditionalBranch()) {
447*5f757f3fSDimitry Andric assert(UncondJmp == nullptr);
448*5f757f3fSDimitry Andric UncondJmp = &Term;
449*5f757f3fSDimitry Andric }
450*5f757f3fSDimitry Andric }
451*5f757f3fSDimitry Andric
452*5f757f3fSDimitry Andric // (1). no terminator, simple follow through.
453*5f757f3fSDimitry Andric if (!CondJmp && !UncondJmp)
454*5f757f3fSDimitry Andric continue;
455*5f757f3fSDimitry Andric
456*5f757f3fSDimitry Andric MachineBasicBlock *CondTargetBB, *JmpBB;
457*5f757f3fSDimitry Andric CurrNumInsns = SoFarNumInsns[MBB];
458*5f757f3fSDimitry Andric
459*5f757f3fSDimitry Andric // (2). jmp to another bb.
460*5f757f3fSDimitry Andric if (!CondJmp && UncondJmp) {
461*5f757f3fSDimitry Andric JmpBB = UncondJmp->getOperand(0).getMBB();
462*5f757f3fSDimitry Andric if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
463*5f757f3fSDimitry Andric continue;
464*5f757f3fSDimitry Andric
465*5f757f3fSDimitry Andric // replace this insn as a JMPL.
466*5f757f3fSDimitry Andric BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
467*5f757f3fSDimitry Andric UncondJmp->eraseFromParent();
468*5f757f3fSDimitry Andric Changed = true;
469*5f757f3fSDimitry Andric continue;
470*5f757f3fSDimitry Andric }
471*5f757f3fSDimitry Andric
472*5f757f3fSDimitry Andric const BasicBlock *TermBB = MBB->getBasicBlock();
473*5f757f3fSDimitry Andric int Dist;
474*5f757f3fSDimitry Andric
475*5f757f3fSDimitry Andric // (3). conditional jmp to another bb or follow through.
476*5f757f3fSDimitry Andric if (!UncondJmp) {
477*5f757f3fSDimitry Andric CondTargetBB = CondJmp->getOperand(2).getMBB();
478*5f757f3fSDimitry Andric MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
479*5f757f3fSDimitry Andric Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
480*5f757f3fSDimitry Andric if (in16BitRange(Dist))
481*5f757f3fSDimitry Andric continue;
482*5f757f3fSDimitry Andric
483*5f757f3fSDimitry Andric // We have
484*5f757f3fSDimitry Andric // B2: ...
485*5f757f3fSDimitry Andric // if (cond) goto B5
486*5f757f3fSDimitry Andric // B3: ...
487*5f757f3fSDimitry Andric // where B2 -> B5 is beyond 16bit range.
488*5f757f3fSDimitry Andric //
489*5f757f3fSDimitry Andric // We do not have 32bit cond jmp insn. So we try to do
490*5f757f3fSDimitry Andric // the following.
491*5f757f3fSDimitry Andric // B2: ...
492*5f757f3fSDimitry Andric // if (cond) goto New_B1
493*5f757f3fSDimitry Andric // New_B0 goto B3
494*5f757f3fSDimitry Andric // New_B1: gotol B5
495*5f757f3fSDimitry Andric // B3: ...
496*5f757f3fSDimitry Andric // Basically two new basic blocks are created.
497*5f757f3fSDimitry Andric MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
498*5f757f3fSDimitry Andric MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);
499*5f757f3fSDimitry Andric
500*5f757f3fSDimitry Andric // Insert New_B0 and New_B1 into function block list.
501*5f757f3fSDimitry Andric MachineFunction::iterator MBB_I = ++MBB->getIterator();
502*5f757f3fSDimitry Andric MF->insert(MBB_I, New_B0);
503*5f757f3fSDimitry Andric MF->insert(MBB_I, New_B1);
504*5f757f3fSDimitry Andric
505*5f757f3fSDimitry Andric // replace B2 cond jump
506*5f757f3fSDimitry Andric if (CondJmp->getOperand(1).isReg())
507*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
508*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg())
509*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(1).getReg())
510*5f757f3fSDimitry Andric .addMBB(New_B1);
511*5f757f3fSDimitry Andric else
512*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
513*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg())
514*5f757f3fSDimitry Andric .addImm(CondJmp->getOperand(1).getImm())
515*5f757f3fSDimitry Andric .addMBB(New_B1);
516*5f757f3fSDimitry Andric
517*5f757f3fSDimitry Andric // it is possible that CondTargetBB and FollowBB are the same. But the
518*5f757f3fSDimitry Andric // above Dist checking should already filtered this case.
519*5f757f3fSDimitry Andric MBB->removeSuccessor(CondTargetBB);
520*5f757f3fSDimitry Andric MBB->removeSuccessor(FollowBB);
521*5f757f3fSDimitry Andric MBB->addSuccessor(New_B0);
522*5f757f3fSDimitry Andric MBB->addSuccessor(New_B1);
523*5f757f3fSDimitry Andric
524*5f757f3fSDimitry Andric // Populate insns in New_B0 and New_B1.
525*5f757f3fSDimitry Andric BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
526*5f757f3fSDimitry Andric BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
527*5f757f3fSDimitry Andric .addMBB(CondTargetBB);
528*5f757f3fSDimitry Andric
529*5f757f3fSDimitry Andric New_B0->addSuccessor(FollowBB);
530*5f757f3fSDimitry Andric New_B1->addSuccessor(CondTargetBB);
531*5f757f3fSDimitry Andric CondJmp->eraseFromParent();
532*5f757f3fSDimitry Andric Changed = true;
533*5f757f3fSDimitry Andric continue;
534*5f757f3fSDimitry Andric }
535*5f757f3fSDimitry Andric
536*5f757f3fSDimitry Andric // (4). conditional jmp followed by an unconditional jmp.
537*5f757f3fSDimitry Andric CondTargetBB = CondJmp->getOperand(2).getMBB();
538*5f757f3fSDimitry Andric JmpBB = UncondJmp->getOperand(0).getMBB();
539*5f757f3fSDimitry Andric
540*5f757f3fSDimitry Andric // We have
541*5f757f3fSDimitry Andric // B2: ...
542*5f757f3fSDimitry Andric // if (cond) goto B5
543*5f757f3fSDimitry Andric // JMP B7
544*5f757f3fSDimitry Andric // B3: ...
545*5f757f3fSDimitry Andric //
546*5f757f3fSDimitry Andric // If only B2->B5 is out of 16bit range, we can do
547*5f757f3fSDimitry Andric // B2: ...
548*5f757f3fSDimitry Andric // if (cond) goto new_B
549*5f757f3fSDimitry Andric // JMP B7
550*5f757f3fSDimitry Andric // New_B: gotol B5
551*5f757f3fSDimitry Andric // B3: ...
552*5f757f3fSDimitry Andric //
553*5f757f3fSDimitry Andric // If only 'JMP B7' is out of 16bit range, we can replace
554*5f757f3fSDimitry Andric // 'JMP B7' with 'JMPL B7'.
555*5f757f3fSDimitry Andric //
556*5f757f3fSDimitry Andric // If both B2->B5 and 'JMP B7' is out of range, just do
557*5f757f3fSDimitry Andric // both the above transformations.
558*5f757f3fSDimitry Andric Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
559*5f757f3fSDimitry Andric if (!in16BitRange(Dist)) {
560*5f757f3fSDimitry Andric MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);
561*5f757f3fSDimitry Andric
562*5f757f3fSDimitry Andric // Insert New_B0 into function block list.
563*5f757f3fSDimitry Andric MF->insert(++MBB->getIterator(), New_B);
564*5f757f3fSDimitry Andric
565*5f757f3fSDimitry Andric // replace B2 cond jump
566*5f757f3fSDimitry Andric if (CondJmp->getOperand(1).isReg())
567*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
568*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg())
569*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(1).getReg())
570*5f757f3fSDimitry Andric .addMBB(New_B);
571*5f757f3fSDimitry Andric else
572*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
573*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg())
574*5f757f3fSDimitry Andric .addImm(CondJmp->getOperand(1).getImm())
575*5f757f3fSDimitry Andric .addMBB(New_B);
576*5f757f3fSDimitry Andric
577*5f757f3fSDimitry Andric if (CondTargetBB != JmpBB)
578*5f757f3fSDimitry Andric MBB->removeSuccessor(CondTargetBB);
579*5f757f3fSDimitry Andric MBB->addSuccessor(New_B);
580*5f757f3fSDimitry Andric
581*5f757f3fSDimitry Andric // Populate insn in New_B.
582*5f757f3fSDimitry Andric BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);
583*5f757f3fSDimitry Andric
584*5f757f3fSDimitry Andric New_B->addSuccessor(CondTargetBB);
585*5f757f3fSDimitry Andric CondJmp->eraseFromParent();
586*5f757f3fSDimitry Andric Changed = true;
587*5f757f3fSDimitry Andric }
588*5f757f3fSDimitry Andric
589*5f757f3fSDimitry Andric if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
590*5f757f3fSDimitry Andric BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
591*5f757f3fSDimitry Andric UncondJmp->eraseFromParent();
592*5f757f3fSDimitry Andric Changed = true;
593*5f757f3fSDimitry Andric }
594*5f757f3fSDimitry Andric }
595*5f757f3fSDimitry Andric
596*5f757f3fSDimitry Andric return Changed;
597*5f757f3fSDimitry Andric }
598*5f757f3fSDimitry Andric
5990b57cec5SDimitry Andric } // end default namespace
6000b57cec5SDimitry Andric
6010b57cec5SDimitry Andric INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
6020b57cec5SDimitry Andric "BPF PreEmit Peephole Optimization", false, false)
6030b57cec5SDimitry Andric
6040b57cec5SDimitry Andric char BPFMIPreEmitPeephole::ID = 0;
createBPFMIPreEmitPeepholePass()6050b57cec5SDimitry Andric FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
6060b57cec5SDimitry Andric {
6070b57cec5SDimitry Andric return new BPFMIPreEmitPeephole();
6080b57cec5SDimitry Andric }
609