1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric // 9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level. 10349cc55cSDimitry Andric // 11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 12349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 13349cc55cSDimitry Andric // 1404eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi 1504eeddc0SDimitry Andric // MOVi64imm + ADDXrr ==> ANDXri + ANDXri 1604eeddc0SDimitry Andric // 1704eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi 1804eeddc0SDimitry Andric // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 1904eeddc0SDimitry Andric // 20349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 21349cc55cSDimitry Andric // later. In this case, we could try to split the constant operand of mov 2204eeddc0SDimitry Andric // instruction into two immediates which can be directly encoded into 2304eeddc0SDimitry Andric // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of 2404eeddc0SDimitry Andric // multiple `mov` + `and/add/sub` instructions. 25349cc55cSDimitry Andric // 2604eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend. 27349cc55cSDimitry Andric // 28349cc55cSDimitry Andric // %3:gpr32 = ORRWrs $wzr, %2, 0 29349cc55cSDimitry Andric // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 30349cc55cSDimitry Andric // 31349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 32349cc55cSDimitry Andric // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 33349cc55cSDimitry Andric // operand are set to zero. 34349cc55cSDimitry Andric // 35bdd1243dSDimitry Andric // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 36bdd1243dSDimitry Andric // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 37bdd1243dSDimitry Andric // 3806c3fb27SDimitry Andric // 6. %intermediate:gpr32 = COPY %src:fpr128 3906c3fb27SDimitry Andric // %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32 4006c3fb27SDimitry Andric // ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0 4106c3fb27SDimitry Andric // 4206c3fb27SDimitry Andric // In cases where a source FPR is copied to a GPR in order to be copied 4306c3fb27SDimitry Andric // to a destination FPR, we can directly copy the values between the FPRs, 4406c3fb27SDimitry Andric // eliminating the use of the Integer unit. When we match a pattern of 4506c3fb27SDimitry Andric // INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR 4606c3fb27SDimitry Andric // source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr 4706c3fb27SDimitry Andric // instructions. 4806c3fb27SDimitry Andric // 4906c3fb27SDimitry Andric // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high 5006c3fb27SDimitry Andric // 64-bits. For example, 5106c3fb27SDimitry Andric // 5206c3fb27SDimitry Andric // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr 5306c3fb27SDimitry Andric // %2:fpr64 = MOVID 0 5406c3fb27SDimitry Andric // %4:fpr128 = IMPLICIT_DEF 5506c3fb27SDimitry Andric // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub 5606c3fb27SDimitry Andric // %6:fpr128 = IMPLICIT_DEF 5706c3fb27SDimitry Andric // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub 5806c3fb27SDimitry Andric // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 5906c3fb27SDimitry Andric // ==> 6006c3fb27SDimitry Andric // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr 6106c3fb27SDimitry Andric // %6:fpr128 = IMPLICIT_DEF 6206c3fb27SDimitry Andric // %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub 6306c3fb27SDimitry Andric // 64349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 65349cc55cSDimitry Andric 66349cc55cSDimitry Andric #include "AArch64ExpandImm.h" 67349cc55cSDimitry Andric #include "AArch64InstrInfo.h" 68349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 69349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 70349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 71349cc55cSDimitry Andric 72349cc55cSDimitry Andric using namespace llvm; 73349cc55cSDimitry Andric 74349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt" 75349cc55cSDimitry Andric 76349cc55cSDimitry Andric namespace { 77349cc55cSDimitry Andric 78349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass { 79349cc55cSDimitry Andric static char ID; 80349cc55cSDimitry Andric 81349cc55cSDimitry Andric AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 82349cc55cSDimitry Andric initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 83349cc55cSDimitry Andric } 84349cc55cSDimitry Andric 85349cc55cSDimitry Andric const AArch64InstrInfo *TII; 8604eeddc0SDimitry Andric const AArch64RegisterInfo *TRI; 87349cc55cSDimitry Andric MachineLoopInfo *MLI; 88349cc55cSDimitry Andric MachineRegisterInfo *MRI; 89349cc55cSDimitry Andric 9081ad6265SDimitry Andric using OpcodePair = std::pair<unsigned, unsigned>; 91349cc55cSDimitry Andric template <typename T> 9204eeddc0SDimitry Andric using SplitAndOpcFunc = 93bdd1243dSDimitry Andric std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>; 9404eeddc0SDimitry Andric using BuildMIFunc = 9581ad6265SDimitry Andric std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned, 9681ad6265SDimitry Andric Register, Register, Register)>; 9704eeddc0SDimitry Andric 9804eeddc0SDimitry Andric /// For instructions where an immediate operand could be split into two 9904eeddc0SDimitry Andric /// separate immediate instructions, use the splitTwoPartImm two handle the 10004eeddc0SDimitry Andric /// optimization. 10104eeddc0SDimitry Andric /// 10204eeddc0SDimitry Andric /// To implement, the following function types must be passed to 10304eeddc0SDimitry Andric /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if 10404eeddc0SDimitry Andric /// splitting the immediate is valid and returns the associated new opcode. A 10504eeddc0SDimitry Andric /// BuildMIFunc must be implemented to build the two immediate instructions. 10604eeddc0SDimitry Andric /// 10704eeddc0SDimitry Andric /// Example Pattern (where IMM would require 2+ MOV instructions): 10804eeddc0SDimitry Andric /// %dst = <Instr>rr %src IMM [...] 10904eeddc0SDimitry Andric /// becomes: 11004eeddc0SDimitry Andric /// %tmp = <Instr>ri %src (encode half IMM) [...] 11104eeddc0SDimitry Andric /// %dst = <Instr>ri %tmp (encode half IMM) [...] 11204eeddc0SDimitry Andric template <typename T> 11304eeddc0SDimitry Andric bool splitTwoPartImm(MachineInstr &MI, 11404eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); 11504eeddc0SDimitry Andric 11604eeddc0SDimitry Andric bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, 11704eeddc0SDimitry Andric MachineInstr *&SubregToRegMI); 11804eeddc0SDimitry Andric 11904eeddc0SDimitry Andric template <typename T> 12081ad6265SDimitry Andric bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI); 12104eeddc0SDimitry Andric template <typename T> 12281ad6265SDimitry Andric bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); 12381ad6265SDimitry Andric 12481ad6265SDimitry Andric template <typename T> 12581ad6265SDimitry Andric bool visitAND(unsigned Opc, MachineInstr &MI); 12681ad6265SDimitry Andric bool visitORR(MachineInstr &MI); 127bdd1243dSDimitry Andric bool visitINSERT(MachineInstr &MI); 12806c3fb27SDimitry Andric bool visitINSviGPR(MachineInstr &MI, unsigned Opc); 12906c3fb27SDimitry Andric bool visitINSvi64lane(MachineInstr &MI); 130*0fca6ea1SDimitry Andric bool visitFMOVDr(MachineInstr &MI); 131*0fca6ea1SDimitry Andric bool visitCopy(MachineInstr &MI); 132349cc55cSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 133349cc55cSDimitry Andric 134349cc55cSDimitry Andric StringRef getPassName() const override { 135349cc55cSDimitry Andric return "AArch64 MI Peephole Optimization pass"; 136349cc55cSDimitry Andric } 137349cc55cSDimitry Andric 138349cc55cSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 139349cc55cSDimitry Andric AU.setPreservesCFG(); 140*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 141349cc55cSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 142349cc55cSDimitry Andric } 143349cc55cSDimitry Andric }; 144349cc55cSDimitry Andric 145349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0; 146349cc55cSDimitry Andric 147349cc55cSDimitry Andric } // end anonymous namespace 148349cc55cSDimitry Andric 149349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 150349cc55cSDimitry Andric "AArch64 MI Peephole Optimization", false, false) 151349cc55cSDimitry Andric 152349cc55cSDimitry Andric template <typename T> 153349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 154349cc55cSDimitry Andric T UImm = static_cast<T>(Imm); 155349cc55cSDimitry Andric if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 156349cc55cSDimitry Andric return false; 157349cc55cSDimitry Andric 158349cc55cSDimitry Andric // If this immediate can be handled by one instruction, do not split it. 159349cc55cSDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 160349cc55cSDimitry Andric AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 161349cc55cSDimitry Andric if (Insn.size() == 1) 162349cc55cSDimitry Andric return false; 163349cc55cSDimitry Andric 164349cc55cSDimitry Andric // The bitmask immediate consists of consecutive ones. Let's say there is 165349cc55cSDimitry Andric // constant 0b00000000001000000000010000000000 which does not consist of 166349cc55cSDimitry Andric // consecutive ones. We can split it in to two bitmask immediate like 167349cc55cSDimitry Andric // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 168349cc55cSDimitry Andric // If we do AND with these two bitmask immediate, we can see original one. 16906c3fb27SDimitry Andric unsigned LowestBitSet = llvm::countr_zero(UImm); 170349cc55cSDimitry Andric unsigned HighestBitSet = Log2_64(UImm); 171349cc55cSDimitry Andric 172349cc55cSDimitry Andric // Create a mask which is filled with one from the position of lowest bit set 173349cc55cSDimitry Andric // to the position of highest bit set. 174349cc55cSDimitry Andric T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 175349cc55cSDimitry Andric (static_cast<T>(1) << LowestBitSet); 176349cc55cSDimitry Andric // Create a mask which is filled with one outside the position of lowest bit 177349cc55cSDimitry Andric // set and the position of highest bit set. 178349cc55cSDimitry Andric T NewImm2 = UImm | ~NewImm1; 179349cc55cSDimitry Andric 180349cc55cSDimitry Andric // If the split value is not valid bitmask immediate, do not split this 181349cc55cSDimitry Andric // constant. 182349cc55cSDimitry Andric if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 183349cc55cSDimitry Andric return false; 184349cc55cSDimitry Andric 185349cc55cSDimitry Andric Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 186349cc55cSDimitry Andric Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 187349cc55cSDimitry Andric return true; 188349cc55cSDimitry Andric } 189349cc55cSDimitry Andric 190349cc55cSDimitry Andric template <typename T> 191349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND( 19281ad6265SDimitry Andric unsigned Opc, MachineInstr &MI) { 193349cc55cSDimitry Andric // Try below transformation. 194349cc55cSDimitry Andric // 195349cc55cSDimitry Andric // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 196349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 197349cc55cSDimitry Andric // 198349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 199349cc55cSDimitry Andric // later. Let's try to split the constant operand of mov instruction into two 200349cc55cSDimitry Andric // bitmask immediates. It makes only two AND instructions intead of multiple 201349cc55cSDimitry Andric // mov + and instructions. 202349cc55cSDimitry Andric 20304eeddc0SDimitry Andric return splitTwoPartImm<T>( 20481ad6265SDimitry Andric MI, 205bdd1243dSDimitry Andric [Opc](T Imm, unsigned RegSize, T &Imm0, 206bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> { 20704eeddc0SDimitry Andric if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) 20881ad6265SDimitry Andric return std::make_pair(Opc, Opc); 209bdd1243dSDimitry Andric return std::nullopt; 21004eeddc0SDimitry Andric }, 21181ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 21204eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 21304eeddc0SDimitry Andric Register NewDstReg) { 214349cc55cSDimitry Andric DebugLoc DL = MI.getDebugLoc(); 21504eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 21681ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 217349cc55cSDimitry Andric .addReg(SrcReg) 21804eeddc0SDimitry Andric .addImm(Imm0); 21981ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 220349cc55cSDimitry Andric .addReg(NewTmpReg) 22104eeddc0SDimitry Andric .addImm(Imm1); 22204eeddc0SDimitry Andric }); 223349cc55cSDimitry Andric } 224349cc55cSDimitry Andric 22581ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { 226349cc55cSDimitry Andric // Check this ORR comes from below zero-extend pattern. 227349cc55cSDimitry Andric // 228349cc55cSDimitry Andric // def : Pat<(i64 (zext GPR32:$src)), 229349cc55cSDimitry Andric // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 230349cc55cSDimitry Andric if (MI.getOperand(3).getImm() != 0) 231349cc55cSDimitry Andric return false; 232349cc55cSDimitry Andric 233349cc55cSDimitry Andric if (MI.getOperand(1).getReg() != AArch64::WZR) 234349cc55cSDimitry Andric return false; 235349cc55cSDimitry Andric 236349cc55cSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 237349cc55cSDimitry Andric if (!SrcMI) 238349cc55cSDimitry Andric return false; 239349cc55cSDimitry Andric 240349cc55cSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 241349cc55cSDimitry Andric // 242349cc55cSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the 243349cc55cSDimitry Andric // source registers are ignored and the upper 32 bits of the destination 244349cc55cSDimitry Andric // register are set to zero. 245349cc55cSDimitry Andric // 246349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 247349cc55cSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 248349cc55cSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode 249349cc55cSDimitry Andric // conservatively. 25081ad6265SDimitry Andric if (SrcMI->getOpcode() == TargetOpcode::COPY && 25181ad6265SDimitry Andric SrcMI->getOperand(1).getReg().isVirtual()) { 25281ad6265SDimitry Andric const TargetRegisterClass *RC = 25381ad6265SDimitry Andric MRI->getRegClass(SrcMI->getOperand(1).getReg()); 25481ad6265SDimitry Andric 25581ad6265SDimitry Andric // A COPY from an FPR will become a FMOVSWr, so do so now so that we know 25681ad6265SDimitry Andric // that the upper bits are zero. 25781ad6265SDimitry Andric if (RC != &AArch64::FPR32RegClass && 25881ad6265SDimitry Andric ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || 25981ad6265SDimitry Andric SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) 26081ad6265SDimitry Andric return false; 26181ad6265SDimitry Andric Register CpySrc = SrcMI->getOperand(1).getReg(); 26281ad6265SDimitry Andric if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { 26381ad6265SDimitry Andric CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); 26481ad6265SDimitry Andric BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 26581ad6265SDimitry Andric TII->get(TargetOpcode::COPY), CpySrc) 26681ad6265SDimitry Andric .add(SrcMI->getOperand(1)); 26781ad6265SDimitry Andric } 26881ad6265SDimitry Andric BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 26981ad6265SDimitry Andric TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) 27081ad6265SDimitry Andric .addReg(CpySrc); 27181ad6265SDimitry Andric SrcMI->eraseFromParent(); 27281ad6265SDimitry Andric } 27381ad6265SDimitry Andric else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 274349cc55cSDimitry Andric return false; 275349cc55cSDimitry Andric 276349cc55cSDimitry Andric Register DefReg = MI.getOperand(0).getReg(); 277349cc55cSDimitry Andric Register SrcReg = MI.getOperand(2).getReg(); 278349cc55cSDimitry Andric MRI->replaceRegWith(DefReg, SrcReg); 279349cc55cSDimitry Andric MRI->clearKillFlags(SrcReg); 28004eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); 28181ad6265SDimitry Andric MI.eraseFromParent(); 28204eeddc0SDimitry Andric 28304eeddc0SDimitry Andric return true; 28404eeddc0SDimitry Andric } 28504eeddc0SDimitry Andric 286bdd1243dSDimitry Andric bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { 287bdd1243dSDimitry Andric // Check this INSERT_SUBREG comes from below zero-extend pattern. 288bdd1243dSDimitry Andric // 289bdd1243dSDimitry Andric // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 290bdd1243dSDimitry Andric // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 291bdd1243dSDimitry Andric // 292bdd1243dSDimitry Andric // We're assuming the first operand to INSERT_SUBREG is irrelevant because a 293bdd1243dSDimitry Andric // COPY would destroy the upper part of the register anyway 294bdd1243dSDimitry Andric if (!MI.isRegTiedToDefOperand(1)) 295bdd1243dSDimitry Andric return false; 296bdd1243dSDimitry Andric 297bdd1243dSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 298bdd1243dSDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(DstReg); 299bdd1243dSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 300bdd1243dSDimitry Andric if (!SrcMI) 301bdd1243dSDimitry Andric return false; 302bdd1243dSDimitry Andric 303bdd1243dSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 304bdd1243dSDimitry Andric // 305bdd1243dSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the 306bdd1243dSDimitry Andric // source registers are ignored and the upper 32 bits of the destination 307bdd1243dSDimitry Andric // register are set to zero. 308bdd1243dSDimitry Andric // 309bdd1243dSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 310bdd1243dSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 311bdd1243dSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode 312bdd1243dSDimitry Andric // conservatively. 313bdd1243dSDimitry Andric if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || 314bdd1243dSDimitry Andric !AArch64::GPR64allRegClass.hasSubClassEq(RC)) 315bdd1243dSDimitry Andric return false; 316bdd1243dSDimitry Andric 317bdd1243dSDimitry Andric // Build a SUBREG_TO_REG instruction 318bdd1243dSDimitry Andric MachineInstr *SubregMI = 319bdd1243dSDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 320bdd1243dSDimitry Andric TII->get(TargetOpcode::SUBREG_TO_REG), DstReg) 321bdd1243dSDimitry Andric .addImm(0) 322bdd1243dSDimitry Andric .add(MI.getOperand(2)) 323bdd1243dSDimitry Andric .add(MI.getOperand(3)); 324bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n"); 325bdd1243dSDimitry Andric (void)SubregMI; 326bdd1243dSDimitry Andric MI.eraseFromParent(); 327bdd1243dSDimitry Andric 328bdd1243dSDimitry Andric return true; 329bdd1243dSDimitry Andric } 330bdd1243dSDimitry Andric 33104eeddc0SDimitry Andric template <typename T> 33204eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { 33304eeddc0SDimitry Andric // The immediate must be in the form of ((imm0 << 12) + imm1), in which both 33404eeddc0SDimitry Andric // imm0 and imm1 are non-zero 12-bit unsigned int. 33504eeddc0SDimitry Andric if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || 33604eeddc0SDimitry Andric (Imm & ~static_cast<T>(0xffffff)) != 0) 33704eeddc0SDimitry Andric return false; 33804eeddc0SDimitry Andric 33904eeddc0SDimitry Andric // The immediate can not be composed via a single instruction. 34004eeddc0SDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 34104eeddc0SDimitry Andric AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); 34204eeddc0SDimitry Andric if (Insn.size() == 1) 34304eeddc0SDimitry Andric return false; 34404eeddc0SDimitry Andric 34504eeddc0SDimitry Andric // Split Imm into (Imm0 << 12) + Imm1; 34604eeddc0SDimitry Andric Imm0 = (Imm >> 12) & 0xfff; 34704eeddc0SDimitry Andric Imm1 = Imm & 0xfff; 34804eeddc0SDimitry Andric return true; 34904eeddc0SDimitry Andric } 35004eeddc0SDimitry Andric 35104eeddc0SDimitry Andric template <typename T> 35204eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB( 35381ad6265SDimitry Andric unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { 35404eeddc0SDimitry Andric // Try below transformation. 35504eeddc0SDimitry Andric // 35606c3fb27SDimitry Andric // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri 35706c3fb27SDimitry Andric // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri 35804eeddc0SDimitry Andric // 35906c3fb27SDimitry Andric // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri 36006c3fb27SDimitry Andric // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri 36104eeddc0SDimitry Andric // 36204eeddc0SDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 36304eeddc0SDimitry Andric // later. Let's try to split the constant operand of mov instruction into two 36404eeddc0SDimitry Andric // legal add/sub immediates. It makes only two ADD/SUB instructions intead of 36504eeddc0SDimitry Andric // multiple `mov` + `and/sub` instructions. 36604eeddc0SDimitry Andric 36706c3fb27SDimitry Andric // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant 36806c3fb27SDimitry Andric // folded. Make sure that we don't generate invalid instructions that use XZR 36906c3fb27SDimitry Andric // in those cases. 37006c3fb27SDimitry Andric if (MI.getOperand(1).getReg() == AArch64::XZR || 37106c3fb27SDimitry Andric MI.getOperand(1).getReg() == AArch64::WZR) 37206c3fb27SDimitry Andric return false; 37306c3fb27SDimitry Andric 37404eeddc0SDimitry Andric return splitTwoPartImm<T>( 37581ad6265SDimitry Andric MI, 37604eeddc0SDimitry Andric [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, 377bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> { 37804eeddc0SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 37981ad6265SDimitry Andric return std::make_pair(PosOpc, PosOpc); 38004eeddc0SDimitry Andric if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 38181ad6265SDimitry Andric return std::make_pair(NegOpc, NegOpc); 382bdd1243dSDimitry Andric return std::nullopt; 38304eeddc0SDimitry Andric }, 38481ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 38504eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 38604eeddc0SDimitry Andric Register NewDstReg) { 38704eeddc0SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 38804eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 38981ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 39004eeddc0SDimitry Andric .addReg(SrcReg) 39104eeddc0SDimitry Andric .addImm(Imm0) 39204eeddc0SDimitry Andric .addImm(12); 39381ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 39481ad6265SDimitry Andric .addReg(NewTmpReg) 39581ad6265SDimitry Andric .addImm(Imm1) 39681ad6265SDimitry Andric .addImm(0); 39781ad6265SDimitry Andric }); 39881ad6265SDimitry Andric } 39981ad6265SDimitry Andric 40081ad6265SDimitry Andric template <typename T> 40181ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSSUBS( 40281ad6265SDimitry Andric OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { 40381ad6265SDimitry Andric // Try the same transformation as ADDSUB but with additional requirement 40481ad6265SDimitry Andric // that the condition code usages are only for Equal and Not Equal 40506c3fb27SDimitry Andric 40606c3fb27SDimitry Andric if (MI.getOperand(1).getReg() == AArch64::XZR || 40706c3fb27SDimitry Andric MI.getOperand(1).getReg() == AArch64::WZR) 40806c3fb27SDimitry Andric return false; 40906c3fb27SDimitry Andric 41081ad6265SDimitry Andric return splitTwoPartImm<T>( 41181ad6265SDimitry Andric MI, 412bdd1243dSDimitry Andric [PosOpcs, NegOpcs, &MI, &TRI = TRI, 413bdd1243dSDimitry Andric &MRI = MRI](T Imm, unsigned RegSize, T &Imm0, 414bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> { 41581ad6265SDimitry Andric OpcodePair OP; 41681ad6265SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 41781ad6265SDimitry Andric OP = PosOpcs; 41881ad6265SDimitry Andric else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 41981ad6265SDimitry Andric OP = NegOpcs; 42081ad6265SDimitry Andric else 421bdd1243dSDimitry Andric return std::nullopt; 42281ad6265SDimitry Andric // Check conditional uses last since it is expensive for scanning 42381ad6265SDimitry Andric // proceeding instructions 42481ad6265SDimitry Andric MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 425bdd1243dSDimitry Andric std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); 42681ad6265SDimitry Andric if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) 427bdd1243dSDimitry Andric return std::nullopt; 42881ad6265SDimitry Andric return OP; 42981ad6265SDimitry Andric }, 43081ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 43181ad6265SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 43281ad6265SDimitry Andric Register NewDstReg) { 43381ad6265SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 43481ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 43581ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 43681ad6265SDimitry Andric .addReg(SrcReg) 43781ad6265SDimitry Andric .addImm(Imm0) 43881ad6265SDimitry Andric .addImm(12); 43981ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 44004eeddc0SDimitry Andric .addReg(NewTmpReg) 44104eeddc0SDimitry Andric .addImm(Imm1) 44204eeddc0SDimitry Andric .addImm(0); 44304eeddc0SDimitry Andric }); 44404eeddc0SDimitry Andric } 44504eeddc0SDimitry Andric 44604eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for 44704eeddc0SDimitry Andric // this peephole optimization. 44804eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, 44904eeddc0SDimitry Andric MachineInstr *&MovMI, 45004eeddc0SDimitry Andric MachineInstr *&SubregToRegMI) { 45104eeddc0SDimitry Andric // Check whether current MBB is in loop and the AND is loop invariant. 45204eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 45304eeddc0SDimitry Andric MachineLoop *L = MLI->getLoopFor(MBB); 45404eeddc0SDimitry Andric if (L && !L->isLoopInvariant(MI)) 45504eeddc0SDimitry Andric return false; 45604eeddc0SDimitry Andric 45704eeddc0SDimitry Andric // Check whether current MI's operand is MOV with immediate. 45804eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 45904eeddc0SDimitry Andric if (!MovMI) 46004eeddc0SDimitry Andric return false; 46104eeddc0SDimitry Andric 46204eeddc0SDimitry Andric // If it is SUBREG_TO_REG, check its operand. 46304eeddc0SDimitry Andric SubregToRegMI = nullptr; 46404eeddc0SDimitry Andric if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 46504eeddc0SDimitry Andric SubregToRegMI = MovMI; 46604eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 46704eeddc0SDimitry Andric if (!MovMI) 46804eeddc0SDimitry Andric return false; 46904eeddc0SDimitry Andric } 47004eeddc0SDimitry Andric 47104eeddc0SDimitry Andric if (MovMI->getOpcode() != AArch64::MOVi32imm && 47204eeddc0SDimitry Andric MovMI->getOpcode() != AArch64::MOVi64imm) 47304eeddc0SDimitry Andric return false; 47404eeddc0SDimitry Andric 47504eeddc0SDimitry Andric // If the MOV has multiple uses, do not split the immediate because it causes 47604eeddc0SDimitry Andric // more instructions. 47704eeddc0SDimitry Andric if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 47804eeddc0SDimitry Andric return false; 47904eeddc0SDimitry Andric if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 48004eeddc0SDimitry Andric return false; 48104eeddc0SDimitry Andric 48204eeddc0SDimitry Andric // It is OK to perform this peephole optimization. 48304eeddc0SDimitry Andric return true; 48404eeddc0SDimitry Andric } 48504eeddc0SDimitry Andric 48604eeddc0SDimitry Andric template <typename T> 48704eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm( 48881ad6265SDimitry Andric MachineInstr &MI, 48904eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { 49004eeddc0SDimitry Andric unsigned RegSize = sizeof(T) * 8; 49104eeddc0SDimitry Andric assert((RegSize == 32 || RegSize == 64) && 49204eeddc0SDimitry Andric "Invalid RegSize for legal immediate peephole optimization"); 49304eeddc0SDimitry Andric 49404eeddc0SDimitry Andric // Perform several essential checks against current MI. 49504eeddc0SDimitry Andric MachineInstr *MovMI, *SubregToRegMI; 49604eeddc0SDimitry Andric if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) 49704eeddc0SDimitry Andric return false; 49804eeddc0SDimitry Andric 49904eeddc0SDimitry Andric // Split the immediate to Imm0 and Imm1, and calculate the Opcode. 50004eeddc0SDimitry Andric T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1; 50104eeddc0SDimitry Andric // For the 32 bit form of instruction, the upper 32 bits of the destination 50204eeddc0SDimitry Andric // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 50304eeddc0SDimitry Andric // of Imm to zero. This is essential if the Immediate value was a negative 50404eeddc0SDimitry Andric // number since it was sign extended when we assign to the 64-bit Imm. 50504eeddc0SDimitry Andric if (SubregToRegMI) 50604eeddc0SDimitry Andric Imm &= 0xFFFFFFFF; 50781ad6265SDimitry Andric OpcodePair Opcode; 50804eeddc0SDimitry Andric if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) 50981ad6265SDimitry Andric Opcode = *R; 51004eeddc0SDimitry Andric else 51104eeddc0SDimitry Andric return false; 51204eeddc0SDimitry Andric 51381ad6265SDimitry Andric // Create new MIs using the first and second opcodes. Opcodes might differ for 51481ad6265SDimitry Andric // flag setting operations that should only set flags on second instruction. 51581ad6265SDimitry Andric // NewTmpReg = Opcode.first SrcReg Imm0 51681ad6265SDimitry Andric // NewDstReg = Opcode.second NewTmpReg Imm1 51781ad6265SDimitry Andric 51881ad6265SDimitry Andric // Determine register classes for destinations and register operands 51904eeddc0SDimitry Andric MachineFunction *MF = MI.getMF(); 52081ad6265SDimitry Andric const TargetRegisterClass *FirstInstrDstRC = 52181ad6265SDimitry Andric TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); 52281ad6265SDimitry Andric const TargetRegisterClass *FirstInstrOperandRC = 52381ad6265SDimitry Andric TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); 52481ad6265SDimitry Andric const TargetRegisterClass *SecondInstrDstRC = 52581ad6265SDimitry Andric (Opcode.first == Opcode.second) 52681ad6265SDimitry Andric ? FirstInstrDstRC 52781ad6265SDimitry Andric : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); 52881ad6265SDimitry Andric const TargetRegisterClass *SecondInstrOperandRC = 52981ad6265SDimitry Andric (Opcode.first == Opcode.second) 53081ad6265SDimitry Andric ? FirstInstrOperandRC 53181ad6265SDimitry Andric : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); 53281ad6265SDimitry Andric 53381ad6265SDimitry Andric // Get old registers destinations and new register destinations 53404eeddc0SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 53504eeddc0SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 53681ad6265SDimitry Andric Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC); 53781ad6265SDimitry Andric // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to 53881ad6265SDimitry Andric // reuse that same destination register. 53981ad6265SDimitry Andric Register NewDstReg = DstReg.isVirtual() 54081ad6265SDimitry Andric ? MRI->createVirtualRegister(SecondInstrDstRC) 54181ad6265SDimitry Andric : DstReg; 54204eeddc0SDimitry Andric 54381ad6265SDimitry Andric // Constrain registers based on their new uses 54481ad6265SDimitry Andric MRI->constrainRegClass(SrcReg, FirstInstrOperandRC); 54581ad6265SDimitry Andric MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC); 54681ad6265SDimitry Andric if (DstReg != NewDstReg) 54704eeddc0SDimitry Andric MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 54804eeddc0SDimitry Andric 54981ad6265SDimitry Andric // Call the delegating operation to build the instruction 55004eeddc0SDimitry Andric BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); 55104eeddc0SDimitry Andric 55204eeddc0SDimitry Andric // replaceRegWith changes MI's definition register. Keep it for SSA form until 55381ad6265SDimitry Andric // deleting MI. Only if we made a new destination register. 55481ad6265SDimitry Andric if (DstReg != NewDstReg) { 55581ad6265SDimitry Andric MRI->replaceRegWith(DstReg, NewDstReg); 55604eeddc0SDimitry Andric MI.getOperand(0).setReg(DstReg); 55781ad6265SDimitry Andric } 55804eeddc0SDimitry Andric 55904eeddc0SDimitry Andric // Record the MIs need to be removed. 56081ad6265SDimitry Andric MI.eraseFromParent(); 56104eeddc0SDimitry Andric if (SubregToRegMI) 56281ad6265SDimitry Andric SubregToRegMI->eraseFromParent(); 56381ad6265SDimitry Andric MovMI->eraseFromParent(); 564349cc55cSDimitry Andric 565349cc55cSDimitry Andric return true; 566349cc55cSDimitry Andric } 567349cc55cSDimitry Andric 56806c3fb27SDimitry Andric bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) { 56906c3fb27SDimitry Andric // Check if this INSvi[X]gpr comes from COPY of a source FPR128 57006c3fb27SDimitry Andric // 57106c3fb27SDimitry Andric // From 57206c3fb27SDimitry Andric // %intermediate1:gpr64 = COPY %src:fpr128 57306c3fb27SDimitry Andric // %intermediate2:gpr32 = COPY %intermediate1:gpr64 57406c3fb27SDimitry Andric // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32 57506c3fb27SDimitry Andric // To 57606c3fb27SDimitry Andric // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128, 57706c3fb27SDimitry Andric // src_index 57806c3fb27SDimitry Andric // where src_index = 0, X = [8|16|32|64] 57906c3fb27SDimitry Andric 58006c3fb27SDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg()); 58106c3fb27SDimitry Andric 58206c3fb27SDimitry Andric // For a chain of COPY instructions, find the initial source register 58306c3fb27SDimitry Andric // and check if it's an FPR128 58406c3fb27SDimitry Andric while (true) { 58506c3fb27SDimitry Andric if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY) 58606c3fb27SDimitry Andric return false; 58706c3fb27SDimitry Andric 58806c3fb27SDimitry Andric if (!SrcMI->getOperand(1).getReg().isVirtual()) 58906c3fb27SDimitry Andric return false; 59006c3fb27SDimitry Andric 59106c3fb27SDimitry Andric if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) == 59206c3fb27SDimitry Andric &AArch64::FPR128RegClass) { 59306c3fb27SDimitry Andric break; 59406c3fb27SDimitry Andric } 59506c3fb27SDimitry Andric SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg()); 59606c3fb27SDimitry Andric } 59706c3fb27SDimitry Andric 59806c3fb27SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 59906c3fb27SDimitry Andric Register SrcReg = SrcMI->getOperand(1).getReg(); 60006c3fb27SDimitry Andric MachineInstr *INSvilaneMI = 60106c3fb27SDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg) 60206c3fb27SDimitry Andric .add(MI.getOperand(1)) 60306c3fb27SDimitry Andric .add(MI.getOperand(2)) 60406c3fb27SDimitry Andric .addUse(SrcReg, getRegState(SrcMI->getOperand(1))) 60506c3fb27SDimitry Andric .addImm(0); 60606c3fb27SDimitry Andric 60706c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n"); 60806c3fb27SDimitry Andric (void)INSvilaneMI; 60906c3fb27SDimitry Andric MI.eraseFromParent(); 61006c3fb27SDimitry Andric return true; 61106c3fb27SDimitry Andric } 61206c3fb27SDimitry Andric 61306c3fb27SDimitry Andric // All instructions that set a FPR64 will implicitly zero the top bits of the 61406c3fb27SDimitry Andric // register. 61506c3fb27SDimitry Andric static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, 61606c3fb27SDimitry Andric MachineRegisterInfo *MRI) { 61706c3fb27SDimitry Andric if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef()) 61806c3fb27SDimitry Andric return false; 61906c3fb27SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); 62006c3fb27SDimitry Andric if (RC != &AArch64::FPR64RegClass) 62106c3fb27SDimitry Andric return false; 62206c3fb27SDimitry Andric return MI->getOpcode() > TargetOpcode::GENERIC_OP_END; 62306c3fb27SDimitry Andric } 62406c3fb27SDimitry Andric 62506c3fb27SDimitry Andric bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) { 62606c3fb27SDimitry Andric // Check the MI for low 64-bits sets zero for high 64-bits implicitly. 62706c3fb27SDimitry Andric // We are expecting below case. 62806c3fb27SDimitry Andric // 62906c3fb27SDimitry Andric // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr 63006c3fb27SDimitry Andric // %6:fpr128 = IMPLICIT_DEF 63106c3fb27SDimitry Andric // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub 63206c3fb27SDimitry Andric // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 63306c3fb27SDimitry Andric MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 63406c3fb27SDimitry Andric if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG) 63506c3fb27SDimitry Andric return false; 63606c3fb27SDimitry Andric Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg()); 63706c3fb27SDimitry Andric if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI)) 63806c3fb27SDimitry Andric return false; 63906c3fb27SDimitry Andric 64006c3fb27SDimitry Andric // Check there is `mov 0` MI for high 64-bits. 64106c3fb27SDimitry Andric // We are expecting below cases. 64206c3fb27SDimitry Andric // 64306c3fb27SDimitry Andric // %2:fpr64 = MOVID 0 64406c3fb27SDimitry Andric // %4:fpr128 = IMPLICIT_DEF 64506c3fb27SDimitry Andric // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub 64606c3fb27SDimitry Andric // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 64706c3fb27SDimitry Andric // or 64806c3fb27SDimitry Andric // %5:fpr128 = MOVIv2d_ns 0 64906c3fb27SDimitry Andric // %6:fpr64 = COPY %5.dsub:fpr128 65006c3fb27SDimitry Andric // %8:fpr128 = IMPLICIT_DEF 65106c3fb27SDimitry Andric // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub 65206c3fb27SDimitry Andric // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0 65306c3fb27SDimitry Andric MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg()); 65406c3fb27SDimitry Andric if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG) 65506c3fb27SDimitry Andric return false; 65606c3fb27SDimitry Andric High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg()); 65706c3fb27SDimitry Andric if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY) 65806c3fb27SDimitry Andric High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg()); 65906c3fb27SDimitry Andric if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID && 66006c3fb27SDimitry Andric High64MI->getOpcode() != AArch64::MOVIv2d_ns)) 66106c3fb27SDimitry Andric return false; 66206c3fb27SDimitry Andric if (High64MI->getOperand(1).getImm() != 0) 66306c3fb27SDimitry Andric return false; 66406c3fb27SDimitry Andric 66506c3fb27SDimitry Andric // Let's remove MIs for high 64-bits. 66606c3fb27SDimitry Andric Register OldDef = MI.getOperand(0).getReg(); 66706c3fb27SDimitry Andric Register NewDef = MI.getOperand(1).getReg(); 66806c3fb27SDimitry Andric MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef)); 66906c3fb27SDimitry Andric MRI->replaceRegWith(OldDef, NewDef); 67006c3fb27SDimitry Andric MI.eraseFromParent(); 67106c3fb27SDimitry Andric 67206c3fb27SDimitry Andric return true; 67306c3fb27SDimitry Andric } 67406c3fb27SDimitry Andric 675*0fca6ea1SDimitry Andric bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { 676*0fca6ea1SDimitry Andric // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR. 677*0fca6ea1SDimitry Andric MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 678*0fca6ea1SDimitry Andric if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI)) 679*0fca6ea1SDimitry Andric return false; 680*0fca6ea1SDimitry Andric 681*0fca6ea1SDimitry Andric // Let's remove MIs for high 64-bits. 682*0fca6ea1SDimitry Andric Register OldDef = MI.getOperand(0).getReg(); 683*0fca6ea1SDimitry Andric Register NewDef = MI.getOperand(1).getReg(); 684*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n"); 685*0fca6ea1SDimitry Andric MRI->clearKillFlags(OldDef); 686*0fca6ea1SDimitry Andric MRI->clearKillFlags(NewDef); 687*0fca6ea1SDimitry Andric MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef)); 688*0fca6ea1SDimitry Andric MRI->replaceRegWith(OldDef, NewDef); 689*0fca6ea1SDimitry Andric MI.eraseFromParent(); 690*0fca6ea1SDimitry Andric 691*0fca6ea1SDimitry Andric return true; 692*0fca6ea1SDimitry Andric } 693*0fca6ea1SDimitry Andric 694*0fca6ea1SDimitry Andric // Across a basic-block we might have in i32 extract from a value that only 695*0fca6ea1SDimitry Andric // operates on upper bits (for example a sxtw). We can replace the COPY with a 696*0fca6ea1SDimitry Andric // new version skipping the sxtw. 697*0fca6ea1SDimitry Andric bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) { 698*0fca6ea1SDimitry Andric Register InputReg = MI.getOperand(1).getReg(); 699*0fca6ea1SDimitry Andric if (MI.getOperand(1).getSubReg() != AArch64::sub_32 || 700*0fca6ea1SDimitry Andric !MRI->hasOneNonDBGUse(InputReg)) 701*0fca6ea1SDimitry Andric return false; 702*0fca6ea1SDimitry Andric 703*0fca6ea1SDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg); 704*0fca6ea1SDimitry Andric SmallPtrSet<MachineInstr *, 4> DeadInstrs; 705*0fca6ea1SDimitry Andric DeadInstrs.insert(SrcMI); 706*0fca6ea1SDimitry Andric while (SrcMI && SrcMI->isFullCopy() && 707*0fca6ea1SDimitry Andric MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) { 708*0fca6ea1SDimitry Andric SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg()); 709*0fca6ea1SDimitry Andric DeadInstrs.insert(SrcMI); 710*0fca6ea1SDimitry Andric } 711*0fca6ea1SDimitry Andric 712*0fca6ea1SDimitry Andric if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri || 713*0fca6ea1SDimitry Andric SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31) 714*0fca6ea1SDimitry Andric return false; 715*0fca6ea1SDimitry Andric 716*0fca6ea1SDimitry Andric Register SrcReg = SrcMI->getOperand(1).getReg(); 717*0fca6ea1SDimitry Andric MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg)); 718*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Optimizing: " << MI); 719*0fca6ea1SDimitry Andric MI.getOperand(1).setReg(SrcReg); 720*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " to: " << MI); 721*0fca6ea1SDimitry Andric for (auto *DeadMI : DeadInstrs) { 722*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI); 723*0fca6ea1SDimitry Andric DeadMI->eraseFromParent(); 724*0fca6ea1SDimitry Andric } 725*0fca6ea1SDimitry Andric return true; 726*0fca6ea1SDimitry Andric } 727*0fca6ea1SDimitry Andric 728349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 729349cc55cSDimitry Andric if (skipFunction(MF.getFunction())) 730349cc55cSDimitry Andric return false; 731349cc55cSDimitry Andric 732349cc55cSDimitry Andric TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 73304eeddc0SDimitry Andric TRI = static_cast<const AArch64RegisterInfo *>( 73404eeddc0SDimitry Andric MF.getSubtarget().getRegisterInfo()); 735*0fca6ea1SDimitry Andric MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 736349cc55cSDimitry Andric MRI = &MF.getRegInfo(); 737349cc55cSDimitry Andric 73804eeddc0SDimitry Andric assert(MRI->isSSA() && "Expected to be run on SSA form!"); 739349cc55cSDimitry Andric 740349cc55cSDimitry Andric bool Changed = false; 741349cc55cSDimitry Andric 742349cc55cSDimitry Andric for (MachineBasicBlock &MBB : MF) { 74381ad6265SDimitry Andric for (MachineInstr &MI : make_early_inc_range(MBB)) { 744349cc55cSDimitry Andric switch (MI.getOpcode()) { 745349cc55cSDimitry Andric default: 746349cc55cSDimitry Andric break; 747bdd1243dSDimitry Andric case AArch64::INSERT_SUBREG: 74806c3fb27SDimitry Andric Changed |= visitINSERT(MI); 749bdd1243dSDimitry Andric break; 750349cc55cSDimitry Andric case AArch64::ANDWrr: 75106c3fb27SDimitry Andric Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI); 752349cc55cSDimitry Andric break; 753349cc55cSDimitry Andric case AArch64::ANDXrr: 75406c3fb27SDimitry Andric Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI); 755349cc55cSDimitry Andric break; 756349cc55cSDimitry Andric case AArch64::ORRWrs: 75706c3fb27SDimitry Andric Changed |= visitORR(MI); 75804eeddc0SDimitry Andric break; 75904eeddc0SDimitry Andric case AArch64::ADDWrr: 76006c3fb27SDimitry Andric Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI); 76104eeddc0SDimitry Andric break; 76204eeddc0SDimitry Andric case AArch64::SUBWrr: 76306c3fb27SDimitry Andric Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI); 76404eeddc0SDimitry Andric break; 76504eeddc0SDimitry Andric case AArch64::ADDXrr: 76606c3fb27SDimitry Andric Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI); 76704eeddc0SDimitry Andric break; 76804eeddc0SDimitry Andric case AArch64::SUBXrr: 76906c3fb27SDimitry Andric Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI); 77081ad6265SDimitry Andric break; 77181ad6265SDimitry Andric case AArch64::ADDSWrr: 77206c3fb27SDimitry Andric Changed |= 77306c3fb27SDimitry Andric visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri}, 77406c3fb27SDimitry Andric {AArch64::SUBWri, AArch64::SUBSWri}, MI); 77581ad6265SDimitry Andric break; 77681ad6265SDimitry Andric case AArch64::SUBSWrr: 77706c3fb27SDimitry Andric Changed |= 77806c3fb27SDimitry Andric visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri}, 77906c3fb27SDimitry Andric {AArch64::ADDWri, AArch64::ADDSWri}, MI); 78081ad6265SDimitry Andric break; 78181ad6265SDimitry Andric case AArch64::ADDSXrr: 78206c3fb27SDimitry Andric Changed |= 78306c3fb27SDimitry Andric visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri}, 78406c3fb27SDimitry Andric {AArch64::SUBXri, AArch64::SUBSXri}, MI); 78581ad6265SDimitry Andric break; 78681ad6265SDimitry Andric case AArch64::SUBSXrr: 78706c3fb27SDimitry Andric Changed |= 78806c3fb27SDimitry Andric visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri}, 78906c3fb27SDimitry Andric {AArch64::ADDXri, AArch64::ADDSXri}, MI); 79006c3fb27SDimitry Andric break; 79106c3fb27SDimitry Andric case AArch64::INSvi64gpr: 79206c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi64lane); 79306c3fb27SDimitry Andric break; 79406c3fb27SDimitry Andric case AArch64::INSvi32gpr: 79506c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi32lane); 79606c3fb27SDimitry Andric break; 79706c3fb27SDimitry Andric case AArch64::INSvi16gpr: 79806c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi16lane); 79906c3fb27SDimitry Andric break; 80006c3fb27SDimitry Andric case AArch64::INSvi8gpr: 80106c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi8lane); 80206c3fb27SDimitry Andric break; 80306c3fb27SDimitry Andric case AArch64::INSvi64lane: 80406c3fb27SDimitry Andric Changed |= visitINSvi64lane(MI); 80504eeddc0SDimitry Andric break; 806*0fca6ea1SDimitry Andric case AArch64::FMOVDr: 807*0fca6ea1SDimitry Andric Changed |= visitFMOVDr(MI); 808*0fca6ea1SDimitry Andric break; 809*0fca6ea1SDimitry Andric case AArch64::COPY: 810*0fca6ea1SDimitry Andric Changed |= visitCopy(MI); 811*0fca6ea1SDimitry Andric break; 812349cc55cSDimitry Andric } 813349cc55cSDimitry Andric } 814349cc55cSDimitry Andric } 815349cc55cSDimitry Andric 816349cc55cSDimitry Andric return Changed; 817349cc55cSDimitry Andric } 818349cc55cSDimitry Andric 819349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 820349cc55cSDimitry Andric return new AArch64MIPeepholeOpt(); 821349cc55cSDimitry Andric } 822