130caca39SJingu Kang //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 230caca39SJingu Kang // 330caca39SJingu Kang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 430caca39SJingu Kang // See https://llvm.org/LICENSE.txt for license information. 530caca39SJingu Kang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 630caca39SJingu Kang // 730caca39SJingu Kang //===----------------------------------------------------------------------===// 830caca39SJingu Kang // 930caca39SJingu Kang // This pass performs below peephole optimizations on MIR level. 1030caca39SJingu Kang // 1130caca39SJingu Kang // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 1230caca39SJingu Kang // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 1330caca39SJingu Kang // 1493deac2eSMicah Weston // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi 1593deac2eSMicah Weston // MOVi64imm + ADDXrr ==> ANDXri + ANDXri 1693deac2eSMicah Weston // 1793deac2eSMicah Weston // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi 1893deac2eSMicah Weston // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 1993deac2eSMicah Weston // 2030caca39SJingu Kang // The mov pseudo instruction could be expanded to multiple mov instructions 2130caca39SJingu Kang // later. In this case, we could try to split the constant operand of mov 2293deac2eSMicah Weston // instruction into two immediates which can be directly encoded into 2393deac2eSMicah Weston // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of 2493deac2eSMicah Weston // multiple `mov` + `and/add/sub` instructions. 25a5024362SJingu Kang // 2693deac2eSMicah Weston // 4. Remove redundant ORRWrs which is generated by zero-extend. 27a5024362SJingu Kang // 28a5024362SJingu Kang // %3:gpr32 = ORRWrs $wzr, %2, 0 29a5024362SJingu Kang // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 30a5024362SJingu Kang // 31a5024362SJingu Kang // If AArch64's 32-bit form of instruction defines the source operand of 32a5024362SJingu Kang // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 33a5024362SJingu Kang // operand are set to zero. 34a5024362SJingu Kang // 35b6655333Szhongyunde // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 36b6655333Szhongyunde // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 37b6655333Szhongyunde // 3872105d10SNilanjana Basu // 6. %intermediate:gpr32 = COPY %src:fpr128 3972105d10SNilanjana Basu // %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32 4072105d10SNilanjana Basu // ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0 4172105d10SNilanjana Basu // 4272105d10SNilanjana Basu // In cases where a source FPR is copied to a GPR in order to be copied 4372105d10SNilanjana Basu // to a destination FPR, we can directly copy the values between the FPRs, 4472105d10SNilanjana Basu // eliminating the use of the Integer unit. When we match a pattern of 4572105d10SNilanjana Basu // INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR 4672105d10SNilanjana Basu // source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr 4772105d10SNilanjana Basu // instructions. 4872105d10SNilanjana Basu // 49932911d6SJingu Kang // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high 50932911d6SJingu Kang // 64-bits. For example, 51932911d6SJingu Kang // 52932911d6SJingu Kang // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr 53932911d6SJingu Kang // %2:fpr64 = MOVID 0 54932911d6SJingu Kang // %4:fpr128 = IMPLICIT_DEF 55c7c5e053SDavid Green // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub 56932911d6SJingu Kang // %6:fpr128 = IMPLICIT_DEF 57c7c5e053SDavid Green // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub 58c7c5e053SDavid Green // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0 59932911d6SJingu Kang // ==> 60932911d6SJingu Kang // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr 61932911d6SJingu Kang // %6:fpr128 = IMPLICIT_DEF 62c7c5e053SDavid Green // %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub 63932911d6SJingu Kang // 6429763aa4SMarina Taylor // 8. Remove redundant CSELs that select between identical registers, by 6529763aa4SMarina Taylor // replacing them with unconditional moves. 6629763aa4SMarina Taylor // 6772901fe1SCsanád Hajdú // 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit 6872901fe1SCsanád Hajdú // LSR or LSL alias of UBFM. 6972901fe1SCsanád Hajdú // 7030caca39SJingu Kang //===----------------------------------------------------------------------===// 7130caca39SJingu Kang 7230caca39SJingu Kang #include "AArch64ExpandImm.h" 7330caca39SJingu Kang #include "AArch64InstrInfo.h" 7430caca39SJingu Kang #include "MCTargetDesc/AArch64AddressingModes.h" 7530caca39SJingu Kang #include "llvm/CodeGen/MachineDominators.h" 7630caca39SJingu Kang #include "llvm/CodeGen/MachineLoopInfo.h" 7730caca39SJingu Kang 7830caca39SJingu Kang using namespace llvm; 7930caca39SJingu Kang 8030caca39SJingu Kang #define DEBUG_TYPE "aarch64-mi-peephole-opt" 8130caca39SJingu Kang 8230caca39SJingu Kang namespace { 8330caca39SJingu Kang 8430caca39SJingu Kang struct AArch64MIPeepholeOpt : public MachineFunctionPass { 8530caca39SJingu Kang static char ID; 8630caca39SJingu Kang 8730caca39SJingu Kang AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 8830caca39SJingu Kang initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 8930caca39SJingu Kang } 9030caca39SJingu Kang 9130caca39SJingu Kang const AArch64InstrInfo *TII; 92f65651ccSMicah Weston const AArch64RegisterInfo *TRI; 9330caca39SJingu Kang MachineLoopInfo *MLI; 9430caca39SJingu Kang MachineRegisterInfo *MRI; 9530caca39SJingu Kang 96c69af70fSMicah Weston using OpcodePair = std::pair<unsigned, unsigned>; 97f65651ccSMicah Weston template <typename T> 98f65651ccSMicah Weston using SplitAndOpcFunc = 99b0df7040SFangrui Song std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>; 100f65651ccSMicah Weston using BuildMIFunc = 101c69af70fSMicah Weston std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned, 102c69af70fSMicah Weston Register, Register, Register)>; 103f65651ccSMicah Weston 104f65651ccSMicah Weston /// For instructions where an immediate operand could be split into two 105f65651ccSMicah Weston /// separate immediate instructions, use the splitTwoPartImm two handle the 106f65651ccSMicah Weston /// optimization. 107f65651ccSMicah Weston /// 108f65651ccSMicah Weston /// To implement, the following function types must be passed to 109f65651ccSMicah Weston /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if 110f65651ccSMicah Weston /// splitting the immediate is valid and returns the associated new opcode. A 111f65651ccSMicah Weston /// BuildMIFunc must be implemented to build the two immediate instructions. 112f65651ccSMicah Weston /// 113f65651ccSMicah Weston /// Example Pattern (where IMM would require 2+ MOV instructions): 114f65651ccSMicah Weston /// %dst = <Instr>rr %src IMM [...] 115f65651ccSMicah Weston /// becomes: 116f65651ccSMicah Weston /// %tmp = <Instr>ri %src (encode half IMM) [...] 117f65651ccSMicah Weston /// %dst = <Instr>ri %tmp (encode half IMM) [...] 118f65651ccSMicah Weston template <typename T> 119f65651ccSMicah Weston bool splitTwoPartImm(MachineInstr &MI, 120f65651ccSMicah Weston SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); 121f65651ccSMicah Weston 12293deac2eSMicah Weston bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, 12393deac2eSMicah Weston MachineInstr *&SubregToRegMI); 12493deac2eSMicah Weston 12593deac2eSMicah Weston template <typename T> 126a1aef4f3SDavid Green bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI); 12730caca39SJingu Kang template <typename T> 128a1aef4f3SDavid Green bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); 129c69af70fSMicah Weston 130c69af70fSMicah Weston template <typename T> 131a1aef4f3SDavid Green bool visitAND(unsigned Opc, MachineInstr &MI); 132a1aef4f3SDavid Green bool visitORR(MachineInstr &MI); 13329763aa4SMarina Taylor bool visitCSEL(MachineInstr &MI); 134b6655333Szhongyunde bool visitINSERT(MachineInstr &MI); 13572105d10SNilanjana Basu bool visitINSviGPR(MachineInstr &MI, unsigned Opc); 136932911d6SJingu Kang bool visitINSvi64lane(MachineInstr &MI); 137f42e321bSDavid Green bool visitFMOVDr(MachineInstr &MI); 13872901fe1SCsanád Hajdú bool visitUBFMXri(MachineInstr &MI); 139600d4937SDavid Green bool visitCopy(MachineInstr &MI); 14030caca39SJingu Kang bool runOnMachineFunction(MachineFunction &MF) override; 14130caca39SJingu Kang 14230caca39SJingu Kang StringRef getPassName() const override { 14330caca39SJingu Kang return "AArch64 MI Peephole Optimization pass"; 14430caca39SJingu Kang } 14530caca39SJingu Kang 14630caca39SJingu Kang void getAnalysisUsage(AnalysisUsage &AU) const override { 14730caca39SJingu Kang AU.setPreservesCFG(); 14879d0de2aSpaperchalice AU.addRequired<MachineLoopInfoWrapperPass>(); 14930caca39SJingu Kang MachineFunctionPass::getAnalysisUsage(AU); 15030caca39SJingu Kang } 15130caca39SJingu Kang }; 15230caca39SJingu Kang 15330caca39SJingu Kang char AArch64MIPeepholeOpt::ID = 0; 15430caca39SJingu Kang 15530caca39SJingu Kang } // end anonymous namespace 15630caca39SJingu Kang 15730caca39SJingu Kang INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 15830caca39SJingu Kang "AArch64 MI Peephole Optimization", false, false) 15930caca39SJingu Kang 16030caca39SJingu Kang template <typename T> 16130caca39SJingu Kang static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 16230caca39SJingu Kang T UImm = static_cast<T>(Imm); 16330caca39SJingu Kang if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 16430caca39SJingu Kang return false; 16530caca39SJingu Kang 16630caca39SJingu Kang // If this immediate can be handled by one instruction, do not split it. 16730caca39SJingu Kang SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 16830caca39SJingu Kang AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 16930caca39SJingu Kang if (Insn.size() == 1) 17030caca39SJingu Kang return false; 17130caca39SJingu Kang 17230caca39SJingu Kang // The bitmask immediate consists of consecutive ones. Let's say there is 17330caca39SJingu Kang // constant 0b00000000001000000000010000000000 which does not consist of 17430caca39SJingu Kang // consecutive ones. We can split it in to two bitmask immediate like 17530caca39SJingu Kang // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 17630caca39SJingu Kang // If we do AND with these two bitmask immediate, we can see original one. 177e0782018SKazu Hirata unsigned LowestBitSet = llvm::countr_zero(UImm); 17830caca39SJingu Kang unsigned HighestBitSet = Log2_64(UImm); 17930caca39SJingu Kang 18030caca39SJingu Kang // Create a mask which is filled with one from the position of lowest bit set 18130caca39SJingu Kang // to the position of highest bit set. 18230caca39SJingu Kang T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 18330caca39SJingu Kang (static_cast<T>(1) << LowestBitSet); 18430caca39SJingu Kang // Create a mask which is filled with one outside the position of lowest bit 18530caca39SJingu Kang // set and the position of highest bit set. 18630caca39SJingu Kang T NewImm2 = UImm | ~NewImm1; 18730caca39SJingu Kang 18830caca39SJingu Kang // If the split value is not valid bitmask immediate, do not split this 18930caca39SJingu Kang // constant. 19030caca39SJingu Kang if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 19130caca39SJingu Kang return false; 19230caca39SJingu Kang 19330caca39SJingu Kang Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 19430caca39SJingu Kang Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 19530caca39SJingu Kang return true; 19630caca39SJingu Kang } 19730caca39SJingu Kang 19830caca39SJingu Kang template <typename T> 19930caca39SJingu Kang bool AArch64MIPeepholeOpt::visitAND( 200a1aef4f3SDavid Green unsigned Opc, MachineInstr &MI) { 20130caca39SJingu Kang // Try below transformation. 20230caca39SJingu Kang // 20330caca39SJingu Kang // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 20430caca39SJingu Kang // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 20530caca39SJingu Kang // 20630caca39SJingu Kang // The mov pseudo instruction could be expanded to multiple mov instructions 20730caca39SJingu Kang // later. Let's try to split the constant operand of mov instruction into two 208*d6fc7d3aSJay Foad // bitmask immediates. It makes only two AND instructions instead of multiple 20930caca39SJingu Kang // mov + and instructions. 21030caca39SJingu Kang 211f65651ccSMicah Weston return splitTwoPartImm<T>( 212a1aef4f3SDavid Green MI, 213b0df7040SFangrui Song [Opc](T Imm, unsigned RegSize, T &Imm0, 214b0df7040SFangrui Song T &Imm1) -> std::optional<OpcodePair> { 215f65651ccSMicah Weston if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) 216c69af70fSMicah Weston return std::make_pair(Opc, Opc); 21720cde154SKazu Hirata return std::nullopt; 218f65651ccSMicah Weston }, 219c69af70fSMicah Weston [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 220f65651ccSMicah Weston unsigned Imm1, Register SrcReg, Register NewTmpReg, 221f65651ccSMicah Weston Register NewDstReg) { 22230caca39SJingu Kang DebugLoc DL = MI.getDebugLoc(); 22393deac2eSMicah Weston MachineBasicBlock *MBB = MI.getParent(); 224c69af70fSMicah Weston BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 22530caca39SJingu Kang .addReg(SrcReg) 226f65651ccSMicah Weston .addImm(Imm0); 227c69af70fSMicah Weston BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 22830caca39SJingu Kang .addReg(NewTmpReg) 229f65651ccSMicah Weston .addImm(Imm1); 230f65651ccSMicah Weston }); 23130caca39SJingu Kang } 23230caca39SJingu Kang 233a1aef4f3SDavid Green bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { 234a5024362SJingu Kang // Check this ORR comes from below zero-extend pattern. 235a5024362SJingu Kang // 236a5024362SJingu Kang // def : Pat<(i64 (zext GPR32:$src)), 237a5024362SJingu Kang // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 238a5024362SJingu Kang if (MI.getOperand(3).getImm() != 0) 239a5024362SJingu Kang return false; 240a5024362SJingu Kang 241a5024362SJingu Kang if (MI.getOperand(1).getReg() != AArch64::WZR) 242a5024362SJingu Kang return false; 243a5024362SJingu Kang 244a5024362SJingu Kang MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 245a5024362SJingu Kang if (!SrcMI) 246a5024362SJingu Kang return false; 247a5024362SJingu Kang 248a5024362SJingu Kang // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 249a5024362SJingu Kang // 250a5024362SJingu Kang // When you use the 32-bit form of an instruction, the upper 32 bits of the 251a5024362SJingu Kang // source registers are ignored and the upper 32 bits of the destination 252a5024362SJingu Kang // register are set to zero. 253a5024362SJingu Kang // 254a5024362SJingu Kang // If AArch64's 32-bit form of instruction defines the source operand of 255a5024362SJingu Kang // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 256a5024362SJingu Kang // real AArch64 instruction and if it is not, do not process the opcode 257a5024362SJingu Kang // conservatively. 258bccbf527SDavid Green if (SrcMI->getOpcode() == TargetOpcode::COPY && 259bccbf527SDavid Green SrcMI->getOperand(1).getReg().isVirtual()) { 260bccbf527SDavid Green const TargetRegisterClass *RC = 261bccbf527SDavid Green MRI->getRegClass(SrcMI->getOperand(1).getReg()); 262bccbf527SDavid Green 263bccbf527SDavid Green // A COPY from an FPR will become a FMOVSWr, so do so now so that we know 264bccbf527SDavid Green // that the upper bits are zero. 265bccbf527SDavid Green if (RC != &AArch64::FPR32RegClass && 266bccbf527SDavid Green ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) || 267bccbf527SDavid Green SrcMI->getOperand(1).getSubReg() != AArch64::ssub)) 268bccbf527SDavid Green return false; 269bccbf527SDavid Green Register CpySrc = SrcMI->getOperand(1).getReg(); 270bccbf527SDavid Green if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) { 271bccbf527SDavid Green CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass); 272bccbf527SDavid Green BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 273bccbf527SDavid Green TII->get(TargetOpcode::COPY), CpySrc) 274bccbf527SDavid Green .add(SrcMI->getOperand(1)); 275bccbf527SDavid Green } 276bccbf527SDavid Green BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(), 277bccbf527SDavid Green TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg()) 278bccbf527SDavid Green .addReg(CpySrc); 279a1aef4f3SDavid Green SrcMI->eraseFromParent(); 280bccbf527SDavid Green } 281bccbf527SDavid Green else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 282a5024362SJingu Kang return false; 283a5024362SJingu Kang 284a5024362SJingu Kang Register DefReg = MI.getOperand(0).getReg(); 285a5024362SJingu Kang Register SrcReg = MI.getOperand(2).getReg(); 286a5024362SJingu Kang MRI->replaceRegWith(DefReg, SrcReg); 287a5024362SJingu Kang MRI->clearKillFlags(SrcReg); 28843e500d7SDavid Green LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); 289a1aef4f3SDavid Green MI.eraseFromParent(); 290a5024362SJingu Kang 291a5024362SJingu Kang return true; 292a5024362SJingu Kang } 293a5024362SJingu Kang 29429763aa4SMarina Taylor bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) { 29529763aa4SMarina Taylor // Replace CSEL with MOV when both inputs are the same register. 29629763aa4SMarina Taylor if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg()) 29729763aa4SMarina Taylor return false; 29829763aa4SMarina Taylor 29929763aa4SMarina Taylor auto ZeroReg = 30029763aa4SMarina Taylor MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR; 30129763aa4SMarina Taylor auto OrOpcode = 30229763aa4SMarina Taylor MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs; 30329763aa4SMarina Taylor 30429763aa4SMarina Taylor BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode)) 30529763aa4SMarina Taylor .addReg(MI.getOperand(0).getReg(), RegState::Define) 30629763aa4SMarina Taylor .addReg(ZeroReg) 30729763aa4SMarina Taylor .addReg(MI.getOperand(1).getReg()) 30829763aa4SMarina Taylor .addImm(0); 30929763aa4SMarina Taylor 31029763aa4SMarina Taylor MI.eraseFromParent(); 31129763aa4SMarina Taylor return true; 31229763aa4SMarina Taylor } 31329763aa4SMarina Taylor 314b6655333Szhongyunde bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { 315b6655333Szhongyunde // Check this INSERT_SUBREG comes from below zero-extend pattern. 316b6655333Szhongyunde // 317b6655333Szhongyunde // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx 318b6655333Szhongyunde // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx 319b6655333Szhongyunde // 320b6655333Szhongyunde // We're assuming the first operand to INSERT_SUBREG is irrelevant because a 321b6655333Szhongyunde // COPY would destroy the upper part of the register anyway 322b6655333Szhongyunde if (!MI.isRegTiedToDefOperand(1)) 323b6655333Szhongyunde return false; 324b6655333Szhongyunde 325b6655333Szhongyunde Register DstReg = MI.getOperand(0).getReg(); 326b6655333Szhongyunde const TargetRegisterClass *RC = MRI->getRegClass(DstReg); 327b6655333Szhongyunde MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 328b6655333Szhongyunde if (!SrcMI) 329b6655333Szhongyunde return false; 330b6655333Szhongyunde 331b6655333Szhongyunde // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 332b6655333Szhongyunde // 333b6655333Szhongyunde // When you use the 32-bit form of an instruction, the upper 32 bits of the 334b6655333Szhongyunde // source registers are ignored and the upper 32 bits of the destination 335b6655333Szhongyunde // register are set to zero. 336b6655333Szhongyunde // 337b6655333Szhongyunde // If AArch64's 32-bit form of instruction defines the source operand of 338b6655333Szhongyunde // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 339b6655333Szhongyunde // real AArch64 instruction and if it is not, do not process the opcode 340b6655333Szhongyunde // conservatively. 341b6655333Szhongyunde if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || 342b6655333Szhongyunde !AArch64::GPR64allRegClass.hasSubClassEq(RC)) 343b6655333Szhongyunde return false; 344b6655333Szhongyunde 345b6655333Szhongyunde // Build a SUBREG_TO_REG instruction 346b6655333Szhongyunde MachineInstr *SubregMI = 347b6655333Szhongyunde BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 348b6655333Szhongyunde TII->get(TargetOpcode::SUBREG_TO_REG), DstReg) 349b6655333Szhongyunde .addImm(0) 350b6655333Szhongyunde .add(MI.getOperand(2)) 351b6655333Szhongyunde .add(MI.getOperand(3)); 352b6655333Szhongyunde LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n"); 353f9b59249SFangrui Song (void)SubregMI; 354b6655333Szhongyunde MI.eraseFromParent(); 355b6655333Szhongyunde 356b6655333Szhongyunde return true; 357b6655333Szhongyunde } 358b6655333Szhongyunde 35993deac2eSMicah Weston template <typename T> 36093deac2eSMicah Weston static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { 36193deac2eSMicah Weston // The immediate must be in the form of ((imm0 << 12) + imm1), in which both 36293deac2eSMicah Weston // imm0 and imm1 are non-zero 12-bit unsigned int. 36393deac2eSMicah Weston if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || 36493deac2eSMicah Weston (Imm & ~static_cast<T>(0xffffff)) != 0) 36593deac2eSMicah Weston return false; 36693deac2eSMicah Weston 36793deac2eSMicah Weston // The immediate can not be composed via a single instruction. 36893deac2eSMicah Weston SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 36993deac2eSMicah Weston AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); 37093deac2eSMicah Weston if (Insn.size() == 1) 37193deac2eSMicah Weston return false; 37293deac2eSMicah Weston 37393deac2eSMicah Weston // Split Imm into (Imm0 << 12) + Imm1; 37493deac2eSMicah Weston Imm0 = (Imm >> 12) & 0xfff; 37593deac2eSMicah Weston Imm1 = Imm & 0xfff; 37693deac2eSMicah Weston return true; 37793deac2eSMicah Weston } 37893deac2eSMicah Weston 37993deac2eSMicah Weston template <typename T> 38093deac2eSMicah Weston bool AArch64MIPeepholeOpt::visitADDSUB( 381a1aef4f3SDavid Green unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { 38293deac2eSMicah Weston // Try below transformation. 38393deac2eSMicah Weston // 384b134c62fSDavid Green // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri 385b134c62fSDavid Green // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri 38693deac2eSMicah Weston // 387b134c62fSDavid Green // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri 388b134c62fSDavid Green // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri 38993deac2eSMicah Weston // 39093deac2eSMicah Weston // The mov pseudo instruction could be expanded to multiple mov instructions 39193deac2eSMicah Weston // later. Let's try to split the constant operand of mov instruction into two 392*d6fc7d3aSJay Foad // legal add/sub immediates. It makes only two ADD/SUB instructions instead of 39393deac2eSMicah Weston // multiple `mov` + `and/sub` instructions. 39493deac2eSMicah Weston 395b134c62fSDavid Green // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant 396b134c62fSDavid Green // folded. Make sure that we don't generate invalid instructions that use XZR 397b134c62fSDavid Green // in those cases. 398b134c62fSDavid Green if (MI.getOperand(1).getReg() == AArch64::XZR || 399b134c62fSDavid Green MI.getOperand(1).getReg() == AArch64::WZR) 400b134c62fSDavid Green return false; 401b134c62fSDavid Green 402f65651ccSMicah Weston return splitTwoPartImm<T>( 403a1aef4f3SDavid Green MI, 404f65651ccSMicah Weston [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, 405b0df7040SFangrui Song T &Imm1) -> std::optional<OpcodePair> { 406f65651ccSMicah Weston if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 407c69af70fSMicah Weston return std::make_pair(PosOpc, PosOpc); 408f65651ccSMicah Weston if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 409c69af70fSMicah Weston return std::make_pair(NegOpc, NegOpc); 41020cde154SKazu Hirata return std::nullopt; 411f65651ccSMicah Weston }, 412c69af70fSMicah Weston [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 413f65651ccSMicah Weston unsigned Imm1, Register SrcReg, Register NewTmpReg, 414f65651ccSMicah Weston Register NewDstReg) { 41593deac2eSMicah Weston DebugLoc DL = MI.getDebugLoc(); 41693deac2eSMicah Weston MachineBasicBlock *MBB = MI.getParent(); 417c69af70fSMicah Weston BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 41893deac2eSMicah Weston .addReg(SrcReg) 41993deac2eSMicah Weston .addImm(Imm0) 42093deac2eSMicah Weston .addImm(12); 421c69af70fSMicah Weston BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 422c69af70fSMicah Weston .addReg(NewTmpReg) 423c69af70fSMicah Weston .addImm(Imm1) 424c69af70fSMicah Weston .addImm(0); 425c69af70fSMicah Weston }); 426c69af70fSMicah Weston } 427c69af70fSMicah Weston 428c69af70fSMicah Weston template <typename T> 429c69af70fSMicah Weston bool AArch64MIPeepholeOpt::visitADDSSUBS( 430a1aef4f3SDavid Green OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { 431c69af70fSMicah Weston // Try the same transformation as ADDSUB but with additional requirement 432c69af70fSMicah Weston // that the condition code usages are only for Equal and Not Equal 433b134c62fSDavid Green 434b134c62fSDavid Green if (MI.getOperand(1).getReg() == AArch64::XZR || 435b134c62fSDavid Green MI.getOperand(1).getReg() == AArch64::WZR) 436b134c62fSDavid Green return false; 437b134c62fSDavid Green 438c69af70fSMicah Weston return splitTwoPartImm<T>( 439a1aef4f3SDavid Green MI, 440b0df7040SFangrui Song [PosOpcs, NegOpcs, &MI, &TRI = TRI, 441b0df7040SFangrui Song &MRI = MRI](T Imm, unsigned RegSize, T &Imm0, 442b0df7040SFangrui Song T &Imm1) -> std::optional<OpcodePair> { 443c69af70fSMicah Weston OpcodePair OP; 444c69af70fSMicah Weston if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 445c69af70fSMicah Weston OP = PosOpcs; 446c69af70fSMicah Weston else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 447c69af70fSMicah Weston OP = NegOpcs; 448c69af70fSMicah Weston else 44920cde154SKazu Hirata return std::nullopt; 450c69af70fSMicah Weston // Check conditional uses last since it is expensive for scanning 451c69af70fSMicah Weston // proceeding instructions 452c69af70fSMicah Weston MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 453b0df7040SFangrui Song std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI); 454c69af70fSMicah Weston if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) 45520cde154SKazu Hirata return std::nullopt; 456c69af70fSMicah Weston return OP; 457c69af70fSMicah Weston }, 458c69af70fSMicah Weston [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, 459c69af70fSMicah Weston unsigned Imm1, Register SrcReg, Register NewTmpReg, 460c69af70fSMicah Weston Register NewDstReg) { 461c69af70fSMicah Weston DebugLoc DL = MI.getDebugLoc(); 462c69af70fSMicah Weston MachineBasicBlock *MBB = MI.getParent(); 463c69af70fSMicah Weston BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg) 464c69af70fSMicah Weston .addReg(SrcReg) 465c69af70fSMicah Weston .addImm(Imm0) 466c69af70fSMicah Weston .addImm(12); 467c69af70fSMicah Weston BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg) 46893deac2eSMicah Weston .addReg(NewTmpReg) 46993deac2eSMicah Weston .addImm(Imm1) 47093deac2eSMicah Weston .addImm(0); 471f65651ccSMicah Weston }); 47293deac2eSMicah Weston } 47393deac2eSMicah Weston 47493deac2eSMicah Weston // Checks if the corresponding MOV immediate instruction is applicable for 47593deac2eSMicah Weston // this peephole optimization. 47693deac2eSMicah Weston bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, 47793deac2eSMicah Weston MachineInstr *&MovMI, 47893deac2eSMicah Weston MachineInstr *&SubregToRegMI) { 47993deac2eSMicah Weston // Check whether current MBB is in loop and the AND is loop invariant. 48093deac2eSMicah Weston MachineBasicBlock *MBB = MI.getParent(); 48193deac2eSMicah Weston MachineLoop *L = MLI->getLoopFor(MBB); 48293deac2eSMicah Weston if (L && !L->isLoopInvariant(MI)) 48393deac2eSMicah Weston return false; 48493deac2eSMicah Weston 48593deac2eSMicah Weston // Check whether current MI's operand is MOV with immediate. 48693deac2eSMicah Weston MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 48793deac2eSMicah Weston if (!MovMI) 48893deac2eSMicah Weston return false; 48993deac2eSMicah Weston 49093deac2eSMicah Weston // If it is SUBREG_TO_REG, check its operand. 49193deac2eSMicah Weston SubregToRegMI = nullptr; 49293deac2eSMicah Weston if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 49393deac2eSMicah Weston SubregToRegMI = MovMI; 49493deac2eSMicah Weston MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 49593deac2eSMicah Weston if (!MovMI) 49693deac2eSMicah Weston return false; 49793deac2eSMicah Weston } 49893deac2eSMicah Weston 49993deac2eSMicah Weston if (MovMI->getOpcode() != AArch64::MOVi32imm && 50093deac2eSMicah Weston MovMI->getOpcode() != AArch64::MOVi64imm) 50193deac2eSMicah Weston return false; 50293deac2eSMicah Weston 50393deac2eSMicah Weston // If the MOV has multiple uses, do not split the immediate because it causes 50493deac2eSMicah Weston // more instructions. 50593deac2eSMicah Weston if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 50693deac2eSMicah Weston return false; 50793deac2eSMicah Weston if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 50893deac2eSMicah Weston return false; 50993deac2eSMicah Weston 51093deac2eSMicah Weston // It is OK to perform this peephole optimization. 51193deac2eSMicah Weston return true; 51293deac2eSMicah Weston } 51393deac2eSMicah Weston 514f65651ccSMicah Weston template <typename T> 515f65651ccSMicah Weston bool AArch64MIPeepholeOpt::splitTwoPartImm( 516a1aef4f3SDavid Green MachineInstr &MI, 517f65651ccSMicah Weston SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { 518f65651ccSMicah Weston unsigned RegSize = sizeof(T) * 8; 519f65651ccSMicah Weston assert((RegSize == 32 || RegSize == 64) && 520f65651ccSMicah Weston "Invalid RegSize for legal immediate peephole optimization"); 521f65651ccSMicah Weston 522f65651ccSMicah Weston // Perform several essential checks against current MI. 523f65651ccSMicah Weston MachineInstr *MovMI, *SubregToRegMI; 524f65651ccSMicah Weston if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) 525f65651ccSMicah Weston return false; 526f65651ccSMicah Weston 527f65651ccSMicah Weston // Split the immediate to Imm0 and Imm1, and calculate the Opcode. 528f65651ccSMicah Weston T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1; 529f65651ccSMicah Weston // For the 32 bit form of instruction, the upper 32 bits of the destination 530f65651ccSMicah Weston // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 531f65651ccSMicah Weston // of Imm to zero. This is essential if the Immediate value was a negative 532f65651ccSMicah Weston // number since it was sign extended when we assign to the 64-bit Imm. 533f65651ccSMicah Weston if (SubregToRegMI) 534f65651ccSMicah Weston Imm &= 0xFFFFFFFF; 535c69af70fSMicah Weston OpcodePair Opcode; 536f65651ccSMicah Weston if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) 5377a47ee51SKazu Hirata Opcode = *R; 538f65651ccSMicah Weston else 539f65651ccSMicah Weston return false; 540f65651ccSMicah Weston 541c69af70fSMicah Weston // Create new MIs using the first and second opcodes. Opcodes might differ for 542c69af70fSMicah Weston // flag setting operations that should only set flags on second instruction. 543c69af70fSMicah Weston // NewTmpReg = Opcode.first SrcReg Imm0 544c69af70fSMicah Weston // NewDstReg = Opcode.second NewTmpReg Imm1 545c69af70fSMicah Weston 546c69af70fSMicah Weston // Determine register classes for destinations and register operands 547f65651ccSMicah Weston MachineFunction *MF = MI.getMF(); 548c69af70fSMicah Weston const TargetRegisterClass *FirstInstrDstRC = 549c69af70fSMicah Weston TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF); 550c69af70fSMicah Weston const TargetRegisterClass *FirstInstrOperandRC = 551c69af70fSMicah Weston TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF); 552c69af70fSMicah Weston const TargetRegisterClass *SecondInstrDstRC = 553c69af70fSMicah Weston (Opcode.first == Opcode.second) 554c69af70fSMicah Weston ? FirstInstrDstRC 555c69af70fSMicah Weston : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF); 556c69af70fSMicah Weston const TargetRegisterClass *SecondInstrOperandRC = 557c69af70fSMicah Weston (Opcode.first == Opcode.second) 558c69af70fSMicah Weston ? FirstInstrOperandRC 559c69af70fSMicah Weston : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF); 560c69af70fSMicah Weston 561c69af70fSMicah Weston // Get old registers destinations and new register destinations 562f65651ccSMicah Weston Register DstReg = MI.getOperand(0).getReg(); 563f65651ccSMicah Weston Register SrcReg = MI.getOperand(1).getReg(); 564c69af70fSMicah Weston Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC); 565c69af70fSMicah Weston // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to 566c69af70fSMicah Weston // reuse that same destination register. 567c69af70fSMicah Weston Register NewDstReg = DstReg.isVirtual() 568c69af70fSMicah Weston ? MRI->createVirtualRegister(SecondInstrDstRC) 569c69af70fSMicah Weston : DstReg; 570f65651ccSMicah Weston 571c69af70fSMicah Weston // Constrain registers based on their new uses 572c69af70fSMicah Weston MRI->constrainRegClass(SrcReg, FirstInstrOperandRC); 573c69af70fSMicah Weston MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC); 574c69af70fSMicah Weston if (DstReg != NewDstReg) 575f65651ccSMicah Weston MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 576f65651ccSMicah Weston 577c69af70fSMicah Weston // Call the delegating operation to build the instruction 578f65651ccSMicah Weston BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); 579f65651ccSMicah Weston 580f65651ccSMicah Weston // replaceRegWith changes MI's definition register. Keep it for SSA form until 581c69af70fSMicah Weston // deleting MI. Only if we made a new destination register. 582c69af70fSMicah Weston if (DstReg != NewDstReg) { 583c69af70fSMicah Weston MRI->replaceRegWith(DstReg, NewDstReg); 584f65651ccSMicah Weston MI.getOperand(0).setReg(DstReg); 585c69af70fSMicah Weston } 586f65651ccSMicah Weston 587f65651ccSMicah Weston // Record the MIs need to be removed. 588a1aef4f3SDavid Green MI.eraseFromParent(); 589f65651ccSMicah Weston if (SubregToRegMI) 590a1aef4f3SDavid Green SubregToRegMI->eraseFromParent(); 591a1aef4f3SDavid Green MovMI->eraseFromParent(); 592f65651ccSMicah Weston 593f65651ccSMicah Weston return true; 594f65651ccSMicah Weston } 595f65651ccSMicah Weston 59672105d10SNilanjana Basu bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) { 59772105d10SNilanjana Basu // Check if this INSvi[X]gpr comes from COPY of a source FPR128 59872105d10SNilanjana Basu // 59972105d10SNilanjana Basu // From 60072105d10SNilanjana Basu // %intermediate1:gpr64 = COPY %src:fpr128 60172105d10SNilanjana Basu // %intermediate2:gpr32 = COPY %intermediate1:gpr64 60272105d10SNilanjana Basu // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32 60372105d10SNilanjana Basu // To 60472105d10SNilanjana Basu // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128, 60572105d10SNilanjana Basu // src_index 60672105d10SNilanjana Basu // where src_index = 0, X = [8|16|32|64] 60772105d10SNilanjana Basu 60872105d10SNilanjana Basu MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg()); 60972105d10SNilanjana Basu 61072105d10SNilanjana Basu // For a chain of COPY instructions, find the initial source register 61172105d10SNilanjana Basu // and check if it's an FPR128 61272105d10SNilanjana Basu while (true) { 61372105d10SNilanjana Basu if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY) 61472105d10SNilanjana Basu return false; 61572105d10SNilanjana Basu 61672105d10SNilanjana Basu if (!SrcMI->getOperand(1).getReg().isVirtual()) 61772105d10SNilanjana Basu return false; 61872105d10SNilanjana Basu 61972105d10SNilanjana Basu if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) == 62072105d10SNilanjana Basu &AArch64::FPR128RegClass) { 62172105d10SNilanjana Basu break; 62272105d10SNilanjana Basu } 62372105d10SNilanjana Basu SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg()); 62472105d10SNilanjana Basu } 62572105d10SNilanjana Basu 62672105d10SNilanjana Basu Register DstReg = MI.getOperand(0).getReg(); 62772105d10SNilanjana Basu Register SrcReg = SrcMI->getOperand(1).getReg(); 62872105d10SNilanjana Basu MachineInstr *INSvilaneMI = 62972105d10SNilanjana Basu BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg) 63072105d10SNilanjana Basu .add(MI.getOperand(1)) 63172105d10SNilanjana Basu .add(MI.getOperand(2)) 63272105d10SNilanjana Basu .addUse(SrcReg, getRegState(SrcMI->getOperand(1))) 63372105d10SNilanjana Basu .addImm(0); 63472105d10SNilanjana Basu 63572105d10SNilanjana Basu LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n"); 636fa66e4bdSKazu Hirata (void)INSvilaneMI; 63772105d10SNilanjana Basu MI.eraseFromParent(); 63872105d10SNilanjana Basu return true; 63972105d10SNilanjana Basu } 64072105d10SNilanjana Basu 6416e7840ddSDavid Green // All instructions that set a FPR64 will implicitly zero the top bits of the 6426e7840ddSDavid Green // register. 6436e7840ddSDavid Green static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, 6446e7840ddSDavid Green MachineRegisterInfo *MRI) { 64586780f49SDavid Green if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef()) 646932911d6SJingu Kang return false; 6476e7840ddSDavid Green const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); 6486e7840ddSDavid Green if (RC != &AArch64::FPR64RegClass) 6496e7840ddSDavid Green return false; 6506e7840ddSDavid Green return MI->getOpcode() > TargetOpcode::GENERIC_OP_END; 651932911d6SJingu Kang } 652932911d6SJingu Kang 653932911d6SJingu Kang bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) { 654932911d6SJingu Kang // Check the MI for low 64-bits sets zero for high 64-bits implicitly. 655932911d6SJingu Kang // We are expecting below case. 656932911d6SJingu Kang // 657932911d6SJingu Kang // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr 658932911d6SJingu Kang // %6:fpr128 = IMPLICIT_DEF 659932911d6SJingu Kang // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub 660932911d6SJingu Kang // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 661932911d6SJingu Kang MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 662932911d6SJingu Kang if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG) 663932911d6SJingu Kang return false; 664932911d6SJingu Kang Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg()); 6656e7840ddSDavid Green if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI)) 666932911d6SJingu Kang return false; 667932911d6SJingu Kang 668932911d6SJingu Kang // Check there is `mov 0` MI for high 64-bits. 669932911d6SJingu Kang // We are expecting below cases. 670932911d6SJingu Kang // 671932911d6SJingu Kang // %2:fpr64 = MOVID 0 672932911d6SJingu Kang // %4:fpr128 = IMPLICIT_DEF 673932911d6SJingu Kang // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub 674932911d6SJingu Kang // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 675932911d6SJingu Kang // or 676932911d6SJingu Kang // %5:fpr128 = MOVIv2d_ns 0 677932911d6SJingu Kang // %6:fpr64 = COPY %5.dsub:fpr128 678932911d6SJingu Kang // %8:fpr128 = IMPLICIT_DEF 679932911d6SJingu Kang // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub 680932911d6SJingu Kang // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0 681932911d6SJingu Kang MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg()); 6826e7840ddSDavid Green if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG) 683932911d6SJingu Kang return false; 684932911d6SJingu Kang High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg()); 6856e7840ddSDavid Green if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY) 686932911d6SJingu Kang High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg()); 6876e7840ddSDavid Green if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID && 6886e7840ddSDavid Green High64MI->getOpcode() != AArch64::MOVIv2d_ns)) 689932911d6SJingu Kang return false; 690932911d6SJingu Kang if (High64MI->getOperand(1).getImm() != 0) 691932911d6SJingu Kang return false; 692932911d6SJingu Kang 693932911d6SJingu Kang // Let's remove MIs for high 64-bits. 694932911d6SJingu Kang Register OldDef = MI.getOperand(0).getReg(); 695932911d6SJingu Kang Register NewDef = MI.getOperand(1).getReg(); 69644479b80SDavid Green MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef)); 697932911d6SJingu Kang MRI->replaceRegWith(OldDef, NewDef); 698932911d6SJingu Kang MI.eraseFromParent(); 699932911d6SJingu Kang 700932911d6SJingu Kang return true; 701932911d6SJingu Kang } 702932911d6SJingu Kang 703f42e321bSDavid Green bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { 704f42e321bSDavid Green // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR. 705f42e321bSDavid Green MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg()); 706f42e321bSDavid Green if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI)) 707f42e321bSDavid Green return false; 708f42e321bSDavid Green 709f42e321bSDavid Green // Let's remove MIs for high 64-bits. 710f42e321bSDavid Green Register OldDef = MI.getOperand(0).getReg(); 711f42e321bSDavid Green Register NewDef = MI.getOperand(1).getReg(); 71236e74cfdSDavid Green LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n"); 71336e74cfdSDavid Green MRI->clearKillFlags(OldDef); 71436e74cfdSDavid Green MRI->clearKillFlags(NewDef); 715f42e321bSDavid Green MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef)); 716f42e321bSDavid Green MRI->replaceRegWith(OldDef, NewDef); 717f42e321bSDavid Green MI.eraseFromParent(); 718f42e321bSDavid Green 719f42e321bSDavid Green return true; 720f42e321bSDavid Green } 721f42e321bSDavid Green 72272901fe1SCsanád Hajdú bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { 72372901fe1SCsanád Hajdú // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of 72472901fe1SCsanád Hajdú // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri. 72572901fe1SCsanád Hajdú int64_t Immr = MI.getOperand(2).getImm(); 72672901fe1SCsanád Hajdú int64_t Imms = MI.getOperand(3).getImm(); 72772901fe1SCsanád Hajdú 72872901fe1SCsanád Hajdú bool IsLSR = Imms == 31 && Immr <= Imms; 72972901fe1SCsanád Hajdú bool IsLSL = Immr == Imms + 33; 73072901fe1SCsanád Hajdú if (!IsLSR && !IsLSL) 73172901fe1SCsanád Hajdú return false; 73272901fe1SCsanád Hajdú 73372901fe1SCsanád Hajdú if (IsLSL) { 73472901fe1SCsanád Hajdú Immr -= 32; 73572901fe1SCsanád Hajdú } 73672901fe1SCsanád Hajdú 73772901fe1SCsanád Hajdú const TargetRegisterClass *DstRC64 = 73872901fe1SCsanád Hajdú TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF()); 73972901fe1SCsanád Hajdú const TargetRegisterClass *DstRC32 = 74072901fe1SCsanád Hajdú TRI->getSubRegisterClass(DstRC64, AArch64::sub_32); 74172901fe1SCsanád Hajdú assert(DstRC32 && "Destination register class of UBFMXri doesn't have a " 74272901fe1SCsanád Hajdú "sub_32 subregister class"); 74372901fe1SCsanád Hajdú 74472901fe1SCsanád Hajdú const TargetRegisterClass *SrcRC64 = 74572901fe1SCsanád Hajdú TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF()); 74672901fe1SCsanád Hajdú const TargetRegisterClass *SrcRC32 = 74772901fe1SCsanád Hajdú TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32); 74872901fe1SCsanád Hajdú assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 " 74972901fe1SCsanád Hajdú "subregister class"); 75072901fe1SCsanád Hajdú 75172901fe1SCsanád Hajdú Register DstReg64 = MI.getOperand(0).getReg(); 75272901fe1SCsanád Hajdú Register DstReg32 = MRI->createVirtualRegister(DstRC32); 75372901fe1SCsanád Hajdú Register SrcReg64 = MI.getOperand(1).getReg(); 75472901fe1SCsanád Hajdú Register SrcReg32 = MRI->createVirtualRegister(SrcRC32); 75572901fe1SCsanád Hajdú 75672901fe1SCsanád Hajdú BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY), 75772901fe1SCsanád Hajdú SrcReg32) 75872901fe1SCsanád Hajdú .addReg(SrcReg64, 0, AArch64::sub_32); 75972901fe1SCsanád Hajdú BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri), 76072901fe1SCsanád Hajdú DstReg32) 76172901fe1SCsanád Hajdú .addReg(SrcReg32) 76272901fe1SCsanád Hajdú .addImm(Immr) 76372901fe1SCsanád Hajdú .addImm(Imms); 76472901fe1SCsanád Hajdú BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 76572901fe1SCsanád Hajdú TII->get(AArch64::SUBREG_TO_REG), DstReg64) 76672901fe1SCsanád Hajdú .addImm(0) 76772901fe1SCsanád Hajdú .addReg(DstReg32) 76872901fe1SCsanád Hajdú .addImm(AArch64::sub_32); 76972901fe1SCsanád Hajdú MI.eraseFromParent(); 77072901fe1SCsanád Hajdú return true; 77172901fe1SCsanád Hajdú } 77272901fe1SCsanád Hajdú 773600d4937SDavid Green // Across a basic-block we might have in i32 extract from a value that only 774600d4937SDavid Green // operates on upper bits (for example a sxtw). We can replace the COPY with a 775600d4937SDavid Green // new version skipping the sxtw. 776600d4937SDavid Green bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) { 777600d4937SDavid Green Register InputReg = MI.getOperand(1).getReg(); 778600d4937SDavid Green if (MI.getOperand(1).getSubReg() != AArch64::sub_32 || 779600d4937SDavid Green !MRI->hasOneNonDBGUse(InputReg)) 780600d4937SDavid Green return false; 781600d4937SDavid Green 782600d4937SDavid Green MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg); 783600d4937SDavid Green SmallPtrSet<MachineInstr *, 4> DeadInstrs; 784600d4937SDavid Green DeadInstrs.insert(SrcMI); 785600d4937SDavid Green while (SrcMI && SrcMI->isFullCopy() && 786600d4937SDavid Green MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) { 787600d4937SDavid Green SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg()); 788600d4937SDavid Green DeadInstrs.insert(SrcMI); 789600d4937SDavid Green } 790600d4937SDavid Green 791fe946bfbSDavid Green if (!SrcMI) 792600d4937SDavid Green return false; 793600d4937SDavid Green 794fe946bfbSDavid Green // Look for SXTW(X) and return Reg. 795fe946bfbSDavid Green auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register { 796fe946bfbSDavid Green if (SrcMI->getOpcode() != AArch64::SBFMXri || 797fe946bfbSDavid Green SrcMI->getOperand(2).getImm() != 0 || 798fe946bfbSDavid Green SrcMI->getOperand(3).getImm() != 31) 799fe946bfbSDavid Green return AArch64::NoRegister; 800fe946bfbSDavid Green return SrcMI->getOperand(1).getReg(); 801fe946bfbSDavid Green }; 802fe946bfbSDavid Green // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32))) 803fe946bfbSDavid Green auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register { 804fe946bfbSDavid Green if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG || 805fe946bfbSDavid Green SrcMI->getOperand(3).getImm() != AArch64::sub_32 || 806fe946bfbSDavid Green !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg())) 807fe946bfbSDavid Green return AArch64::NoRegister; 808fe946bfbSDavid Green MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg()); 809fe946bfbSDavid Green if (!Orr || Orr->getOpcode() != AArch64::ORRWrr || 810fe946bfbSDavid Green Orr->getOperand(1).getReg() != AArch64::WZR || 811fe946bfbSDavid Green !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg())) 812fe946bfbSDavid Green return AArch64::NoRegister; 813fe946bfbSDavid Green MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg()); 814fe946bfbSDavid Green if (!Cpy || Cpy->getOpcode() != AArch64::COPY || 815fe946bfbSDavid Green Cpy->getOperand(1).getSubReg() != AArch64::sub_32) 816fe946bfbSDavid Green return AArch64::NoRegister; 817fe946bfbSDavid Green DeadInstrs.insert(Orr); 818fe946bfbSDavid Green return Cpy->getOperand(1).getReg(); 819fe946bfbSDavid Green }; 820fe946bfbSDavid Green 821fe946bfbSDavid Green Register SrcReg = getSXTWSrcReg(SrcMI); 822fe946bfbSDavid Green if (!SrcReg) 823fe946bfbSDavid Green SrcReg = getUXTWSrcReg(SrcMI); 824fe946bfbSDavid Green if (!SrcReg) 825fe946bfbSDavid Green return false; 826fe946bfbSDavid Green 827600d4937SDavid Green MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg)); 828600d4937SDavid Green LLVM_DEBUG(dbgs() << "Optimizing: " << MI); 829600d4937SDavid Green MI.getOperand(1).setReg(SrcReg); 830600d4937SDavid Green LLVM_DEBUG(dbgs() << " to: " << MI); 831600d4937SDavid Green for (auto *DeadMI : DeadInstrs) { 832600d4937SDavid Green LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI); 833600d4937SDavid Green DeadMI->eraseFromParent(); 834600d4937SDavid Green } 835600d4937SDavid Green return true; 836600d4937SDavid Green } 837600d4937SDavid Green 83830caca39SJingu Kang bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 83930caca39SJingu Kang if (skipFunction(MF.getFunction())) 84030caca39SJingu Kang return false; 84130caca39SJingu Kang 84230caca39SJingu Kang TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 843f65651ccSMicah Weston TRI = static_cast<const AArch64RegisterInfo *>( 844f65651ccSMicah Weston MF.getSubtarget().getRegisterInfo()); 84579d0de2aSpaperchalice MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 84630caca39SJingu Kang MRI = &MF.getRegInfo(); 84730caca39SJingu Kang 84843e500d7SDavid Green assert(MRI->isSSA() && "Expected to be run on SSA form!"); 84930caca39SJingu Kang 85030caca39SJingu Kang bool Changed = false; 85130caca39SJingu Kang 85230caca39SJingu Kang for (MachineBasicBlock &MBB : MF) { 853a1aef4f3SDavid Green for (MachineInstr &MI : make_early_inc_range(MBB)) { 85430caca39SJingu Kang switch (MI.getOpcode()) { 85530caca39SJingu Kang default: 85630caca39SJingu Kang break; 857b6655333Szhongyunde case AArch64::INSERT_SUBREG: 85804e94bb1SDavid Green Changed |= visitINSERT(MI); 859b6655333Szhongyunde break; 86030caca39SJingu Kang case AArch64::ANDWrr: 86104e94bb1SDavid Green Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI); 86230caca39SJingu Kang break; 86330caca39SJingu Kang case AArch64::ANDXrr: 86404e94bb1SDavid Green Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI); 86530caca39SJingu Kang break; 866a5024362SJingu Kang case AArch64::ORRWrs: 86704e94bb1SDavid Green Changed |= visitORR(MI); 86843e500d7SDavid Green break; 86993deac2eSMicah Weston case AArch64::ADDWrr: 87004e94bb1SDavid Green Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI); 87193deac2eSMicah Weston break; 87293deac2eSMicah Weston case AArch64::SUBWrr: 87304e94bb1SDavid Green Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI); 87493deac2eSMicah Weston break; 87593deac2eSMicah Weston case AArch64::ADDXrr: 87604e94bb1SDavid Green Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI); 87793deac2eSMicah Weston break; 87893deac2eSMicah Weston case AArch64::SUBXrr: 87904e94bb1SDavid Green Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI); 88093deac2eSMicah Weston break; 881c69af70fSMicah Weston case AArch64::ADDSWrr: 88204e94bb1SDavid Green Changed |= 88304e94bb1SDavid Green visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri}, 88404e94bb1SDavid Green {AArch64::SUBWri, AArch64::SUBSWri}, MI); 885c69af70fSMicah Weston break; 886c69af70fSMicah Weston case AArch64::SUBSWrr: 88704e94bb1SDavid Green Changed |= 88804e94bb1SDavid Green visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri}, 88904e94bb1SDavid Green {AArch64::ADDWri, AArch64::ADDSWri}, MI); 890c69af70fSMicah Weston break; 891c69af70fSMicah Weston case AArch64::ADDSXrr: 89204e94bb1SDavid Green Changed |= 89304e94bb1SDavid Green visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri}, 89404e94bb1SDavid Green {AArch64::SUBXri, AArch64::SUBSXri}, MI); 895c69af70fSMicah Weston break; 896c69af70fSMicah Weston case AArch64::SUBSXrr: 89704e94bb1SDavid Green Changed |= 89804e94bb1SDavid Green visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri}, 89904e94bb1SDavid Green {AArch64::ADDXri, AArch64::ADDSXri}, MI); 900c69af70fSMicah Weston break; 90129763aa4SMarina Taylor case AArch64::CSELWr: 90229763aa4SMarina Taylor case AArch64::CSELXr: 90329763aa4SMarina Taylor Changed |= visitCSEL(MI); 90429763aa4SMarina Taylor break; 90572105d10SNilanjana Basu case AArch64::INSvi64gpr: 90604e94bb1SDavid Green Changed |= visitINSviGPR(MI, AArch64::INSvi64lane); 90772105d10SNilanjana Basu break; 90872105d10SNilanjana Basu case AArch64::INSvi32gpr: 90904e94bb1SDavid Green Changed |= visitINSviGPR(MI, AArch64::INSvi32lane); 91072105d10SNilanjana Basu break; 91172105d10SNilanjana Basu case AArch64::INSvi16gpr: 91204e94bb1SDavid Green Changed |= visitINSviGPR(MI, AArch64::INSvi16lane); 91372105d10SNilanjana Basu break; 91472105d10SNilanjana Basu case AArch64::INSvi8gpr: 91504e94bb1SDavid Green Changed |= visitINSviGPR(MI, AArch64::INSvi8lane); 91672105d10SNilanjana Basu break; 917932911d6SJingu Kang case AArch64::INSvi64lane: 91804e94bb1SDavid Green Changed |= visitINSvi64lane(MI); 919932911d6SJingu Kang break; 920f42e321bSDavid Green case AArch64::FMOVDr: 921f42e321bSDavid Green Changed |= visitFMOVDr(MI); 922f42e321bSDavid Green break; 92372901fe1SCsanád Hajdú case AArch64::UBFMXri: 92472901fe1SCsanád Hajdú Changed |= visitUBFMXri(MI); 92572901fe1SCsanád Hajdú break; 926600d4937SDavid Green case AArch64::COPY: 927600d4937SDavid Green Changed |= visitCopy(MI); 928600d4937SDavid Green break; 92930caca39SJingu Kang } 93030caca39SJingu Kang } 93130caca39SJingu Kang } 93230caca39SJingu Kang 93330caca39SJingu Kang return Changed; 93430caca39SJingu Kang } 93530caca39SJingu Kang 93630caca39SJingu Kang FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 93730caca39SJingu Kang return new AArch64MIPeepholeOpt(); 93830caca39SJingu Kang } 939