xref: /llvm-project/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (revision d6fc7d3ab186fee1c95c00992206e0914cb25f42)
130caca39SJingu Kang //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
230caca39SJingu Kang //
330caca39SJingu Kang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
430caca39SJingu Kang // See https://llvm.org/LICENSE.txt for license information.
530caca39SJingu Kang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
630caca39SJingu Kang //
730caca39SJingu Kang //===----------------------------------------------------------------------===//
830caca39SJingu Kang //
930caca39SJingu Kang // This pass performs below peephole optimizations on MIR level.
1030caca39SJingu Kang //
1130caca39SJingu Kang // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
1230caca39SJingu Kang //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
1330caca39SJingu Kang //
1493deac2eSMicah Weston // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1593deac2eSMicah Weston //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
1693deac2eSMicah Weston //
1793deac2eSMicah Weston // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
1893deac2eSMicah Weston //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
1993deac2eSMicah Weston //
2030caca39SJingu Kang //    The mov pseudo instruction could be expanded to multiple mov instructions
2130caca39SJingu Kang //    later. In this case, we could try to split the constant  operand of mov
2293deac2eSMicah Weston //    instruction into two immediates which can be directly encoded into
2393deac2eSMicah Weston //    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
2493deac2eSMicah Weston //    multiple `mov` + `and/add/sub` instructions.
25a5024362SJingu Kang //
2693deac2eSMicah Weston // 4. Remove redundant ORRWrs which is generated by zero-extend.
27a5024362SJingu Kang //
28a5024362SJingu Kang //    %3:gpr32 = ORRWrs $wzr, %2, 0
29a5024362SJingu Kang //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30a5024362SJingu Kang //
31a5024362SJingu Kang //    If AArch64's 32-bit form of instruction defines the source operand of
32a5024362SJingu Kang //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33a5024362SJingu Kang //    operand are set to zero.
34a5024362SJingu Kang //
35b6655333Szhongyunde // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36b6655333Szhongyunde //     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
37b6655333Szhongyunde //
3872105d10SNilanjana Basu // 6. %intermediate:gpr32 = COPY %src:fpr128
3972105d10SNilanjana Basu //    %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
4072105d10SNilanjana Basu //     ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
4172105d10SNilanjana Basu //
4272105d10SNilanjana Basu //    In cases where a source FPR is copied to a GPR in order to be copied
4372105d10SNilanjana Basu //    to a destination FPR, we can directly copy the values between the FPRs,
4472105d10SNilanjana Basu //    eliminating the use of the Integer unit. When we match a pattern of
4572105d10SNilanjana Basu //    INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
4672105d10SNilanjana Basu //    source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
4772105d10SNilanjana Basu //    instructions.
4872105d10SNilanjana Basu //
49932911d6SJingu Kang // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50932911d6SJingu Kang //    64-bits. For example,
51932911d6SJingu Kang //
52932911d6SJingu Kang //   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53932911d6SJingu Kang //   %2:fpr64 = MOVID 0
54932911d6SJingu Kang //   %4:fpr128 = IMPLICIT_DEF
55c7c5e053SDavid Green //   %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56932911d6SJingu Kang //   %6:fpr128 = IMPLICIT_DEF
57c7c5e053SDavid Green //   %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58c7c5e053SDavid Green //   %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59932911d6SJingu Kang //   ==>
60932911d6SJingu Kang //   %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61932911d6SJingu Kang //   %6:fpr128 = IMPLICIT_DEF
62c7c5e053SDavid Green //   %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63932911d6SJingu Kang //
6429763aa4SMarina Taylor // 8. Remove redundant CSELs that select between identical registers, by
6529763aa4SMarina Taylor //    replacing them with unconditional moves.
6629763aa4SMarina Taylor //
6772901fe1SCsanád Hajdú // 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
6872901fe1SCsanád Hajdú //    LSR or LSL alias of UBFM.
6972901fe1SCsanád Hajdú //
7030caca39SJingu Kang //===----------------------------------------------------------------------===//
7130caca39SJingu Kang 
7230caca39SJingu Kang #include "AArch64ExpandImm.h"
7330caca39SJingu Kang #include "AArch64InstrInfo.h"
7430caca39SJingu Kang #include "MCTargetDesc/AArch64AddressingModes.h"
7530caca39SJingu Kang #include "llvm/CodeGen/MachineDominators.h"
7630caca39SJingu Kang #include "llvm/CodeGen/MachineLoopInfo.h"
7730caca39SJingu Kang 
7830caca39SJingu Kang using namespace llvm;
7930caca39SJingu Kang 
8030caca39SJingu Kang #define DEBUG_TYPE "aarch64-mi-peephole-opt"
8130caca39SJingu Kang 
8230caca39SJingu Kang namespace {
8330caca39SJingu Kang 
8430caca39SJingu Kang struct AArch64MIPeepholeOpt : public MachineFunctionPass {
8530caca39SJingu Kang   static char ID;
8630caca39SJingu Kang 
8730caca39SJingu Kang   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
8830caca39SJingu Kang     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
8930caca39SJingu Kang   }
9030caca39SJingu Kang 
9130caca39SJingu Kang   const AArch64InstrInfo *TII;
92f65651ccSMicah Weston   const AArch64RegisterInfo *TRI;
9330caca39SJingu Kang   MachineLoopInfo *MLI;
9430caca39SJingu Kang   MachineRegisterInfo *MRI;
9530caca39SJingu Kang 
96c69af70fSMicah Weston   using OpcodePair = std::pair<unsigned, unsigned>;
97f65651ccSMicah Weston   template <typename T>
98f65651ccSMicah Weston   using SplitAndOpcFunc =
99b0df7040SFangrui Song       std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
100f65651ccSMicah Weston   using BuildMIFunc =
101c69af70fSMicah Weston       std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
102c69af70fSMicah Weston                          Register, Register, Register)>;
103f65651ccSMicah Weston 
104f65651ccSMicah Weston   /// For instructions where an immediate operand could be split into two
105f65651ccSMicah Weston   /// separate immediate instructions, use the splitTwoPartImm two handle the
106f65651ccSMicah Weston   /// optimization.
107f65651ccSMicah Weston   ///
108f65651ccSMicah Weston   /// To implement, the following function types must be passed to
109f65651ccSMicah Weston   /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
110f65651ccSMicah Weston   /// splitting the immediate is valid and returns the associated new opcode. A
111f65651ccSMicah Weston   /// BuildMIFunc must be implemented to build the two immediate instructions.
112f65651ccSMicah Weston   ///
113f65651ccSMicah Weston   /// Example Pattern (where IMM would require 2+ MOV instructions):
114f65651ccSMicah Weston   ///     %dst = <Instr>rr %src IMM [...]
115f65651ccSMicah Weston   /// becomes:
116f65651ccSMicah Weston   ///     %tmp = <Instr>ri %src (encode half IMM) [...]
117f65651ccSMicah Weston   ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
118f65651ccSMicah Weston   template <typename T>
119f65651ccSMicah Weston   bool splitTwoPartImm(MachineInstr &MI,
120f65651ccSMicah Weston                        SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
121f65651ccSMicah Weston 
12293deac2eSMicah Weston   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
12393deac2eSMicah Weston                         MachineInstr *&SubregToRegMI);
12493deac2eSMicah Weston 
12593deac2eSMicah Weston   template <typename T>
126a1aef4f3SDavid Green   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
12730caca39SJingu Kang   template <typename T>
128a1aef4f3SDavid Green   bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
129c69af70fSMicah Weston 
130c69af70fSMicah Weston   template <typename T>
131a1aef4f3SDavid Green   bool visitAND(unsigned Opc, MachineInstr &MI);
132a1aef4f3SDavid Green   bool visitORR(MachineInstr &MI);
13329763aa4SMarina Taylor   bool visitCSEL(MachineInstr &MI);
134b6655333Szhongyunde   bool visitINSERT(MachineInstr &MI);
13572105d10SNilanjana Basu   bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
136932911d6SJingu Kang   bool visitINSvi64lane(MachineInstr &MI);
137f42e321bSDavid Green   bool visitFMOVDr(MachineInstr &MI);
13872901fe1SCsanád Hajdú   bool visitUBFMXri(MachineInstr &MI);
139600d4937SDavid Green   bool visitCopy(MachineInstr &MI);
14030caca39SJingu Kang   bool runOnMachineFunction(MachineFunction &MF) override;
14130caca39SJingu Kang 
14230caca39SJingu Kang   StringRef getPassName() const override {
14330caca39SJingu Kang     return "AArch64 MI Peephole Optimization pass";
14430caca39SJingu Kang   }
14530caca39SJingu Kang 
14630caca39SJingu Kang   void getAnalysisUsage(AnalysisUsage &AU) const override {
14730caca39SJingu Kang     AU.setPreservesCFG();
14879d0de2aSpaperchalice     AU.addRequired<MachineLoopInfoWrapperPass>();
14930caca39SJingu Kang     MachineFunctionPass::getAnalysisUsage(AU);
15030caca39SJingu Kang   }
15130caca39SJingu Kang };
15230caca39SJingu Kang 
15330caca39SJingu Kang char AArch64MIPeepholeOpt::ID = 0;
15430caca39SJingu Kang 
15530caca39SJingu Kang } // end anonymous namespace
15630caca39SJingu Kang 
15730caca39SJingu Kang INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
15830caca39SJingu Kang                 "AArch64 MI Peephole Optimization", false, false)
15930caca39SJingu Kang 
16030caca39SJingu Kang template <typename T>
16130caca39SJingu Kang static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
16230caca39SJingu Kang   T UImm = static_cast<T>(Imm);
16330caca39SJingu Kang   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
16430caca39SJingu Kang     return false;
16530caca39SJingu Kang 
16630caca39SJingu Kang   // If this immediate can be handled by one instruction, do not split it.
16730caca39SJingu Kang   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
16830caca39SJingu Kang   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
16930caca39SJingu Kang   if (Insn.size() == 1)
17030caca39SJingu Kang     return false;
17130caca39SJingu Kang 
17230caca39SJingu Kang   // The bitmask immediate consists of consecutive ones.  Let's say there is
17330caca39SJingu Kang   // constant 0b00000000001000000000010000000000 which does not consist of
17430caca39SJingu Kang   // consecutive ones. We can split it in to two bitmask immediate like
17530caca39SJingu Kang   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
17630caca39SJingu Kang   // If we do AND with these two bitmask immediate, we can see original one.
177e0782018SKazu Hirata   unsigned LowestBitSet = llvm::countr_zero(UImm);
17830caca39SJingu Kang   unsigned HighestBitSet = Log2_64(UImm);
17930caca39SJingu Kang 
18030caca39SJingu Kang   // Create a mask which is filled with one from the position of lowest bit set
18130caca39SJingu Kang   // to the position of highest bit set.
18230caca39SJingu Kang   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
18330caca39SJingu Kang               (static_cast<T>(1) << LowestBitSet);
18430caca39SJingu Kang   // Create a mask which is filled with one outside the position of lowest bit
18530caca39SJingu Kang   // set and the position of highest bit set.
18630caca39SJingu Kang   T NewImm2 = UImm | ~NewImm1;
18730caca39SJingu Kang 
18830caca39SJingu Kang   // If the split value is not valid bitmask immediate, do not split this
18930caca39SJingu Kang   // constant.
19030caca39SJingu Kang   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
19130caca39SJingu Kang     return false;
19230caca39SJingu Kang 
19330caca39SJingu Kang   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
19430caca39SJingu Kang   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
19530caca39SJingu Kang   return true;
19630caca39SJingu Kang }
19730caca39SJingu Kang 
19830caca39SJingu Kang template <typename T>
19930caca39SJingu Kang bool AArch64MIPeepholeOpt::visitAND(
200a1aef4f3SDavid Green     unsigned Opc, MachineInstr &MI) {
20130caca39SJingu Kang   // Try below transformation.
20230caca39SJingu Kang   //
20330caca39SJingu Kang   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
20430caca39SJingu Kang   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
20530caca39SJingu Kang   //
20630caca39SJingu Kang   // The mov pseudo instruction could be expanded to multiple mov instructions
20730caca39SJingu Kang   // later. Let's try to split the constant operand of mov instruction into two
208*d6fc7d3aSJay Foad   // bitmask immediates. It makes only two AND instructions instead of multiple
20930caca39SJingu Kang   // mov + and instructions.
21030caca39SJingu Kang 
211f65651ccSMicah Weston   return splitTwoPartImm<T>(
212a1aef4f3SDavid Green       MI,
213b0df7040SFangrui Song       [Opc](T Imm, unsigned RegSize, T &Imm0,
214b0df7040SFangrui Song             T &Imm1) -> std::optional<OpcodePair> {
215f65651ccSMicah Weston         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
216c69af70fSMicah Weston           return std::make_pair(Opc, Opc);
21720cde154SKazu Hirata         return std::nullopt;
218f65651ccSMicah Weston       },
219c69af70fSMicah Weston       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
220f65651ccSMicah Weston                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
221f65651ccSMicah Weston                    Register NewDstReg) {
22230caca39SJingu Kang         DebugLoc DL = MI.getDebugLoc();
22393deac2eSMicah Weston         MachineBasicBlock *MBB = MI.getParent();
224c69af70fSMicah Weston         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
22530caca39SJingu Kang             .addReg(SrcReg)
226f65651ccSMicah Weston             .addImm(Imm0);
227c69af70fSMicah Weston         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
22830caca39SJingu Kang             .addReg(NewTmpReg)
229f65651ccSMicah Weston             .addImm(Imm1);
230f65651ccSMicah Weston       });
23130caca39SJingu Kang }
23230caca39SJingu Kang 
233a1aef4f3SDavid Green bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
234a5024362SJingu Kang   // Check this ORR comes from below zero-extend pattern.
235a5024362SJingu Kang   //
236a5024362SJingu Kang   // def : Pat<(i64 (zext GPR32:$src)),
237a5024362SJingu Kang   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
238a5024362SJingu Kang   if (MI.getOperand(3).getImm() != 0)
239a5024362SJingu Kang     return false;
240a5024362SJingu Kang 
241a5024362SJingu Kang   if (MI.getOperand(1).getReg() != AArch64::WZR)
242a5024362SJingu Kang     return false;
243a5024362SJingu Kang 
244a5024362SJingu Kang   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
245a5024362SJingu Kang   if (!SrcMI)
246a5024362SJingu Kang     return false;
247a5024362SJingu Kang 
248a5024362SJingu Kang   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
249a5024362SJingu Kang   //
250a5024362SJingu Kang   // When you use the 32-bit form of an instruction, the upper 32 bits of the
251a5024362SJingu Kang   // source registers are ignored and the upper 32 bits of the destination
252a5024362SJingu Kang   // register are set to zero.
253a5024362SJingu Kang   //
254a5024362SJingu Kang   // If AArch64's 32-bit form of instruction defines the source operand of
255a5024362SJingu Kang   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
256a5024362SJingu Kang   // real AArch64 instruction and if it is not, do not process the opcode
257a5024362SJingu Kang   // conservatively.
258bccbf527SDavid Green   if (SrcMI->getOpcode() == TargetOpcode::COPY &&
259bccbf527SDavid Green       SrcMI->getOperand(1).getReg().isVirtual()) {
260bccbf527SDavid Green     const TargetRegisterClass *RC =
261bccbf527SDavid Green         MRI->getRegClass(SrcMI->getOperand(1).getReg());
262bccbf527SDavid Green 
263bccbf527SDavid Green     // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
264bccbf527SDavid Green     // that the upper bits are zero.
265bccbf527SDavid Green     if (RC != &AArch64::FPR32RegClass &&
266bccbf527SDavid Green         ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
267bccbf527SDavid Green          SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
268bccbf527SDavid Green       return false;
269bccbf527SDavid Green     Register CpySrc = SrcMI->getOperand(1).getReg();
270bccbf527SDavid Green     if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
271bccbf527SDavid Green       CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
272bccbf527SDavid Green       BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
273bccbf527SDavid Green               TII->get(TargetOpcode::COPY), CpySrc)
274bccbf527SDavid Green           .add(SrcMI->getOperand(1));
275bccbf527SDavid Green     }
276bccbf527SDavid Green     BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
277bccbf527SDavid Green             TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
278bccbf527SDavid Green         .addReg(CpySrc);
279a1aef4f3SDavid Green     SrcMI->eraseFromParent();
280bccbf527SDavid Green   }
281bccbf527SDavid Green   else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
282a5024362SJingu Kang     return false;
283a5024362SJingu Kang 
284a5024362SJingu Kang   Register DefReg = MI.getOperand(0).getReg();
285a5024362SJingu Kang   Register SrcReg = MI.getOperand(2).getReg();
286a5024362SJingu Kang   MRI->replaceRegWith(DefReg, SrcReg);
287a5024362SJingu Kang   MRI->clearKillFlags(SrcReg);
28843e500d7SDavid Green   LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
289a1aef4f3SDavid Green   MI.eraseFromParent();
290a5024362SJingu Kang 
291a5024362SJingu Kang   return true;
292a5024362SJingu Kang }
293a5024362SJingu Kang 
29429763aa4SMarina Taylor bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
29529763aa4SMarina Taylor   // Replace CSEL with MOV when both inputs are the same register.
29629763aa4SMarina Taylor   if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
29729763aa4SMarina Taylor     return false;
29829763aa4SMarina Taylor 
29929763aa4SMarina Taylor   auto ZeroReg =
30029763aa4SMarina Taylor       MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
30129763aa4SMarina Taylor   auto OrOpcode =
30229763aa4SMarina Taylor       MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
30329763aa4SMarina Taylor 
30429763aa4SMarina Taylor   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
30529763aa4SMarina Taylor       .addReg(MI.getOperand(0).getReg(), RegState::Define)
30629763aa4SMarina Taylor       .addReg(ZeroReg)
30729763aa4SMarina Taylor       .addReg(MI.getOperand(1).getReg())
30829763aa4SMarina Taylor       .addImm(0);
30929763aa4SMarina Taylor 
31029763aa4SMarina Taylor   MI.eraseFromParent();
31129763aa4SMarina Taylor   return true;
31229763aa4SMarina Taylor }
31329763aa4SMarina Taylor 
314b6655333Szhongyunde bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
315b6655333Szhongyunde   // Check this INSERT_SUBREG comes from below zero-extend pattern.
316b6655333Szhongyunde   //
317b6655333Szhongyunde   // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
318b6655333Szhongyunde   // To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
319b6655333Szhongyunde   //
320b6655333Szhongyunde   // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
321b6655333Szhongyunde   // COPY would destroy the upper part of the register anyway
322b6655333Szhongyunde   if (!MI.isRegTiedToDefOperand(1))
323b6655333Szhongyunde     return false;
324b6655333Szhongyunde 
325b6655333Szhongyunde   Register DstReg = MI.getOperand(0).getReg();
326b6655333Szhongyunde   const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
327b6655333Szhongyunde   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
328b6655333Szhongyunde   if (!SrcMI)
329b6655333Szhongyunde     return false;
330b6655333Szhongyunde 
331b6655333Szhongyunde   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
332b6655333Szhongyunde   //
333b6655333Szhongyunde   // When you use the 32-bit form of an instruction, the upper 32 bits of the
334b6655333Szhongyunde   // source registers are ignored and the upper 32 bits of the destination
335b6655333Szhongyunde   // register are set to zero.
336b6655333Szhongyunde   //
337b6655333Szhongyunde   // If AArch64's 32-bit form of instruction defines the source operand of
338b6655333Szhongyunde   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
339b6655333Szhongyunde   // real AArch64 instruction and if it is not, do not process the opcode
340b6655333Szhongyunde   // conservatively.
341b6655333Szhongyunde   if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
342b6655333Szhongyunde       !AArch64::GPR64allRegClass.hasSubClassEq(RC))
343b6655333Szhongyunde     return false;
344b6655333Szhongyunde 
345b6655333Szhongyunde   // Build a SUBREG_TO_REG instruction
346b6655333Szhongyunde   MachineInstr *SubregMI =
347b6655333Szhongyunde       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
348b6655333Szhongyunde               TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
349b6655333Szhongyunde           .addImm(0)
350b6655333Szhongyunde           .add(MI.getOperand(2))
351b6655333Szhongyunde           .add(MI.getOperand(3));
352b6655333Szhongyunde   LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *SubregMI << "\n");
353f9b59249SFangrui Song   (void)SubregMI;
354b6655333Szhongyunde   MI.eraseFromParent();
355b6655333Szhongyunde 
356b6655333Szhongyunde   return true;
357b6655333Szhongyunde }
358b6655333Szhongyunde 
35993deac2eSMicah Weston template <typename T>
36093deac2eSMicah Weston static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
36193deac2eSMicah Weston   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
36293deac2eSMicah Weston   // imm0 and imm1 are non-zero 12-bit unsigned int.
36393deac2eSMicah Weston   if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
36493deac2eSMicah Weston       (Imm & ~static_cast<T>(0xffffff)) != 0)
36593deac2eSMicah Weston     return false;
36693deac2eSMicah Weston 
36793deac2eSMicah Weston   // The immediate can not be composed via a single instruction.
36893deac2eSMicah Weston   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
36993deac2eSMicah Weston   AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
37093deac2eSMicah Weston   if (Insn.size() == 1)
37193deac2eSMicah Weston     return false;
37293deac2eSMicah Weston 
37393deac2eSMicah Weston   // Split Imm into (Imm0 << 12) + Imm1;
37493deac2eSMicah Weston   Imm0 = (Imm >> 12) & 0xfff;
37593deac2eSMicah Weston   Imm1 = Imm & 0xfff;
37693deac2eSMicah Weston   return true;
37793deac2eSMicah Weston }
37893deac2eSMicah Weston 
37993deac2eSMicah Weston template <typename T>
38093deac2eSMicah Weston bool AArch64MIPeepholeOpt::visitADDSUB(
381a1aef4f3SDavid Green     unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
38293deac2eSMicah Weston   // Try below transformation.
38393deac2eSMicah Weston   //
384b134c62fSDavid Green   // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
385b134c62fSDavid Green   // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
38693deac2eSMicah Weston   //
387b134c62fSDavid Green   // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
388b134c62fSDavid Green   // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
38993deac2eSMicah Weston   //
39093deac2eSMicah Weston   // The mov pseudo instruction could be expanded to multiple mov instructions
39193deac2eSMicah Weston   // later. Let's try to split the constant operand of mov instruction into two
392*d6fc7d3aSJay Foad   // legal add/sub immediates. It makes only two ADD/SUB instructions instead of
39393deac2eSMicah Weston   // multiple `mov` + `and/sub` instructions.
39493deac2eSMicah Weston 
395b134c62fSDavid Green   // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
396b134c62fSDavid Green   // folded. Make sure that we don't generate invalid instructions that use XZR
397b134c62fSDavid Green   // in those cases.
398b134c62fSDavid Green   if (MI.getOperand(1).getReg() == AArch64::XZR ||
399b134c62fSDavid Green       MI.getOperand(1).getReg() == AArch64::WZR)
400b134c62fSDavid Green     return false;
401b134c62fSDavid Green 
402f65651ccSMicah Weston   return splitTwoPartImm<T>(
403a1aef4f3SDavid Green       MI,
404f65651ccSMicah Weston       [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
405b0df7040SFangrui Song                        T &Imm1) -> std::optional<OpcodePair> {
406f65651ccSMicah Weston         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
407c69af70fSMicah Weston           return std::make_pair(PosOpc, PosOpc);
408f65651ccSMicah Weston         if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
409c69af70fSMicah Weston           return std::make_pair(NegOpc, NegOpc);
41020cde154SKazu Hirata         return std::nullopt;
411f65651ccSMicah Weston       },
412c69af70fSMicah Weston       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
413f65651ccSMicah Weston                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
414f65651ccSMicah Weston                    Register NewDstReg) {
41593deac2eSMicah Weston         DebugLoc DL = MI.getDebugLoc();
41693deac2eSMicah Weston         MachineBasicBlock *MBB = MI.getParent();
417c69af70fSMicah Weston         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
41893deac2eSMicah Weston             .addReg(SrcReg)
41993deac2eSMicah Weston             .addImm(Imm0)
42093deac2eSMicah Weston             .addImm(12);
421c69af70fSMicah Weston         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
422c69af70fSMicah Weston             .addReg(NewTmpReg)
423c69af70fSMicah Weston             .addImm(Imm1)
424c69af70fSMicah Weston             .addImm(0);
425c69af70fSMicah Weston       });
426c69af70fSMicah Weston }
427c69af70fSMicah Weston 
428c69af70fSMicah Weston template <typename T>
429c69af70fSMicah Weston bool AArch64MIPeepholeOpt::visitADDSSUBS(
430a1aef4f3SDavid Green     OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
431c69af70fSMicah Weston   // Try the same transformation as ADDSUB but with additional requirement
432c69af70fSMicah Weston   // that the condition code usages are only for Equal and Not Equal
433b134c62fSDavid Green 
434b134c62fSDavid Green   if (MI.getOperand(1).getReg() == AArch64::XZR ||
435b134c62fSDavid Green       MI.getOperand(1).getReg() == AArch64::WZR)
436b134c62fSDavid Green     return false;
437b134c62fSDavid Green 
438c69af70fSMicah Weston   return splitTwoPartImm<T>(
439a1aef4f3SDavid Green       MI,
440b0df7040SFangrui Song       [PosOpcs, NegOpcs, &MI, &TRI = TRI,
441b0df7040SFangrui Song        &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
442b0df7040SFangrui Song                    T &Imm1) -> std::optional<OpcodePair> {
443c69af70fSMicah Weston         OpcodePair OP;
444c69af70fSMicah Weston         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
445c69af70fSMicah Weston           OP = PosOpcs;
446c69af70fSMicah Weston         else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
447c69af70fSMicah Weston           OP = NegOpcs;
448c69af70fSMicah Weston         else
44920cde154SKazu Hirata           return std::nullopt;
450c69af70fSMicah Weston         // Check conditional uses last since it is expensive for scanning
451c69af70fSMicah Weston         // proceeding instructions
452c69af70fSMicah Weston         MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
453b0df7040SFangrui Song         std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
454c69af70fSMicah Weston         if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
45520cde154SKazu Hirata           return std::nullopt;
456c69af70fSMicah Weston         return OP;
457c69af70fSMicah Weston       },
458c69af70fSMicah Weston       [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
459c69af70fSMicah Weston                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
460c69af70fSMicah Weston                    Register NewDstReg) {
461c69af70fSMicah Weston         DebugLoc DL = MI.getDebugLoc();
462c69af70fSMicah Weston         MachineBasicBlock *MBB = MI.getParent();
463c69af70fSMicah Weston         BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
464c69af70fSMicah Weston             .addReg(SrcReg)
465c69af70fSMicah Weston             .addImm(Imm0)
466c69af70fSMicah Weston             .addImm(12);
467c69af70fSMicah Weston         BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
46893deac2eSMicah Weston             .addReg(NewTmpReg)
46993deac2eSMicah Weston             .addImm(Imm1)
47093deac2eSMicah Weston             .addImm(0);
471f65651ccSMicah Weston       });
47293deac2eSMicah Weston }
47393deac2eSMicah Weston 
47493deac2eSMicah Weston // Checks if the corresponding MOV immediate instruction is applicable for
47593deac2eSMicah Weston // this peephole optimization.
47693deac2eSMicah Weston bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
47793deac2eSMicah Weston                                             MachineInstr *&MovMI,
47893deac2eSMicah Weston                                             MachineInstr *&SubregToRegMI) {
47993deac2eSMicah Weston   // Check whether current MBB is in loop and the AND is loop invariant.
48093deac2eSMicah Weston   MachineBasicBlock *MBB = MI.getParent();
48193deac2eSMicah Weston   MachineLoop *L = MLI->getLoopFor(MBB);
48293deac2eSMicah Weston   if (L && !L->isLoopInvariant(MI))
48393deac2eSMicah Weston     return false;
48493deac2eSMicah Weston 
48593deac2eSMicah Weston   // Check whether current MI's operand is MOV with immediate.
48693deac2eSMicah Weston   MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
48793deac2eSMicah Weston   if (!MovMI)
48893deac2eSMicah Weston     return false;
48993deac2eSMicah Weston 
49093deac2eSMicah Weston   // If it is SUBREG_TO_REG, check its operand.
49193deac2eSMicah Weston   SubregToRegMI = nullptr;
49293deac2eSMicah Weston   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
49393deac2eSMicah Weston     SubregToRegMI = MovMI;
49493deac2eSMicah Weston     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
49593deac2eSMicah Weston     if (!MovMI)
49693deac2eSMicah Weston       return false;
49793deac2eSMicah Weston   }
49893deac2eSMicah Weston 
49993deac2eSMicah Weston   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
50093deac2eSMicah Weston       MovMI->getOpcode() != AArch64::MOVi64imm)
50193deac2eSMicah Weston     return false;
50293deac2eSMicah Weston 
50393deac2eSMicah Weston   // If the MOV has multiple uses, do not split the immediate because it causes
50493deac2eSMicah Weston   // more instructions.
50593deac2eSMicah Weston   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
50693deac2eSMicah Weston     return false;
50793deac2eSMicah Weston   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
50893deac2eSMicah Weston     return false;
50993deac2eSMicah Weston 
51093deac2eSMicah Weston   // It is OK to perform this peephole optimization.
51193deac2eSMicah Weston   return true;
51293deac2eSMicah Weston }
51393deac2eSMicah Weston 
514f65651ccSMicah Weston template <typename T>
515f65651ccSMicah Weston bool AArch64MIPeepholeOpt::splitTwoPartImm(
516a1aef4f3SDavid Green     MachineInstr &MI,
517f65651ccSMicah Weston     SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
518f65651ccSMicah Weston   unsigned RegSize = sizeof(T) * 8;
519f65651ccSMicah Weston   assert((RegSize == 32 || RegSize == 64) &&
520f65651ccSMicah Weston          "Invalid RegSize for legal immediate peephole optimization");
521f65651ccSMicah Weston 
522f65651ccSMicah Weston   // Perform several essential checks against current MI.
523f65651ccSMicah Weston   MachineInstr *MovMI, *SubregToRegMI;
524f65651ccSMicah Weston   if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
525f65651ccSMicah Weston     return false;
526f65651ccSMicah Weston 
527f65651ccSMicah Weston   // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
528f65651ccSMicah Weston   T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
529f65651ccSMicah Weston   // For the 32 bit form of instruction, the upper 32 bits of the destination
530f65651ccSMicah Weston   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
531f65651ccSMicah Weston   // of Imm to zero. This is essential if the Immediate value was a negative
532f65651ccSMicah Weston   // number since it was sign extended when we assign to the 64-bit Imm.
533f65651ccSMicah Weston   if (SubregToRegMI)
534f65651ccSMicah Weston     Imm &= 0xFFFFFFFF;
535c69af70fSMicah Weston   OpcodePair Opcode;
536f65651ccSMicah Weston   if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
5377a47ee51SKazu Hirata     Opcode = *R;
538f65651ccSMicah Weston   else
539f65651ccSMicah Weston     return false;
540f65651ccSMicah Weston 
541c69af70fSMicah Weston   // Create new MIs using the first and second opcodes. Opcodes might differ for
542c69af70fSMicah Weston   // flag setting operations that should only set flags on second instruction.
543c69af70fSMicah Weston   // NewTmpReg = Opcode.first SrcReg Imm0
544c69af70fSMicah Weston   // NewDstReg = Opcode.second NewTmpReg Imm1
545c69af70fSMicah Weston 
546c69af70fSMicah Weston   // Determine register classes for destinations and register operands
547f65651ccSMicah Weston   MachineFunction *MF = MI.getMF();
548c69af70fSMicah Weston   const TargetRegisterClass *FirstInstrDstRC =
549c69af70fSMicah Weston       TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
550c69af70fSMicah Weston   const TargetRegisterClass *FirstInstrOperandRC =
551c69af70fSMicah Weston       TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
552c69af70fSMicah Weston   const TargetRegisterClass *SecondInstrDstRC =
553c69af70fSMicah Weston       (Opcode.first == Opcode.second)
554c69af70fSMicah Weston           ? FirstInstrDstRC
555c69af70fSMicah Weston           : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
556c69af70fSMicah Weston   const TargetRegisterClass *SecondInstrOperandRC =
557c69af70fSMicah Weston       (Opcode.first == Opcode.second)
558c69af70fSMicah Weston           ? FirstInstrOperandRC
559c69af70fSMicah Weston           : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
560c69af70fSMicah Weston 
561c69af70fSMicah Weston   // Get old registers destinations and new register destinations
562f65651ccSMicah Weston   Register DstReg = MI.getOperand(0).getReg();
563f65651ccSMicah Weston   Register SrcReg = MI.getOperand(1).getReg();
564c69af70fSMicah Weston   Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
565c69af70fSMicah Weston   // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
566c69af70fSMicah Weston   // reuse that same destination register.
567c69af70fSMicah Weston   Register NewDstReg = DstReg.isVirtual()
568c69af70fSMicah Weston                            ? MRI->createVirtualRegister(SecondInstrDstRC)
569c69af70fSMicah Weston                            : DstReg;
570f65651ccSMicah Weston 
571c69af70fSMicah Weston   // Constrain registers based on their new uses
572c69af70fSMicah Weston   MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
573c69af70fSMicah Weston   MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
574c69af70fSMicah Weston   if (DstReg != NewDstReg)
575f65651ccSMicah Weston     MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
576f65651ccSMicah Weston 
577c69af70fSMicah Weston   // Call the delegating operation to build the instruction
578f65651ccSMicah Weston   BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
579f65651ccSMicah Weston 
580f65651ccSMicah Weston   // replaceRegWith changes MI's definition register. Keep it for SSA form until
581c69af70fSMicah Weston   // deleting MI. Only if we made a new destination register.
582c69af70fSMicah Weston   if (DstReg != NewDstReg) {
583c69af70fSMicah Weston     MRI->replaceRegWith(DstReg, NewDstReg);
584f65651ccSMicah Weston     MI.getOperand(0).setReg(DstReg);
585c69af70fSMicah Weston   }
586f65651ccSMicah Weston 
587f65651ccSMicah Weston   // Record the MIs need to be removed.
588a1aef4f3SDavid Green   MI.eraseFromParent();
589f65651ccSMicah Weston   if (SubregToRegMI)
590a1aef4f3SDavid Green     SubregToRegMI->eraseFromParent();
591a1aef4f3SDavid Green   MovMI->eraseFromParent();
592f65651ccSMicah Weston 
593f65651ccSMicah Weston   return true;
594f65651ccSMicah Weston }
595f65651ccSMicah Weston 
59672105d10SNilanjana Basu bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
59772105d10SNilanjana Basu   // Check if this INSvi[X]gpr comes from COPY of a source FPR128
59872105d10SNilanjana Basu   //
59972105d10SNilanjana Basu   // From
60072105d10SNilanjana Basu   //  %intermediate1:gpr64 = COPY %src:fpr128
60172105d10SNilanjana Basu   //  %intermediate2:gpr32 = COPY %intermediate1:gpr64
60272105d10SNilanjana Basu   //  %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
60372105d10SNilanjana Basu   // To
60472105d10SNilanjana Basu   //  %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
60572105d10SNilanjana Basu   //  src_index
60672105d10SNilanjana Basu   // where src_index = 0, X = [8|16|32|64]
60772105d10SNilanjana Basu 
60872105d10SNilanjana Basu   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
60972105d10SNilanjana Basu 
61072105d10SNilanjana Basu   // For a chain of COPY instructions, find the initial source register
61172105d10SNilanjana Basu   // and check if it's an FPR128
61272105d10SNilanjana Basu   while (true) {
61372105d10SNilanjana Basu     if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
61472105d10SNilanjana Basu       return false;
61572105d10SNilanjana Basu 
61672105d10SNilanjana Basu     if (!SrcMI->getOperand(1).getReg().isVirtual())
61772105d10SNilanjana Basu       return false;
61872105d10SNilanjana Basu 
61972105d10SNilanjana Basu     if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
62072105d10SNilanjana Basu         &AArch64::FPR128RegClass) {
62172105d10SNilanjana Basu       break;
62272105d10SNilanjana Basu     }
62372105d10SNilanjana Basu     SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
62472105d10SNilanjana Basu   }
62572105d10SNilanjana Basu 
62672105d10SNilanjana Basu   Register DstReg = MI.getOperand(0).getReg();
62772105d10SNilanjana Basu   Register SrcReg = SrcMI->getOperand(1).getReg();
62872105d10SNilanjana Basu   MachineInstr *INSvilaneMI =
62972105d10SNilanjana Basu       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
63072105d10SNilanjana Basu           .add(MI.getOperand(1))
63172105d10SNilanjana Basu           .add(MI.getOperand(2))
63272105d10SNilanjana Basu           .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
63372105d10SNilanjana Basu           .addImm(0);
63472105d10SNilanjana Basu 
63572105d10SNilanjana Basu   LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *INSvilaneMI << "\n");
636fa66e4bdSKazu Hirata   (void)INSvilaneMI;
63772105d10SNilanjana Basu   MI.eraseFromParent();
63872105d10SNilanjana Basu   return true;
63972105d10SNilanjana Basu }
64072105d10SNilanjana Basu 
6416e7840ddSDavid Green // All instructions that set a FPR64 will implicitly zero the top bits of the
6426e7840ddSDavid Green // register.
6436e7840ddSDavid Green static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
6446e7840ddSDavid Green                                         MachineRegisterInfo *MRI) {
64586780f49SDavid Green   if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
646932911d6SJingu Kang     return false;
6476e7840ddSDavid Green   const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
6486e7840ddSDavid Green   if (RC != &AArch64::FPR64RegClass)
6496e7840ddSDavid Green     return false;
6506e7840ddSDavid Green   return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
651932911d6SJingu Kang }
652932911d6SJingu Kang 
653932911d6SJingu Kang bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
654932911d6SJingu Kang   // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
655932911d6SJingu Kang   // We are expecting below case.
656932911d6SJingu Kang   //
657932911d6SJingu Kang   //  %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
658932911d6SJingu Kang   //  %6:fpr128 = IMPLICIT_DEF
659932911d6SJingu Kang   //  %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
660932911d6SJingu Kang   //  %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
661932911d6SJingu Kang   MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
662932911d6SJingu Kang   if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
663932911d6SJingu Kang     return false;
664932911d6SJingu Kang   Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
6656e7840ddSDavid Green   if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
666932911d6SJingu Kang     return false;
667932911d6SJingu Kang 
668932911d6SJingu Kang   // Check there is `mov 0` MI for high 64-bits.
669932911d6SJingu Kang   // We are expecting below cases.
670932911d6SJingu Kang   //
671932911d6SJingu Kang   //  %2:fpr64 = MOVID 0
672932911d6SJingu Kang   //  %4:fpr128 = IMPLICIT_DEF
673932911d6SJingu Kang   //  %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
674932911d6SJingu Kang   //  %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
675932911d6SJingu Kang   // or
676932911d6SJingu Kang   //  %5:fpr128 = MOVIv2d_ns 0
677932911d6SJingu Kang   //  %6:fpr64 = COPY %5.dsub:fpr128
678932911d6SJingu Kang   //  %8:fpr128 = IMPLICIT_DEF
679932911d6SJingu Kang   //  %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
680932911d6SJingu Kang   //  %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
681932911d6SJingu Kang   MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
6826e7840ddSDavid Green   if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
683932911d6SJingu Kang     return false;
684932911d6SJingu Kang   High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
6856e7840ddSDavid Green   if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
686932911d6SJingu Kang     High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
6876e7840ddSDavid Green   if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
6886e7840ddSDavid Green                     High64MI->getOpcode() != AArch64::MOVIv2d_ns))
689932911d6SJingu Kang     return false;
690932911d6SJingu Kang   if (High64MI->getOperand(1).getImm() != 0)
691932911d6SJingu Kang     return false;
692932911d6SJingu Kang 
693932911d6SJingu Kang   // Let's remove MIs for high 64-bits.
694932911d6SJingu Kang   Register OldDef = MI.getOperand(0).getReg();
695932911d6SJingu Kang   Register NewDef = MI.getOperand(1).getReg();
69644479b80SDavid Green   MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
697932911d6SJingu Kang   MRI->replaceRegWith(OldDef, NewDef);
698932911d6SJingu Kang   MI.eraseFromParent();
699932911d6SJingu Kang 
700932911d6SJingu Kang   return true;
701932911d6SJingu Kang }
702932911d6SJingu Kang 
703f42e321bSDavid Green bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
704f42e321bSDavid Green   // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
705f42e321bSDavid Green   MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
706f42e321bSDavid Green   if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
707f42e321bSDavid Green     return false;
708f42e321bSDavid Green 
709f42e321bSDavid Green   // Let's remove MIs for high 64-bits.
710f42e321bSDavid Green   Register OldDef = MI.getOperand(0).getReg();
711f42e321bSDavid Green   Register NewDef = MI.getOperand(1).getReg();
71236e74cfdSDavid Green   LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
71336e74cfdSDavid Green   MRI->clearKillFlags(OldDef);
71436e74cfdSDavid Green   MRI->clearKillFlags(NewDef);
715f42e321bSDavid Green   MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
716f42e321bSDavid Green   MRI->replaceRegWith(OldDef, NewDef);
717f42e321bSDavid Green   MI.eraseFromParent();
718f42e321bSDavid Green 
719f42e321bSDavid Green   return true;
720f42e321bSDavid Green }
721f42e321bSDavid Green 
72272901fe1SCsanád Hajdú bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
72372901fe1SCsanád Hajdú   // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
72472901fe1SCsanád Hajdú   // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
72572901fe1SCsanád Hajdú   int64_t Immr = MI.getOperand(2).getImm();
72672901fe1SCsanád Hajdú   int64_t Imms = MI.getOperand(3).getImm();
72772901fe1SCsanád Hajdú 
72872901fe1SCsanád Hajdú   bool IsLSR = Imms == 31 && Immr <= Imms;
72972901fe1SCsanád Hajdú   bool IsLSL = Immr == Imms + 33;
73072901fe1SCsanád Hajdú   if (!IsLSR && !IsLSL)
73172901fe1SCsanád Hajdú     return false;
73272901fe1SCsanád Hajdú 
73372901fe1SCsanád Hajdú   if (IsLSL) {
73472901fe1SCsanád Hajdú     Immr -= 32;
73572901fe1SCsanád Hajdú   }
73672901fe1SCsanád Hajdú 
73772901fe1SCsanád Hajdú   const TargetRegisterClass *DstRC64 =
73872901fe1SCsanád Hajdú       TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());
73972901fe1SCsanád Hajdú   const TargetRegisterClass *DstRC32 =
74072901fe1SCsanád Hajdú       TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
74172901fe1SCsanád Hajdú   assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
74272901fe1SCsanád Hajdú                     "sub_32 subregister class");
74372901fe1SCsanád Hajdú 
74472901fe1SCsanád Hajdú   const TargetRegisterClass *SrcRC64 =
74572901fe1SCsanád Hajdú       TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());
74672901fe1SCsanád Hajdú   const TargetRegisterClass *SrcRC32 =
74772901fe1SCsanád Hajdú       TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
74872901fe1SCsanád Hajdú   assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
74972901fe1SCsanád Hajdú                     "subregister class");
75072901fe1SCsanád Hajdú 
75172901fe1SCsanád Hajdú   Register DstReg64 = MI.getOperand(0).getReg();
75272901fe1SCsanád Hajdú   Register DstReg32 = MRI->createVirtualRegister(DstRC32);
75372901fe1SCsanád Hajdú   Register SrcReg64 = MI.getOperand(1).getReg();
75472901fe1SCsanád Hajdú   Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
75572901fe1SCsanád Hajdú 
75672901fe1SCsanád Hajdú   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
75772901fe1SCsanád Hajdú           SrcReg32)
75872901fe1SCsanád Hajdú       .addReg(SrcReg64, 0, AArch64::sub_32);
75972901fe1SCsanád Hajdú   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
76072901fe1SCsanád Hajdú           DstReg32)
76172901fe1SCsanád Hajdú       .addReg(SrcReg32)
76272901fe1SCsanád Hajdú       .addImm(Immr)
76372901fe1SCsanád Hajdú       .addImm(Imms);
76472901fe1SCsanád Hajdú   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
76572901fe1SCsanád Hajdú           TII->get(AArch64::SUBREG_TO_REG), DstReg64)
76672901fe1SCsanád Hajdú       .addImm(0)
76772901fe1SCsanád Hajdú       .addReg(DstReg32)
76872901fe1SCsanád Hajdú       .addImm(AArch64::sub_32);
76972901fe1SCsanád Hajdú   MI.eraseFromParent();
77072901fe1SCsanád Hajdú   return true;
77172901fe1SCsanád Hajdú }
77272901fe1SCsanád Hajdú 
773600d4937SDavid Green // Across a basic-block we might have in i32 extract from a value that only
774600d4937SDavid Green // operates on upper bits (for example a sxtw). We can replace the COPY with a
775600d4937SDavid Green // new version skipping the sxtw.
776600d4937SDavid Green bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
777600d4937SDavid Green   Register InputReg = MI.getOperand(1).getReg();
778600d4937SDavid Green   if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
779600d4937SDavid Green       !MRI->hasOneNonDBGUse(InputReg))
780600d4937SDavid Green     return false;
781600d4937SDavid Green 
782600d4937SDavid Green   MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
783600d4937SDavid Green   SmallPtrSet<MachineInstr *, 4> DeadInstrs;
784600d4937SDavid Green   DeadInstrs.insert(SrcMI);
785600d4937SDavid Green   while (SrcMI && SrcMI->isFullCopy() &&
786600d4937SDavid Green          MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
787600d4937SDavid Green     SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
788600d4937SDavid Green     DeadInstrs.insert(SrcMI);
789600d4937SDavid Green   }
790600d4937SDavid Green 
791fe946bfbSDavid Green   if (!SrcMI)
792600d4937SDavid Green     return false;
793600d4937SDavid Green 
794fe946bfbSDavid Green   // Look for SXTW(X) and return Reg.
795fe946bfbSDavid Green   auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
796fe946bfbSDavid Green     if (SrcMI->getOpcode() != AArch64::SBFMXri ||
797fe946bfbSDavid Green         SrcMI->getOperand(2).getImm() != 0 ||
798fe946bfbSDavid Green         SrcMI->getOperand(3).getImm() != 31)
799fe946bfbSDavid Green       return AArch64::NoRegister;
800fe946bfbSDavid Green     return SrcMI->getOperand(1).getReg();
801fe946bfbSDavid Green   };
802fe946bfbSDavid Green   // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
803fe946bfbSDavid Green   auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
804fe946bfbSDavid Green     if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
805fe946bfbSDavid Green         SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||
806fe946bfbSDavid Green         !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))
807fe946bfbSDavid Green       return AArch64::NoRegister;
808fe946bfbSDavid Green     MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());
809fe946bfbSDavid Green     if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
810fe946bfbSDavid Green         Orr->getOperand(1).getReg() != AArch64::WZR ||
811fe946bfbSDavid Green         !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
812fe946bfbSDavid Green       return AArch64::NoRegister;
813fe946bfbSDavid Green     MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
814fe946bfbSDavid Green     if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
815fe946bfbSDavid Green         Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
816fe946bfbSDavid Green       return AArch64::NoRegister;
817fe946bfbSDavid Green     DeadInstrs.insert(Orr);
818fe946bfbSDavid Green     return Cpy->getOperand(1).getReg();
819fe946bfbSDavid Green   };
820fe946bfbSDavid Green 
821fe946bfbSDavid Green   Register SrcReg = getSXTWSrcReg(SrcMI);
822fe946bfbSDavid Green   if (!SrcReg)
823fe946bfbSDavid Green     SrcReg = getUXTWSrcReg(SrcMI);
824fe946bfbSDavid Green   if (!SrcReg)
825fe946bfbSDavid Green     return false;
826fe946bfbSDavid Green 
827600d4937SDavid Green   MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
828600d4937SDavid Green   LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
829600d4937SDavid Green   MI.getOperand(1).setReg(SrcReg);
830600d4937SDavid Green   LLVM_DEBUG(dbgs() << "        to: " << MI);
831600d4937SDavid Green   for (auto *DeadMI : DeadInstrs) {
832600d4937SDavid Green     LLVM_DEBUG(dbgs() << "  Removing: " << *DeadMI);
833600d4937SDavid Green     DeadMI->eraseFromParent();
834600d4937SDavid Green   }
835600d4937SDavid Green   return true;
836600d4937SDavid Green }
837600d4937SDavid Green 
83830caca39SJingu Kang bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
83930caca39SJingu Kang   if (skipFunction(MF.getFunction()))
84030caca39SJingu Kang     return false;
84130caca39SJingu Kang 
84230caca39SJingu Kang   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
843f65651ccSMicah Weston   TRI = static_cast<const AArch64RegisterInfo *>(
844f65651ccSMicah Weston       MF.getSubtarget().getRegisterInfo());
84579d0de2aSpaperchalice   MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
84630caca39SJingu Kang   MRI = &MF.getRegInfo();
84730caca39SJingu Kang 
84843e500d7SDavid Green   assert(MRI->isSSA() && "Expected to be run on SSA form!");
84930caca39SJingu Kang 
85030caca39SJingu Kang   bool Changed = false;
85130caca39SJingu Kang 
85230caca39SJingu Kang   for (MachineBasicBlock &MBB : MF) {
853a1aef4f3SDavid Green     for (MachineInstr &MI : make_early_inc_range(MBB)) {
85430caca39SJingu Kang       switch (MI.getOpcode()) {
85530caca39SJingu Kang       default:
85630caca39SJingu Kang         break;
857b6655333Szhongyunde       case AArch64::INSERT_SUBREG:
85804e94bb1SDavid Green         Changed |= visitINSERT(MI);
859b6655333Szhongyunde         break;
86030caca39SJingu Kang       case AArch64::ANDWrr:
86104e94bb1SDavid Green         Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
86230caca39SJingu Kang         break;
86330caca39SJingu Kang       case AArch64::ANDXrr:
86404e94bb1SDavid Green         Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
86530caca39SJingu Kang         break;
866a5024362SJingu Kang       case AArch64::ORRWrs:
86704e94bb1SDavid Green         Changed |= visitORR(MI);
86843e500d7SDavid Green         break;
86993deac2eSMicah Weston       case AArch64::ADDWrr:
87004e94bb1SDavid Green         Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
87193deac2eSMicah Weston         break;
87293deac2eSMicah Weston       case AArch64::SUBWrr:
87304e94bb1SDavid Green         Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
87493deac2eSMicah Weston         break;
87593deac2eSMicah Weston       case AArch64::ADDXrr:
87604e94bb1SDavid Green         Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
87793deac2eSMicah Weston         break;
87893deac2eSMicah Weston       case AArch64::SUBXrr:
87904e94bb1SDavid Green         Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
88093deac2eSMicah Weston         break;
881c69af70fSMicah Weston       case AArch64::ADDSWrr:
88204e94bb1SDavid Green         Changed |=
88304e94bb1SDavid Green             visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
88404e94bb1SDavid Green                                     {AArch64::SUBWri, AArch64::SUBSWri}, MI);
885c69af70fSMicah Weston         break;
886c69af70fSMicah Weston       case AArch64::SUBSWrr:
88704e94bb1SDavid Green         Changed |=
88804e94bb1SDavid Green             visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
88904e94bb1SDavid Green                                     {AArch64::ADDWri, AArch64::ADDSWri}, MI);
890c69af70fSMicah Weston         break;
891c69af70fSMicah Weston       case AArch64::ADDSXrr:
89204e94bb1SDavid Green         Changed |=
89304e94bb1SDavid Green             visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
89404e94bb1SDavid Green                                     {AArch64::SUBXri, AArch64::SUBSXri}, MI);
895c69af70fSMicah Weston         break;
896c69af70fSMicah Weston       case AArch64::SUBSXrr:
89704e94bb1SDavid Green         Changed |=
89804e94bb1SDavid Green             visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
89904e94bb1SDavid Green                                     {AArch64::ADDXri, AArch64::ADDSXri}, MI);
900c69af70fSMicah Weston         break;
90129763aa4SMarina Taylor       case AArch64::CSELWr:
90229763aa4SMarina Taylor       case AArch64::CSELXr:
90329763aa4SMarina Taylor         Changed |= visitCSEL(MI);
90429763aa4SMarina Taylor         break;
90572105d10SNilanjana Basu       case AArch64::INSvi64gpr:
90604e94bb1SDavid Green         Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
90772105d10SNilanjana Basu         break;
90872105d10SNilanjana Basu       case AArch64::INSvi32gpr:
90904e94bb1SDavid Green         Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
91072105d10SNilanjana Basu         break;
91172105d10SNilanjana Basu       case AArch64::INSvi16gpr:
91204e94bb1SDavid Green         Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
91372105d10SNilanjana Basu         break;
91472105d10SNilanjana Basu       case AArch64::INSvi8gpr:
91504e94bb1SDavid Green         Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
91672105d10SNilanjana Basu         break;
917932911d6SJingu Kang       case AArch64::INSvi64lane:
91804e94bb1SDavid Green         Changed |= visitINSvi64lane(MI);
919932911d6SJingu Kang         break;
920f42e321bSDavid Green       case AArch64::FMOVDr:
921f42e321bSDavid Green         Changed |= visitFMOVDr(MI);
922f42e321bSDavid Green         break;
92372901fe1SCsanád Hajdú       case AArch64::UBFMXri:
92472901fe1SCsanád Hajdú         Changed |= visitUBFMXri(MI);
92572901fe1SCsanád Hajdú         break;
926600d4937SDavid Green       case AArch64::COPY:
927600d4937SDavid Green         Changed |= visitCopy(MI);
928600d4937SDavid Green         break;
92930caca39SJingu Kang       }
93030caca39SJingu Kang     }
93130caca39SJingu Kang   }
93230caca39SJingu Kang 
93330caca39SJingu Kang   return Changed;
93430caca39SJingu Kang }
93530caca39SJingu Kang 
93630caca39SJingu Kang FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
93730caca39SJingu Kang   return new AArch64MIPeepholeOpt();
93830caca39SJingu Kang }
939