xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Perform peephole optimizations on the machine code:
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // - Optimize Extensions
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //     Optimization of sign / zero extension instructions. It may be extended to
140b57cec5SDimitry Andric //     handle other instructions with similar properties.
150b57cec5SDimitry Andric //
160b57cec5SDimitry Andric //     On some targets, some instructions, e.g. X86 sign / zero extension, may
170b57cec5SDimitry Andric //     leave the source value in the lower part of the result. This optimization
180b57cec5SDimitry Andric //     will replace some uses of the pre-extension value with uses of the
190b57cec5SDimitry Andric //     sub-register of the results.
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric // - Optimize Comparisons
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric //     Optimization of comparison instructions. For instance, in this code:
240b57cec5SDimitry Andric //
250b57cec5SDimitry Andric //       sub r1, 1
260b57cec5SDimitry Andric //       cmp r1, 0
270b57cec5SDimitry Andric //       bz  L1
280b57cec5SDimitry Andric //
290b57cec5SDimitry Andric //     If the "sub" instruction all ready sets (or could be modified to set) the
300b57cec5SDimitry Andric //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
310b57cec5SDimitry Andric //     eliminate the "cmp" instruction.
320b57cec5SDimitry Andric //
330b57cec5SDimitry Andric //     Another instance, in this code:
340b57cec5SDimitry Andric //
350b57cec5SDimitry Andric //       sub r1, r3 | sub r1, imm
360b57cec5SDimitry Andric //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
370b57cec5SDimitry Andric //       bge L1
380b57cec5SDimitry Andric //
390b57cec5SDimitry Andric //     If the branch instruction can use flag from "sub", then we can replace
400b57cec5SDimitry Andric //     "sub" with "subs" and eliminate the "cmp" instruction.
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric // - Optimize Loads:
430b57cec5SDimitry Andric //
440b57cec5SDimitry Andric //     Loads that can be folded into a later instruction. A load is foldable
450b57cec5SDimitry Andric //     if it loads to virtual registers and the virtual register defined has
460b57cec5SDimitry Andric //     a single use.
470b57cec5SDimitry Andric //
480b57cec5SDimitry Andric // - Optimize Copies and Bitcast (more generally, target specific copies):
490b57cec5SDimitry Andric //
500b57cec5SDimitry Andric //     Rewrite copies and bitcasts to avoid cross register bank copies
510b57cec5SDimitry Andric //     when possible.
520b57cec5SDimitry Andric //     E.g., Consider the following example, where capital and lower
530b57cec5SDimitry Andric //     letters denote different register file:
540b57cec5SDimitry Andric //     b = copy A <-- cross-bank copy
550b57cec5SDimitry Andric //     C = copy b <-- cross-bank copy
560b57cec5SDimitry Andric //   =>
570b57cec5SDimitry Andric //     b = copy A <-- cross-bank copy
580b57cec5SDimitry Andric //     C = copy A <-- same-bank copy
590b57cec5SDimitry Andric //
600b57cec5SDimitry Andric //     E.g., for bitcast:
610b57cec5SDimitry Andric //     b = bitcast A <-- cross-bank copy
620b57cec5SDimitry Andric //     C = bitcast b <-- cross-bank copy
630b57cec5SDimitry Andric //   =>
640b57cec5SDimitry Andric //     b = bitcast A <-- cross-bank copy
650b57cec5SDimitry Andric //     C = copy A    <-- same-bank copy
660b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
690b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
700b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
710b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
720b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
730b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
740b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
750b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
760b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
770b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
780b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
790b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
800b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
810b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
820b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
830b57cec5SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
840b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
850b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
86480093f4SDimitry Andric #include "llvm/InitializePasses.h"
870b57cec5SDimitry Andric #include "llvm/MC/LaneBitmask.h"
880b57cec5SDimitry Andric #include "llvm/MC/MCInstrDesc.h"
890b57cec5SDimitry Andric #include "llvm/Pass.h"
900b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
910b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
920b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
930b57cec5SDimitry Andric #include <cassert>
940b57cec5SDimitry Andric #include <cstdint>
950b57cec5SDimitry Andric #include <memory>
960b57cec5SDimitry Andric #include <utility>
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric using namespace llvm;
990b57cec5SDimitry Andric using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
1000b57cec5SDimitry Andric using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx;
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric #define DEBUG_TYPE "peephole-opt"
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric // Optimize Extensions
1050b57cec5SDimitry Andric static cl::opt<bool>
1060b57cec5SDimitry Andric Aggressive("aggressive-ext-opt", cl::Hidden,
1070b57cec5SDimitry Andric            cl::desc("Aggressive extension optimization"));
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric static cl::opt<bool>
1100b57cec5SDimitry Andric DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
1110b57cec5SDimitry Andric                 cl::desc("Disable the peephole optimizer"));
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric /// Specifiy whether or not the value tracking looks through
1140b57cec5SDimitry Andric /// complex instructions. When this is true, the value tracker
1150b57cec5SDimitry Andric /// bails on everything that is not a copy or a bitcast.
1160b57cec5SDimitry Andric static cl::opt<bool>
1170b57cec5SDimitry Andric DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
1180b57cec5SDimitry Andric                   cl::desc("Disable advanced copy optimization"));
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric static cl::opt<bool> DisableNAPhysCopyOpt(
1210b57cec5SDimitry Andric     "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
1220b57cec5SDimitry Andric     cl::desc("Disable non-allocatable physical register copy optimization"));
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric // Limit the number of PHI instructions to process
1250b57cec5SDimitry Andric // in PeepholeOptimizer::getNextSource.
1260b57cec5SDimitry Andric static cl::opt<unsigned> RewritePHILimit(
1270b57cec5SDimitry Andric     "rewrite-phi-limit", cl::Hidden, cl::init(10),
1280b57cec5SDimitry Andric     cl::desc("Limit the length of PHI chains to lookup"));
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric // Limit the length of recurrence chain when evaluating the benefit of
1310b57cec5SDimitry Andric // commuting operands.
1320b57cec5SDimitry Andric static cl::opt<unsigned> MaxRecurrenceChain(
1330b57cec5SDimitry Andric     "recurrence-chain-limit", cl::Hidden, cl::init(3),
1340b57cec5SDimitry Andric     cl::desc("Maximum length of recurrence chain when evaluating the benefit "
1350b57cec5SDimitry Andric              "of commuting operands"));
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric STATISTIC(NumReuse, "Number of extension results reused");
1390b57cec5SDimitry Andric STATISTIC(NumCmps, "Number of compares eliminated");
1400b57cec5SDimitry Andric STATISTIC(NumImmFold, "Number of move immediate folded");
1410b57cec5SDimitry Andric STATISTIC(NumLoadFold, "Number of loads folded");
1420b57cec5SDimitry Andric STATISTIC(NumSelects, "Number of selects optimized");
1430b57cec5SDimitry Andric STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
1440b57cec5SDimitry Andric STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
1450b57cec5SDimitry Andric STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric namespace {
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   class ValueTrackerResult;
1500b57cec5SDimitry Andric   class RecurrenceInstr;
1510b57cec5SDimitry Andric 
1525f757f3fSDimitry Andric   class PeepholeOptimizer : public MachineFunctionPass,
1535f757f3fSDimitry Andric                             private MachineFunction::Delegate {
15406c3fb27SDimitry Andric     const TargetInstrInfo *TII = nullptr;
15506c3fb27SDimitry Andric     const TargetRegisterInfo *TRI = nullptr;
15606c3fb27SDimitry Andric     MachineRegisterInfo *MRI = nullptr;
15706c3fb27SDimitry Andric     MachineDominatorTree *DT = nullptr; // Machine dominator tree
15806c3fb27SDimitry Andric     MachineLoopInfo *MLI = nullptr;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   public:
1610b57cec5SDimitry Andric     static char ID; // Pass identification
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric     PeepholeOptimizer() : MachineFunctionPass(ID) {
1640b57cec5SDimitry Andric       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
1650b57cec5SDimitry Andric     }
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric     bool runOnMachineFunction(MachineFunction &MF) override;
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric     void getAnalysisUsage(AnalysisUsage &AU) const override {
1700b57cec5SDimitry Andric       AU.setPreservesCFG();
1710b57cec5SDimitry Andric       MachineFunctionPass::getAnalysisUsage(AU);
172*0fca6ea1SDimitry Andric       AU.addRequired<MachineLoopInfoWrapperPass>();
173*0fca6ea1SDimitry Andric       AU.addPreserved<MachineLoopInfoWrapperPass>();
1740b57cec5SDimitry Andric       if (Aggressive) {
175*0fca6ea1SDimitry Andric         AU.addRequired<MachineDominatorTreeWrapperPass>();
176*0fca6ea1SDimitry Andric         AU.addPreserved<MachineDominatorTreeWrapperPass>();
1770b57cec5SDimitry Andric       }
1780b57cec5SDimitry Andric     }
1790b57cec5SDimitry Andric 
180e8d8bef9SDimitry Andric     MachineFunctionProperties getRequiredProperties() const override {
181e8d8bef9SDimitry Andric       return MachineFunctionProperties()
182e8d8bef9SDimitry Andric         .set(MachineFunctionProperties::Property::IsSSA);
183e8d8bef9SDimitry Andric     }
184e8d8bef9SDimitry Andric 
1850b57cec5SDimitry Andric     /// Track Def -> Use info used for rewriting copies.
1860b57cec5SDimitry Andric     using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric     /// Sequence of instructions that formulate recurrence cycle.
1890b57cec5SDimitry Andric     using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric   private:
1920b57cec5SDimitry Andric     bool optimizeCmpInstr(MachineInstr &MI);
1930b57cec5SDimitry Andric     bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
1940b57cec5SDimitry Andric                           SmallPtrSetImpl<MachineInstr*> &LocalMIs);
1950b57cec5SDimitry Andric     bool optimizeSelect(MachineInstr &MI,
1960b57cec5SDimitry Andric                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
1970b57cec5SDimitry Andric     bool optimizeCondBranch(MachineInstr &MI);
1980b57cec5SDimitry Andric     bool optimizeCoalescableCopy(MachineInstr &MI);
1990b57cec5SDimitry Andric     bool optimizeUncoalescableCopy(MachineInstr &MI,
2000b57cec5SDimitry Andric                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
2010b57cec5SDimitry Andric     bool optimizeRecurrence(MachineInstr &PHI);
2020b57cec5SDimitry Andric     bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
203e8d8bef9SDimitry Andric     bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
204e8d8bef9SDimitry Andric                          DenseMap<Register, MachineInstr *> &ImmDefMIs);
205e8d8bef9SDimitry Andric     bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
2065f757f3fSDimitry Andric                        DenseMap<Register, MachineInstr *> &ImmDefMIs,
2075f757f3fSDimitry Andric                        bool &Deleted);
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric     /// Finds recurrence cycles, but only ones that formulated around
2100b57cec5SDimitry Andric     /// a def operand and a use operand that are tied. If there is a use
2110b57cec5SDimitry Andric     /// operand commutable with the tied use operand, find recurrence cycle
2120b57cec5SDimitry Andric     /// along that operand as well.
213e8d8bef9SDimitry Andric     bool findTargetRecurrence(Register Reg,
214e8d8bef9SDimitry Andric                               const SmallSet<Register, 2> &TargetReg,
2150b57cec5SDimitry Andric                               RecurrenceCycle &RC);
2160b57cec5SDimitry Andric 
21781ad6265SDimitry Andric     /// If copy instruction \p MI is a virtual register copy or a copy of a
21881ad6265SDimitry Andric     /// constant physical register to a virtual register, track it in the
2195f757f3fSDimitry Andric     /// set CopySrcMIs. If this virtual register was previously seen as a
220e8d8bef9SDimitry Andric     /// copy, replace the uses of this copy with the previously seen copy's
221e8d8bef9SDimitry Andric     /// destination register.
2225f757f3fSDimitry Andric     bool foldRedundantCopy(MachineInstr &MI);
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric     /// Is the register \p Reg a non-allocatable physical register?
225e8d8bef9SDimitry Andric     bool isNAPhysCopy(Register Reg);
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric     /// If copy instruction \p MI is a non-allocatable virtual<->physical
2280b57cec5SDimitry Andric     /// register copy, track it in the \p NAPhysToVirtMIs map. If this
2290b57cec5SDimitry Andric     /// non-allocatable physical register was previously copied to a virtual
2300b57cec5SDimitry Andric     /// registered and hasn't been clobbered, the virt->phys copy can be
2310b57cec5SDimitry Andric     /// deleted.
232e8d8bef9SDimitry Andric     bool foldRedundantNAPhysCopy(
233e8d8bef9SDimitry Andric         MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric     bool isLoadFoldable(MachineInstr &MI,
236e8d8bef9SDimitry Andric                         SmallSet<Register, 16> &FoldAsLoadDefCandidates);
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric     /// Check whether \p MI is understood by the register coalescer
2390b57cec5SDimitry Andric     /// but may require some rewriting.
2400b57cec5SDimitry Andric     bool isCoalescableCopy(const MachineInstr &MI) {
2410b57cec5SDimitry Andric       // SubregToRegs are not interesting, because they are already register
2420b57cec5SDimitry Andric       // coalescer friendly.
2430b57cec5SDimitry Andric       return MI.isCopy() || (!DisableAdvCopyOpt &&
2440b57cec5SDimitry Andric                              (MI.isRegSequence() || MI.isInsertSubreg() ||
2450b57cec5SDimitry Andric                               MI.isExtractSubreg()));
2460b57cec5SDimitry Andric     }
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric     /// Check whether \p MI is a copy like instruction that is
2490b57cec5SDimitry Andric     /// not recognized by the register coalescer.
2500b57cec5SDimitry Andric     bool isUncoalescableCopy(const MachineInstr &MI) {
2510b57cec5SDimitry Andric       return MI.isBitcast() ||
2520b57cec5SDimitry Andric              (!DisableAdvCopyOpt &&
2530b57cec5SDimitry Andric               (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
2540b57cec5SDimitry Andric                MI.isExtractSubregLike()));
2550b57cec5SDimitry Andric     }
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric     MachineInstr &rewriteSource(MachineInstr &CopyLike,
2580b57cec5SDimitry Andric                                 RegSubRegPair Def, RewriteMapTy &RewriteMap);
2595f757f3fSDimitry Andric 
2605f757f3fSDimitry Andric     // Set of copies to virtual registers keyed by source register.  Never
2615f757f3fSDimitry Andric     // holds any physreg which requires def tracking.
2625f757f3fSDimitry Andric     DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
2635f757f3fSDimitry Andric 
2645f757f3fSDimitry Andric     // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs.
2655f757f3fSDimitry Andric     void MF_HandleInsertion(MachineInstr &MI) override {
2665f757f3fSDimitry Andric       return;
2675f757f3fSDimitry Andric     }
2685f757f3fSDimitry Andric 
2695f757f3fSDimitry Andric     bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) {
2705f757f3fSDimitry Andric       if (!MI.isCopy())
2715f757f3fSDimitry Andric         return false;
2725f757f3fSDimitry Andric 
2735f757f3fSDimitry Andric       Register SrcReg = MI.getOperand(1).getReg();
2745f757f3fSDimitry Andric       unsigned SrcSubReg = MI.getOperand(1).getSubReg();
2755f757f3fSDimitry Andric       if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
2765f757f3fSDimitry Andric         return false;
2775f757f3fSDimitry Andric 
2785f757f3fSDimitry Andric       SrcPair = RegSubRegPair(SrcReg, SrcSubReg);
2795f757f3fSDimitry Andric       return true;
2805f757f3fSDimitry Andric     }
2815f757f3fSDimitry Andric 
2825f757f3fSDimitry Andric     // If a COPY instruction is to be deleted or changed, we should also remove
2835f757f3fSDimitry Andric     // it from CopySrcMIs.
2845f757f3fSDimitry Andric     void deleteChangedCopy(MachineInstr &MI) {
2855f757f3fSDimitry Andric       RegSubRegPair SrcPair;
2865f757f3fSDimitry Andric       if (!getCopySrc(MI, SrcPair))
2875f757f3fSDimitry Andric         return;
2885f757f3fSDimitry Andric 
2895f757f3fSDimitry Andric       auto It = CopySrcMIs.find(SrcPair);
2905f757f3fSDimitry Andric       if (It != CopySrcMIs.end() && It->second == &MI)
2915f757f3fSDimitry Andric         CopySrcMIs.erase(It);
2925f757f3fSDimitry Andric     }
2935f757f3fSDimitry Andric 
2945f757f3fSDimitry Andric     void MF_HandleRemoval(MachineInstr &MI) override {
2955f757f3fSDimitry Andric       deleteChangedCopy(MI);
2965f757f3fSDimitry Andric     }
2975f757f3fSDimitry Andric 
2985f757f3fSDimitry Andric     void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override
2995f757f3fSDimitry Andric     {
3005f757f3fSDimitry Andric       deleteChangedCopy(MI);
3015f757f3fSDimitry Andric     }
3020b57cec5SDimitry Andric   };
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   /// Helper class to hold instructions that are inside recurrence cycles.
3050b57cec5SDimitry Andric   /// The recurrence cycle is formulated around 1) a def operand and its
3060b57cec5SDimitry Andric   /// tied use operand, or 2) a def operand and a use operand that is commutable
3070b57cec5SDimitry Andric   /// with another use operand which is tied to the def operand. In the latter
3080b57cec5SDimitry Andric   /// case, index of the tied use operand and the commutable use operand are
3090b57cec5SDimitry Andric   /// maintained with CommutePair.
3100b57cec5SDimitry Andric   class RecurrenceInstr {
3110b57cec5SDimitry Andric   public:
3120b57cec5SDimitry Andric     using IndexPair = std::pair<unsigned, unsigned>;
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric     RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
3150b57cec5SDimitry Andric     RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
3160b57cec5SDimitry Andric       : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric     MachineInstr *getMI() const { return MI; }
319bdd1243dSDimitry Andric     std::optional<IndexPair> getCommutePair() const { return CommutePair; }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   private:
3220b57cec5SDimitry Andric     MachineInstr *MI;
323bdd1243dSDimitry Andric     std::optional<IndexPair> CommutePair;
3240b57cec5SDimitry Andric   };
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric   /// Helper class to hold a reply for ValueTracker queries.
3270b57cec5SDimitry Andric   /// Contains the returned sources for a given search and the instructions
3280b57cec5SDimitry Andric   /// where the sources were tracked from.
3290b57cec5SDimitry Andric   class ValueTrackerResult {
3300b57cec5SDimitry Andric   private:
3310b57cec5SDimitry Andric     /// Track all sources found by one ValueTracker query.
3320b57cec5SDimitry Andric     SmallVector<RegSubRegPair, 2> RegSrcs;
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric     /// Instruction using the sources in 'RegSrcs'.
3350b57cec5SDimitry Andric     const MachineInstr *Inst = nullptr;
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   public:
3380b57cec5SDimitry Andric     ValueTrackerResult() = default;
3390b57cec5SDimitry Andric 
340e8d8bef9SDimitry Andric     ValueTrackerResult(Register Reg, unsigned SubReg) {
3410b57cec5SDimitry Andric       addSource(Reg, SubReg);
3420b57cec5SDimitry Andric     }
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric     bool isValid() const { return getNumSources() > 0; }
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric     void setInst(const MachineInstr *I) { Inst = I; }
3470b57cec5SDimitry Andric     const MachineInstr *getInst() const { return Inst; }
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric     void clear() {
3500b57cec5SDimitry Andric       RegSrcs.clear();
3510b57cec5SDimitry Andric       Inst = nullptr;
3520b57cec5SDimitry Andric     }
3530b57cec5SDimitry Andric 
354e8d8bef9SDimitry Andric     void addSource(Register SrcReg, unsigned SrcSubReg) {
3550b57cec5SDimitry Andric       RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
3560b57cec5SDimitry Andric     }
3570b57cec5SDimitry Andric 
358e8d8bef9SDimitry Andric     void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
3590b57cec5SDimitry Andric       assert(Idx < getNumSources() && "Reg pair source out of index");
3600b57cec5SDimitry Andric       RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
3610b57cec5SDimitry Andric     }
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric     int getNumSources() const { return RegSrcs.size(); }
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric     RegSubRegPair getSrc(int Idx) const {
3660b57cec5SDimitry Andric       return RegSrcs[Idx];
3670b57cec5SDimitry Andric     }
3680b57cec5SDimitry Andric 
369e8d8bef9SDimitry Andric     Register getSrcReg(int Idx) const {
3700b57cec5SDimitry Andric       assert(Idx < getNumSources() && "Reg source out of index");
3710b57cec5SDimitry Andric       return RegSrcs[Idx].Reg;
3720b57cec5SDimitry Andric     }
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric     unsigned getSrcSubReg(int Idx) const {
3750b57cec5SDimitry Andric       assert(Idx < getNumSources() && "SubReg source out of index");
3760b57cec5SDimitry Andric       return RegSrcs[Idx].SubReg;
3770b57cec5SDimitry Andric     }
3780b57cec5SDimitry Andric 
379e8d8bef9SDimitry Andric     bool operator==(const ValueTrackerResult &Other) const {
3800b57cec5SDimitry Andric       if (Other.getInst() != getInst())
3810b57cec5SDimitry Andric         return false;
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric       if (Other.getNumSources() != getNumSources())
3840b57cec5SDimitry Andric         return false;
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric       for (int i = 0, e = Other.getNumSources(); i != e; ++i)
3870b57cec5SDimitry Andric         if (Other.getSrcReg(i) != getSrcReg(i) ||
3880b57cec5SDimitry Andric             Other.getSrcSubReg(i) != getSrcSubReg(i))
3890b57cec5SDimitry Andric           return false;
3900b57cec5SDimitry Andric       return true;
3910b57cec5SDimitry Andric     }
3920b57cec5SDimitry Andric   };
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   /// Helper class to track the possible sources of a value defined by
3950b57cec5SDimitry Andric   /// a (chain of) copy related instructions.
3960b57cec5SDimitry Andric   /// Given a definition (instruction and definition index), this class
3970b57cec5SDimitry Andric   /// follows the use-def chain to find successive suitable sources.
3980b57cec5SDimitry Andric   /// The given source can be used to rewrite the definition into
3990b57cec5SDimitry Andric   /// def = COPY src.
4000b57cec5SDimitry Andric   ///
4010b57cec5SDimitry Andric   /// For instance, let us consider the following snippet:
4020b57cec5SDimitry Andric   /// v0 =
4030b57cec5SDimitry Andric   /// v2 = INSERT_SUBREG v1, v0, sub0
4040b57cec5SDimitry Andric   /// def = COPY v2.sub0
4050b57cec5SDimitry Andric   ///
4060b57cec5SDimitry Andric   /// Using a ValueTracker for def = COPY v2.sub0 will give the following
4070b57cec5SDimitry Andric   /// suitable sources:
4080b57cec5SDimitry Andric   /// v2.sub0 and v0.
4090b57cec5SDimitry Andric   /// Then, def can be rewritten into def = COPY v0.
4100b57cec5SDimitry Andric   class ValueTracker {
4110b57cec5SDimitry Andric   private:
4120b57cec5SDimitry Andric     /// The current point into the use-def chain.
4130b57cec5SDimitry Andric     const MachineInstr *Def = nullptr;
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric     /// The index of the definition in Def.
4160b57cec5SDimitry Andric     unsigned DefIdx = 0;
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric     /// The sub register index of the definition.
4190b57cec5SDimitry Andric     unsigned DefSubReg;
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric     /// The register where the value can be found.
422e8d8bef9SDimitry Andric     Register Reg;
4230b57cec5SDimitry Andric 
4240b57cec5SDimitry Andric     /// MachineRegisterInfo used to perform tracking.
4250b57cec5SDimitry Andric     const MachineRegisterInfo &MRI;
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric     /// Optional TargetInstrInfo used to perform some complex tracking.
4280b57cec5SDimitry Andric     const TargetInstrInfo *TII;
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric     /// Dispatcher to the right underlying implementation of getNextSource.
4310b57cec5SDimitry Andric     ValueTrackerResult getNextSourceImpl();
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric     /// Specialized version of getNextSource for Copy instructions.
4340b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromCopy();
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric     /// Specialized version of getNextSource for Bitcast instructions.
4370b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromBitcast();
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric     /// Specialized version of getNextSource for RegSequence instructions.
4400b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromRegSequence();
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric     /// Specialized version of getNextSource for InsertSubreg instructions.
4430b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromInsertSubreg();
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric     /// Specialized version of getNextSource for ExtractSubreg instructions.
4460b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromExtractSubreg();
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric     /// Specialized version of getNextSource for SubregToReg instructions.
4490b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromSubregToReg();
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric     /// Specialized version of getNextSource for PHI instructions.
4520b57cec5SDimitry Andric     ValueTrackerResult getNextSourceFromPHI();
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   public:
4550b57cec5SDimitry Andric     /// Create a ValueTracker instance for the value defined by \p Reg.
4560b57cec5SDimitry Andric     /// \p DefSubReg represents the sub register index the value tracker will
4570b57cec5SDimitry Andric     /// track. It does not need to match the sub register index used in the
4580b57cec5SDimitry Andric     /// definition of \p Reg.
4590b57cec5SDimitry Andric     /// If \p Reg is a physical register, a value tracker constructed with
4600b57cec5SDimitry Andric     /// this constructor will not find any alternative source.
4610b57cec5SDimitry Andric     /// Indeed, when \p Reg is a physical register that constructor does not
4620b57cec5SDimitry Andric     /// know which definition of \p Reg it should track.
4630b57cec5SDimitry Andric     /// Use the next constructor to track a physical register.
464e8d8bef9SDimitry Andric     ValueTracker(Register Reg, unsigned DefSubReg,
4650b57cec5SDimitry Andric                  const MachineRegisterInfo &MRI,
4660b57cec5SDimitry Andric                  const TargetInstrInfo *TII = nullptr)
4670b57cec5SDimitry Andric         : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
468e8d8bef9SDimitry Andric       if (!Reg.isPhysical()) {
4690b57cec5SDimitry Andric         Def = MRI.getVRegDef(Reg);
4700b57cec5SDimitry Andric         DefIdx = MRI.def_begin(Reg).getOperandNo();
4710b57cec5SDimitry Andric       }
4720b57cec5SDimitry Andric     }
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric     /// Following the use-def chain, get the next available source
4750b57cec5SDimitry Andric     /// for the tracked value.
4760b57cec5SDimitry Andric     /// \return A ValueTrackerResult containing a set of registers
4770b57cec5SDimitry Andric     /// and sub registers with tracked values. A ValueTrackerResult with
4780b57cec5SDimitry Andric     /// an empty set of registers means no source was found.
4790b57cec5SDimitry Andric     ValueTrackerResult getNextSource();
4800b57cec5SDimitry Andric   };
4810b57cec5SDimitry Andric 
4820b57cec5SDimitry Andric } // end anonymous namespace
4830b57cec5SDimitry Andric 
4840b57cec5SDimitry Andric char PeepholeOptimizer::ID = 0;
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
4890b57cec5SDimitry Andric                       "Peephole Optimizations", false, false)
490*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
491*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
4920b57cec5SDimitry Andric INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
4930b57cec5SDimitry Andric                     "Peephole Optimizations", false, false)
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric /// If instruction is a copy-like instruction, i.e. it reads a single register
4960b57cec5SDimitry Andric /// and writes a single register and it does not modify the source, and if the
4970b57cec5SDimitry Andric /// source value is preserved as a sub-register of the result, then replace all
4980b57cec5SDimitry Andric /// reachable uses of the source with the subreg of the result.
4990b57cec5SDimitry Andric ///
5000b57cec5SDimitry Andric /// Do not generate an EXTRACT that is used only in a debug use, as this changes
5010b57cec5SDimitry Andric /// the code. Since this code does not currently share EXTRACTs, just ignore all
5020b57cec5SDimitry Andric /// debug uses.
5030b57cec5SDimitry Andric bool PeepholeOptimizer::
5040b57cec5SDimitry Andric optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
5050b57cec5SDimitry Andric                  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
5065ffd83dbSDimitry Andric   Register SrcReg, DstReg;
5075ffd83dbSDimitry Andric   unsigned SubIdx;
5080b57cec5SDimitry Andric   if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
5090b57cec5SDimitry Andric     return false;
5100b57cec5SDimitry Andric 
5115ffd83dbSDimitry Andric   if (DstReg.isPhysical() || SrcReg.isPhysical())
5120b57cec5SDimitry Andric     return false;
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric   if (MRI->hasOneNonDBGUse(SrcReg))
5150b57cec5SDimitry Andric     // No other uses.
5160b57cec5SDimitry Andric     return false;
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric   // Ensure DstReg can get a register class that actually supports
5190b57cec5SDimitry Andric   // sub-registers. Don't change the class until we commit.
5200b57cec5SDimitry Andric   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
5210b57cec5SDimitry Andric   DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
5220b57cec5SDimitry Andric   if (!DstRC)
5230b57cec5SDimitry Andric     return false;
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric   // The ext instr may be operating on a sub-register of SrcReg as well.
5260b57cec5SDimitry Andric   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
5270b57cec5SDimitry Andric   // register.
5280b57cec5SDimitry Andric   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
5290b57cec5SDimitry Andric   // SrcReg:SubIdx should be replaced.
5300b57cec5SDimitry Andric   bool UseSrcSubIdx =
5310b57cec5SDimitry Andric       TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   // The source has other uses. See if we can replace the other uses with use of
5340b57cec5SDimitry Andric   // the result of the extension.
5350b57cec5SDimitry Andric   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
5360b57cec5SDimitry Andric   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
5370b57cec5SDimitry Andric     ReachedBBs.insert(UI.getParent());
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric   // Uses that are in the same BB of uses of the result of the instruction.
5400b57cec5SDimitry Andric   SmallVector<MachineOperand*, 8> Uses;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric   // Uses that the result of the instruction can reach.
5430b57cec5SDimitry Andric   SmallVector<MachineOperand*, 8> ExtendedUses;
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric   bool ExtendLife = true;
5460b57cec5SDimitry Andric   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
5470b57cec5SDimitry Andric     MachineInstr *UseMI = UseMO.getParent();
5480b57cec5SDimitry Andric     if (UseMI == &MI)
5490b57cec5SDimitry Andric       continue;
5500b57cec5SDimitry Andric 
5510b57cec5SDimitry Andric     if (UseMI->isPHI()) {
5520b57cec5SDimitry Andric       ExtendLife = false;
5530b57cec5SDimitry Andric       continue;
5540b57cec5SDimitry Andric     }
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric     // Only accept uses of SrcReg:SubIdx.
5570b57cec5SDimitry Andric     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
5580b57cec5SDimitry Andric       continue;
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric     // It's an error to translate this:
5610b57cec5SDimitry Andric     //
5620b57cec5SDimitry Andric     //    %reg1025 = <sext> %reg1024
5630b57cec5SDimitry Andric     //     ...
5640b57cec5SDimitry Andric     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
5650b57cec5SDimitry Andric     //
5660b57cec5SDimitry Andric     // into this:
5670b57cec5SDimitry Andric     //
5680b57cec5SDimitry Andric     //    %reg1025 = <sext> %reg1024
5690b57cec5SDimitry Andric     //     ...
5700b57cec5SDimitry Andric     //    %reg1027 = COPY %reg1025:4
5710b57cec5SDimitry Andric     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
5720b57cec5SDimitry Andric     //
5730b57cec5SDimitry Andric     // The problem here is that SUBREG_TO_REG is there to assert that an
5740b57cec5SDimitry Andric     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
5750b57cec5SDimitry Andric     // the COPY here, it will give us the value after the <sext>, not the
5760b57cec5SDimitry Andric     // original value of %reg1024 before <sext>.
5770b57cec5SDimitry Andric     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
5780b57cec5SDimitry Andric       continue;
5790b57cec5SDimitry Andric 
5800b57cec5SDimitry Andric     MachineBasicBlock *UseMBB = UseMI->getParent();
5810b57cec5SDimitry Andric     if (UseMBB == &MBB) {
5820b57cec5SDimitry Andric       // Local uses that come after the extension.
5830b57cec5SDimitry Andric       if (!LocalMIs.count(UseMI))
5840b57cec5SDimitry Andric         Uses.push_back(&UseMO);
5850b57cec5SDimitry Andric     } else if (ReachedBBs.count(UseMBB)) {
5860b57cec5SDimitry Andric       // Non-local uses where the result of the extension is used. Always
5870b57cec5SDimitry Andric       // replace these unless it's a PHI.
5880b57cec5SDimitry Andric       Uses.push_back(&UseMO);
5890b57cec5SDimitry Andric     } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
5900b57cec5SDimitry Andric       // We may want to extend the live range of the extension result in order
5910b57cec5SDimitry Andric       // to replace these uses.
5920b57cec5SDimitry Andric       ExtendedUses.push_back(&UseMO);
5930b57cec5SDimitry Andric     } else {
5940b57cec5SDimitry Andric       // Both will be live out of the def MBB anyway. Don't extend live range of
5950b57cec5SDimitry Andric       // the extension result.
5960b57cec5SDimitry Andric       ExtendLife = false;
5970b57cec5SDimitry Andric       break;
5980b57cec5SDimitry Andric     }
5990b57cec5SDimitry Andric   }
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric   if (ExtendLife && !ExtendedUses.empty())
6020b57cec5SDimitry Andric     // Extend the liveness of the extension result.
6030b57cec5SDimitry Andric     Uses.append(ExtendedUses.begin(), ExtendedUses.end());
6040b57cec5SDimitry Andric 
6050b57cec5SDimitry Andric   // Now replace all uses.
6060b57cec5SDimitry Andric   bool Changed = false;
6070b57cec5SDimitry Andric   if (!Uses.empty()) {
6080b57cec5SDimitry Andric     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
6090b57cec5SDimitry Andric 
6100b57cec5SDimitry Andric     // Look for PHI uses of the extended result, we don't want to extend the
6110b57cec5SDimitry Andric     // liveness of a PHI input. It breaks all kinds of assumptions down
6120b57cec5SDimitry Andric     // stream. A PHI use is expected to be the kill of its source values.
6130b57cec5SDimitry Andric     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
6140b57cec5SDimitry Andric       if (UI.isPHI())
6150b57cec5SDimitry Andric         PHIBBs.insert(UI.getParent());
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
618*0fca6ea1SDimitry Andric     for (MachineOperand *UseMO : Uses) {
6190b57cec5SDimitry Andric       MachineInstr *UseMI = UseMO->getParent();
6200b57cec5SDimitry Andric       MachineBasicBlock *UseMBB = UseMI->getParent();
6210b57cec5SDimitry Andric       if (PHIBBs.count(UseMBB))
6220b57cec5SDimitry Andric         continue;
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric       // About to add uses of DstReg, clear DstReg's kill flags.
6250b57cec5SDimitry Andric       if (!Changed) {
6260b57cec5SDimitry Andric         MRI->clearKillFlags(DstReg);
6270b57cec5SDimitry Andric         MRI->constrainRegClass(DstReg, DstRC);
6280b57cec5SDimitry Andric       }
6290b57cec5SDimitry Andric 
630fe6060f1SDimitry Andric       // SubReg defs are illegal in machine SSA phase,
631fe6060f1SDimitry Andric       // we should not generate SubReg defs.
632fe6060f1SDimitry Andric       //
633fe6060f1SDimitry Andric       // For example, for the instructions:
634fe6060f1SDimitry Andric       //
635fe6060f1SDimitry Andric       // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
636fe6060f1SDimitry Andric       // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc
637fe6060f1SDimitry Andric       //
638fe6060f1SDimitry Andric       // We should generate:
639fe6060f1SDimitry Andric       //
640fe6060f1SDimitry Andric       // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc
641fe6060f1SDimitry Andric       // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0
642fe6060f1SDimitry Andric       // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0
643fe6060f1SDimitry Andric       //
644fe6060f1SDimitry Andric       if (UseSrcSubIdx)
645fe6060f1SDimitry Andric         RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
646fe6060f1SDimitry Andric 
6478bcb0991SDimitry Andric       Register NewVR = MRI->createVirtualRegister(RC);
648fe6060f1SDimitry Andric       BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
6490b57cec5SDimitry Andric               TII->get(TargetOpcode::COPY), NewVR)
6500b57cec5SDimitry Andric         .addReg(DstReg, 0, SubIdx);
651fe6060f1SDimitry Andric       if (UseSrcSubIdx)
652fe6060f1SDimitry Andric         UseMO->setSubReg(0);
653fe6060f1SDimitry Andric 
6540b57cec5SDimitry Andric       UseMO->setReg(NewVR);
6550b57cec5SDimitry Andric       ++NumReuse;
6560b57cec5SDimitry Andric       Changed = true;
6570b57cec5SDimitry Andric     }
6580b57cec5SDimitry Andric   }
6590b57cec5SDimitry Andric 
6600b57cec5SDimitry Andric   return Changed;
6610b57cec5SDimitry Andric }
6620b57cec5SDimitry Andric 
6630b57cec5SDimitry Andric /// If the instruction is a compare and the previous instruction it's comparing
6640b57cec5SDimitry Andric /// against already sets (or could be modified to set) the same flag as the
6650b57cec5SDimitry Andric /// compare, then we can remove the comparison and use the flag from the
6660b57cec5SDimitry Andric /// previous instruction.
6670b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
6680b57cec5SDimitry Andric   // If this instruction is a comparison against zero and isn't comparing a
6690b57cec5SDimitry Andric   // physical register, we can try to optimize it.
6705ffd83dbSDimitry Andric   Register SrcReg, SrcReg2;
671349cc55cSDimitry Andric   int64_t CmpMask, CmpValue;
6720b57cec5SDimitry Andric   if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
6735ffd83dbSDimitry Andric       SrcReg.isPhysical() || SrcReg2.isPhysical())
6740b57cec5SDimitry Andric     return false;
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric   // Attempt to optimize the comparison instruction.
6775ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI);
6780b57cec5SDimitry Andric   if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
6795ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "  -> Successfully optimized compare!\n");
6800b57cec5SDimitry Andric     ++NumCmps;
6810b57cec5SDimitry Andric     return true;
6820b57cec5SDimitry Andric   }
6830b57cec5SDimitry Andric 
6840b57cec5SDimitry Andric   return false;
6850b57cec5SDimitry Andric }
6860b57cec5SDimitry Andric 
6870b57cec5SDimitry Andric /// Optimize a select instruction.
6880b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI,
6890b57cec5SDimitry Andric                             SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
6900b57cec5SDimitry Andric   unsigned TrueOp = 0;
6910b57cec5SDimitry Andric   unsigned FalseOp = 0;
6920b57cec5SDimitry Andric   bool Optimizable = false;
6930b57cec5SDimitry Andric   SmallVector<MachineOperand, 4> Cond;
6940b57cec5SDimitry Andric   if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
6950b57cec5SDimitry Andric     return false;
6960b57cec5SDimitry Andric   if (!Optimizable)
6970b57cec5SDimitry Andric     return false;
6980b57cec5SDimitry Andric   if (!TII->optimizeSelect(MI, LocalMIs))
6990b57cec5SDimitry Andric     return false;
7005ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Deleting select: " << MI);
7010b57cec5SDimitry Andric   MI.eraseFromParent();
7020b57cec5SDimitry Andric   ++NumSelects;
7030b57cec5SDimitry Andric   return true;
7040b57cec5SDimitry Andric }
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric /// Check if a simpler conditional branch can be generated.
7070b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
7080b57cec5SDimitry Andric   return TII->optimizeCondBranch(MI);
7090b57cec5SDimitry Andric }
7100b57cec5SDimitry Andric 
7110b57cec5SDimitry Andric /// Try to find the next source that share the same register file
7120b57cec5SDimitry Andric /// for the value defined by \p Reg and \p SubReg.
7130b57cec5SDimitry Andric /// When true is returned, the \p RewriteMap can be used by the client to
7140b57cec5SDimitry Andric /// retrieve all Def -> Use along the way up to the next source. Any found
7150b57cec5SDimitry Andric /// Use that is not itself a key for another entry, is the next source to
7160b57cec5SDimitry Andric /// use. During the search for the next source, multiple sources can be found
7170b57cec5SDimitry Andric /// given multiple incoming sources of a PHI instruction. In this case, we
7180b57cec5SDimitry Andric /// look in each PHI source for the next source; all found next sources must
7190b57cec5SDimitry Andric /// share the same register file as \p Reg and \p SubReg. The client should
7200b57cec5SDimitry Andric /// then be capable to rewrite all intermediate PHIs to get the next source.
7210b57cec5SDimitry Andric /// \return False if no alternative sources are available. True otherwise.
7220b57cec5SDimitry Andric bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
7230b57cec5SDimitry Andric                                        RewriteMapTy &RewriteMap) {
7240b57cec5SDimitry Andric   // Do not try to find a new source for a physical register.
7250b57cec5SDimitry Andric   // So far we do not have any motivating example for doing that.
7260b57cec5SDimitry Andric   // Thus, instead of maintaining untested code, we will revisit that if
7270b57cec5SDimitry Andric   // that changes at some point.
7285ffd83dbSDimitry Andric   Register Reg = RegSubReg.Reg;
7295ffd83dbSDimitry Andric   if (Reg.isPhysical())
7300b57cec5SDimitry Andric     return false;
7310b57cec5SDimitry Andric   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
7320b57cec5SDimitry Andric 
7330b57cec5SDimitry Andric   SmallVector<RegSubRegPair, 4> SrcToLook;
7340b57cec5SDimitry Andric   RegSubRegPair CurSrcPair = RegSubReg;
7350b57cec5SDimitry Andric   SrcToLook.push_back(CurSrcPair);
7360b57cec5SDimitry Andric 
7370b57cec5SDimitry Andric   unsigned PHICount = 0;
7380b57cec5SDimitry Andric   do {
7390b57cec5SDimitry Andric     CurSrcPair = SrcToLook.pop_back_val();
7400b57cec5SDimitry Andric     // As explained above, do not handle physical registers
741bdd1243dSDimitry Andric     if (CurSrcPair.Reg.isPhysical())
7420b57cec5SDimitry Andric       return false;
7430b57cec5SDimitry Andric 
7440b57cec5SDimitry Andric     ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
7450b57cec5SDimitry Andric 
7460b57cec5SDimitry Andric     // Follow the chain of copies until we find a more suitable source, a phi
7470b57cec5SDimitry Andric     // or have to abort.
7480b57cec5SDimitry Andric     while (true) {
7490b57cec5SDimitry Andric       ValueTrackerResult Res = ValTracker.getNextSource();
7500b57cec5SDimitry Andric       // Abort at the end of a chain (without finding a suitable source).
7510b57cec5SDimitry Andric       if (!Res.isValid())
7520b57cec5SDimitry Andric         return false;
7530b57cec5SDimitry Andric 
7540b57cec5SDimitry Andric       // Insert the Def -> Use entry for the recently found source.
7550b57cec5SDimitry Andric       ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
7560b57cec5SDimitry Andric       if (CurSrcRes.isValid()) {
7570b57cec5SDimitry Andric         assert(CurSrcRes == Res && "ValueTrackerResult found must match");
7580b57cec5SDimitry Andric         // An existent entry with multiple sources is a PHI cycle we must avoid.
7590b57cec5SDimitry Andric         // Otherwise it's an entry with a valid next source we already found.
7600b57cec5SDimitry Andric         if (CurSrcRes.getNumSources() > 1) {
7610b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
7620b57cec5SDimitry Andric                      << "findNextSource: found PHI cycle, aborting...\n");
7630b57cec5SDimitry Andric           return false;
7640b57cec5SDimitry Andric         }
7650b57cec5SDimitry Andric         break;
7660b57cec5SDimitry Andric       }
7670b57cec5SDimitry Andric       RewriteMap.insert(std::make_pair(CurSrcPair, Res));
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric       // ValueTrackerResult usually have one source unless it's the result from
7700b57cec5SDimitry Andric       // a PHI instruction. Add the found PHI edges to be looked up further.
7710b57cec5SDimitry Andric       unsigned NumSrcs = Res.getNumSources();
7720b57cec5SDimitry Andric       if (NumSrcs > 1) {
7730b57cec5SDimitry Andric         PHICount++;
7740b57cec5SDimitry Andric         if (PHICount >= RewritePHILimit) {
7750b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
7760b57cec5SDimitry Andric           return false;
7770b57cec5SDimitry Andric         }
7780b57cec5SDimitry Andric 
7790b57cec5SDimitry Andric         for (unsigned i = 0; i < NumSrcs; ++i)
7800b57cec5SDimitry Andric           SrcToLook.push_back(Res.getSrc(i));
7810b57cec5SDimitry Andric         break;
7820b57cec5SDimitry Andric       }
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric       CurSrcPair = Res.getSrc(0);
7850b57cec5SDimitry Andric       // Do not extend the live-ranges of physical registers as they add
7860b57cec5SDimitry Andric       // constraints to the register allocator. Moreover, if we want to extend
7870b57cec5SDimitry Andric       // the live-range of a physical register, unlike SSA virtual register,
7880b57cec5SDimitry Andric       // we will have to check that they aren't redefine before the related use.
789bdd1243dSDimitry Andric       if (CurSrcPair.Reg.isPhysical())
7900b57cec5SDimitry Andric         return false;
7910b57cec5SDimitry Andric 
7920b57cec5SDimitry Andric       // Keep following the chain if the value isn't any better yet.
7930b57cec5SDimitry Andric       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
7940b57cec5SDimitry Andric       if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
7950b57cec5SDimitry Andric                                      CurSrcPair.SubReg))
7960b57cec5SDimitry Andric         continue;
7970b57cec5SDimitry Andric 
7980b57cec5SDimitry Andric       // We currently cannot deal with subreg operands on PHI instructions
7990b57cec5SDimitry Andric       // (see insertPHI()).
8000b57cec5SDimitry Andric       if (PHICount > 0 && CurSrcPair.SubReg != 0)
8010b57cec5SDimitry Andric         continue;
8020b57cec5SDimitry Andric 
8030b57cec5SDimitry Andric       // We found a suitable source, and are done with this chain.
8040b57cec5SDimitry Andric       break;
8050b57cec5SDimitry Andric     }
8060b57cec5SDimitry Andric   } while (!SrcToLook.empty());
8070b57cec5SDimitry Andric 
8080b57cec5SDimitry Andric   // If we did not find a more suitable source, there is nothing to optimize.
8090b57cec5SDimitry Andric   return CurSrcPair.Reg != Reg;
8100b57cec5SDimitry Andric }
8110b57cec5SDimitry Andric 
8120b57cec5SDimitry Andric /// Insert a PHI instruction with incoming edges \p SrcRegs that are
8130b57cec5SDimitry Andric /// guaranteed to have the same register class. This is necessary whenever we
8140b57cec5SDimitry Andric /// successfully traverse a PHI instruction and find suitable sources coming
8150b57cec5SDimitry Andric /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
8160b57cec5SDimitry Andric /// suitable to be used in a new COPY instruction.
8170b57cec5SDimitry Andric static MachineInstr &
8180b57cec5SDimitry Andric insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
8190b57cec5SDimitry Andric           const SmallVectorImpl<RegSubRegPair> &SrcRegs,
8200b57cec5SDimitry Andric           MachineInstr &OrigPHI) {
8210b57cec5SDimitry Andric   assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
8220b57cec5SDimitry Andric 
8230b57cec5SDimitry Andric   const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
8240b57cec5SDimitry Andric   // NewRC is only correct if no subregisters are involved. findNextSource()
8250b57cec5SDimitry Andric   // should have rejected those cases already.
8260b57cec5SDimitry Andric   assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
8278bcb0991SDimitry Andric   Register NewVR = MRI.createVirtualRegister(NewRC);
8280b57cec5SDimitry Andric   MachineBasicBlock *MBB = OrigPHI.getParent();
8290b57cec5SDimitry Andric   MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
8300b57cec5SDimitry Andric                                     TII.get(TargetOpcode::PHI), NewVR);
8310b57cec5SDimitry Andric 
8320b57cec5SDimitry Andric   unsigned MBBOpIdx = 2;
8330b57cec5SDimitry Andric   for (const RegSubRegPair &RegPair : SrcRegs) {
8340b57cec5SDimitry Andric     MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
8350b57cec5SDimitry Andric     MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
8360b57cec5SDimitry Andric     // Since we're extended the lifetime of RegPair.Reg, clear the
8370b57cec5SDimitry Andric     // kill flags to account for that and make RegPair.Reg reaches
8380b57cec5SDimitry Andric     // the new PHI.
8390b57cec5SDimitry Andric     MRI.clearKillFlags(RegPair.Reg);
8400b57cec5SDimitry Andric     MBBOpIdx += 2;
8410b57cec5SDimitry Andric   }
8420b57cec5SDimitry Andric 
8430b57cec5SDimitry Andric   return *MIB;
8440b57cec5SDimitry Andric }
8450b57cec5SDimitry Andric 
8460b57cec5SDimitry Andric namespace {
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric /// Interface to query instructions amenable to copy rewriting.
8490b57cec5SDimitry Andric class Rewriter {
8500b57cec5SDimitry Andric protected:
8510b57cec5SDimitry Andric   MachineInstr &CopyLike;
8520b57cec5SDimitry Andric   unsigned CurrentSrcIdx = 0;   ///< The index of the source being rewritten.
8530b57cec5SDimitry Andric public:
8540b57cec5SDimitry Andric   Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
85581ad6265SDimitry Andric   virtual ~Rewriter() = default;
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric   /// Get the next rewritable source (SrcReg, SrcSubReg) and
8580b57cec5SDimitry Andric   /// the related value that it affects (DstReg, DstSubReg).
8590b57cec5SDimitry Andric   /// A source is considered rewritable if its register class and the
8600b57cec5SDimitry Andric   /// register class of the related DstReg may not be register
8610b57cec5SDimitry Andric   /// coalescer friendly. In other words, given a copy-like instruction
8620b57cec5SDimitry Andric   /// not all the arguments may be returned at rewritable source, since
8630b57cec5SDimitry Andric   /// some arguments are none to be register coalescer friendly.
8640b57cec5SDimitry Andric   ///
8650b57cec5SDimitry Andric   /// Each call of this method moves the current source to the next
8660b57cec5SDimitry Andric   /// rewritable source.
8670b57cec5SDimitry Andric   /// For instance, let CopyLike be the instruction to rewrite.
8680b57cec5SDimitry Andric   /// CopyLike has one definition and one source:
8690b57cec5SDimitry Andric   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
8700b57cec5SDimitry Andric   ///
8710b57cec5SDimitry Andric   /// The first call will give the first rewritable source, i.e.,
8720b57cec5SDimitry Andric   /// the only source this instruction has:
8730b57cec5SDimitry Andric   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
8740b57cec5SDimitry Andric   /// This source defines the whole definition, i.e.,
8750b57cec5SDimitry Andric   /// (DstReg, DstSubReg) = (dst, dstSubIdx).
8760b57cec5SDimitry Andric   ///
8770b57cec5SDimitry Andric   /// The second and subsequent calls will return false, as there is only one
8780b57cec5SDimitry Andric   /// rewritable source.
8790b57cec5SDimitry Andric   ///
8800b57cec5SDimitry Andric   /// \return True if a rewritable source has been found, false otherwise.
8810b57cec5SDimitry Andric   /// The output arguments are valid if and only if true is returned.
8820b57cec5SDimitry Andric   virtual bool getNextRewritableSource(RegSubRegPair &Src,
8830b57cec5SDimitry Andric                                        RegSubRegPair &Dst) = 0;
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric   /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
8860b57cec5SDimitry Andric   /// \return True if the rewriting was possible, false otherwise.
887e8d8bef9SDimitry Andric   virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
8880b57cec5SDimitry Andric };
8890b57cec5SDimitry Andric 
8900b57cec5SDimitry Andric /// Rewriter for COPY instructions.
8910b57cec5SDimitry Andric class CopyRewriter : public Rewriter {
8920b57cec5SDimitry Andric public:
8930b57cec5SDimitry Andric   CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
8940b57cec5SDimitry Andric     assert(MI.isCopy() && "Expected copy instruction");
8950b57cec5SDimitry Andric   }
8960b57cec5SDimitry Andric   virtual ~CopyRewriter() = default;
8970b57cec5SDimitry Andric 
8980b57cec5SDimitry Andric   bool getNextRewritableSource(RegSubRegPair &Src,
8990b57cec5SDimitry Andric                                RegSubRegPair &Dst) override {
9000b57cec5SDimitry Andric     // CurrentSrcIdx > 0 means this function has already been called.
9010b57cec5SDimitry Andric     if (CurrentSrcIdx > 0)
9020b57cec5SDimitry Andric       return false;
9030b57cec5SDimitry Andric     // This is the first call to getNextRewritableSource.
9040b57cec5SDimitry Andric     // Move the CurrentSrcIdx to remember that we made that call.
9050b57cec5SDimitry Andric     CurrentSrcIdx = 1;
9060b57cec5SDimitry Andric     // The rewritable source is the argument.
9070b57cec5SDimitry Andric     const MachineOperand &MOSrc = CopyLike.getOperand(1);
9080b57cec5SDimitry Andric     Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
9090b57cec5SDimitry Andric     // What we track are the alternative sources of the definition.
9100b57cec5SDimitry Andric     const MachineOperand &MODef = CopyLike.getOperand(0);
9110b57cec5SDimitry Andric     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
9120b57cec5SDimitry Andric     return true;
9130b57cec5SDimitry Andric   }
9140b57cec5SDimitry Andric 
915e8d8bef9SDimitry Andric   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
9160b57cec5SDimitry Andric     if (CurrentSrcIdx != 1)
9170b57cec5SDimitry Andric       return false;
9180b57cec5SDimitry Andric     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
9190b57cec5SDimitry Andric     MOSrc.setReg(NewReg);
9200b57cec5SDimitry Andric     MOSrc.setSubReg(NewSubReg);
9210b57cec5SDimitry Andric     return true;
9220b57cec5SDimitry Andric   }
9230b57cec5SDimitry Andric };
9240b57cec5SDimitry Andric 
9250b57cec5SDimitry Andric /// Helper class to rewrite uncoalescable copy like instructions
9260b57cec5SDimitry Andric /// into new COPY (coalescable friendly) instructions.
9270b57cec5SDimitry Andric class UncoalescableRewriter : public Rewriter {
9280b57cec5SDimitry Andric   unsigned NumDefs;  ///< Number of defs in the bitcast.
9290b57cec5SDimitry Andric 
9300b57cec5SDimitry Andric public:
9310b57cec5SDimitry Andric   UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
9320b57cec5SDimitry Andric     NumDefs = MI.getDesc().getNumDefs();
9330b57cec5SDimitry Andric   }
9340b57cec5SDimitry Andric 
9350b57cec5SDimitry Andric   /// \see See Rewriter::getNextRewritableSource()
9360b57cec5SDimitry Andric   /// All such sources need to be considered rewritable in order to
9370b57cec5SDimitry Andric   /// rewrite a uncoalescable copy-like instruction. This method return
9380b57cec5SDimitry Andric   /// each definition that must be checked if rewritable.
9390b57cec5SDimitry Andric   bool getNextRewritableSource(RegSubRegPair &Src,
9400b57cec5SDimitry Andric                                RegSubRegPair &Dst) override {
9410b57cec5SDimitry Andric     // Find the next non-dead definition and continue from there.
9420b57cec5SDimitry Andric     if (CurrentSrcIdx == NumDefs)
9430b57cec5SDimitry Andric       return false;
9440b57cec5SDimitry Andric 
9450b57cec5SDimitry Andric     while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
9460b57cec5SDimitry Andric       ++CurrentSrcIdx;
9470b57cec5SDimitry Andric       if (CurrentSrcIdx == NumDefs)
9480b57cec5SDimitry Andric         return false;
9490b57cec5SDimitry Andric     }
9500b57cec5SDimitry Andric 
9510b57cec5SDimitry Andric     // What we track are the alternative sources of the definition.
9520b57cec5SDimitry Andric     Src = RegSubRegPair(0, 0);
9530b57cec5SDimitry Andric     const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
9540b57cec5SDimitry Andric     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
9550b57cec5SDimitry Andric 
9560b57cec5SDimitry Andric     CurrentSrcIdx++;
9570b57cec5SDimitry Andric     return true;
9580b57cec5SDimitry Andric   }
9590b57cec5SDimitry Andric 
960e8d8bef9SDimitry Andric   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
9610b57cec5SDimitry Andric     return false;
9620b57cec5SDimitry Andric   }
9630b57cec5SDimitry Andric };
9640b57cec5SDimitry Andric 
9650b57cec5SDimitry Andric /// Specialized rewriter for INSERT_SUBREG instruction.
9660b57cec5SDimitry Andric class InsertSubregRewriter : public Rewriter {
9670b57cec5SDimitry Andric public:
9680b57cec5SDimitry Andric   InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
9690b57cec5SDimitry Andric     assert(MI.isInsertSubreg() && "Invalid instruction");
9700b57cec5SDimitry Andric   }
9710b57cec5SDimitry Andric 
9720b57cec5SDimitry Andric   /// \see See Rewriter::getNextRewritableSource()
9730b57cec5SDimitry Andric   /// Here CopyLike has the following form:
9740b57cec5SDimitry Andric   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
9750b57cec5SDimitry Andric   /// Src1 has the same register class has dst, hence, there is
9760b57cec5SDimitry Andric   /// nothing to rewrite.
9770b57cec5SDimitry Andric   /// Src2.src2SubIdx, may not be register coalescer friendly.
9780b57cec5SDimitry Andric   /// Therefore, the first call to this method returns:
9790b57cec5SDimitry Andric   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
9800b57cec5SDimitry Andric   /// (DstReg, DstSubReg) = (dst, subIdx).
9810b57cec5SDimitry Andric   ///
9820b57cec5SDimitry Andric   /// Subsequence calls will return false.
9830b57cec5SDimitry Andric   bool getNextRewritableSource(RegSubRegPair &Src,
9840b57cec5SDimitry Andric                                RegSubRegPair &Dst) override {
9850b57cec5SDimitry Andric     // If we already get the only source we can rewrite, return false.
9860b57cec5SDimitry Andric     if (CurrentSrcIdx == 2)
9870b57cec5SDimitry Andric       return false;
9880b57cec5SDimitry Andric     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
9890b57cec5SDimitry Andric     CurrentSrcIdx = 2;
9900b57cec5SDimitry Andric     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
9910b57cec5SDimitry Andric     Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
9920b57cec5SDimitry Andric     const MachineOperand &MODef = CopyLike.getOperand(0);
9930b57cec5SDimitry Andric 
9940b57cec5SDimitry Andric     // We want to track something that is compatible with the
9950b57cec5SDimitry Andric     // partial definition.
9960b57cec5SDimitry Andric     if (MODef.getSubReg())
9970b57cec5SDimitry Andric       // Bail if we have to compose sub-register indices.
9980b57cec5SDimitry Andric       return false;
9990b57cec5SDimitry Andric     Dst = RegSubRegPair(MODef.getReg(),
10000b57cec5SDimitry Andric                         (unsigned)CopyLike.getOperand(3).getImm());
10010b57cec5SDimitry Andric     return true;
10020b57cec5SDimitry Andric   }
10030b57cec5SDimitry Andric 
1004e8d8bef9SDimitry Andric   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
10050b57cec5SDimitry Andric     if (CurrentSrcIdx != 2)
10060b57cec5SDimitry Andric       return false;
10070b57cec5SDimitry Andric     // We are rewriting the inserted reg.
10080b57cec5SDimitry Andric     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
10090b57cec5SDimitry Andric     MO.setReg(NewReg);
10100b57cec5SDimitry Andric     MO.setSubReg(NewSubReg);
10110b57cec5SDimitry Andric     return true;
10120b57cec5SDimitry Andric   }
10130b57cec5SDimitry Andric };
10140b57cec5SDimitry Andric 
10150b57cec5SDimitry Andric /// Specialized rewriter for EXTRACT_SUBREG instruction.
10160b57cec5SDimitry Andric class ExtractSubregRewriter : public Rewriter {
10170b57cec5SDimitry Andric   const TargetInstrInfo &TII;
10180b57cec5SDimitry Andric 
10190b57cec5SDimitry Andric public:
10200b57cec5SDimitry Andric   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
10210b57cec5SDimitry Andric       : Rewriter(MI), TII(TII) {
10220b57cec5SDimitry Andric     assert(MI.isExtractSubreg() && "Invalid instruction");
10230b57cec5SDimitry Andric   }
10240b57cec5SDimitry Andric 
10250b57cec5SDimitry Andric   /// \see Rewriter::getNextRewritableSource()
10260b57cec5SDimitry Andric   /// Here CopyLike has the following form:
10270b57cec5SDimitry Andric   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
10280b57cec5SDimitry Andric   /// There is only one rewritable source: Src.subIdx,
10290b57cec5SDimitry Andric   /// which defines dst.dstSubIdx.
10300b57cec5SDimitry Andric   bool getNextRewritableSource(RegSubRegPair &Src,
10310b57cec5SDimitry Andric                                RegSubRegPair &Dst) override {
10320b57cec5SDimitry Andric     // If we already get the only source we can rewrite, return false.
10330b57cec5SDimitry Andric     if (CurrentSrcIdx == 1)
10340b57cec5SDimitry Andric       return false;
10350b57cec5SDimitry Andric     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
10360b57cec5SDimitry Andric     CurrentSrcIdx = 1;
10370b57cec5SDimitry Andric     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
10380b57cec5SDimitry Andric     // If we have to compose sub-register indices, bail out.
10390b57cec5SDimitry Andric     if (MOExtractedReg.getSubReg())
10400b57cec5SDimitry Andric       return false;
10410b57cec5SDimitry Andric 
10420b57cec5SDimitry Andric     Src = RegSubRegPair(MOExtractedReg.getReg(),
10430b57cec5SDimitry Andric                         CopyLike.getOperand(2).getImm());
10440b57cec5SDimitry Andric 
10450b57cec5SDimitry Andric     // We want to track something that is compatible with the definition.
10460b57cec5SDimitry Andric     const MachineOperand &MODef = CopyLike.getOperand(0);
10470b57cec5SDimitry Andric     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
10480b57cec5SDimitry Andric     return true;
10490b57cec5SDimitry Andric   }
10500b57cec5SDimitry Andric 
1051e8d8bef9SDimitry Andric   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
10520b57cec5SDimitry Andric     // The only source we can rewrite is the input register.
10530b57cec5SDimitry Andric     if (CurrentSrcIdx != 1)
10540b57cec5SDimitry Andric       return false;
10550b57cec5SDimitry Andric 
10560b57cec5SDimitry Andric     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric     // If we find a source that does not require to extract something,
10590b57cec5SDimitry Andric     // rewrite the operation with a copy.
10600b57cec5SDimitry Andric     if (!NewSubReg) {
10610b57cec5SDimitry Andric       // Move the current index to an invalid position.
10620b57cec5SDimitry Andric       // We do not want another call to this method to be able
10630b57cec5SDimitry Andric       // to do any change.
10640b57cec5SDimitry Andric       CurrentSrcIdx = -1;
10650b57cec5SDimitry Andric       // Rewrite the operation as a COPY.
10660b57cec5SDimitry Andric       // Get rid of the sub-register index.
106781ad6265SDimitry Andric       CopyLike.removeOperand(2);
10680b57cec5SDimitry Andric       // Morph the operation into a COPY.
10690b57cec5SDimitry Andric       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
10700b57cec5SDimitry Andric       return true;
10710b57cec5SDimitry Andric     }
10720b57cec5SDimitry Andric     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
10730b57cec5SDimitry Andric     return true;
10740b57cec5SDimitry Andric   }
10750b57cec5SDimitry Andric };
10760b57cec5SDimitry Andric 
10770b57cec5SDimitry Andric /// Specialized rewriter for REG_SEQUENCE instruction.
10780b57cec5SDimitry Andric class RegSequenceRewriter : public Rewriter {
10790b57cec5SDimitry Andric public:
10800b57cec5SDimitry Andric   RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
10810b57cec5SDimitry Andric     assert(MI.isRegSequence() && "Invalid instruction");
10820b57cec5SDimitry Andric   }
10830b57cec5SDimitry Andric 
10840b57cec5SDimitry Andric   /// \see Rewriter::getNextRewritableSource()
10850b57cec5SDimitry Andric   /// Here CopyLike has the following form:
10860b57cec5SDimitry Andric   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
10870b57cec5SDimitry Andric   /// Each call will return a different source, walking all the available
10880b57cec5SDimitry Andric   /// source.
10890b57cec5SDimitry Andric   ///
10900b57cec5SDimitry Andric   /// The first call returns:
10910b57cec5SDimitry Andric   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
10920b57cec5SDimitry Andric   /// (DstReg, DstSubReg) = (dst, subIdx1).
10930b57cec5SDimitry Andric   ///
10940b57cec5SDimitry Andric   /// The second call returns:
10950b57cec5SDimitry Andric   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
10960b57cec5SDimitry Andric   /// (DstReg, DstSubReg) = (dst, subIdx2).
10970b57cec5SDimitry Andric   ///
10980b57cec5SDimitry Andric   /// And so on, until all the sources have been traversed, then
10990b57cec5SDimitry Andric   /// it returns false.
11000b57cec5SDimitry Andric   bool getNextRewritableSource(RegSubRegPair &Src,
11010b57cec5SDimitry Andric                                RegSubRegPair &Dst) override {
11020b57cec5SDimitry Andric     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
11030b57cec5SDimitry Andric 
11040b57cec5SDimitry Andric     // If this is the first call, move to the first argument.
11050b57cec5SDimitry Andric     if (CurrentSrcIdx == 0) {
11060b57cec5SDimitry Andric       CurrentSrcIdx = 1;
11070b57cec5SDimitry Andric     } else {
11080b57cec5SDimitry Andric       // Otherwise, move to the next argument and check that it is valid.
11090b57cec5SDimitry Andric       CurrentSrcIdx += 2;
11100b57cec5SDimitry Andric       if (CurrentSrcIdx >= CopyLike.getNumOperands())
11110b57cec5SDimitry Andric         return false;
11120b57cec5SDimitry Andric     }
11130b57cec5SDimitry Andric     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
11140b57cec5SDimitry Andric     Src.Reg = MOInsertedReg.getReg();
11150b57cec5SDimitry Andric     // If we have to compose sub-register indices, bail out.
11160b57cec5SDimitry Andric     if ((Src.SubReg = MOInsertedReg.getSubReg()))
11170b57cec5SDimitry Andric       return false;
11180b57cec5SDimitry Andric 
11190b57cec5SDimitry Andric     // We want to track something that is compatible with the related
11200b57cec5SDimitry Andric     // partial definition.
11210b57cec5SDimitry Andric     Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
11220b57cec5SDimitry Andric 
11230b57cec5SDimitry Andric     const MachineOperand &MODef = CopyLike.getOperand(0);
11240b57cec5SDimitry Andric     Dst.Reg = MODef.getReg();
11250b57cec5SDimitry Andric     // If we have to compose sub-registers, bail.
11260b57cec5SDimitry Andric     return MODef.getSubReg() == 0;
11270b57cec5SDimitry Andric   }
11280b57cec5SDimitry Andric 
1129e8d8bef9SDimitry Andric   bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
11300b57cec5SDimitry Andric     // We cannot rewrite out of bound operands.
11310b57cec5SDimitry Andric     // Moreover, rewritable sources are at odd positions.
11320b57cec5SDimitry Andric     if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
11330b57cec5SDimitry Andric       return false;
11340b57cec5SDimitry Andric 
11350b57cec5SDimitry Andric     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
11360b57cec5SDimitry Andric     MO.setReg(NewReg);
11370b57cec5SDimitry Andric     MO.setSubReg(NewSubReg);
11380b57cec5SDimitry Andric     return true;
11390b57cec5SDimitry Andric   }
11400b57cec5SDimitry Andric };
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric } // end anonymous namespace
11430b57cec5SDimitry Andric 
11440b57cec5SDimitry Andric /// Get the appropriated Rewriter for \p MI.
11450b57cec5SDimitry Andric /// \return A pointer to a dynamically allocated Rewriter or nullptr if no
11460b57cec5SDimitry Andric /// rewriter works for \p MI.
11470b57cec5SDimitry Andric static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {
11480b57cec5SDimitry Andric   // Handle uncoalescable copy-like instructions.
11490b57cec5SDimitry Andric   if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
11500b57cec5SDimitry Andric       MI.isExtractSubregLike())
11510b57cec5SDimitry Andric     return new UncoalescableRewriter(MI);
11520b57cec5SDimitry Andric 
11530b57cec5SDimitry Andric   switch (MI.getOpcode()) {
11540b57cec5SDimitry Andric   default:
11550b57cec5SDimitry Andric     return nullptr;
11560b57cec5SDimitry Andric   case TargetOpcode::COPY:
11570b57cec5SDimitry Andric     return new CopyRewriter(MI);
11580b57cec5SDimitry Andric   case TargetOpcode::INSERT_SUBREG:
11590b57cec5SDimitry Andric     return new InsertSubregRewriter(MI);
11600b57cec5SDimitry Andric   case TargetOpcode::EXTRACT_SUBREG:
11610b57cec5SDimitry Andric     return new ExtractSubregRewriter(MI, TII);
11620b57cec5SDimitry Andric   case TargetOpcode::REG_SEQUENCE:
11630b57cec5SDimitry Andric     return new RegSequenceRewriter(MI);
11640b57cec5SDimitry Andric   }
11650b57cec5SDimitry Andric }
11660b57cec5SDimitry Andric 
11670b57cec5SDimitry Andric /// Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
11680b57cec5SDimitry Andric /// the new source to use for rewrite. If \p HandleMultipleSources is true and
11690b57cec5SDimitry Andric /// multiple sources for a given \p Def are found along the way, we found a
11700b57cec5SDimitry Andric /// PHI instructions that needs to be rewritten.
11710b57cec5SDimitry Andric /// TODO: HandleMultipleSources should be removed once we test PHI handling
11720b57cec5SDimitry Andric /// with coalescable copies.
11730b57cec5SDimitry Andric static RegSubRegPair
11740b57cec5SDimitry Andric getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
11750b57cec5SDimitry Andric              RegSubRegPair Def,
11760b57cec5SDimitry Andric              const PeepholeOptimizer::RewriteMapTy &RewriteMap,
11770b57cec5SDimitry Andric              bool HandleMultipleSources = true) {
11780b57cec5SDimitry Andric   RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
11790b57cec5SDimitry Andric   while (true) {
11800b57cec5SDimitry Andric     ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
11810b57cec5SDimitry Andric     // If there are no entries on the map, LookupSrc is the new source.
11820b57cec5SDimitry Andric     if (!Res.isValid())
11830b57cec5SDimitry Andric       return LookupSrc;
11840b57cec5SDimitry Andric 
11850b57cec5SDimitry Andric     // There's only one source for this definition, keep searching...
11860b57cec5SDimitry Andric     unsigned NumSrcs = Res.getNumSources();
11870b57cec5SDimitry Andric     if (NumSrcs == 1) {
11880b57cec5SDimitry Andric       LookupSrc.Reg = Res.getSrcReg(0);
11890b57cec5SDimitry Andric       LookupSrc.SubReg = Res.getSrcSubReg(0);
11900b57cec5SDimitry Andric       continue;
11910b57cec5SDimitry Andric     }
11920b57cec5SDimitry Andric 
11930b57cec5SDimitry Andric     // TODO: Remove once multiple srcs w/ coalescable copies are supported.
11940b57cec5SDimitry Andric     if (!HandleMultipleSources)
11950b57cec5SDimitry Andric       break;
11960b57cec5SDimitry Andric 
11970b57cec5SDimitry Andric     // Multiple sources, recurse into each source to find a new source
11980b57cec5SDimitry Andric     // for it. Then, rewrite the PHI accordingly to its new edges.
11990b57cec5SDimitry Andric     SmallVector<RegSubRegPair, 4> NewPHISrcs;
12000b57cec5SDimitry Andric     for (unsigned i = 0; i < NumSrcs; ++i) {
12010b57cec5SDimitry Andric       RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
12020b57cec5SDimitry Andric       NewPHISrcs.push_back(
12030b57cec5SDimitry Andric           getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
12040b57cec5SDimitry Andric     }
12050b57cec5SDimitry Andric 
12060b57cec5SDimitry Andric     // Build the new PHI node and return its def register as the new source.
12070b57cec5SDimitry Andric     MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
12080b57cec5SDimitry Andric     MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
12090b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "-- getNewSource\n");
12100b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "   Replacing: " << OrigPHI);
12110b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "        With: " << NewPHI);
12120b57cec5SDimitry Andric     const MachineOperand &MODef = NewPHI.getOperand(0);
12130b57cec5SDimitry Andric     return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
12140b57cec5SDimitry Andric   }
12150b57cec5SDimitry Andric 
12160b57cec5SDimitry Andric   return RegSubRegPair(0, 0);
12170b57cec5SDimitry Andric }
12180b57cec5SDimitry Andric 
12190b57cec5SDimitry Andric /// Optimize generic copy instructions to avoid cross register bank copy.
12200b57cec5SDimitry Andric /// The optimization looks through a chain of copies and tries to find a source
12210b57cec5SDimitry Andric /// that has a compatible register class.
12220b57cec5SDimitry Andric /// Two register classes are considered to be compatible if they share the same
12230b57cec5SDimitry Andric /// register bank.
12240b57cec5SDimitry Andric /// New copies issued by this optimization are register allocator
12250b57cec5SDimitry Andric /// friendly. This optimization does not remove any copy as it may
12260b57cec5SDimitry Andric /// overconstrain the register allocator, but replaces some operands
12270b57cec5SDimitry Andric /// when possible.
12280b57cec5SDimitry Andric /// \pre isCoalescableCopy(*MI) is true.
12290b57cec5SDimitry Andric /// \return True, when \p MI has been rewritten. False otherwise.
12300b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
12310b57cec5SDimitry Andric   assert(isCoalescableCopy(MI) && "Invalid argument");
12320b57cec5SDimitry Andric   assert(MI.getDesc().getNumDefs() == 1 &&
12330b57cec5SDimitry Andric          "Coalescer can understand multiple defs?!");
12340b57cec5SDimitry Andric   const MachineOperand &MODef = MI.getOperand(0);
12350b57cec5SDimitry Andric   // Do not rewrite physical definitions.
1236bdd1243dSDimitry Andric   if (MODef.getReg().isPhysical())
12370b57cec5SDimitry Andric     return false;
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric   bool Changed = false;
12400b57cec5SDimitry Andric   // Get the right rewriter for the current copy.
12410b57cec5SDimitry Andric   std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
12420b57cec5SDimitry Andric   // If none exists, bail out.
12430b57cec5SDimitry Andric   if (!CpyRewriter)
12440b57cec5SDimitry Andric     return false;
12450b57cec5SDimitry Andric   // Rewrite each rewritable source.
12460b57cec5SDimitry Andric   RegSubRegPair Src;
12470b57cec5SDimitry Andric   RegSubRegPair TrackPair;
12480b57cec5SDimitry Andric   while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
12490b57cec5SDimitry Andric     // Keep track of PHI nodes and its incoming edges when looking for sources.
12500b57cec5SDimitry Andric     RewriteMapTy RewriteMap;
12510b57cec5SDimitry Andric     // Try to find a more suitable source. If we failed to do so, or get the
12520b57cec5SDimitry Andric     // actual source, move to the next source.
12530b57cec5SDimitry Andric     if (!findNextSource(TrackPair, RewriteMap))
12540b57cec5SDimitry Andric       continue;
12550b57cec5SDimitry Andric 
12560b57cec5SDimitry Andric     // Get the new source to rewrite. TODO: Only enable handling of multiple
12570b57cec5SDimitry Andric     // sources (PHIs) once we have a motivating example and testcases for it.
12580b57cec5SDimitry Andric     RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
12590b57cec5SDimitry Andric                                         /*HandleMultipleSources=*/false);
12600b57cec5SDimitry Andric     if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
12610b57cec5SDimitry Andric       continue;
12620b57cec5SDimitry Andric 
12630b57cec5SDimitry Andric     // Rewrite source.
12640b57cec5SDimitry Andric     if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
12650b57cec5SDimitry Andric       // We may have extended the live-range of NewSrc, account for that.
12660b57cec5SDimitry Andric       MRI->clearKillFlags(NewSrc.Reg);
12670b57cec5SDimitry Andric       Changed = true;
12680b57cec5SDimitry Andric     }
12690b57cec5SDimitry Andric   }
12700b57cec5SDimitry Andric   // TODO: We could have a clean-up method to tidy the instruction.
12710b57cec5SDimitry Andric   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
12720b57cec5SDimitry Andric   // => v0 = COPY v1
12730b57cec5SDimitry Andric   // Currently we haven't seen motivating example for that and we
12740b57cec5SDimitry Andric   // want to avoid untested code.
12750b57cec5SDimitry Andric   NumRewrittenCopies += Changed;
12760b57cec5SDimitry Andric   return Changed;
12770b57cec5SDimitry Andric }
12780b57cec5SDimitry Andric 
12790b57cec5SDimitry Andric /// Rewrite the source found through \p Def, by using the \p RewriteMap
12800b57cec5SDimitry Andric /// and create a new COPY instruction. More info about RewriteMap in
12810b57cec5SDimitry Andric /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
12820b57cec5SDimitry Andric /// Uncoalescable copies, since they are copy like instructions that aren't
12830b57cec5SDimitry Andric /// recognized by the register allocator.
12840b57cec5SDimitry Andric MachineInstr &
12850b57cec5SDimitry Andric PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
12860b57cec5SDimitry Andric                                  RegSubRegPair Def, RewriteMapTy &RewriteMap) {
1287bdd1243dSDimitry Andric   assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers");
12880b57cec5SDimitry Andric 
12890b57cec5SDimitry Andric   // Find the new source to use in the COPY rewrite.
12900b57cec5SDimitry Andric   RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
12910b57cec5SDimitry Andric 
12920b57cec5SDimitry Andric   // Insert the COPY.
12930b57cec5SDimitry Andric   const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
12948bcb0991SDimitry Andric   Register NewVReg = MRI->createVirtualRegister(DefRC);
12950b57cec5SDimitry Andric 
12960b57cec5SDimitry Andric   MachineInstr *NewCopy =
12970b57cec5SDimitry Andric       BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
12980b57cec5SDimitry Andric               TII->get(TargetOpcode::COPY), NewVReg)
12990b57cec5SDimitry Andric           .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
13000b57cec5SDimitry Andric 
13010b57cec5SDimitry Andric   if (Def.SubReg) {
13020b57cec5SDimitry Andric     NewCopy->getOperand(0).setSubReg(Def.SubReg);
13030b57cec5SDimitry Andric     NewCopy->getOperand(0).setIsUndef();
13040b57cec5SDimitry Andric   }
13050b57cec5SDimitry Andric 
13060b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
13070b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "   Replacing: " << CopyLike);
13080b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "        With: " << *NewCopy);
13090b57cec5SDimitry Andric   MRI->replaceRegWith(Def.Reg, NewVReg);
13100b57cec5SDimitry Andric   MRI->clearKillFlags(NewVReg);
13110b57cec5SDimitry Andric 
13120b57cec5SDimitry Andric   // We extended the lifetime of NewSrc.Reg, clear the kill flags to
13130b57cec5SDimitry Andric   // account for that.
13140b57cec5SDimitry Andric   MRI->clearKillFlags(NewSrc.Reg);
13150b57cec5SDimitry Andric 
13160b57cec5SDimitry Andric   return *NewCopy;
13170b57cec5SDimitry Andric }
13180b57cec5SDimitry Andric 
13190b57cec5SDimitry Andric /// Optimize copy-like instructions to create
13200b57cec5SDimitry Andric /// register coalescer friendly instruction.
13210b57cec5SDimitry Andric /// The optimization tries to kill-off the \p MI by looking
13220b57cec5SDimitry Andric /// through a chain of copies to find a source that has a compatible
13230b57cec5SDimitry Andric /// register class.
13240b57cec5SDimitry Andric /// If such a source is found, it replace \p MI by a generic COPY
13250b57cec5SDimitry Andric /// operation.
13260b57cec5SDimitry Andric /// \pre isUncoalescableCopy(*MI) is true.
13270b57cec5SDimitry Andric /// \return True, when \p MI has been optimized. In that case, \p MI has
13280b57cec5SDimitry Andric /// been removed from its parent.
13290b57cec5SDimitry Andric /// All COPY instructions created, are inserted in \p LocalMIs.
13300b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeUncoalescableCopy(
13310b57cec5SDimitry Andric     MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
13320b57cec5SDimitry Andric   assert(isUncoalescableCopy(MI) && "Invalid argument");
13330b57cec5SDimitry Andric   UncoalescableRewriter CpyRewriter(MI);
13340b57cec5SDimitry Andric 
13350b57cec5SDimitry Andric   // Rewrite each rewritable source by generating new COPYs. This works
13360b57cec5SDimitry Andric   // differently from optimizeCoalescableCopy since it first makes sure that all
13370b57cec5SDimitry Andric   // definitions can be rewritten.
13380b57cec5SDimitry Andric   RewriteMapTy RewriteMap;
13390b57cec5SDimitry Andric   RegSubRegPair Src;
13400b57cec5SDimitry Andric   RegSubRegPair Def;
13410b57cec5SDimitry Andric   SmallVector<RegSubRegPair, 4> RewritePairs;
13420b57cec5SDimitry Andric   while (CpyRewriter.getNextRewritableSource(Src, Def)) {
13430b57cec5SDimitry Andric     // If a physical register is here, this is probably for a good reason.
13440b57cec5SDimitry Andric     // Do not rewrite that.
1345bdd1243dSDimitry Andric     if (Def.Reg.isPhysical())
13460b57cec5SDimitry Andric       return false;
13470b57cec5SDimitry Andric 
13480b57cec5SDimitry Andric     // If we do not know how to rewrite this definition, there is no point
13490b57cec5SDimitry Andric     // in trying to kill this instruction.
13500b57cec5SDimitry Andric     if (!findNextSource(Def, RewriteMap))
13510b57cec5SDimitry Andric       return false;
13520b57cec5SDimitry Andric 
13530b57cec5SDimitry Andric     RewritePairs.push_back(Def);
13540b57cec5SDimitry Andric   }
13550b57cec5SDimitry Andric 
13560b57cec5SDimitry Andric   // The change is possible for all defs, do it.
13570b57cec5SDimitry Andric   for (const RegSubRegPair &Def : RewritePairs) {
13580b57cec5SDimitry Andric     // Rewrite the "copy" in a way the register coalescer understands.
13590b57cec5SDimitry Andric     MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
13600b57cec5SDimitry Andric     LocalMIs.insert(&NewCopy);
13610b57cec5SDimitry Andric   }
13620b57cec5SDimitry Andric 
13630b57cec5SDimitry Andric   // MI is now dead.
13645ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI);
13650b57cec5SDimitry Andric   MI.eraseFromParent();
13660b57cec5SDimitry Andric   ++NumUncoalescableCopies;
13670b57cec5SDimitry Andric   return true;
13680b57cec5SDimitry Andric }
13690b57cec5SDimitry Andric 
13700b57cec5SDimitry Andric /// Check whether MI is a candidate for folding into a later instruction.
13710b57cec5SDimitry Andric /// We only fold loads to virtual registers and the virtual register defined
13720b57cec5SDimitry Andric /// has a single user.
13730b57cec5SDimitry Andric bool PeepholeOptimizer::isLoadFoldable(
1374e8d8bef9SDimitry Andric     MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
13750b57cec5SDimitry Andric   if (!MI.canFoldAsLoad() || !MI.mayLoad())
13760b57cec5SDimitry Andric     return false;
13770b57cec5SDimitry Andric   const MCInstrDesc &MCID = MI.getDesc();
13780b57cec5SDimitry Andric   if (MCID.getNumDefs() != 1)
13790b57cec5SDimitry Andric     return false;
13800b57cec5SDimitry Andric 
13818bcb0991SDimitry Andric   Register Reg = MI.getOperand(0).getReg();
13820b57cec5SDimitry Andric   // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
13830b57cec5SDimitry Andric   // loads. It should be checked when processing uses of the load, since
13840b57cec5SDimitry Andric   // uses can be removed during peephole.
1385e8d8bef9SDimitry Andric   if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
13860b57cec5SDimitry Andric       MRI->hasOneNonDBGUser(Reg)) {
13870b57cec5SDimitry Andric     FoldAsLoadDefCandidates.insert(Reg);
13880b57cec5SDimitry Andric     return true;
13890b57cec5SDimitry Andric   }
13900b57cec5SDimitry Andric   return false;
13910b57cec5SDimitry Andric }
13920b57cec5SDimitry Andric 
13930b57cec5SDimitry Andric bool PeepholeOptimizer::isMoveImmediate(
1394e8d8bef9SDimitry Andric     MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
1395e8d8bef9SDimitry Andric     DenseMap<Register, MachineInstr *> &ImmDefMIs) {
13960b57cec5SDimitry Andric   const MCInstrDesc &MCID = MI.getDesc();
13975f757f3fSDimitry Andric   if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
13980b57cec5SDimitry Andric     return false;
13998bcb0991SDimitry Andric   Register Reg = MI.getOperand(0).getReg();
14005f757f3fSDimitry Andric   if (!Reg.isVirtual())
14015f757f3fSDimitry Andric     return false;
14025f757f3fSDimitry Andric 
14035f757f3fSDimitry Andric   int64_t ImmVal;
14045f757f3fSDimitry Andric   if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
14055f757f3fSDimitry Andric     return false;
14065f757f3fSDimitry Andric 
14070b57cec5SDimitry Andric   ImmDefMIs.insert(std::make_pair(Reg, &MI));
14080b57cec5SDimitry Andric   ImmDefRegs.insert(Reg);
14090b57cec5SDimitry Andric   return true;
14100b57cec5SDimitry Andric }
14110b57cec5SDimitry Andric 
14120b57cec5SDimitry Andric /// Try folding register operands that are defined by move immediate
14130b57cec5SDimitry Andric /// instructions, i.e. a trivial constant folding optimization, if
14140b57cec5SDimitry Andric /// and only if the def and use are in the same BB.
1415e8d8bef9SDimitry Andric bool PeepholeOptimizer::foldImmediate(
1416e8d8bef9SDimitry Andric     MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
14175f757f3fSDimitry Andric     DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
14185f757f3fSDimitry Andric   Deleted = false;
14190b57cec5SDimitry Andric   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
14200b57cec5SDimitry Andric     MachineOperand &MO = MI.getOperand(i);
14210b57cec5SDimitry Andric     if (!MO.isReg() || MO.isDef())
14220b57cec5SDimitry Andric       continue;
14238bcb0991SDimitry Andric     Register Reg = MO.getReg();
1424e8d8bef9SDimitry Andric     if (!Reg.isVirtual())
14250b57cec5SDimitry Andric       continue;
14260b57cec5SDimitry Andric     if (ImmDefRegs.count(Reg) == 0)
14270b57cec5SDimitry Andric       continue;
1428e8d8bef9SDimitry Andric     DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
14290b57cec5SDimitry Andric     assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
1430*0fca6ea1SDimitry Andric     if (TII->foldImmediate(MI, *II->second, Reg, MRI)) {
14310b57cec5SDimitry Andric       ++NumImmFold;
1432*0fca6ea1SDimitry Andric       // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
14335f757f3fSDimitry Andric       // is not deleted, and we happened to get a same MI, we can delete MI and
14345f757f3fSDimitry Andric       // replace its users.
14355f757f3fSDimitry Andric       if (MRI->getVRegDef(Reg) &&
14365f757f3fSDimitry Andric           MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
14375f757f3fSDimitry Andric         Register DstReg = MI.getOperand(0).getReg();
14385f757f3fSDimitry Andric         if (DstReg.isVirtual() &&
14395f757f3fSDimitry Andric             MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
14405f757f3fSDimitry Andric           MRI->replaceRegWith(DstReg, Reg);
14415f757f3fSDimitry Andric           MI.eraseFromParent();
14425f757f3fSDimitry Andric           Deleted = true;
14435f757f3fSDimitry Andric         }
14445f757f3fSDimitry Andric       }
14450b57cec5SDimitry Andric       return true;
14460b57cec5SDimitry Andric     }
14470b57cec5SDimitry Andric   }
14480b57cec5SDimitry Andric   return false;
14490b57cec5SDimitry Andric }
14500b57cec5SDimitry Andric 
14510b57cec5SDimitry Andric // FIXME: This is very simple and misses some cases which should be handled when
14520b57cec5SDimitry Andric // motivating examples are found.
14530b57cec5SDimitry Andric //
14540b57cec5SDimitry Andric // The copy rewriting logic should look at uses as well as defs and be able to
14550b57cec5SDimitry Andric // eliminate copies across blocks.
14560b57cec5SDimitry Andric //
14570b57cec5SDimitry Andric // Later copies that are subregister extracts will also not be eliminated since
14580b57cec5SDimitry Andric // only the first copy is considered.
14590b57cec5SDimitry Andric //
14600b57cec5SDimitry Andric // e.g.
14610b57cec5SDimitry Andric // %1 = COPY %0
14620b57cec5SDimitry Andric // %2 = COPY %0:sub1
14630b57cec5SDimitry Andric //
14640b57cec5SDimitry Andric // Should replace %2 uses with %1:sub1
14655f757f3fSDimitry Andric bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) {
14660b57cec5SDimitry Andric   assert(MI.isCopy() && "expected a COPY machine instruction");
14670b57cec5SDimitry Andric 
14685f757f3fSDimitry Andric   RegSubRegPair SrcPair;
14695f757f3fSDimitry Andric   if (!getCopySrc(MI, SrcPair))
14700b57cec5SDimitry Andric     return false;
14710b57cec5SDimitry Andric 
14728bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1473e8d8bef9SDimitry Andric   if (!DstReg.isVirtual())
14740b57cec5SDimitry Andric     return false;
14750b57cec5SDimitry Andric 
14765f757f3fSDimitry Andric   if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) {
14770b57cec5SDimitry Andric     // First copy of this reg seen.
14780b57cec5SDimitry Andric     return false;
14790b57cec5SDimitry Andric   }
14800b57cec5SDimitry Andric 
14815f757f3fSDimitry Andric   MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second;
14820b57cec5SDimitry Andric 
14835f757f3fSDimitry Andric   assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() &&
1484e8d8bef9SDimitry Andric          "Unexpected mismatching subreg!");
14850b57cec5SDimitry Andric 
14868bcb0991SDimitry Andric   Register PrevDstReg = PrevCopy->getOperand(0).getReg();
14870b57cec5SDimitry Andric 
14880b57cec5SDimitry Andric   // Only replace if the copy register class is the same.
14890b57cec5SDimitry Andric   //
14900b57cec5SDimitry Andric   // TODO: If we have multiple copies to different register classes, we may want
14910b57cec5SDimitry Andric   // to track multiple copies of the same source register.
14920b57cec5SDimitry Andric   if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
14930b57cec5SDimitry Andric     return false;
14940b57cec5SDimitry Andric 
14950b57cec5SDimitry Andric   MRI->replaceRegWith(DstReg, PrevDstReg);
14960b57cec5SDimitry Andric 
14970b57cec5SDimitry Andric   // Lifetime of the previous copy has been extended.
14980b57cec5SDimitry Andric   MRI->clearKillFlags(PrevDstReg);
14990b57cec5SDimitry Andric   return true;
15000b57cec5SDimitry Andric }
15010b57cec5SDimitry Andric 
1502e8d8bef9SDimitry Andric bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
1503e8d8bef9SDimitry Andric   return Reg.isPhysical() && !MRI->isAllocatable(Reg);
15040b57cec5SDimitry Andric }
15050b57cec5SDimitry Andric 
15060b57cec5SDimitry Andric bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1507e8d8bef9SDimitry Andric     MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
15080b57cec5SDimitry Andric   assert(MI.isCopy() && "expected a COPY machine instruction");
15090b57cec5SDimitry Andric 
15100b57cec5SDimitry Andric   if (DisableNAPhysCopyOpt)
15110b57cec5SDimitry Andric     return false;
15120b57cec5SDimitry Andric 
15138bcb0991SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
15148bcb0991SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
1515bdd1243dSDimitry Andric   if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) {
1516e8d8bef9SDimitry Andric     // %vreg = COPY $physreg
15170b57cec5SDimitry Andric     // Avoid using a datastructure which can track multiple live non-allocatable
15180b57cec5SDimitry Andric     // phys->virt copies since LLVM doesn't seem to do this.
15190b57cec5SDimitry Andric     NAPhysToVirtMIs.insert({SrcReg, &MI});
15200b57cec5SDimitry Andric     return false;
15210b57cec5SDimitry Andric   }
15220b57cec5SDimitry Andric 
1523e8d8bef9SDimitry Andric   if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
15240b57cec5SDimitry Andric     return false;
15250b57cec5SDimitry Andric 
1526e8d8bef9SDimitry Andric   // $physreg = COPY %vreg
15270b57cec5SDimitry Andric   auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
15280b57cec5SDimitry Andric   if (PrevCopy == NAPhysToVirtMIs.end()) {
15290b57cec5SDimitry Andric     // We can't remove the copy: there was an intervening clobber of the
15300b57cec5SDimitry Andric     // non-allocatable physical register after the copy to virtual.
15310b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
15320b57cec5SDimitry Andric                       << MI);
15330b57cec5SDimitry Andric     return false;
15340b57cec5SDimitry Andric   }
15350b57cec5SDimitry Andric 
15368bcb0991SDimitry Andric   Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
15370b57cec5SDimitry Andric   if (PrevDstReg == SrcReg) {
15380b57cec5SDimitry Andric     // Remove the virt->phys copy: we saw the virtual register definition, and
15390b57cec5SDimitry Andric     // the non-allocatable physical register's state hasn't changed since then.
15400b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
15410b57cec5SDimitry Andric     ++NumNAPhysCopies;
15420b57cec5SDimitry Andric     return true;
15430b57cec5SDimitry Andric   }
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric   // Potential missed optimization opportunity: we saw a different virtual
15460b57cec5SDimitry Andric   // register get a copy of the non-allocatable physical register, and we only
15470b57cec5SDimitry Andric   // track one such copy. Avoid getting confused by this new non-allocatable
15480b57cec5SDimitry Andric   // physical register definition, and remove it from the tracked copies.
15490b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
15500b57cec5SDimitry Andric   NAPhysToVirtMIs.erase(PrevCopy);
15510b57cec5SDimitry Andric   return false;
15520b57cec5SDimitry Andric }
15530b57cec5SDimitry Andric 
15540b57cec5SDimitry Andric /// \bried Returns true if \p MO is a virtual register operand.
15550b57cec5SDimitry Andric static bool isVirtualRegisterOperand(MachineOperand &MO) {
1556e8d8bef9SDimitry Andric   return MO.isReg() && MO.getReg().isVirtual();
15570b57cec5SDimitry Andric }
15580b57cec5SDimitry Andric 
15590b57cec5SDimitry Andric bool PeepholeOptimizer::findTargetRecurrence(
1560e8d8bef9SDimitry Andric     Register Reg, const SmallSet<Register, 2> &TargetRegs,
15610b57cec5SDimitry Andric     RecurrenceCycle &RC) {
15620b57cec5SDimitry Andric   // Recurrence found if Reg is in TargetRegs.
15630b57cec5SDimitry Andric   if (TargetRegs.count(Reg))
15640b57cec5SDimitry Andric     return true;
15650b57cec5SDimitry Andric 
15660b57cec5SDimitry Andric   // TODO: Curerntly, we only allow the last instruction of the recurrence
15670b57cec5SDimitry Andric   // cycle (the instruction that feeds the PHI instruction) to have more than
15680b57cec5SDimitry Andric   // one uses to guarantee that commuting operands does not tie registers
15690b57cec5SDimitry Andric   // with overlapping live range. Once we have actual live range info of
15700b57cec5SDimitry Andric   // each register, this constraint can be relaxed.
15710b57cec5SDimitry Andric   if (!MRI->hasOneNonDBGUse(Reg))
15720b57cec5SDimitry Andric     return false;
15730b57cec5SDimitry Andric 
15740b57cec5SDimitry Andric   // Give up if the reccurrence chain length is longer than the limit.
15750b57cec5SDimitry Andric   if (RC.size() >= MaxRecurrenceChain)
15760b57cec5SDimitry Andric     return false;
15770b57cec5SDimitry Andric 
15780b57cec5SDimitry Andric   MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
1579*0fca6ea1SDimitry Andric   unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
15800b57cec5SDimitry Andric 
15810b57cec5SDimitry Andric   // Only interested in recurrences whose instructions have only one def, which
15820b57cec5SDimitry Andric   // is a virtual register.
15830b57cec5SDimitry Andric   if (MI.getDesc().getNumDefs() != 1)
15840b57cec5SDimitry Andric     return false;
15850b57cec5SDimitry Andric 
15860b57cec5SDimitry Andric   MachineOperand &DefOp = MI.getOperand(0);
15870b57cec5SDimitry Andric   if (!isVirtualRegisterOperand(DefOp))
15880b57cec5SDimitry Andric     return false;
15890b57cec5SDimitry Andric 
15900b57cec5SDimitry Andric   // Check if def operand of MI is tied to any use operand. We are only
15910b57cec5SDimitry Andric   // interested in the case that all the instructions in the recurrence chain
15920b57cec5SDimitry Andric   // have there def operand tied with one of the use operand.
15930b57cec5SDimitry Andric   unsigned TiedUseIdx;
15940b57cec5SDimitry Andric   if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
15950b57cec5SDimitry Andric     return false;
15960b57cec5SDimitry Andric 
15970b57cec5SDimitry Andric   if (Idx == TiedUseIdx) {
15980b57cec5SDimitry Andric     RC.push_back(RecurrenceInstr(&MI));
15990b57cec5SDimitry Andric     return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
16000b57cec5SDimitry Andric   } else {
16010b57cec5SDimitry Andric     // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
16020b57cec5SDimitry Andric     unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
16030b57cec5SDimitry Andric     if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
16040b57cec5SDimitry Andric       RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
16050b57cec5SDimitry Andric       return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
16060b57cec5SDimitry Andric     }
16070b57cec5SDimitry Andric   }
16080b57cec5SDimitry Andric 
16090b57cec5SDimitry Andric   return false;
16100b57cec5SDimitry Andric }
16110b57cec5SDimitry Andric 
16120b57cec5SDimitry Andric /// Phi instructions will eventually be lowered to copy instructions.
16130b57cec5SDimitry Andric /// If phi is in a loop header, a recurrence may formulated around the source
16140b57cec5SDimitry Andric /// and destination of the phi. For such case commuting operands of the
16150b57cec5SDimitry Andric /// instructions in the recurrence may enable coalescing of the copy instruction
16160b57cec5SDimitry Andric /// generated from the phi. For example, if there is a recurrence of
16170b57cec5SDimitry Andric ///
16180b57cec5SDimitry Andric /// LoopHeader:
16190b57cec5SDimitry Andric ///   %1 = phi(%0, %100)
16200b57cec5SDimitry Andric /// LoopLatch:
16210b57cec5SDimitry Andric ///   %0<def, tied1> = ADD %2<def, tied0>, %1
16220b57cec5SDimitry Andric ///
16230b57cec5SDimitry Andric /// , the fact that %0 and %2 are in the same tied operands set makes
16240b57cec5SDimitry Andric /// the coalescing of copy instruction generated from the phi in
16250b57cec5SDimitry Andric /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
16260b57cec5SDimitry Andric /// %2 have overlapping live range. This introduces additional move
16270b57cec5SDimitry Andric /// instruction to the final assembly. However, if we commute %2 and
16280b57cec5SDimitry Andric /// %1 of ADD instruction, the redundant move instruction can be
16290b57cec5SDimitry Andric /// avoided.
16300b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
1631e8d8bef9SDimitry Andric   SmallSet<Register, 2> TargetRegs;
16320b57cec5SDimitry Andric   for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
16330b57cec5SDimitry Andric     MachineOperand &MO = PHI.getOperand(Idx);
16340b57cec5SDimitry Andric     assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
16350b57cec5SDimitry Andric     TargetRegs.insert(MO.getReg());
16360b57cec5SDimitry Andric   }
16370b57cec5SDimitry Andric 
16380b57cec5SDimitry Andric   bool Changed = false;
16390b57cec5SDimitry Andric   RecurrenceCycle RC;
16400b57cec5SDimitry Andric   if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
16410b57cec5SDimitry Andric     // Commutes operands of instructions in RC if necessary so that the copy to
16420b57cec5SDimitry Andric     // be generated from PHI can be coalesced.
16430b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
16440b57cec5SDimitry Andric     for (auto &RI : RC) {
16450b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
16460b57cec5SDimitry Andric       auto CP = RI.getCommutePair();
16470b57cec5SDimitry Andric       if (CP) {
16480b57cec5SDimitry Andric         Changed = true;
16490b57cec5SDimitry Andric         TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
16500b57cec5SDimitry Andric                                 (*CP).second);
16510b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
16520b57cec5SDimitry Andric       }
16530b57cec5SDimitry Andric     }
16540b57cec5SDimitry Andric   }
16550b57cec5SDimitry Andric 
16560b57cec5SDimitry Andric   return Changed;
16570b57cec5SDimitry Andric }
16580b57cec5SDimitry Andric 
16590b57cec5SDimitry Andric bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
16600b57cec5SDimitry Andric   if (skipFunction(MF.getFunction()))
16610b57cec5SDimitry Andric     return false;
16620b57cec5SDimitry Andric 
16630b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
16640b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
16650b57cec5SDimitry Andric 
16660b57cec5SDimitry Andric   if (DisablePeephole)
16670b57cec5SDimitry Andric     return false;
16680b57cec5SDimitry Andric 
16690b57cec5SDimitry Andric   TII = MF.getSubtarget().getInstrInfo();
16700b57cec5SDimitry Andric   TRI = MF.getSubtarget().getRegisterInfo();
16710b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
1672*0fca6ea1SDimitry Andric   DT = Aggressive ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1673*0fca6ea1SDimitry Andric                   : nullptr;
1674*0fca6ea1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
16755f757f3fSDimitry Andric   MF.setDelegate(this);
16760b57cec5SDimitry Andric 
16770b57cec5SDimitry Andric   bool Changed = false;
16780b57cec5SDimitry Andric 
16790b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
16800b57cec5SDimitry Andric     bool SeenMoveImm = false;
16810b57cec5SDimitry Andric 
16820b57cec5SDimitry Andric     // During this forward scan, at some point it needs to answer the question
16830b57cec5SDimitry Andric     // "given a pointer to an MI in the current BB, is it located before or
16840b57cec5SDimitry Andric     // after the current instruction".
16850b57cec5SDimitry Andric     // To perform this, the following set keeps track of the MIs already seen
16860b57cec5SDimitry Andric     // during the scan, if a MI is not in the set, it is assumed to be located
16870b57cec5SDimitry Andric     // after. Newly created MIs have to be inserted in the set as well.
16880b57cec5SDimitry Andric     SmallPtrSet<MachineInstr*, 16> LocalMIs;
1689e8d8bef9SDimitry Andric     SmallSet<Register, 4> ImmDefRegs;
1690e8d8bef9SDimitry Andric     DenseMap<Register, MachineInstr *> ImmDefMIs;
1691e8d8bef9SDimitry Andric     SmallSet<Register, 16> FoldAsLoadDefCandidates;
16920b57cec5SDimitry Andric 
16930b57cec5SDimitry Andric     // Track when a non-allocatable physical register is copied to a virtual
16940b57cec5SDimitry Andric     // register so that useless moves can be removed.
16950b57cec5SDimitry Andric     //
1696e8d8bef9SDimitry Andric     // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
1697e8d8bef9SDimitry Andric     // without any intervening re-definition of $physreg.
1698e8d8bef9SDimitry Andric     DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
16990b57cec5SDimitry Andric 
17005f757f3fSDimitry Andric     CopySrcMIs.clear();
17010b57cec5SDimitry Andric 
17020b57cec5SDimitry Andric     bool IsLoopHeader = MLI->isLoopHeader(&MBB);
17030b57cec5SDimitry Andric 
17040b57cec5SDimitry Andric     for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
17050b57cec5SDimitry Andric          MII != MIE; ) {
17060b57cec5SDimitry Andric       MachineInstr *MI = &*MII;
17070b57cec5SDimitry Andric       // We may be erasing MI below, increment MII now.
17080b57cec5SDimitry Andric       ++MII;
17090b57cec5SDimitry Andric       LocalMIs.insert(MI);
17100b57cec5SDimitry Andric 
1711e8d8bef9SDimitry Andric       // Skip debug instructions. They should not affect this peephole
1712e8d8bef9SDimitry Andric       // optimization.
17130b57cec5SDimitry Andric       if (MI->isDebugInstr())
17140b57cec5SDimitry Andric         continue;
17150b57cec5SDimitry Andric 
17160b57cec5SDimitry Andric       if (MI->isPosition())
17170b57cec5SDimitry Andric         continue;
17180b57cec5SDimitry Andric 
17190b57cec5SDimitry Andric       if (IsLoopHeader && MI->isPHI()) {
17200b57cec5SDimitry Andric         if (optimizeRecurrence(*MI)) {
17210b57cec5SDimitry Andric           Changed = true;
17220b57cec5SDimitry Andric           continue;
17230b57cec5SDimitry Andric         }
17240b57cec5SDimitry Andric       }
17250b57cec5SDimitry Andric 
17260b57cec5SDimitry Andric       if (!MI->isCopy()) {
17270b57cec5SDimitry Andric         for (const MachineOperand &MO : MI->operands()) {
17280b57cec5SDimitry Andric           // Visit all operands: definitions can be implicit or explicit.
17290b57cec5SDimitry Andric           if (MO.isReg()) {
17308bcb0991SDimitry Andric             Register Reg = MO.getReg();
17310b57cec5SDimitry Andric             if (MO.isDef() && isNAPhysCopy(Reg)) {
17320b57cec5SDimitry Andric               const auto &Def = NAPhysToVirtMIs.find(Reg);
17330b57cec5SDimitry Andric               if (Def != NAPhysToVirtMIs.end()) {
17340b57cec5SDimitry Andric                 // A new definition of the non-allocatable physical register
17350b57cec5SDimitry Andric                 // invalidates previous copies.
17360b57cec5SDimitry Andric                 LLVM_DEBUG(dbgs()
17370b57cec5SDimitry Andric                            << "NAPhysCopy: invalidating because of " << *MI);
17380b57cec5SDimitry Andric                 NAPhysToVirtMIs.erase(Def);
17390b57cec5SDimitry Andric               }
17400b57cec5SDimitry Andric             }
17410b57cec5SDimitry Andric           } else if (MO.isRegMask()) {
17420b57cec5SDimitry Andric             const uint32_t *RegMask = MO.getRegMask();
17430b57cec5SDimitry Andric             for (auto &RegMI : NAPhysToVirtMIs) {
1744e8d8bef9SDimitry Andric               Register Def = RegMI.first;
17450b57cec5SDimitry Andric               if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
17460b57cec5SDimitry Andric                 LLVM_DEBUG(dbgs()
17470b57cec5SDimitry Andric                            << "NAPhysCopy: invalidating because of " << *MI);
17480b57cec5SDimitry Andric                 NAPhysToVirtMIs.erase(Def);
17490b57cec5SDimitry Andric               }
17500b57cec5SDimitry Andric             }
17510b57cec5SDimitry Andric           }
17520b57cec5SDimitry Andric         }
17530b57cec5SDimitry Andric       }
17540b57cec5SDimitry Andric 
17550b57cec5SDimitry Andric       if (MI->isImplicitDef() || MI->isKill())
17560b57cec5SDimitry Andric         continue;
17570b57cec5SDimitry Andric 
17580b57cec5SDimitry Andric       if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
17590b57cec5SDimitry Andric         // Blow away all non-allocatable physical registers knowledge since we
17600b57cec5SDimitry Andric         // don't know what's correct anymore.
17610b57cec5SDimitry Andric         //
17620b57cec5SDimitry Andric         // FIXME: handle explicit asm clobbers.
17630b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
17640b57cec5SDimitry Andric                           << *MI);
17650b57cec5SDimitry Andric         NAPhysToVirtMIs.clear();
17660b57cec5SDimitry Andric       }
17670b57cec5SDimitry Andric 
17680b57cec5SDimitry Andric       if ((isUncoalescableCopy(*MI) &&
17690b57cec5SDimitry Andric            optimizeUncoalescableCopy(*MI, LocalMIs)) ||
17700b57cec5SDimitry Andric           (MI->isCompare() && optimizeCmpInstr(*MI)) ||
17710b57cec5SDimitry Andric           (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
17720b57cec5SDimitry Andric         // MI is deleted.
17730b57cec5SDimitry Andric         LocalMIs.erase(MI);
17740b57cec5SDimitry Andric         Changed = true;
17750b57cec5SDimitry Andric         continue;
17760b57cec5SDimitry Andric       }
17770b57cec5SDimitry Andric 
17780b57cec5SDimitry Andric       if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
17790b57cec5SDimitry Andric         Changed = true;
17800b57cec5SDimitry Andric         continue;
17810b57cec5SDimitry Andric       }
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
17840b57cec5SDimitry Andric         // MI is just rewritten.
17850b57cec5SDimitry Andric         Changed = true;
17860b57cec5SDimitry Andric         continue;
17870b57cec5SDimitry Andric       }
17880b57cec5SDimitry Andric 
17895f757f3fSDimitry Andric       if (MI->isCopy() && (foldRedundantCopy(*MI) ||
17900b57cec5SDimitry Andric                            foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
17910b57cec5SDimitry Andric         LocalMIs.erase(MI);
17925ffd83dbSDimitry Andric         LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
17930b57cec5SDimitry Andric         MI->eraseFromParent();
17940b57cec5SDimitry Andric         Changed = true;
17950b57cec5SDimitry Andric         continue;
17960b57cec5SDimitry Andric       }
17970b57cec5SDimitry Andric 
17980b57cec5SDimitry Andric       if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
17990b57cec5SDimitry Andric         SeenMoveImm = true;
18000b57cec5SDimitry Andric       } else {
18010b57cec5SDimitry Andric         Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
18020b57cec5SDimitry Andric         // optimizeExtInstr might have created new instructions after MI
18030b57cec5SDimitry Andric         // and before the already incremented MII. Adjust MII so that the
18040b57cec5SDimitry Andric         // next iteration sees the new instructions.
18050b57cec5SDimitry Andric         MII = MI;
18060b57cec5SDimitry Andric         ++MII;
18075f757f3fSDimitry Andric         if (SeenMoveImm) {
18085f757f3fSDimitry Andric           bool Deleted;
18095f757f3fSDimitry Andric           Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
18105f757f3fSDimitry Andric           if (Deleted) {
18115f757f3fSDimitry Andric             LocalMIs.erase(MI);
18125f757f3fSDimitry Andric             continue;
18135f757f3fSDimitry Andric           }
18145f757f3fSDimitry Andric         }
18150b57cec5SDimitry Andric       }
18160b57cec5SDimitry Andric 
18170b57cec5SDimitry Andric       // Check whether MI is a load candidate for folding into a later
18180b57cec5SDimitry Andric       // instruction. If MI is not a candidate, check whether we can fold an
18190b57cec5SDimitry Andric       // earlier load into MI.
18200b57cec5SDimitry Andric       if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
18210b57cec5SDimitry Andric           !FoldAsLoadDefCandidates.empty()) {
18220b57cec5SDimitry Andric 
18230b57cec5SDimitry Andric         // We visit each operand even after successfully folding a previous
18240b57cec5SDimitry Andric         // one.  This allows us to fold multiple loads into a single
18250b57cec5SDimitry Andric         // instruction.  We do assume that optimizeLoadInstr doesn't insert
18260b57cec5SDimitry Andric         // foldable uses earlier in the argument list.  Since we don't restart
18270b57cec5SDimitry Andric         // iteration, we'd miss such cases.
18280b57cec5SDimitry Andric         const MCInstrDesc &MIDesc = MI->getDesc();
18290b57cec5SDimitry Andric         for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
18300b57cec5SDimitry Andric              ++i) {
18310b57cec5SDimitry Andric           const MachineOperand &MOp = MI->getOperand(i);
18320b57cec5SDimitry Andric           if (!MOp.isReg())
18330b57cec5SDimitry Andric             continue;
1834e8d8bef9SDimitry Andric           Register FoldAsLoadDefReg = MOp.getReg();
18350b57cec5SDimitry Andric           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
18360b57cec5SDimitry Andric             // We need to fold load after optimizeCmpInstr, since
18370b57cec5SDimitry Andric             // optimizeCmpInstr can enable folding by converting SUB to CMP.
18380b57cec5SDimitry Andric             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
18390b57cec5SDimitry Andric             // we need it for markUsesInDebugValueAsUndef().
1840e8d8bef9SDimitry Andric             Register FoldedReg = FoldAsLoadDefReg;
18410b57cec5SDimitry Andric             MachineInstr *DefMI = nullptr;
18420b57cec5SDimitry Andric             if (MachineInstr *FoldMI =
18430b57cec5SDimitry Andric                     TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
18440b57cec5SDimitry Andric               // Update LocalMIs since we replaced MI with FoldMI and deleted
18450b57cec5SDimitry Andric               // DefMI.
18460b57cec5SDimitry Andric               LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
18470b57cec5SDimitry Andric               LLVM_DEBUG(dbgs() << "     With: " << *FoldMI);
18480b57cec5SDimitry Andric               LocalMIs.erase(MI);
18490b57cec5SDimitry Andric               LocalMIs.erase(DefMI);
18500b57cec5SDimitry Andric               LocalMIs.insert(FoldMI);
18515ffd83dbSDimitry Andric               // Update the call site info.
18525ffd83dbSDimitry Andric               if (MI->shouldUpdateCallSiteInfo())
18538bcb0991SDimitry Andric                 MI->getMF()->moveCallSiteInfo(MI, FoldMI);
18540b57cec5SDimitry Andric               MI->eraseFromParent();
18550b57cec5SDimitry Andric               DefMI->eraseFromParent();
18560b57cec5SDimitry Andric               MRI->markUsesInDebugValueAsUndef(FoldedReg);
18570b57cec5SDimitry Andric               FoldAsLoadDefCandidates.erase(FoldedReg);
18580b57cec5SDimitry Andric               ++NumLoadFold;
18590b57cec5SDimitry Andric 
18600b57cec5SDimitry Andric               // MI is replaced with FoldMI so we can continue trying to fold
18610b57cec5SDimitry Andric               Changed = true;
18620b57cec5SDimitry Andric               MI = FoldMI;
18630b57cec5SDimitry Andric             }
18640b57cec5SDimitry Andric           }
18650b57cec5SDimitry Andric         }
18660b57cec5SDimitry Andric       }
18670b57cec5SDimitry Andric 
18680b57cec5SDimitry Andric       // If we run into an instruction we can't fold across, discard
18690b57cec5SDimitry Andric       // the load candidates.  Note: We might be able to fold *into* this
18700b57cec5SDimitry Andric       // instruction, so this needs to be after the folding logic.
18710b57cec5SDimitry Andric       if (MI->isLoadFoldBarrier()) {
18720b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
18730b57cec5SDimitry Andric         FoldAsLoadDefCandidates.clear();
18740b57cec5SDimitry Andric       }
18750b57cec5SDimitry Andric     }
18760b57cec5SDimitry Andric   }
18770b57cec5SDimitry Andric 
18785f757f3fSDimitry Andric   MF.resetDelegate(this);
18790b57cec5SDimitry Andric   return Changed;
18800b57cec5SDimitry Andric }
18810b57cec5SDimitry Andric 
18820b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
18830b57cec5SDimitry Andric   assert(Def->isCopy() && "Invalid definition");
18840b57cec5SDimitry Andric   // Copy instruction are supposed to be: Def = Src.
18850b57cec5SDimitry Andric   // If someone breaks this assumption, bad things will happen everywhere.
18868bcb0991SDimitry Andric   // There may be implicit uses preventing the copy to be moved across
18878bcb0991SDimitry Andric   // some target specific register definitions
18888bcb0991SDimitry Andric   assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
18898bcb0991SDimitry Andric          "Invalid number of operands");
18908bcb0991SDimitry Andric   assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
18910b57cec5SDimitry Andric 
18920b57cec5SDimitry Andric   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
18930b57cec5SDimitry Andric     // If we look for a different subreg, it means we want a subreg of src.
18940b57cec5SDimitry Andric     // Bails as we do not support composing subregs yet.
18950b57cec5SDimitry Andric     return ValueTrackerResult();
18960b57cec5SDimitry Andric   // Otherwise, we want the whole source.
18970b57cec5SDimitry Andric   const MachineOperand &Src = Def->getOperand(1);
18980b57cec5SDimitry Andric   if (Src.isUndef())
18990b57cec5SDimitry Andric     return ValueTrackerResult();
19000b57cec5SDimitry Andric   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
19010b57cec5SDimitry Andric }
19020b57cec5SDimitry Andric 
19030b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
19040b57cec5SDimitry Andric   assert(Def->isBitcast() && "Invalid definition");
19050b57cec5SDimitry Andric 
19060b57cec5SDimitry Andric   // Bail if there are effects that a plain copy will not expose.
19070b57cec5SDimitry Andric   if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
19080b57cec5SDimitry Andric     return ValueTrackerResult();
19090b57cec5SDimitry Andric 
19100b57cec5SDimitry Andric   // Bitcasts with more than one def are not supported.
19110b57cec5SDimitry Andric   if (Def->getDesc().getNumDefs() != 1)
19120b57cec5SDimitry Andric     return ValueTrackerResult();
19130b57cec5SDimitry Andric   const MachineOperand DefOp = Def->getOperand(DefIdx);
19140b57cec5SDimitry Andric   if (DefOp.getSubReg() != DefSubReg)
19150b57cec5SDimitry Andric     // If we look for a different subreg, it means we want a subreg of the src.
19160b57cec5SDimitry Andric     // Bails as we do not support composing subregs yet.
19170b57cec5SDimitry Andric     return ValueTrackerResult();
19180b57cec5SDimitry Andric 
19190b57cec5SDimitry Andric   unsigned SrcIdx = Def->getNumOperands();
19200b57cec5SDimitry Andric   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
19210b57cec5SDimitry Andric        ++OpIdx) {
19220b57cec5SDimitry Andric     const MachineOperand &MO = Def->getOperand(OpIdx);
19230b57cec5SDimitry Andric     if (!MO.isReg() || !MO.getReg())
19240b57cec5SDimitry Andric       continue;
19250b57cec5SDimitry Andric     // Ignore dead implicit defs.
19260b57cec5SDimitry Andric     if (MO.isImplicit() && MO.isDead())
19270b57cec5SDimitry Andric       continue;
19280b57cec5SDimitry Andric     assert(!MO.isDef() && "We should have skipped all the definitions by now");
19290b57cec5SDimitry Andric     if (SrcIdx != EndOpIdx)
19300b57cec5SDimitry Andric       // Multiple sources?
19310b57cec5SDimitry Andric       return ValueTrackerResult();
19320b57cec5SDimitry Andric     SrcIdx = OpIdx;
19330b57cec5SDimitry Andric   }
19340b57cec5SDimitry Andric 
19358bcb0991SDimitry Andric   // In some rare case, Def has no input, SrcIdx is out of bound,
19368bcb0991SDimitry Andric   // getOperand(SrcIdx) will fail below.
19378bcb0991SDimitry Andric   if (SrcIdx >= Def->getNumOperands())
19388bcb0991SDimitry Andric     return ValueTrackerResult();
19398bcb0991SDimitry Andric 
19400b57cec5SDimitry Andric   // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
19410b57cec5SDimitry Andric   // will break the assumed guarantees for the upper bits.
19420b57cec5SDimitry Andric   for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
19430b57cec5SDimitry Andric     if (UseMI.isSubregToReg())
19440b57cec5SDimitry Andric       return ValueTrackerResult();
19450b57cec5SDimitry Andric   }
19460b57cec5SDimitry Andric 
19470b57cec5SDimitry Andric   const MachineOperand &Src = Def->getOperand(SrcIdx);
19480b57cec5SDimitry Andric   if (Src.isUndef())
19490b57cec5SDimitry Andric     return ValueTrackerResult();
19500b57cec5SDimitry Andric   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
19510b57cec5SDimitry Andric }
19520b57cec5SDimitry Andric 
19530b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
19540b57cec5SDimitry Andric   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
19550b57cec5SDimitry Andric          "Invalid definition");
19560b57cec5SDimitry Andric 
19570b57cec5SDimitry Andric   if (Def->getOperand(DefIdx).getSubReg())
19580b57cec5SDimitry Andric     // If we are composing subregs, bail out.
19590b57cec5SDimitry Andric     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
19600b57cec5SDimitry Andric     // This should almost never happen as the SSA property is tracked at
19610b57cec5SDimitry Andric     // the register level (as opposed to the subreg level).
19620b57cec5SDimitry Andric     // I.e.,
19630b57cec5SDimitry Andric     // Def.sub0 =
19640b57cec5SDimitry Andric     // Def.sub1 =
19650b57cec5SDimitry Andric     // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
19660b57cec5SDimitry Andric     // Def. Thus, it must not be generated.
19670b57cec5SDimitry Andric     // However, some code could theoretically generates a single
19680b57cec5SDimitry Andric     // Def.sub0 (i.e, not defining the other subregs) and we would
19690b57cec5SDimitry Andric     // have this case.
19700b57cec5SDimitry Andric     // If we can ascertain (or force) that this never happens, we could
19710b57cec5SDimitry Andric     // turn that into an assertion.
19720b57cec5SDimitry Andric     return ValueTrackerResult();
19730b57cec5SDimitry Andric 
19740b57cec5SDimitry Andric   if (!TII)
19750b57cec5SDimitry Andric     // We could handle the REG_SEQUENCE here, but we do not want to
19760b57cec5SDimitry Andric     // duplicate the code from the generic TII.
19770b57cec5SDimitry Andric     return ValueTrackerResult();
19780b57cec5SDimitry Andric 
19790b57cec5SDimitry Andric   SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs;
19800b57cec5SDimitry Andric   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
19810b57cec5SDimitry Andric     return ValueTrackerResult();
19820b57cec5SDimitry Andric 
19830b57cec5SDimitry Andric   // We are looking at:
19840b57cec5SDimitry Andric   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
19850b57cec5SDimitry Andric   // Check if one of the operand defines the subreg we are interested in.
19860b57cec5SDimitry Andric   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
19870b57cec5SDimitry Andric     if (RegSeqInput.SubIdx == DefSubReg)
19880b57cec5SDimitry Andric       return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
19890b57cec5SDimitry Andric   }
19900b57cec5SDimitry Andric 
19910b57cec5SDimitry Andric   // If the subreg we are tracking is super-defined by another subreg,
19920b57cec5SDimitry Andric   // we could follow this value. However, this would require to compose
19930b57cec5SDimitry Andric   // the subreg and we do not do that for now.
19940b57cec5SDimitry Andric   return ValueTrackerResult();
19950b57cec5SDimitry Andric }
19960b57cec5SDimitry Andric 
19970b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
19980b57cec5SDimitry Andric   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
19990b57cec5SDimitry Andric          "Invalid definition");
20000b57cec5SDimitry Andric 
20010b57cec5SDimitry Andric   if (Def->getOperand(DefIdx).getSubReg())
20020b57cec5SDimitry Andric     // If we are composing subreg, bail out.
20030b57cec5SDimitry Andric     // Same remark as getNextSourceFromRegSequence.
20040b57cec5SDimitry Andric     // I.e., this may be turned into an assert.
20050b57cec5SDimitry Andric     return ValueTrackerResult();
20060b57cec5SDimitry Andric 
20070b57cec5SDimitry Andric   if (!TII)
20080b57cec5SDimitry Andric     // We could handle the REG_SEQUENCE here, but we do not want to
20090b57cec5SDimitry Andric     // duplicate the code from the generic TII.
20100b57cec5SDimitry Andric     return ValueTrackerResult();
20110b57cec5SDimitry Andric 
20120b57cec5SDimitry Andric   RegSubRegPair BaseReg;
20130b57cec5SDimitry Andric   RegSubRegPairAndIdx InsertedReg;
20140b57cec5SDimitry Andric   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
20150b57cec5SDimitry Andric     return ValueTrackerResult();
20160b57cec5SDimitry Andric 
20170b57cec5SDimitry Andric   // We are looking at:
20180b57cec5SDimitry Andric   // Def = INSERT_SUBREG v0, v1, sub1
20190b57cec5SDimitry Andric   // There are two cases:
20200b57cec5SDimitry Andric   // 1. DefSubReg == sub1, get v1.
20210b57cec5SDimitry Andric   // 2. DefSubReg != sub1, the value may be available through v0.
20220b57cec5SDimitry Andric 
20230b57cec5SDimitry Andric   // #1 Check if the inserted register matches the required sub index.
20240b57cec5SDimitry Andric   if (InsertedReg.SubIdx == DefSubReg) {
20250b57cec5SDimitry Andric     return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
20260b57cec5SDimitry Andric   }
20270b57cec5SDimitry Andric   // #2 Otherwise, if the sub register we are looking for is not partial
20280b57cec5SDimitry Andric   // defined by the inserted element, we can look through the main
20290b57cec5SDimitry Andric   // register (v0).
20300b57cec5SDimitry Andric   const MachineOperand &MODef = Def->getOperand(DefIdx);
20310b57cec5SDimitry Andric   // If the result register (Def) and the base register (v0) do not
20320b57cec5SDimitry Andric   // have the same register class or if we have to compose
20330b57cec5SDimitry Andric   // subregisters, bail out.
20340b57cec5SDimitry Andric   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
20350b57cec5SDimitry Andric       BaseReg.SubReg)
20360b57cec5SDimitry Andric     return ValueTrackerResult();
20370b57cec5SDimitry Andric 
20380b57cec5SDimitry Andric   // Get the TRI and check if the inserted sub-register overlaps with the
20390b57cec5SDimitry Andric   // sub-register we are tracking.
20400b57cec5SDimitry Andric   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
20410b57cec5SDimitry Andric   if (!TRI ||
20420b57cec5SDimitry Andric       !(TRI->getSubRegIndexLaneMask(DefSubReg) &
20430b57cec5SDimitry Andric         TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
20440b57cec5SDimitry Andric     return ValueTrackerResult();
20450b57cec5SDimitry Andric   // At this point, the value is available in v0 via the same subreg
20460b57cec5SDimitry Andric   // we used for Def.
20470b57cec5SDimitry Andric   return ValueTrackerResult(BaseReg.Reg, DefSubReg);
20480b57cec5SDimitry Andric }
20490b57cec5SDimitry Andric 
20500b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
20510b57cec5SDimitry Andric   assert((Def->isExtractSubreg() ||
20520b57cec5SDimitry Andric           Def->isExtractSubregLike()) && "Invalid definition");
20530b57cec5SDimitry Andric   // We are looking at:
20540b57cec5SDimitry Andric   // Def = EXTRACT_SUBREG v0, sub0
20550b57cec5SDimitry Andric 
20560b57cec5SDimitry Andric   // Bail if we have to compose sub registers.
20570b57cec5SDimitry Andric   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
20580b57cec5SDimitry Andric   if (DefSubReg)
20590b57cec5SDimitry Andric     return ValueTrackerResult();
20600b57cec5SDimitry Andric 
20610b57cec5SDimitry Andric   if (!TII)
20620b57cec5SDimitry Andric     // We could handle the EXTRACT_SUBREG here, but we do not want to
20630b57cec5SDimitry Andric     // duplicate the code from the generic TII.
20640b57cec5SDimitry Andric     return ValueTrackerResult();
20650b57cec5SDimitry Andric 
20660b57cec5SDimitry Andric   RegSubRegPairAndIdx ExtractSubregInputReg;
20670b57cec5SDimitry Andric   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
20680b57cec5SDimitry Andric     return ValueTrackerResult();
20690b57cec5SDimitry Andric 
20700b57cec5SDimitry Andric   // Bail if we have to compose sub registers.
20710b57cec5SDimitry Andric   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
20720b57cec5SDimitry Andric   if (ExtractSubregInputReg.SubReg)
20730b57cec5SDimitry Andric     return ValueTrackerResult();
20740b57cec5SDimitry Andric   // Otherwise, the value is available in the v0.sub0.
20750b57cec5SDimitry Andric   return ValueTrackerResult(ExtractSubregInputReg.Reg,
20760b57cec5SDimitry Andric                             ExtractSubregInputReg.SubIdx);
20770b57cec5SDimitry Andric }
20780b57cec5SDimitry Andric 
20790b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
20800b57cec5SDimitry Andric   assert(Def->isSubregToReg() && "Invalid definition");
20810b57cec5SDimitry Andric   // We are looking at:
20820b57cec5SDimitry Andric   // Def = SUBREG_TO_REG Imm, v0, sub0
20830b57cec5SDimitry Andric 
20840b57cec5SDimitry Andric   // Bail if we have to compose sub registers.
20850b57cec5SDimitry Andric   // If DefSubReg != sub0, we would have to check that all the bits
20860b57cec5SDimitry Andric   // we track are included in sub0 and if yes, we would have to
20870b57cec5SDimitry Andric   // determine the right subreg in v0.
20880b57cec5SDimitry Andric   if (DefSubReg != Def->getOperand(3).getImm())
20890b57cec5SDimitry Andric     return ValueTrackerResult();
20900b57cec5SDimitry Andric   // Bail if we have to compose sub registers.
20910b57cec5SDimitry Andric   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
20920b57cec5SDimitry Andric   if (Def->getOperand(2).getSubReg())
20930b57cec5SDimitry Andric     return ValueTrackerResult();
20940b57cec5SDimitry Andric 
20950b57cec5SDimitry Andric   return ValueTrackerResult(Def->getOperand(2).getReg(),
20960b57cec5SDimitry Andric                             Def->getOperand(3).getImm());
20970b57cec5SDimitry Andric }
20980b57cec5SDimitry Andric 
20990b57cec5SDimitry Andric /// Explore each PHI incoming operand and return its sources.
21000b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
21010b57cec5SDimitry Andric   assert(Def->isPHI() && "Invalid definition");
21020b57cec5SDimitry Andric   ValueTrackerResult Res;
21030b57cec5SDimitry Andric 
21040b57cec5SDimitry Andric   // If we look for a different subreg, bail as we do not support composing
21050b57cec5SDimitry Andric   // subregs yet.
21060b57cec5SDimitry Andric   if (Def->getOperand(0).getSubReg() != DefSubReg)
21070b57cec5SDimitry Andric     return ValueTrackerResult();
21080b57cec5SDimitry Andric 
21090b57cec5SDimitry Andric   // Return all register sources for PHI instructions.
21100b57cec5SDimitry Andric   for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
21110b57cec5SDimitry Andric     const MachineOperand &MO = Def->getOperand(i);
21120b57cec5SDimitry Andric     assert(MO.isReg() && "Invalid PHI instruction");
21130b57cec5SDimitry Andric     // We have no code to deal with undef operands. They shouldn't happen in
21140b57cec5SDimitry Andric     // normal programs anyway.
21150b57cec5SDimitry Andric     if (MO.isUndef())
21160b57cec5SDimitry Andric       return ValueTrackerResult();
21170b57cec5SDimitry Andric     Res.addSource(MO.getReg(), MO.getSubReg());
21180b57cec5SDimitry Andric   }
21190b57cec5SDimitry Andric 
21200b57cec5SDimitry Andric   return Res;
21210b57cec5SDimitry Andric }
21220b57cec5SDimitry Andric 
21230b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceImpl() {
21240b57cec5SDimitry Andric   assert(Def && "This method needs a valid definition");
21250b57cec5SDimitry Andric 
21260b57cec5SDimitry Andric   assert(((Def->getOperand(DefIdx).isDef() &&
21270b57cec5SDimitry Andric            (DefIdx < Def->getDesc().getNumDefs() ||
21280b57cec5SDimitry Andric             Def->getDesc().isVariadic())) ||
21290b57cec5SDimitry Andric           Def->getOperand(DefIdx).isImplicit()) &&
21300b57cec5SDimitry Andric          "Invalid DefIdx");
21310b57cec5SDimitry Andric   if (Def->isCopy())
21320b57cec5SDimitry Andric     return getNextSourceFromCopy();
21330b57cec5SDimitry Andric   if (Def->isBitcast())
21340b57cec5SDimitry Andric     return getNextSourceFromBitcast();
21350b57cec5SDimitry Andric   // All the remaining cases involve "complex" instructions.
21360b57cec5SDimitry Andric   // Bail if we did not ask for the advanced tracking.
21370b57cec5SDimitry Andric   if (DisableAdvCopyOpt)
21380b57cec5SDimitry Andric     return ValueTrackerResult();
21390b57cec5SDimitry Andric   if (Def->isRegSequence() || Def->isRegSequenceLike())
21400b57cec5SDimitry Andric     return getNextSourceFromRegSequence();
21410b57cec5SDimitry Andric   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
21420b57cec5SDimitry Andric     return getNextSourceFromInsertSubreg();
21430b57cec5SDimitry Andric   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
21440b57cec5SDimitry Andric     return getNextSourceFromExtractSubreg();
21450b57cec5SDimitry Andric   if (Def->isSubregToReg())
21460b57cec5SDimitry Andric     return getNextSourceFromSubregToReg();
21470b57cec5SDimitry Andric   if (Def->isPHI())
21480b57cec5SDimitry Andric     return getNextSourceFromPHI();
21490b57cec5SDimitry Andric   return ValueTrackerResult();
21500b57cec5SDimitry Andric }
21510b57cec5SDimitry Andric 
21520b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSource() {
21530b57cec5SDimitry Andric   // If we reach a point where we cannot move up in the use-def chain,
21540b57cec5SDimitry Andric   // there is nothing we can get.
21550b57cec5SDimitry Andric   if (!Def)
21560b57cec5SDimitry Andric     return ValueTrackerResult();
21570b57cec5SDimitry Andric 
21580b57cec5SDimitry Andric   ValueTrackerResult Res = getNextSourceImpl();
21590b57cec5SDimitry Andric   if (Res.isValid()) {
21600b57cec5SDimitry Andric     // Update definition, definition index, and subregister for the
21610b57cec5SDimitry Andric     // next call of getNextSource.
21620b57cec5SDimitry Andric     // Update the current register.
21630b57cec5SDimitry Andric     bool OneRegSrc = Res.getNumSources() == 1;
21640b57cec5SDimitry Andric     if (OneRegSrc)
21650b57cec5SDimitry Andric       Reg = Res.getSrcReg(0);
21660b57cec5SDimitry Andric     // Update the result before moving up in the use-def chain
21670b57cec5SDimitry Andric     // with the instruction containing the last found sources.
21680b57cec5SDimitry Andric     Res.setInst(Def);
21690b57cec5SDimitry Andric 
21700b57cec5SDimitry Andric     // If we can still move up in the use-def chain, move to the next
21710b57cec5SDimitry Andric     // definition.
2172bdd1243dSDimitry Andric     if (!Reg.isPhysical() && OneRegSrc) {
21730b57cec5SDimitry Andric       MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
21740b57cec5SDimitry Andric       if (DI != MRI.def_end()) {
21750b57cec5SDimitry Andric         Def = DI->getParent();
21760b57cec5SDimitry Andric         DefIdx = DI.getOperandNo();
21770b57cec5SDimitry Andric         DefSubReg = Res.getSrcSubReg(0);
21780b57cec5SDimitry Andric       } else {
21790b57cec5SDimitry Andric         Def = nullptr;
21800b57cec5SDimitry Andric       }
21810b57cec5SDimitry Andric       return Res;
21820b57cec5SDimitry Andric     }
21830b57cec5SDimitry Andric   }
21840b57cec5SDimitry Andric   // If we end up here, this means we will not be able to find another source
21850b57cec5SDimitry Andric   // for the next iteration. Make sure any new call to getNextSource bails out
21860b57cec5SDimitry Andric   // early by cutting the use-def chain.
21870b57cec5SDimitry Andric   Def = nullptr;
21880b57cec5SDimitry Andric   return Res;
21890b57cec5SDimitry Andric }
2190