10b57cec5SDimitry Andric //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Perform peephole optimizations on the machine code: 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // - Optimize Extensions 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // Optimization of sign / zero extension instructions. It may be extended to 140b57cec5SDimitry Andric // handle other instructions with similar properties. 150b57cec5SDimitry Andric // 160b57cec5SDimitry Andric // On some targets, some instructions, e.g. X86 sign / zero extension, may 170b57cec5SDimitry Andric // leave the source value in the lower part of the result. This optimization 180b57cec5SDimitry Andric // will replace some uses of the pre-extension value with uses of the 190b57cec5SDimitry Andric // sub-register of the results. 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric // - Optimize Comparisons 220b57cec5SDimitry Andric // 230b57cec5SDimitry Andric // Optimization of comparison instructions. For instance, in this code: 240b57cec5SDimitry Andric // 250b57cec5SDimitry Andric // sub r1, 1 260b57cec5SDimitry Andric // cmp r1, 0 270b57cec5SDimitry Andric // bz L1 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // If the "sub" instruction all ready sets (or could be modified to set) the 300b57cec5SDimitry Andric // same flag that the "cmp" instruction sets and that "bz" uses, then we can 310b57cec5SDimitry Andric // eliminate the "cmp" instruction. 320b57cec5SDimitry Andric // 330b57cec5SDimitry Andric // Another instance, in this code: 340b57cec5SDimitry Andric // 350b57cec5SDimitry Andric // sub r1, r3 | sub r1, imm 360b57cec5SDimitry Andric // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 370b57cec5SDimitry Andric // bge L1 380b57cec5SDimitry Andric // 390b57cec5SDimitry Andric // If the branch instruction can use flag from "sub", then we can replace 400b57cec5SDimitry Andric // "sub" with "subs" and eliminate the "cmp" instruction. 410b57cec5SDimitry Andric // 420b57cec5SDimitry Andric // - Optimize Loads: 430b57cec5SDimitry Andric // 440b57cec5SDimitry Andric // Loads that can be folded into a later instruction. A load is foldable 450b57cec5SDimitry Andric // if it loads to virtual registers and the virtual register defined has 460b57cec5SDimitry Andric // a single use. 470b57cec5SDimitry Andric // 480b57cec5SDimitry Andric // - Optimize Copies and Bitcast (more generally, target specific copies): 490b57cec5SDimitry Andric // 500b57cec5SDimitry Andric // Rewrite copies and bitcasts to avoid cross register bank copies 510b57cec5SDimitry Andric // when possible. 520b57cec5SDimitry Andric // E.g., Consider the following example, where capital and lower 530b57cec5SDimitry Andric // letters denote different register file: 540b57cec5SDimitry Andric // b = copy A <-- cross-bank copy 550b57cec5SDimitry Andric // C = copy b <-- cross-bank copy 560b57cec5SDimitry Andric // => 570b57cec5SDimitry Andric // b = copy A <-- cross-bank copy 580b57cec5SDimitry Andric // C = copy A <-- same-bank copy 590b57cec5SDimitry Andric // 600b57cec5SDimitry Andric // E.g., for bitcast: 610b57cec5SDimitry Andric // b = bitcast A <-- cross-bank copy 620b57cec5SDimitry Andric // C = bitcast b <-- cross-bank copy 630b57cec5SDimitry Andric // => 640b57cec5SDimitry Andric // b = bitcast A <-- cross-bank copy 650b57cec5SDimitry Andric // C = copy A <-- same-bank copy 660b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 690b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 700b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 710b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 720b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 730b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 740b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 750b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 760b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 770b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 780b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 790b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 800b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 810b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 820b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 830b57cec5SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h" 840b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 850b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 86480093f4SDimitry Andric #include "llvm/InitializePasses.h" 870b57cec5SDimitry Andric #include "llvm/MC/LaneBitmask.h" 880b57cec5SDimitry Andric #include "llvm/MC/MCInstrDesc.h" 890b57cec5SDimitry Andric #include "llvm/Pass.h" 900b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 910b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 920b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 930b57cec5SDimitry Andric #include <cassert> 940b57cec5SDimitry Andric #include <cstdint> 950b57cec5SDimitry Andric #include <memory> 960b57cec5SDimitry Andric #include <utility> 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric using namespace llvm; 990b57cec5SDimitry Andric using RegSubRegPair = TargetInstrInfo::RegSubRegPair; 1000b57cec5SDimitry Andric using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric #define DEBUG_TYPE "peephole-opt" 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric // Optimize Extensions 1050b57cec5SDimitry Andric static cl::opt<bool> 1060b57cec5SDimitry Andric Aggressive("aggressive-ext-opt", cl::Hidden, 1070b57cec5SDimitry Andric cl::desc("Aggressive extension optimization")); 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric static cl::opt<bool> 1100b57cec5SDimitry Andric DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 1110b57cec5SDimitry Andric cl::desc("Disable the peephole optimizer")); 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric /// Specifiy whether or not the value tracking looks through 1140b57cec5SDimitry Andric /// complex instructions. When this is true, the value tracker 1150b57cec5SDimitry Andric /// bails on everything that is not a copy or a bitcast. 1160b57cec5SDimitry Andric static cl::opt<bool> 1170b57cec5SDimitry Andric DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 1180b57cec5SDimitry Andric cl::desc("Disable advanced copy optimization")); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric static cl::opt<bool> DisableNAPhysCopyOpt( 1210b57cec5SDimitry Andric "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), 1220b57cec5SDimitry Andric cl::desc("Disable non-allocatable physical register copy optimization")); 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric // Limit the number of PHI instructions to process 1250b57cec5SDimitry Andric // in PeepholeOptimizer::getNextSource. 1260b57cec5SDimitry Andric static cl::opt<unsigned> RewritePHILimit( 1270b57cec5SDimitry Andric "rewrite-phi-limit", cl::Hidden, cl::init(10), 1280b57cec5SDimitry Andric cl::desc("Limit the length of PHI chains to lookup")); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric // Limit the length of recurrence chain when evaluating the benefit of 1310b57cec5SDimitry Andric // commuting operands. 1320b57cec5SDimitry Andric static cl::opt<unsigned> MaxRecurrenceChain( 1330b57cec5SDimitry Andric "recurrence-chain-limit", cl::Hidden, cl::init(3), 1340b57cec5SDimitry Andric cl::desc("Maximum length of recurrence chain when evaluating the benefit " 1350b57cec5SDimitry Andric "of commuting operands")); 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric STATISTIC(NumReuse, "Number of extension results reused"); 1390b57cec5SDimitry Andric STATISTIC(NumCmps, "Number of compares eliminated"); 1400b57cec5SDimitry Andric STATISTIC(NumImmFold, "Number of move immediate folded"); 1410b57cec5SDimitry Andric STATISTIC(NumLoadFold, "Number of loads folded"); 1420b57cec5SDimitry Andric STATISTIC(NumSelects, "Number of selects optimized"); 1430b57cec5SDimitry Andric STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 1440b57cec5SDimitry Andric STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 1450b57cec5SDimitry Andric STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric namespace { 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric class ValueTrackerResult; 1500b57cec5SDimitry Andric class RecurrenceInstr; 1510b57cec5SDimitry Andric 1525f757f3fSDimitry Andric class PeepholeOptimizer : public MachineFunctionPass, 1535f757f3fSDimitry Andric private MachineFunction::Delegate { 15406c3fb27SDimitry Andric const TargetInstrInfo *TII = nullptr; 15506c3fb27SDimitry Andric const TargetRegisterInfo *TRI = nullptr; 15606c3fb27SDimitry Andric MachineRegisterInfo *MRI = nullptr; 15706c3fb27SDimitry Andric MachineDominatorTree *DT = nullptr; // Machine dominator tree 15806c3fb27SDimitry Andric MachineLoopInfo *MLI = nullptr; 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric public: 1610b57cec5SDimitry Andric static char ID; // Pass identification 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric PeepholeOptimizer() : MachineFunctionPass(ID) { 1640b57cec5SDimitry Andric initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1700b57cec5SDimitry Andric AU.setPreservesCFG(); 1710b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 172*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 173*0fca6ea1SDimitry Andric AU.addPreserved<MachineLoopInfoWrapperPass>(); 1740b57cec5SDimitry Andric if (Aggressive) { 175*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>(); 176*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>(); 1770b57cec5SDimitry Andric } 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 180e8d8bef9SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 181e8d8bef9SDimitry Andric return MachineFunctionProperties() 182e8d8bef9SDimitry Andric .set(MachineFunctionProperties::Property::IsSSA); 183e8d8bef9SDimitry Andric } 184e8d8bef9SDimitry Andric 1850b57cec5SDimitry Andric /// Track Def -> Use info used for rewriting copies. 1860b57cec5SDimitry Andric using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>; 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric /// Sequence of instructions that formulate recurrence cycle. 1890b57cec5SDimitry Andric using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric private: 1920b57cec5SDimitry Andric bool optimizeCmpInstr(MachineInstr &MI); 1930b57cec5SDimitry Andric bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, 1940b57cec5SDimitry Andric SmallPtrSetImpl<MachineInstr*> &LocalMIs); 1950b57cec5SDimitry Andric bool optimizeSelect(MachineInstr &MI, 1960b57cec5SDimitry Andric SmallPtrSetImpl<MachineInstr *> &LocalMIs); 1970b57cec5SDimitry Andric bool optimizeCondBranch(MachineInstr &MI); 1980b57cec5SDimitry Andric bool optimizeCoalescableCopy(MachineInstr &MI); 1990b57cec5SDimitry Andric bool optimizeUncoalescableCopy(MachineInstr &MI, 2000b57cec5SDimitry Andric SmallPtrSetImpl<MachineInstr *> &LocalMIs); 2010b57cec5SDimitry Andric bool optimizeRecurrence(MachineInstr &PHI); 2020b57cec5SDimitry Andric bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap); 203e8d8bef9SDimitry Andric bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, 204e8d8bef9SDimitry Andric DenseMap<Register, MachineInstr *> &ImmDefMIs); 205e8d8bef9SDimitry Andric bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, 2065f757f3fSDimitry Andric DenseMap<Register, MachineInstr *> &ImmDefMIs, 2075f757f3fSDimitry Andric bool &Deleted); 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric /// Finds recurrence cycles, but only ones that formulated around 2100b57cec5SDimitry Andric /// a def operand and a use operand that are tied. If there is a use 2110b57cec5SDimitry Andric /// operand commutable with the tied use operand, find recurrence cycle 2120b57cec5SDimitry Andric /// along that operand as well. 213e8d8bef9SDimitry Andric bool findTargetRecurrence(Register Reg, 214e8d8bef9SDimitry Andric const SmallSet<Register, 2> &TargetReg, 2150b57cec5SDimitry Andric RecurrenceCycle &RC); 2160b57cec5SDimitry Andric 21781ad6265SDimitry Andric /// If copy instruction \p MI is a virtual register copy or a copy of a 21881ad6265SDimitry Andric /// constant physical register to a virtual register, track it in the 2195f757f3fSDimitry Andric /// set CopySrcMIs. If this virtual register was previously seen as a 220e8d8bef9SDimitry Andric /// copy, replace the uses of this copy with the previously seen copy's 221e8d8bef9SDimitry Andric /// destination register. 2225f757f3fSDimitry Andric bool foldRedundantCopy(MachineInstr &MI); 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric /// Is the register \p Reg a non-allocatable physical register? 225e8d8bef9SDimitry Andric bool isNAPhysCopy(Register Reg); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric /// If copy instruction \p MI is a non-allocatable virtual<->physical 2280b57cec5SDimitry Andric /// register copy, track it in the \p NAPhysToVirtMIs map. If this 2290b57cec5SDimitry Andric /// non-allocatable physical register was previously copied to a virtual 2300b57cec5SDimitry Andric /// registered and hasn't been clobbered, the virt->phys copy can be 2310b57cec5SDimitry Andric /// deleted. 232e8d8bef9SDimitry Andric bool foldRedundantNAPhysCopy( 233e8d8bef9SDimitry Andric MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs); 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric bool isLoadFoldable(MachineInstr &MI, 236e8d8bef9SDimitry Andric SmallSet<Register, 16> &FoldAsLoadDefCandidates); 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric /// Check whether \p MI is understood by the register coalescer 2390b57cec5SDimitry Andric /// but may require some rewriting. 2400b57cec5SDimitry Andric bool isCoalescableCopy(const MachineInstr &MI) { 2410b57cec5SDimitry Andric // SubregToRegs are not interesting, because they are already register 2420b57cec5SDimitry Andric // coalescer friendly. 2430b57cec5SDimitry Andric return MI.isCopy() || (!DisableAdvCopyOpt && 2440b57cec5SDimitry Andric (MI.isRegSequence() || MI.isInsertSubreg() || 2450b57cec5SDimitry Andric MI.isExtractSubreg())); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric /// Check whether \p MI is a copy like instruction that is 2490b57cec5SDimitry Andric /// not recognized by the register coalescer. 2500b57cec5SDimitry Andric bool isUncoalescableCopy(const MachineInstr &MI) { 2510b57cec5SDimitry Andric return MI.isBitcast() || 2520b57cec5SDimitry Andric (!DisableAdvCopyOpt && 2530b57cec5SDimitry Andric (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 2540b57cec5SDimitry Andric MI.isExtractSubregLike())); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric MachineInstr &rewriteSource(MachineInstr &CopyLike, 2580b57cec5SDimitry Andric RegSubRegPair Def, RewriteMapTy &RewriteMap); 2595f757f3fSDimitry Andric 2605f757f3fSDimitry Andric // Set of copies to virtual registers keyed by source register. Never 2615f757f3fSDimitry Andric // holds any physreg which requires def tracking. 2625f757f3fSDimitry Andric DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs; 2635f757f3fSDimitry Andric 2645f757f3fSDimitry Andric // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs. 2655f757f3fSDimitry Andric void MF_HandleInsertion(MachineInstr &MI) override { 2665f757f3fSDimitry Andric return; 2675f757f3fSDimitry Andric } 2685f757f3fSDimitry Andric 2695f757f3fSDimitry Andric bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) { 2705f757f3fSDimitry Andric if (!MI.isCopy()) 2715f757f3fSDimitry Andric return false; 2725f757f3fSDimitry Andric 2735f757f3fSDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 2745f757f3fSDimitry Andric unsigned SrcSubReg = MI.getOperand(1).getSubReg(); 2755f757f3fSDimitry Andric if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg)) 2765f757f3fSDimitry Andric return false; 2775f757f3fSDimitry Andric 2785f757f3fSDimitry Andric SrcPair = RegSubRegPair(SrcReg, SrcSubReg); 2795f757f3fSDimitry Andric return true; 2805f757f3fSDimitry Andric } 2815f757f3fSDimitry Andric 2825f757f3fSDimitry Andric // If a COPY instruction is to be deleted or changed, we should also remove 2835f757f3fSDimitry Andric // it from CopySrcMIs. 2845f757f3fSDimitry Andric void deleteChangedCopy(MachineInstr &MI) { 2855f757f3fSDimitry Andric RegSubRegPair SrcPair; 2865f757f3fSDimitry Andric if (!getCopySrc(MI, SrcPair)) 2875f757f3fSDimitry Andric return; 2885f757f3fSDimitry Andric 2895f757f3fSDimitry Andric auto It = CopySrcMIs.find(SrcPair); 2905f757f3fSDimitry Andric if (It != CopySrcMIs.end() && It->second == &MI) 2915f757f3fSDimitry Andric CopySrcMIs.erase(It); 2925f757f3fSDimitry Andric } 2935f757f3fSDimitry Andric 2945f757f3fSDimitry Andric void MF_HandleRemoval(MachineInstr &MI) override { 2955f757f3fSDimitry Andric deleteChangedCopy(MI); 2965f757f3fSDimitry Andric } 2975f757f3fSDimitry Andric 2985f757f3fSDimitry Andric void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override 2995f757f3fSDimitry Andric { 3005f757f3fSDimitry Andric deleteChangedCopy(MI); 3015f757f3fSDimitry Andric } 3020b57cec5SDimitry Andric }; 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric /// Helper class to hold instructions that are inside recurrence cycles. 3050b57cec5SDimitry Andric /// The recurrence cycle is formulated around 1) a def operand and its 3060b57cec5SDimitry Andric /// tied use operand, or 2) a def operand and a use operand that is commutable 3070b57cec5SDimitry Andric /// with another use operand which is tied to the def operand. In the latter 3080b57cec5SDimitry Andric /// case, index of the tied use operand and the commutable use operand are 3090b57cec5SDimitry Andric /// maintained with CommutePair. 3100b57cec5SDimitry Andric class RecurrenceInstr { 3110b57cec5SDimitry Andric public: 3120b57cec5SDimitry Andric using IndexPair = std::pair<unsigned, unsigned>; 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric RecurrenceInstr(MachineInstr *MI) : MI(MI) {} 3150b57cec5SDimitry Andric RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) 3160b57cec5SDimitry Andric : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric MachineInstr *getMI() const { return MI; } 319bdd1243dSDimitry Andric std::optional<IndexPair> getCommutePair() const { return CommutePair; } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric private: 3220b57cec5SDimitry Andric MachineInstr *MI; 323bdd1243dSDimitry Andric std::optional<IndexPair> CommutePair; 3240b57cec5SDimitry Andric }; 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric /// Helper class to hold a reply for ValueTracker queries. 3270b57cec5SDimitry Andric /// Contains the returned sources for a given search and the instructions 3280b57cec5SDimitry Andric /// where the sources were tracked from. 3290b57cec5SDimitry Andric class ValueTrackerResult { 3300b57cec5SDimitry Andric private: 3310b57cec5SDimitry Andric /// Track all sources found by one ValueTracker query. 3320b57cec5SDimitry Andric SmallVector<RegSubRegPair, 2> RegSrcs; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric /// Instruction using the sources in 'RegSrcs'. 3350b57cec5SDimitry Andric const MachineInstr *Inst = nullptr; 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric public: 3380b57cec5SDimitry Andric ValueTrackerResult() = default; 3390b57cec5SDimitry Andric 340e8d8bef9SDimitry Andric ValueTrackerResult(Register Reg, unsigned SubReg) { 3410b57cec5SDimitry Andric addSource(Reg, SubReg); 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric bool isValid() const { return getNumSources() > 0; } 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric void setInst(const MachineInstr *I) { Inst = I; } 3470b57cec5SDimitry Andric const MachineInstr *getInst() const { return Inst; } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric void clear() { 3500b57cec5SDimitry Andric RegSrcs.clear(); 3510b57cec5SDimitry Andric Inst = nullptr; 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric 354e8d8bef9SDimitry Andric void addSource(Register SrcReg, unsigned SrcSubReg) { 3550b57cec5SDimitry Andric RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg)); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 358e8d8bef9SDimitry Andric void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) { 3590b57cec5SDimitry Andric assert(Idx < getNumSources() && "Reg pair source out of index"); 3600b57cec5SDimitry Andric RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg); 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric int getNumSources() const { return RegSrcs.size(); } 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric RegSubRegPair getSrc(int Idx) const { 3660b57cec5SDimitry Andric return RegSrcs[Idx]; 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric 369e8d8bef9SDimitry Andric Register getSrcReg(int Idx) const { 3700b57cec5SDimitry Andric assert(Idx < getNumSources() && "Reg source out of index"); 3710b57cec5SDimitry Andric return RegSrcs[Idx].Reg; 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric unsigned getSrcSubReg(int Idx) const { 3750b57cec5SDimitry Andric assert(Idx < getNumSources() && "SubReg source out of index"); 3760b57cec5SDimitry Andric return RegSrcs[Idx].SubReg; 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 379e8d8bef9SDimitry Andric bool operator==(const ValueTrackerResult &Other) const { 3800b57cec5SDimitry Andric if (Other.getInst() != getInst()) 3810b57cec5SDimitry Andric return false; 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric if (Other.getNumSources() != getNumSources()) 3840b57cec5SDimitry Andric return false; 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric for (int i = 0, e = Other.getNumSources(); i != e; ++i) 3870b57cec5SDimitry Andric if (Other.getSrcReg(i) != getSrcReg(i) || 3880b57cec5SDimitry Andric Other.getSrcSubReg(i) != getSrcSubReg(i)) 3890b57cec5SDimitry Andric return false; 3900b57cec5SDimitry Andric return true; 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric }; 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric /// Helper class to track the possible sources of a value defined by 3950b57cec5SDimitry Andric /// a (chain of) copy related instructions. 3960b57cec5SDimitry Andric /// Given a definition (instruction and definition index), this class 3970b57cec5SDimitry Andric /// follows the use-def chain to find successive suitable sources. 3980b57cec5SDimitry Andric /// The given source can be used to rewrite the definition into 3990b57cec5SDimitry Andric /// def = COPY src. 4000b57cec5SDimitry Andric /// 4010b57cec5SDimitry Andric /// For instance, let us consider the following snippet: 4020b57cec5SDimitry Andric /// v0 = 4030b57cec5SDimitry Andric /// v2 = INSERT_SUBREG v1, v0, sub0 4040b57cec5SDimitry Andric /// def = COPY v2.sub0 4050b57cec5SDimitry Andric /// 4060b57cec5SDimitry Andric /// Using a ValueTracker for def = COPY v2.sub0 will give the following 4070b57cec5SDimitry Andric /// suitable sources: 4080b57cec5SDimitry Andric /// v2.sub0 and v0. 4090b57cec5SDimitry Andric /// Then, def can be rewritten into def = COPY v0. 4100b57cec5SDimitry Andric class ValueTracker { 4110b57cec5SDimitry Andric private: 4120b57cec5SDimitry Andric /// The current point into the use-def chain. 4130b57cec5SDimitry Andric const MachineInstr *Def = nullptr; 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric /// The index of the definition in Def. 4160b57cec5SDimitry Andric unsigned DefIdx = 0; 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric /// The sub register index of the definition. 4190b57cec5SDimitry Andric unsigned DefSubReg; 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric /// The register where the value can be found. 422e8d8bef9SDimitry Andric Register Reg; 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric /// MachineRegisterInfo used to perform tracking. 4250b57cec5SDimitry Andric const MachineRegisterInfo &MRI; 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric /// Optional TargetInstrInfo used to perform some complex tracking. 4280b57cec5SDimitry Andric const TargetInstrInfo *TII; 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric /// Dispatcher to the right underlying implementation of getNextSource. 4310b57cec5SDimitry Andric ValueTrackerResult getNextSourceImpl(); 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric /// Specialized version of getNextSource for Copy instructions. 4340b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromCopy(); 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric /// Specialized version of getNextSource for Bitcast instructions. 4370b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromBitcast(); 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric /// Specialized version of getNextSource for RegSequence instructions. 4400b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromRegSequence(); 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric /// Specialized version of getNextSource for InsertSubreg instructions. 4430b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromInsertSubreg(); 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric /// Specialized version of getNextSource for ExtractSubreg instructions. 4460b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromExtractSubreg(); 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric /// Specialized version of getNextSource for SubregToReg instructions. 4490b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromSubregToReg(); 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric /// Specialized version of getNextSource for PHI instructions. 4520b57cec5SDimitry Andric ValueTrackerResult getNextSourceFromPHI(); 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric public: 4550b57cec5SDimitry Andric /// Create a ValueTracker instance for the value defined by \p Reg. 4560b57cec5SDimitry Andric /// \p DefSubReg represents the sub register index the value tracker will 4570b57cec5SDimitry Andric /// track. It does not need to match the sub register index used in the 4580b57cec5SDimitry Andric /// definition of \p Reg. 4590b57cec5SDimitry Andric /// If \p Reg is a physical register, a value tracker constructed with 4600b57cec5SDimitry Andric /// this constructor will not find any alternative source. 4610b57cec5SDimitry Andric /// Indeed, when \p Reg is a physical register that constructor does not 4620b57cec5SDimitry Andric /// know which definition of \p Reg it should track. 4630b57cec5SDimitry Andric /// Use the next constructor to track a physical register. 464e8d8bef9SDimitry Andric ValueTracker(Register Reg, unsigned DefSubReg, 4650b57cec5SDimitry Andric const MachineRegisterInfo &MRI, 4660b57cec5SDimitry Andric const TargetInstrInfo *TII = nullptr) 4670b57cec5SDimitry Andric : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { 468e8d8bef9SDimitry Andric if (!Reg.isPhysical()) { 4690b57cec5SDimitry Andric Def = MRI.getVRegDef(Reg); 4700b57cec5SDimitry Andric DefIdx = MRI.def_begin(Reg).getOperandNo(); 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric /// Following the use-def chain, get the next available source 4750b57cec5SDimitry Andric /// for the tracked value. 4760b57cec5SDimitry Andric /// \return A ValueTrackerResult containing a set of registers 4770b57cec5SDimitry Andric /// and sub registers with tracked values. A ValueTrackerResult with 4780b57cec5SDimitry Andric /// an empty set of registers means no source was found. 4790b57cec5SDimitry Andric ValueTrackerResult getNextSource(); 4800b57cec5SDimitry Andric }; 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric } // end anonymous namespace 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric char PeepholeOptimizer::ID = 0; 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, 4890b57cec5SDimitry Andric "Peephole Optimizations", false, false) 490*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 491*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 4920b57cec5SDimitry Andric INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, 4930b57cec5SDimitry Andric "Peephole Optimizations", false, false) 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric /// If instruction is a copy-like instruction, i.e. it reads a single register 4960b57cec5SDimitry Andric /// and writes a single register and it does not modify the source, and if the 4970b57cec5SDimitry Andric /// source value is preserved as a sub-register of the result, then replace all 4980b57cec5SDimitry Andric /// reachable uses of the source with the subreg of the result. 4990b57cec5SDimitry Andric /// 5000b57cec5SDimitry Andric /// Do not generate an EXTRACT that is used only in a debug use, as this changes 5010b57cec5SDimitry Andric /// the code. Since this code does not currently share EXTRACTs, just ignore all 5020b57cec5SDimitry Andric /// debug uses. 5030b57cec5SDimitry Andric bool PeepholeOptimizer:: 5040b57cec5SDimitry Andric optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, 5050b57cec5SDimitry Andric SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 5065ffd83dbSDimitry Andric Register SrcReg, DstReg; 5075ffd83dbSDimitry Andric unsigned SubIdx; 5080b57cec5SDimitry Andric if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) 5090b57cec5SDimitry Andric return false; 5100b57cec5SDimitry Andric 5115ffd83dbSDimitry Andric if (DstReg.isPhysical() || SrcReg.isPhysical()) 5120b57cec5SDimitry Andric return false; 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric if (MRI->hasOneNonDBGUse(SrcReg)) 5150b57cec5SDimitry Andric // No other uses. 5160b57cec5SDimitry Andric return false; 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric // Ensure DstReg can get a register class that actually supports 5190b57cec5SDimitry Andric // sub-registers. Don't change the class until we commit. 5200b57cec5SDimitry Andric const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 5210b57cec5SDimitry Andric DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); 5220b57cec5SDimitry Andric if (!DstRC) 5230b57cec5SDimitry Andric return false; 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric // The ext instr may be operating on a sub-register of SrcReg as well. 5260b57cec5SDimitry Andric // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 5270b57cec5SDimitry Andric // register. 5280b57cec5SDimitry Andric // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 5290b57cec5SDimitry Andric // SrcReg:SubIdx should be replaced. 5300b57cec5SDimitry Andric bool UseSrcSubIdx = 5310b57cec5SDimitry Andric TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric // The source has other uses. See if we can replace the other uses with use of 5340b57cec5SDimitry Andric // the result of the extension. 5350b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 5360b57cec5SDimitry Andric for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 5370b57cec5SDimitry Andric ReachedBBs.insert(UI.getParent()); 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric // Uses that are in the same BB of uses of the result of the instruction. 5400b57cec5SDimitry Andric SmallVector<MachineOperand*, 8> Uses; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric // Uses that the result of the instruction can reach. 5430b57cec5SDimitry Andric SmallVector<MachineOperand*, 8> ExtendedUses; 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric bool ExtendLife = true; 5460b57cec5SDimitry Andric for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 5470b57cec5SDimitry Andric MachineInstr *UseMI = UseMO.getParent(); 5480b57cec5SDimitry Andric if (UseMI == &MI) 5490b57cec5SDimitry Andric continue; 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric if (UseMI->isPHI()) { 5520b57cec5SDimitry Andric ExtendLife = false; 5530b57cec5SDimitry Andric continue; 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric // Only accept uses of SrcReg:SubIdx. 5570b57cec5SDimitry Andric if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 5580b57cec5SDimitry Andric continue; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // It's an error to translate this: 5610b57cec5SDimitry Andric // 5620b57cec5SDimitry Andric // %reg1025 = <sext> %reg1024 5630b57cec5SDimitry Andric // ... 5640b57cec5SDimitry Andric // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 5650b57cec5SDimitry Andric // 5660b57cec5SDimitry Andric // into this: 5670b57cec5SDimitry Andric // 5680b57cec5SDimitry Andric // %reg1025 = <sext> %reg1024 5690b57cec5SDimitry Andric // ... 5700b57cec5SDimitry Andric // %reg1027 = COPY %reg1025:4 5710b57cec5SDimitry Andric // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 5720b57cec5SDimitry Andric // 5730b57cec5SDimitry Andric // The problem here is that SUBREG_TO_REG is there to assert that an 5740b57cec5SDimitry Andric // implicit zext occurs. It doesn't insert a zext instruction. If we allow 5750b57cec5SDimitry Andric // the COPY here, it will give us the value after the <sext>, not the 5760b57cec5SDimitry Andric // original value of %reg1024 before <sext>. 5770b57cec5SDimitry Andric if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 5780b57cec5SDimitry Andric continue; 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric MachineBasicBlock *UseMBB = UseMI->getParent(); 5810b57cec5SDimitry Andric if (UseMBB == &MBB) { 5820b57cec5SDimitry Andric // Local uses that come after the extension. 5830b57cec5SDimitry Andric if (!LocalMIs.count(UseMI)) 5840b57cec5SDimitry Andric Uses.push_back(&UseMO); 5850b57cec5SDimitry Andric } else if (ReachedBBs.count(UseMBB)) { 5860b57cec5SDimitry Andric // Non-local uses where the result of the extension is used. Always 5870b57cec5SDimitry Andric // replace these unless it's a PHI. 5880b57cec5SDimitry Andric Uses.push_back(&UseMO); 5890b57cec5SDimitry Andric } else if (Aggressive && DT->dominates(&MBB, UseMBB)) { 5900b57cec5SDimitry Andric // We may want to extend the live range of the extension result in order 5910b57cec5SDimitry Andric // to replace these uses. 5920b57cec5SDimitry Andric ExtendedUses.push_back(&UseMO); 5930b57cec5SDimitry Andric } else { 5940b57cec5SDimitry Andric // Both will be live out of the def MBB anyway. Don't extend live range of 5950b57cec5SDimitry Andric // the extension result. 5960b57cec5SDimitry Andric ExtendLife = false; 5970b57cec5SDimitry Andric break; 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric if (ExtendLife && !ExtendedUses.empty()) 6020b57cec5SDimitry Andric // Extend the liveness of the extension result. 6030b57cec5SDimitry Andric Uses.append(ExtendedUses.begin(), ExtendedUses.end()); 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric // Now replace all uses. 6060b57cec5SDimitry Andric bool Changed = false; 6070b57cec5SDimitry Andric if (!Uses.empty()) { 6080b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric // Look for PHI uses of the extended result, we don't want to extend the 6110b57cec5SDimitry Andric // liveness of a PHI input. It breaks all kinds of assumptions down 6120b57cec5SDimitry Andric // stream. A PHI use is expected to be the kill of its source values. 6130b57cec5SDimitry Andric for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 6140b57cec5SDimitry Andric if (UI.isPHI()) 6150b57cec5SDimitry Andric PHIBBs.insert(UI.getParent()); 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 618*0fca6ea1SDimitry Andric for (MachineOperand *UseMO : Uses) { 6190b57cec5SDimitry Andric MachineInstr *UseMI = UseMO->getParent(); 6200b57cec5SDimitry Andric MachineBasicBlock *UseMBB = UseMI->getParent(); 6210b57cec5SDimitry Andric if (PHIBBs.count(UseMBB)) 6220b57cec5SDimitry Andric continue; 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric // About to add uses of DstReg, clear DstReg's kill flags. 6250b57cec5SDimitry Andric if (!Changed) { 6260b57cec5SDimitry Andric MRI->clearKillFlags(DstReg); 6270b57cec5SDimitry Andric MRI->constrainRegClass(DstReg, DstRC); 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 630fe6060f1SDimitry Andric // SubReg defs are illegal in machine SSA phase, 631fe6060f1SDimitry Andric // we should not generate SubReg defs. 632fe6060f1SDimitry Andric // 633fe6060f1SDimitry Andric // For example, for the instructions: 634fe6060f1SDimitry Andric // 635fe6060f1SDimitry Andric // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc 636fe6060f1SDimitry Andric // %3:gprc_and_gprc_nor0 = COPY %0.sub_32:g8rc 637fe6060f1SDimitry Andric // 638fe6060f1SDimitry Andric // We should generate: 639fe6060f1SDimitry Andric // 640fe6060f1SDimitry Andric // %1:g8rc_and_g8rc_nox0 = EXTSW %0:g8rc 641fe6060f1SDimitry Andric // %6:gprc_and_gprc_nor0 = COPY %1.sub_32:g8rc_and_g8rc_nox0 642fe6060f1SDimitry Andric // %3:gprc_and_gprc_nor0 = COPY %6:gprc_and_gprc_nor0 643fe6060f1SDimitry Andric // 644fe6060f1SDimitry Andric if (UseSrcSubIdx) 645fe6060f1SDimitry Andric RC = MRI->getRegClass(UseMI->getOperand(0).getReg()); 646fe6060f1SDimitry Andric 6478bcb0991SDimitry Andric Register NewVR = MRI->createVirtualRegister(RC); 648fe6060f1SDimitry Andric BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 6490b57cec5SDimitry Andric TII->get(TargetOpcode::COPY), NewVR) 6500b57cec5SDimitry Andric .addReg(DstReg, 0, SubIdx); 651fe6060f1SDimitry Andric if (UseSrcSubIdx) 652fe6060f1SDimitry Andric UseMO->setSubReg(0); 653fe6060f1SDimitry Andric 6540b57cec5SDimitry Andric UseMO->setReg(NewVR); 6550b57cec5SDimitry Andric ++NumReuse; 6560b57cec5SDimitry Andric Changed = true; 6570b57cec5SDimitry Andric } 6580b57cec5SDimitry Andric } 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric return Changed; 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric /// If the instruction is a compare and the previous instruction it's comparing 6640b57cec5SDimitry Andric /// against already sets (or could be modified to set) the same flag as the 6650b57cec5SDimitry Andric /// compare, then we can remove the comparison and use the flag from the 6660b57cec5SDimitry Andric /// previous instruction. 6670b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { 6680b57cec5SDimitry Andric // If this instruction is a comparison against zero and isn't comparing a 6690b57cec5SDimitry Andric // physical register, we can try to optimize it. 6705ffd83dbSDimitry Andric Register SrcReg, SrcReg2; 671349cc55cSDimitry Andric int64_t CmpMask, CmpValue; 6720b57cec5SDimitry Andric if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 6735ffd83dbSDimitry Andric SrcReg.isPhysical() || SrcReg2.isPhysical()) 6740b57cec5SDimitry Andric return false; 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric // Attempt to optimize the comparison instruction. 6775ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to optimize compare: " << MI); 6780b57cec5SDimitry Andric if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 6795ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " -> Successfully optimized compare!\n"); 6800b57cec5SDimitry Andric ++NumCmps; 6810b57cec5SDimitry Andric return true; 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric return false; 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric /// Optimize a select instruction. 6880b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI, 6890b57cec5SDimitry Andric SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 6900b57cec5SDimitry Andric unsigned TrueOp = 0; 6910b57cec5SDimitry Andric unsigned FalseOp = 0; 6920b57cec5SDimitry Andric bool Optimizable = false; 6930b57cec5SDimitry Andric SmallVector<MachineOperand, 4> Cond; 6940b57cec5SDimitry Andric if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 6950b57cec5SDimitry Andric return false; 6960b57cec5SDimitry Andric if (!Optimizable) 6970b57cec5SDimitry Andric return false; 6980b57cec5SDimitry Andric if (!TII->optimizeSelect(MI, LocalMIs)) 6990b57cec5SDimitry Andric return false; 7005ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Deleting select: " << MI); 7010b57cec5SDimitry Andric MI.eraseFromParent(); 7020b57cec5SDimitry Andric ++NumSelects; 7030b57cec5SDimitry Andric return true; 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andric /// Check if a simpler conditional branch can be generated. 7070b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { 7080b57cec5SDimitry Andric return TII->optimizeCondBranch(MI); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric /// Try to find the next source that share the same register file 7120b57cec5SDimitry Andric /// for the value defined by \p Reg and \p SubReg. 7130b57cec5SDimitry Andric /// When true is returned, the \p RewriteMap can be used by the client to 7140b57cec5SDimitry Andric /// retrieve all Def -> Use along the way up to the next source. Any found 7150b57cec5SDimitry Andric /// Use that is not itself a key for another entry, is the next source to 7160b57cec5SDimitry Andric /// use. During the search for the next source, multiple sources can be found 7170b57cec5SDimitry Andric /// given multiple incoming sources of a PHI instruction. In this case, we 7180b57cec5SDimitry Andric /// look in each PHI source for the next source; all found next sources must 7190b57cec5SDimitry Andric /// share the same register file as \p Reg and \p SubReg. The client should 7200b57cec5SDimitry Andric /// then be capable to rewrite all intermediate PHIs to get the next source. 7210b57cec5SDimitry Andric /// \return False if no alternative sources are available. True otherwise. 7220b57cec5SDimitry Andric bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, 7230b57cec5SDimitry Andric RewriteMapTy &RewriteMap) { 7240b57cec5SDimitry Andric // Do not try to find a new source for a physical register. 7250b57cec5SDimitry Andric // So far we do not have any motivating example for doing that. 7260b57cec5SDimitry Andric // Thus, instead of maintaining untested code, we will revisit that if 7270b57cec5SDimitry Andric // that changes at some point. 7285ffd83dbSDimitry Andric Register Reg = RegSubReg.Reg; 7295ffd83dbSDimitry Andric if (Reg.isPhysical()) 7300b57cec5SDimitry Andric return false; 7310b57cec5SDimitry Andric const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric SmallVector<RegSubRegPair, 4> SrcToLook; 7340b57cec5SDimitry Andric RegSubRegPair CurSrcPair = RegSubReg; 7350b57cec5SDimitry Andric SrcToLook.push_back(CurSrcPair); 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric unsigned PHICount = 0; 7380b57cec5SDimitry Andric do { 7390b57cec5SDimitry Andric CurSrcPair = SrcToLook.pop_back_val(); 7400b57cec5SDimitry Andric // As explained above, do not handle physical registers 741bdd1243dSDimitry Andric if (CurSrcPair.Reg.isPhysical()) 7420b57cec5SDimitry Andric return false; 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); 7450b57cec5SDimitry Andric 7460b57cec5SDimitry Andric // Follow the chain of copies until we find a more suitable source, a phi 7470b57cec5SDimitry Andric // or have to abort. 7480b57cec5SDimitry Andric while (true) { 7490b57cec5SDimitry Andric ValueTrackerResult Res = ValTracker.getNextSource(); 7500b57cec5SDimitry Andric // Abort at the end of a chain (without finding a suitable source). 7510b57cec5SDimitry Andric if (!Res.isValid()) 7520b57cec5SDimitry Andric return false; 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric // Insert the Def -> Use entry for the recently found source. 7550b57cec5SDimitry Andric ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); 7560b57cec5SDimitry Andric if (CurSrcRes.isValid()) { 7570b57cec5SDimitry Andric assert(CurSrcRes == Res && "ValueTrackerResult found must match"); 7580b57cec5SDimitry Andric // An existent entry with multiple sources is a PHI cycle we must avoid. 7590b57cec5SDimitry Andric // Otherwise it's an entry with a valid next source we already found. 7600b57cec5SDimitry Andric if (CurSrcRes.getNumSources() > 1) { 7610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 7620b57cec5SDimitry Andric << "findNextSource: found PHI cycle, aborting...\n"); 7630b57cec5SDimitry Andric return false; 7640b57cec5SDimitry Andric } 7650b57cec5SDimitry Andric break; 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric RewriteMap.insert(std::make_pair(CurSrcPair, Res)); 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric // ValueTrackerResult usually have one source unless it's the result from 7700b57cec5SDimitry Andric // a PHI instruction. Add the found PHI edges to be looked up further. 7710b57cec5SDimitry Andric unsigned NumSrcs = Res.getNumSources(); 7720b57cec5SDimitry Andric if (NumSrcs > 1) { 7730b57cec5SDimitry Andric PHICount++; 7740b57cec5SDimitry Andric if (PHICount >= RewritePHILimit) { 7750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); 7760b57cec5SDimitry Andric return false; 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric for (unsigned i = 0; i < NumSrcs; ++i) 7800b57cec5SDimitry Andric SrcToLook.push_back(Res.getSrc(i)); 7810b57cec5SDimitry Andric break; 7820b57cec5SDimitry Andric } 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric CurSrcPair = Res.getSrc(0); 7850b57cec5SDimitry Andric // Do not extend the live-ranges of physical registers as they add 7860b57cec5SDimitry Andric // constraints to the register allocator. Moreover, if we want to extend 7870b57cec5SDimitry Andric // the live-range of a physical register, unlike SSA virtual register, 7880b57cec5SDimitry Andric // we will have to check that they aren't redefine before the related use. 789bdd1243dSDimitry Andric if (CurSrcPair.Reg.isPhysical()) 7900b57cec5SDimitry Andric return false; 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric // Keep following the chain if the value isn't any better yet. 7930b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); 7940b57cec5SDimitry Andric if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC, 7950b57cec5SDimitry Andric CurSrcPair.SubReg)) 7960b57cec5SDimitry Andric continue; 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric // We currently cannot deal with subreg operands on PHI instructions 7990b57cec5SDimitry Andric // (see insertPHI()). 8000b57cec5SDimitry Andric if (PHICount > 0 && CurSrcPair.SubReg != 0) 8010b57cec5SDimitry Andric continue; 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric // We found a suitable source, and are done with this chain. 8040b57cec5SDimitry Andric break; 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric } while (!SrcToLook.empty()); 8070b57cec5SDimitry Andric 8080b57cec5SDimitry Andric // If we did not find a more suitable source, there is nothing to optimize. 8090b57cec5SDimitry Andric return CurSrcPair.Reg != Reg; 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric /// Insert a PHI instruction with incoming edges \p SrcRegs that are 8130b57cec5SDimitry Andric /// guaranteed to have the same register class. This is necessary whenever we 8140b57cec5SDimitry Andric /// successfully traverse a PHI instruction and find suitable sources coming 8150b57cec5SDimitry Andric /// from its edges. By inserting a new PHI, we provide a rewritten PHI def 8160b57cec5SDimitry Andric /// suitable to be used in a new COPY instruction. 8170b57cec5SDimitry Andric static MachineInstr & 8180b57cec5SDimitry Andric insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, 8190b57cec5SDimitry Andric const SmallVectorImpl<RegSubRegPair> &SrcRegs, 8200b57cec5SDimitry Andric MachineInstr &OrigPHI) { 8210b57cec5SDimitry Andric assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg); 8240b57cec5SDimitry Andric // NewRC is only correct if no subregisters are involved. findNextSource() 8250b57cec5SDimitry Andric // should have rejected those cases already. 8260b57cec5SDimitry Andric assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); 8278bcb0991SDimitry Andric Register NewVR = MRI.createVirtualRegister(NewRC); 8280b57cec5SDimitry Andric MachineBasicBlock *MBB = OrigPHI.getParent(); 8290b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), 8300b57cec5SDimitry Andric TII.get(TargetOpcode::PHI), NewVR); 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric unsigned MBBOpIdx = 2; 8330b57cec5SDimitry Andric for (const RegSubRegPair &RegPair : SrcRegs) { 8340b57cec5SDimitry Andric MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); 8350b57cec5SDimitry Andric MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB()); 8360b57cec5SDimitry Andric // Since we're extended the lifetime of RegPair.Reg, clear the 8370b57cec5SDimitry Andric // kill flags to account for that and make RegPair.Reg reaches 8380b57cec5SDimitry Andric // the new PHI. 8390b57cec5SDimitry Andric MRI.clearKillFlags(RegPair.Reg); 8400b57cec5SDimitry Andric MBBOpIdx += 2; 8410b57cec5SDimitry Andric } 8420b57cec5SDimitry Andric 8430b57cec5SDimitry Andric return *MIB; 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric namespace { 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric /// Interface to query instructions amenable to copy rewriting. 8490b57cec5SDimitry Andric class Rewriter { 8500b57cec5SDimitry Andric protected: 8510b57cec5SDimitry Andric MachineInstr &CopyLike; 8520b57cec5SDimitry Andric unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. 8530b57cec5SDimitry Andric public: 8540b57cec5SDimitry Andric Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} 85581ad6265SDimitry Andric virtual ~Rewriter() = default; 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric /// Get the next rewritable source (SrcReg, SrcSubReg) and 8580b57cec5SDimitry Andric /// the related value that it affects (DstReg, DstSubReg). 8590b57cec5SDimitry Andric /// A source is considered rewritable if its register class and the 8600b57cec5SDimitry Andric /// register class of the related DstReg may not be register 8610b57cec5SDimitry Andric /// coalescer friendly. In other words, given a copy-like instruction 8620b57cec5SDimitry Andric /// not all the arguments may be returned at rewritable source, since 8630b57cec5SDimitry Andric /// some arguments are none to be register coalescer friendly. 8640b57cec5SDimitry Andric /// 8650b57cec5SDimitry Andric /// Each call of this method moves the current source to the next 8660b57cec5SDimitry Andric /// rewritable source. 8670b57cec5SDimitry Andric /// For instance, let CopyLike be the instruction to rewrite. 8680b57cec5SDimitry Andric /// CopyLike has one definition and one source: 8690b57cec5SDimitry Andric /// dst.dstSubIdx = CopyLike src.srcSubIdx. 8700b57cec5SDimitry Andric /// 8710b57cec5SDimitry Andric /// The first call will give the first rewritable source, i.e., 8720b57cec5SDimitry Andric /// the only source this instruction has: 8730b57cec5SDimitry Andric /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 8740b57cec5SDimitry Andric /// This source defines the whole definition, i.e., 8750b57cec5SDimitry Andric /// (DstReg, DstSubReg) = (dst, dstSubIdx). 8760b57cec5SDimitry Andric /// 8770b57cec5SDimitry Andric /// The second and subsequent calls will return false, as there is only one 8780b57cec5SDimitry Andric /// rewritable source. 8790b57cec5SDimitry Andric /// 8800b57cec5SDimitry Andric /// \return True if a rewritable source has been found, false otherwise. 8810b57cec5SDimitry Andric /// The output arguments are valid if and only if true is returned. 8820b57cec5SDimitry Andric virtual bool getNextRewritableSource(RegSubRegPair &Src, 8830b57cec5SDimitry Andric RegSubRegPair &Dst) = 0; 8840b57cec5SDimitry Andric 8850b57cec5SDimitry Andric /// Rewrite the current source with \p NewReg and \p NewSubReg if possible. 8860b57cec5SDimitry Andric /// \return True if the rewriting was possible, false otherwise. 887e8d8bef9SDimitry Andric virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0; 8880b57cec5SDimitry Andric }; 8890b57cec5SDimitry Andric 8900b57cec5SDimitry Andric /// Rewriter for COPY instructions. 8910b57cec5SDimitry Andric class CopyRewriter : public Rewriter { 8920b57cec5SDimitry Andric public: 8930b57cec5SDimitry Andric CopyRewriter(MachineInstr &MI) : Rewriter(MI) { 8940b57cec5SDimitry Andric assert(MI.isCopy() && "Expected copy instruction"); 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric virtual ~CopyRewriter() = default; 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric bool getNextRewritableSource(RegSubRegPair &Src, 8990b57cec5SDimitry Andric RegSubRegPair &Dst) override { 9000b57cec5SDimitry Andric // CurrentSrcIdx > 0 means this function has already been called. 9010b57cec5SDimitry Andric if (CurrentSrcIdx > 0) 9020b57cec5SDimitry Andric return false; 9030b57cec5SDimitry Andric // This is the first call to getNextRewritableSource. 9040b57cec5SDimitry Andric // Move the CurrentSrcIdx to remember that we made that call. 9050b57cec5SDimitry Andric CurrentSrcIdx = 1; 9060b57cec5SDimitry Andric // The rewritable source is the argument. 9070b57cec5SDimitry Andric const MachineOperand &MOSrc = CopyLike.getOperand(1); 9080b57cec5SDimitry Andric Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); 9090b57cec5SDimitry Andric // What we track are the alternative sources of the definition. 9100b57cec5SDimitry Andric const MachineOperand &MODef = CopyLike.getOperand(0); 9110b57cec5SDimitry Andric Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); 9120b57cec5SDimitry Andric return true; 9130b57cec5SDimitry Andric } 9140b57cec5SDimitry Andric 915e8d8bef9SDimitry Andric bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { 9160b57cec5SDimitry Andric if (CurrentSrcIdx != 1) 9170b57cec5SDimitry Andric return false; 9180b57cec5SDimitry Andric MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 9190b57cec5SDimitry Andric MOSrc.setReg(NewReg); 9200b57cec5SDimitry Andric MOSrc.setSubReg(NewSubReg); 9210b57cec5SDimitry Andric return true; 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric }; 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric /// Helper class to rewrite uncoalescable copy like instructions 9260b57cec5SDimitry Andric /// into new COPY (coalescable friendly) instructions. 9270b57cec5SDimitry Andric class UncoalescableRewriter : public Rewriter { 9280b57cec5SDimitry Andric unsigned NumDefs; ///< Number of defs in the bitcast. 9290b57cec5SDimitry Andric 9300b57cec5SDimitry Andric public: 9310b57cec5SDimitry Andric UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { 9320b57cec5SDimitry Andric NumDefs = MI.getDesc().getNumDefs(); 9330b57cec5SDimitry Andric } 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric /// \see See Rewriter::getNextRewritableSource() 9360b57cec5SDimitry Andric /// All such sources need to be considered rewritable in order to 9370b57cec5SDimitry Andric /// rewrite a uncoalescable copy-like instruction. This method return 9380b57cec5SDimitry Andric /// each definition that must be checked if rewritable. 9390b57cec5SDimitry Andric bool getNextRewritableSource(RegSubRegPair &Src, 9400b57cec5SDimitry Andric RegSubRegPair &Dst) override { 9410b57cec5SDimitry Andric // Find the next non-dead definition and continue from there. 9420b57cec5SDimitry Andric if (CurrentSrcIdx == NumDefs) 9430b57cec5SDimitry Andric return false; 9440b57cec5SDimitry Andric 9450b57cec5SDimitry Andric while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { 9460b57cec5SDimitry Andric ++CurrentSrcIdx; 9470b57cec5SDimitry Andric if (CurrentSrcIdx == NumDefs) 9480b57cec5SDimitry Andric return false; 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andric // What we track are the alternative sources of the definition. 9520b57cec5SDimitry Andric Src = RegSubRegPair(0, 0); 9530b57cec5SDimitry Andric const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); 9540b57cec5SDimitry Andric Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric CurrentSrcIdx++; 9570b57cec5SDimitry Andric return true; 9580b57cec5SDimitry Andric } 9590b57cec5SDimitry Andric 960e8d8bef9SDimitry Andric bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { 9610b57cec5SDimitry Andric return false; 9620b57cec5SDimitry Andric } 9630b57cec5SDimitry Andric }; 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric /// Specialized rewriter for INSERT_SUBREG instruction. 9660b57cec5SDimitry Andric class InsertSubregRewriter : public Rewriter { 9670b57cec5SDimitry Andric public: 9680b57cec5SDimitry Andric InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) { 9690b57cec5SDimitry Andric assert(MI.isInsertSubreg() && "Invalid instruction"); 9700b57cec5SDimitry Andric } 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andric /// \see See Rewriter::getNextRewritableSource() 9730b57cec5SDimitry Andric /// Here CopyLike has the following form: 9740b57cec5SDimitry Andric /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 9750b57cec5SDimitry Andric /// Src1 has the same register class has dst, hence, there is 9760b57cec5SDimitry Andric /// nothing to rewrite. 9770b57cec5SDimitry Andric /// Src2.src2SubIdx, may not be register coalescer friendly. 9780b57cec5SDimitry Andric /// Therefore, the first call to this method returns: 9790b57cec5SDimitry Andric /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 9800b57cec5SDimitry Andric /// (DstReg, DstSubReg) = (dst, subIdx). 9810b57cec5SDimitry Andric /// 9820b57cec5SDimitry Andric /// Subsequence calls will return false. 9830b57cec5SDimitry Andric bool getNextRewritableSource(RegSubRegPair &Src, 9840b57cec5SDimitry Andric RegSubRegPair &Dst) override { 9850b57cec5SDimitry Andric // If we already get the only source we can rewrite, return false. 9860b57cec5SDimitry Andric if (CurrentSrcIdx == 2) 9870b57cec5SDimitry Andric return false; 9880b57cec5SDimitry Andric // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 9890b57cec5SDimitry Andric CurrentSrcIdx = 2; 9900b57cec5SDimitry Andric const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 9910b57cec5SDimitry Andric Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg()); 9920b57cec5SDimitry Andric const MachineOperand &MODef = CopyLike.getOperand(0); 9930b57cec5SDimitry Andric 9940b57cec5SDimitry Andric // We want to track something that is compatible with the 9950b57cec5SDimitry Andric // partial definition. 9960b57cec5SDimitry Andric if (MODef.getSubReg()) 9970b57cec5SDimitry Andric // Bail if we have to compose sub-register indices. 9980b57cec5SDimitry Andric return false; 9990b57cec5SDimitry Andric Dst = RegSubRegPair(MODef.getReg(), 10000b57cec5SDimitry Andric (unsigned)CopyLike.getOperand(3).getImm()); 10010b57cec5SDimitry Andric return true; 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric 1004e8d8bef9SDimitry Andric bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { 10050b57cec5SDimitry Andric if (CurrentSrcIdx != 2) 10060b57cec5SDimitry Andric return false; 10070b57cec5SDimitry Andric // We are rewriting the inserted reg. 10080b57cec5SDimitry Andric MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 10090b57cec5SDimitry Andric MO.setReg(NewReg); 10100b57cec5SDimitry Andric MO.setSubReg(NewSubReg); 10110b57cec5SDimitry Andric return true; 10120b57cec5SDimitry Andric } 10130b57cec5SDimitry Andric }; 10140b57cec5SDimitry Andric 10150b57cec5SDimitry Andric /// Specialized rewriter for EXTRACT_SUBREG instruction. 10160b57cec5SDimitry Andric class ExtractSubregRewriter : public Rewriter { 10170b57cec5SDimitry Andric const TargetInstrInfo &TII; 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric public: 10200b57cec5SDimitry Andric ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 10210b57cec5SDimitry Andric : Rewriter(MI), TII(TII) { 10220b57cec5SDimitry Andric assert(MI.isExtractSubreg() && "Invalid instruction"); 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric /// \see Rewriter::getNextRewritableSource() 10260b57cec5SDimitry Andric /// Here CopyLike has the following form: 10270b57cec5SDimitry Andric /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 10280b57cec5SDimitry Andric /// There is only one rewritable source: Src.subIdx, 10290b57cec5SDimitry Andric /// which defines dst.dstSubIdx. 10300b57cec5SDimitry Andric bool getNextRewritableSource(RegSubRegPair &Src, 10310b57cec5SDimitry Andric RegSubRegPair &Dst) override { 10320b57cec5SDimitry Andric // If we already get the only source we can rewrite, return false. 10330b57cec5SDimitry Andric if (CurrentSrcIdx == 1) 10340b57cec5SDimitry Andric return false; 10350b57cec5SDimitry Andric // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 10360b57cec5SDimitry Andric CurrentSrcIdx = 1; 10370b57cec5SDimitry Andric const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 10380b57cec5SDimitry Andric // If we have to compose sub-register indices, bail out. 10390b57cec5SDimitry Andric if (MOExtractedReg.getSubReg()) 10400b57cec5SDimitry Andric return false; 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andric Src = RegSubRegPair(MOExtractedReg.getReg(), 10430b57cec5SDimitry Andric CopyLike.getOperand(2).getImm()); 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric // We want to track something that is compatible with the definition. 10460b57cec5SDimitry Andric const MachineOperand &MODef = CopyLike.getOperand(0); 10470b57cec5SDimitry Andric Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); 10480b57cec5SDimitry Andric return true; 10490b57cec5SDimitry Andric } 10500b57cec5SDimitry Andric 1051e8d8bef9SDimitry Andric bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { 10520b57cec5SDimitry Andric // The only source we can rewrite is the input register. 10530b57cec5SDimitry Andric if (CurrentSrcIdx != 1) 10540b57cec5SDimitry Andric return false; 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric // If we find a source that does not require to extract something, 10590b57cec5SDimitry Andric // rewrite the operation with a copy. 10600b57cec5SDimitry Andric if (!NewSubReg) { 10610b57cec5SDimitry Andric // Move the current index to an invalid position. 10620b57cec5SDimitry Andric // We do not want another call to this method to be able 10630b57cec5SDimitry Andric // to do any change. 10640b57cec5SDimitry Andric CurrentSrcIdx = -1; 10650b57cec5SDimitry Andric // Rewrite the operation as a COPY. 10660b57cec5SDimitry Andric // Get rid of the sub-register index. 106781ad6265SDimitry Andric CopyLike.removeOperand(2); 10680b57cec5SDimitry Andric // Morph the operation into a COPY. 10690b57cec5SDimitry Andric CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 10700b57cec5SDimitry Andric return true; 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 10730b57cec5SDimitry Andric return true; 10740b57cec5SDimitry Andric } 10750b57cec5SDimitry Andric }; 10760b57cec5SDimitry Andric 10770b57cec5SDimitry Andric /// Specialized rewriter for REG_SEQUENCE instruction. 10780b57cec5SDimitry Andric class RegSequenceRewriter : public Rewriter { 10790b57cec5SDimitry Andric public: 10800b57cec5SDimitry Andric RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { 10810b57cec5SDimitry Andric assert(MI.isRegSequence() && "Invalid instruction"); 10820b57cec5SDimitry Andric } 10830b57cec5SDimitry Andric 10840b57cec5SDimitry Andric /// \see Rewriter::getNextRewritableSource() 10850b57cec5SDimitry Andric /// Here CopyLike has the following form: 10860b57cec5SDimitry Andric /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 10870b57cec5SDimitry Andric /// Each call will return a different source, walking all the available 10880b57cec5SDimitry Andric /// source. 10890b57cec5SDimitry Andric /// 10900b57cec5SDimitry Andric /// The first call returns: 10910b57cec5SDimitry Andric /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 10920b57cec5SDimitry Andric /// (DstReg, DstSubReg) = (dst, subIdx1). 10930b57cec5SDimitry Andric /// 10940b57cec5SDimitry Andric /// The second call returns: 10950b57cec5SDimitry Andric /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 10960b57cec5SDimitry Andric /// (DstReg, DstSubReg) = (dst, subIdx2). 10970b57cec5SDimitry Andric /// 10980b57cec5SDimitry Andric /// And so on, until all the sources have been traversed, then 10990b57cec5SDimitry Andric /// it returns false. 11000b57cec5SDimitry Andric bool getNextRewritableSource(RegSubRegPair &Src, 11010b57cec5SDimitry Andric RegSubRegPair &Dst) override { 11020b57cec5SDimitry Andric // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric // If this is the first call, move to the first argument. 11050b57cec5SDimitry Andric if (CurrentSrcIdx == 0) { 11060b57cec5SDimitry Andric CurrentSrcIdx = 1; 11070b57cec5SDimitry Andric } else { 11080b57cec5SDimitry Andric // Otherwise, move to the next argument and check that it is valid. 11090b57cec5SDimitry Andric CurrentSrcIdx += 2; 11100b57cec5SDimitry Andric if (CurrentSrcIdx >= CopyLike.getNumOperands()) 11110b57cec5SDimitry Andric return false; 11120b57cec5SDimitry Andric } 11130b57cec5SDimitry Andric const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 11140b57cec5SDimitry Andric Src.Reg = MOInsertedReg.getReg(); 11150b57cec5SDimitry Andric // If we have to compose sub-register indices, bail out. 11160b57cec5SDimitry Andric if ((Src.SubReg = MOInsertedReg.getSubReg())) 11170b57cec5SDimitry Andric return false; 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric // We want to track something that is compatible with the related 11200b57cec5SDimitry Andric // partial definition. 11210b57cec5SDimitry Andric Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric const MachineOperand &MODef = CopyLike.getOperand(0); 11240b57cec5SDimitry Andric Dst.Reg = MODef.getReg(); 11250b57cec5SDimitry Andric // If we have to compose sub-registers, bail. 11260b57cec5SDimitry Andric return MODef.getSubReg() == 0; 11270b57cec5SDimitry Andric } 11280b57cec5SDimitry Andric 1129e8d8bef9SDimitry Andric bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { 11300b57cec5SDimitry Andric // We cannot rewrite out of bound operands. 11310b57cec5SDimitry Andric // Moreover, rewritable sources are at odd positions. 11320b57cec5SDimitry Andric if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 11330b57cec5SDimitry Andric return false; 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 11360b57cec5SDimitry Andric MO.setReg(NewReg); 11370b57cec5SDimitry Andric MO.setSubReg(NewSubReg); 11380b57cec5SDimitry Andric return true; 11390b57cec5SDimitry Andric } 11400b57cec5SDimitry Andric }; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric } // end anonymous namespace 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric /// Get the appropriated Rewriter for \p MI. 11450b57cec5SDimitry Andric /// \return A pointer to a dynamically allocated Rewriter or nullptr if no 11460b57cec5SDimitry Andric /// rewriter works for \p MI. 11470b57cec5SDimitry Andric static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { 11480b57cec5SDimitry Andric // Handle uncoalescable copy-like instructions. 11490b57cec5SDimitry Andric if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() || 11500b57cec5SDimitry Andric MI.isExtractSubregLike()) 11510b57cec5SDimitry Andric return new UncoalescableRewriter(MI); 11520b57cec5SDimitry Andric 11530b57cec5SDimitry Andric switch (MI.getOpcode()) { 11540b57cec5SDimitry Andric default: 11550b57cec5SDimitry Andric return nullptr; 11560b57cec5SDimitry Andric case TargetOpcode::COPY: 11570b57cec5SDimitry Andric return new CopyRewriter(MI); 11580b57cec5SDimitry Andric case TargetOpcode::INSERT_SUBREG: 11590b57cec5SDimitry Andric return new InsertSubregRewriter(MI); 11600b57cec5SDimitry Andric case TargetOpcode::EXTRACT_SUBREG: 11610b57cec5SDimitry Andric return new ExtractSubregRewriter(MI, TII); 11620b57cec5SDimitry Andric case TargetOpcode::REG_SEQUENCE: 11630b57cec5SDimitry Andric return new RegSequenceRewriter(MI); 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric } 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric /// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find 11680b57cec5SDimitry Andric /// the new source to use for rewrite. If \p HandleMultipleSources is true and 11690b57cec5SDimitry Andric /// multiple sources for a given \p Def are found along the way, we found a 11700b57cec5SDimitry Andric /// PHI instructions that needs to be rewritten. 11710b57cec5SDimitry Andric /// TODO: HandleMultipleSources should be removed once we test PHI handling 11720b57cec5SDimitry Andric /// with coalescable copies. 11730b57cec5SDimitry Andric static RegSubRegPair 11740b57cec5SDimitry Andric getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, 11750b57cec5SDimitry Andric RegSubRegPair Def, 11760b57cec5SDimitry Andric const PeepholeOptimizer::RewriteMapTy &RewriteMap, 11770b57cec5SDimitry Andric bool HandleMultipleSources = true) { 11780b57cec5SDimitry Andric RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); 11790b57cec5SDimitry Andric while (true) { 11800b57cec5SDimitry Andric ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); 11810b57cec5SDimitry Andric // If there are no entries on the map, LookupSrc is the new source. 11820b57cec5SDimitry Andric if (!Res.isValid()) 11830b57cec5SDimitry Andric return LookupSrc; 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric // There's only one source for this definition, keep searching... 11860b57cec5SDimitry Andric unsigned NumSrcs = Res.getNumSources(); 11870b57cec5SDimitry Andric if (NumSrcs == 1) { 11880b57cec5SDimitry Andric LookupSrc.Reg = Res.getSrcReg(0); 11890b57cec5SDimitry Andric LookupSrc.SubReg = Res.getSrcSubReg(0); 11900b57cec5SDimitry Andric continue; 11910b57cec5SDimitry Andric } 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric // TODO: Remove once multiple srcs w/ coalescable copies are supported. 11940b57cec5SDimitry Andric if (!HandleMultipleSources) 11950b57cec5SDimitry Andric break; 11960b57cec5SDimitry Andric 11970b57cec5SDimitry Andric // Multiple sources, recurse into each source to find a new source 11980b57cec5SDimitry Andric // for it. Then, rewrite the PHI accordingly to its new edges. 11990b57cec5SDimitry Andric SmallVector<RegSubRegPair, 4> NewPHISrcs; 12000b57cec5SDimitry Andric for (unsigned i = 0; i < NumSrcs; ++i) { 12010b57cec5SDimitry Andric RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i)); 12020b57cec5SDimitry Andric NewPHISrcs.push_back( 12030b57cec5SDimitry Andric getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric // Build the new PHI node and return its def register as the new source. 12070b57cec5SDimitry Andric MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst()); 12080b57cec5SDimitry Andric MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI); 12090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "-- getNewSource\n"); 12100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Replacing: " << OrigPHI); 12110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " With: " << NewPHI); 12120b57cec5SDimitry Andric const MachineOperand &MODef = NewPHI.getOperand(0); 12130b57cec5SDimitry Andric return RegSubRegPair(MODef.getReg(), MODef.getSubReg()); 12140b57cec5SDimitry Andric } 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric return RegSubRegPair(0, 0); 12170b57cec5SDimitry Andric } 12180b57cec5SDimitry Andric 12190b57cec5SDimitry Andric /// Optimize generic copy instructions to avoid cross register bank copy. 12200b57cec5SDimitry Andric /// The optimization looks through a chain of copies and tries to find a source 12210b57cec5SDimitry Andric /// that has a compatible register class. 12220b57cec5SDimitry Andric /// Two register classes are considered to be compatible if they share the same 12230b57cec5SDimitry Andric /// register bank. 12240b57cec5SDimitry Andric /// New copies issued by this optimization are register allocator 12250b57cec5SDimitry Andric /// friendly. This optimization does not remove any copy as it may 12260b57cec5SDimitry Andric /// overconstrain the register allocator, but replaces some operands 12270b57cec5SDimitry Andric /// when possible. 12280b57cec5SDimitry Andric /// \pre isCoalescableCopy(*MI) is true. 12290b57cec5SDimitry Andric /// \return True, when \p MI has been rewritten. False otherwise. 12300b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { 12310b57cec5SDimitry Andric assert(isCoalescableCopy(MI) && "Invalid argument"); 12320b57cec5SDimitry Andric assert(MI.getDesc().getNumDefs() == 1 && 12330b57cec5SDimitry Andric "Coalescer can understand multiple defs?!"); 12340b57cec5SDimitry Andric const MachineOperand &MODef = MI.getOperand(0); 12350b57cec5SDimitry Andric // Do not rewrite physical definitions. 1236bdd1243dSDimitry Andric if (MODef.getReg().isPhysical()) 12370b57cec5SDimitry Andric return false; 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric bool Changed = false; 12400b57cec5SDimitry Andric // Get the right rewriter for the current copy. 12410b57cec5SDimitry Andric std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII)); 12420b57cec5SDimitry Andric // If none exists, bail out. 12430b57cec5SDimitry Andric if (!CpyRewriter) 12440b57cec5SDimitry Andric return false; 12450b57cec5SDimitry Andric // Rewrite each rewritable source. 12460b57cec5SDimitry Andric RegSubRegPair Src; 12470b57cec5SDimitry Andric RegSubRegPair TrackPair; 12480b57cec5SDimitry Andric while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) { 12490b57cec5SDimitry Andric // Keep track of PHI nodes and its incoming edges when looking for sources. 12500b57cec5SDimitry Andric RewriteMapTy RewriteMap; 12510b57cec5SDimitry Andric // Try to find a more suitable source. If we failed to do so, or get the 12520b57cec5SDimitry Andric // actual source, move to the next source. 12530b57cec5SDimitry Andric if (!findNextSource(TrackPair, RewriteMap)) 12540b57cec5SDimitry Andric continue; 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric // Get the new source to rewrite. TODO: Only enable handling of multiple 12570b57cec5SDimitry Andric // sources (PHIs) once we have a motivating example and testcases for it. 12580b57cec5SDimitry Andric RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap, 12590b57cec5SDimitry Andric /*HandleMultipleSources=*/false); 12600b57cec5SDimitry Andric if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0) 12610b57cec5SDimitry Andric continue; 12620b57cec5SDimitry Andric 12630b57cec5SDimitry Andric // Rewrite source. 12640b57cec5SDimitry Andric if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { 12650b57cec5SDimitry Andric // We may have extended the live-range of NewSrc, account for that. 12660b57cec5SDimitry Andric MRI->clearKillFlags(NewSrc.Reg); 12670b57cec5SDimitry Andric Changed = true; 12680b57cec5SDimitry Andric } 12690b57cec5SDimitry Andric } 12700b57cec5SDimitry Andric // TODO: We could have a clean-up method to tidy the instruction. 12710b57cec5SDimitry Andric // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 12720b57cec5SDimitry Andric // => v0 = COPY v1 12730b57cec5SDimitry Andric // Currently we haven't seen motivating example for that and we 12740b57cec5SDimitry Andric // want to avoid untested code. 12750b57cec5SDimitry Andric NumRewrittenCopies += Changed; 12760b57cec5SDimitry Andric return Changed; 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric /// Rewrite the source found through \p Def, by using the \p RewriteMap 12800b57cec5SDimitry Andric /// and create a new COPY instruction. More info about RewriteMap in 12810b57cec5SDimitry Andric /// PeepholeOptimizer::findNextSource. Right now this is only used to handle 12820b57cec5SDimitry Andric /// Uncoalescable copies, since they are copy like instructions that aren't 12830b57cec5SDimitry Andric /// recognized by the register allocator. 12840b57cec5SDimitry Andric MachineInstr & 12850b57cec5SDimitry Andric PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, 12860b57cec5SDimitry Andric RegSubRegPair Def, RewriteMapTy &RewriteMap) { 1287bdd1243dSDimitry Andric assert(!Def.Reg.isPhysical() && "We do not rewrite physical registers"); 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric // Find the new source to use in the COPY rewrite. 12900b57cec5SDimitry Andric RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap); 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric // Insert the COPY. 12930b57cec5SDimitry Andric const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 12948bcb0991SDimitry Andric Register NewVReg = MRI->createVirtualRegister(DefRC); 12950b57cec5SDimitry Andric 12960b57cec5SDimitry Andric MachineInstr *NewCopy = 12970b57cec5SDimitry Andric BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), 12980b57cec5SDimitry Andric TII->get(TargetOpcode::COPY), NewVReg) 12990b57cec5SDimitry Andric .addReg(NewSrc.Reg, 0, NewSrc.SubReg); 13000b57cec5SDimitry Andric 13010b57cec5SDimitry Andric if (Def.SubReg) { 13020b57cec5SDimitry Andric NewCopy->getOperand(0).setSubReg(Def.SubReg); 13030b57cec5SDimitry Andric NewCopy->getOperand(0).setIsUndef(); 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "-- RewriteSource\n"); 13070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Replacing: " << CopyLike); 13080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " With: " << *NewCopy); 13090b57cec5SDimitry Andric MRI->replaceRegWith(Def.Reg, NewVReg); 13100b57cec5SDimitry Andric MRI->clearKillFlags(NewVReg); 13110b57cec5SDimitry Andric 13120b57cec5SDimitry Andric // We extended the lifetime of NewSrc.Reg, clear the kill flags to 13130b57cec5SDimitry Andric // account for that. 13140b57cec5SDimitry Andric MRI->clearKillFlags(NewSrc.Reg); 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric return *NewCopy; 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric /// Optimize copy-like instructions to create 13200b57cec5SDimitry Andric /// register coalescer friendly instruction. 13210b57cec5SDimitry Andric /// The optimization tries to kill-off the \p MI by looking 13220b57cec5SDimitry Andric /// through a chain of copies to find a source that has a compatible 13230b57cec5SDimitry Andric /// register class. 13240b57cec5SDimitry Andric /// If such a source is found, it replace \p MI by a generic COPY 13250b57cec5SDimitry Andric /// operation. 13260b57cec5SDimitry Andric /// \pre isUncoalescableCopy(*MI) is true. 13270b57cec5SDimitry Andric /// \return True, when \p MI has been optimized. In that case, \p MI has 13280b57cec5SDimitry Andric /// been removed from its parent. 13290b57cec5SDimitry Andric /// All COPY instructions created, are inserted in \p LocalMIs. 13300b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeUncoalescableCopy( 13310b57cec5SDimitry Andric MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 13320b57cec5SDimitry Andric assert(isUncoalescableCopy(MI) && "Invalid argument"); 13330b57cec5SDimitry Andric UncoalescableRewriter CpyRewriter(MI); 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric // Rewrite each rewritable source by generating new COPYs. This works 13360b57cec5SDimitry Andric // differently from optimizeCoalescableCopy since it first makes sure that all 13370b57cec5SDimitry Andric // definitions can be rewritten. 13380b57cec5SDimitry Andric RewriteMapTy RewriteMap; 13390b57cec5SDimitry Andric RegSubRegPair Src; 13400b57cec5SDimitry Andric RegSubRegPair Def; 13410b57cec5SDimitry Andric SmallVector<RegSubRegPair, 4> RewritePairs; 13420b57cec5SDimitry Andric while (CpyRewriter.getNextRewritableSource(Src, Def)) { 13430b57cec5SDimitry Andric // If a physical register is here, this is probably for a good reason. 13440b57cec5SDimitry Andric // Do not rewrite that. 1345bdd1243dSDimitry Andric if (Def.Reg.isPhysical()) 13460b57cec5SDimitry Andric return false; 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric // If we do not know how to rewrite this definition, there is no point 13490b57cec5SDimitry Andric // in trying to kill this instruction. 13500b57cec5SDimitry Andric if (!findNextSource(Def, RewriteMap)) 13510b57cec5SDimitry Andric return false; 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric RewritePairs.push_back(Def); 13540b57cec5SDimitry Andric } 13550b57cec5SDimitry Andric 13560b57cec5SDimitry Andric // The change is possible for all defs, do it. 13570b57cec5SDimitry Andric for (const RegSubRegPair &Def : RewritePairs) { 13580b57cec5SDimitry Andric // Rewrite the "copy" in a way the register coalescer understands. 13590b57cec5SDimitry Andric MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap); 13600b57cec5SDimitry Andric LocalMIs.insert(&NewCopy); 13610b57cec5SDimitry Andric } 13620b57cec5SDimitry Andric 13630b57cec5SDimitry Andric // MI is now dead. 13645ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Deleting uncoalescable copy: " << MI); 13650b57cec5SDimitry Andric MI.eraseFromParent(); 13660b57cec5SDimitry Andric ++NumUncoalescableCopies; 13670b57cec5SDimitry Andric return true; 13680b57cec5SDimitry Andric } 13690b57cec5SDimitry Andric 13700b57cec5SDimitry Andric /// Check whether MI is a candidate for folding into a later instruction. 13710b57cec5SDimitry Andric /// We only fold loads to virtual registers and the virtual register defined 13720b57cec5SDimitry Andric /// has a single user. 13730b57cec5SDimitry Andric bool PeepholeOptimizer::isLoadFoldable( 1374e8d8bef9SDimitry Andric MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) { 13750b57cec5SDimitry Andric if (!MI.canFoldAsLoad() || !MI.mayLoad()) 13760b57cec5SDimitry Andric return false; 13770b57cec5SDimitry Andric const MCInstrDesc &MCID = MI.getDesc(); 13780b57cec5SDimitry Andric if (MCID.getNumDefs() != 1) 13790b57cec5SDimitry Andric return false; 13800b57cec5SDimitry Andric 13818bcb0991SDimitry Andric Register Reg = MI.getOperand(0).getReg(); 13820b57cec5SDimitry Andric // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting 13830b57cec5SDimitry Andric // loads. It should be checked when processing uses of the load, since 13840b57cec5SDimitry Andric // uses can be removed during peephole. 1385e8d8bef9SDimitry Andric if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() && 13860b57cec5SDimitry Andric MRI->hasOneNonDBGUser(Reg)) { 13870b57cec5SDimitry Andric FoldAsLoadDefCandidates.insert(Reg); 13880b57cec5SDimitry Andric return true; 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric return false; 13910b57cec5SDimitry Andric } 13920b57cec5SDimitry Andric 13930b57cec5SDimitry Andric bool PeepholeOptimizer::isMoveImmediate( 1394e8d8bef9SDimitry Andric MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, 1395e8d8bef9SDimitry Andric DenseMap<Register, MachineInstr *> &ImmDefMIs) { 13960b57cec5SDimitry Andric const MCInstrDesc &MCID = MI.getDesc(); 13975f757f3fSDimitry Andric if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg()) 13980b57cec5SDimitry Andric return false; 13998bcb0991SDimitry Andric Register Reg = MI.getOperand(0).getReg(); 14005f757f3fSDimitry Andric if (!Reg.isVirtual()) 14015f757f3fSDimitry Andric return false; 14025f757f3fSDimitry Andric 14035f757f3fSDimitry Andric int64_t ImmVal; 14045f757f3fSDimitry Andric if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal)) 14055f757f3fSDimitry Andric return false; 14065f757f3fSDimitry Andric 14070b57cec5SDimitry Andric ImmDefMIs.insert(std::make_pair(Reg, &MI)); 14080b57cec5SDimitry Andric ImmDefRegs.insert(Reg); 14090b57cec5SDimitry Andric return true; 14100b57cec5SDimitry Andric } 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andric /// Try folding register operands that are defined by move immediate 14130b57cec5SDimitry Andric /// instructions, i.e. a trivial constant folding optimization, if 14140b57cec5SDimitry Andric /// and only if the def and use are in the same BB. 1415e8d8bef9SDimitry Andric bool PeepholeOptimizer::foldImmediate( 1416e8d8bef9SDimitry Andric MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, 14175f757f3fSDimitry Andric DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) { 14185f757f3fSDimitry Andric Deleted = false; 14190b57cec5SDimitry Andric for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 14200b57cec5SDimitry Andric MachineOperand &MO = MI.getOperand(i); 14210b57cec5SDimitry Andric if (!MO.isReg() || MO.isDef()) 14220b57cec5SDimitry Andric continue; 14238bcb0991SDimitry Andric Register Reg = MO.getReg(); 1424e8d8bef9SDimitry Andric if (!Reg.isVirtual()) 14250b57cec5SDimitry Andric continue; 14260b57cec5SDimitry Andric if (ImmDefRegs.count(Reg) == 0) 14270b57cec5SDimitry Andric continue; 1428e8d8bef9SDimitry Andric DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg); 14290b57cec5SDimitry Andric assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); 1430*0fca6ea1SDimitry Andric if (TII->foldImmediate(MI, *II->second, Reg, MRI)) { 14310b57cec5SDimitry Andric ++NumImmFold; 1432*0fca6ea1SDimitry Andric // foldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI 14335f757f3fSDimitry Andric // is not deleted, and we happened to get a same MI, we can delete MI and 14345f757f3fSDimitry Andric // replace its users. 14355f757f3fSDimitry Andric if (MRI->getVRegDef(Reg) && 14365f757f3fSDimitry Andric MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) { 14375f757f3fSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 14385f757f3fSDimitry Andric if (DstReg.isVirtual() && 14395f757f3fSDimitry Andric MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) { 14405f757f3fSDimitry Andric MRI->replaceRegWith(DstReg, Reg); 14415f757f3fSDimitry Andric MI.eraseFromParent(); 14425f757f3fSDimitry Andric Deleted = true; 14435f757f3fSDimitry Andric } 14445f757f3fSDimitry Andric } 14450b57cec5SDimitry Andric return true; 14460b57cec5SDimitry Andric } 14470b57cec5SDimitry Andric } 14480b57cec5SDimitry Andric return false; 14490b57cec5SDimitry Andric } 14500b57cec5SDimitry Andric 14510b57cec5SDimitry Andric // FIXME: This is very simple and misses some cases which should be handled when 14520b57cec5SDimitry Andric // motivating examples are found. 14530b57cec5SDimitry Andric // 14540b57cec5SDimitry Andric // The copy rewriting logic should look at uses as well as defs and be able to 14550b57cec5SDimitry Andric // eliminate copies across blocks. 14560b57cec5SDimitry Andric // 14570b57cec5SDimitry Andric // Later copies that are subregister extracts will also not be eliminated since 14580b57cec5SDimitry Andric // only the first copy is considered. 14590b57cec5SDimitry Andric // 14600b57cec5SDimitry Andric // e.g. 14610b57cec5SDimitry Andric // %1 = COPY %0 14620b57cec5SDimitry Andric // %2 = COPY %0:sub1 14630b57cec5SDimitry Andric // 14640b57cec5SDimitry Andric // Should replace %2 uses with %1:sub1 14655f757f3fSDimitry Andric bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI) { 14660b57cec5SDimitry Andric assert(MI.isCopy() && "expected a COPY machine instruction"); 14670b57cec5SDimitry Andric 14685f757f3fSDimitry Andric RegSubRegPair SrcPair; 14695f757f3fSDimitry Andric if (!getCopySrc(MI, SrcPair)) 14700b57cec5SDimitry Andric return false; 14710b57cec5SDimitry Andric 14728bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1473e8d8bef9SDimitry Andric if (!DstReg.isVirtual()) 14740b57cec5SDimitry Andric return false; 14750b57cec5SDimitry Andric 14765f757f3fSDimitry Andric if (CopySrcMIs.insert(std::make_pair(SrcPair, &MI)).second) { 14770b57cec5SDimitry Andric // First copy of this reg seen. 14780b57cec5SDimitry Andric return false; 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric 14815f757f3fSDimitry Andric MachineInstr *PrevCopy = CopySrcMIs.find(SrcPair)->second; 14820b57cec5SDimitry Andric 14835f757f3fSDimitry Andric assert(SrcPair.SubReg == PrevCopy->getOperand(1).getSubReg() && 1484e8d8bef9SDimitry Andric "Unexpected mismatching subreg!"); 14850b57cec5SDimitry Andric 14868bcb0991SDimitry Andric Register PrevDstReg = PrevCopy->getOperand(0).getReg(); 14870b57cec5SDimitry Andric 14880b57cec5SDimitry Andric // Only replace if the copy register class is the same. 14890b57cec5SDimitry Andric // 14900b57cec5SDimitry Andric // TODO: If we have multiple copies to different register classes, we may want 14910b57cec5SDimitry Andric // to track multiple copies of the same source register. 14920b57cec5SDimitry Andric if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg)) 14930b57cec5SDimitry Andric return false; 14940b57cec5SDimitry Andric 14950b57cec5SDimitry Andric MRI->replaceRegWith(DstReg, PrevDstReg); 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andric // Lifetime of the previous copy has been extended. 14980b57cec5SDimitry Andric MRI->clearKillFlags(PrevDstReg); 14990b57cec5SDimitry Andric return true; 15000b57cec5SDimitry Andric } 15010b57cec5SDimitry Andric 1502e8d8bef9SDimitry Andric bool PeepholeOptimizer::isNAPhysCopy(Register Reg) { 1503e8d8bef9SDimitry Andric return Reg.isPhysical() && !MRI->isAllocatable(Reg); 15040b57cec5SDimitry Andric } 15050b57cec5SDimitry Andric 15060b57cec5SDimitry Andric bool PeepholeOptimizer::foldRedundantNAPhysCopy( 1507e8d8bef9SDimitry Andric MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) { 15080b57cec5SDimitry Andric assert(MI.isCopy() && "expected a COPY machine instruction"); 15090b57cec5SDimitry Andric 15100b57cec5SDimitry Andric if (DisableNAPhysCopyOpt) 15110b57cec5SDimitry Andric return false; 15120b57cec5SDimitry Andric 15138bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 15148bcb0991SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 1515bdd1243dSDimitry Andric if (isNAPhysCopy(SrcReg) && DstReg.isVirtual()) { 1516e8d8bef9SDimitry Andric // %vreg = COPY $physreg 15170b57cec5SDimitry Andric // Avoid using a datastructure which can track multiple live non-allocatable 15180b57cec5SDimitry Andric // phys->virt copies since LLVM doesn't seem to do this. 15190b57cec5SDimitry Andric NAPhysToVirtMIs.insert({SrcReg, &MI}); 15200b57cec5SDimitry Andric return false; 15210b57cec5SDimitry Andric } 15220b57cec5SDimitry Andric 1523e8d8bef9SDimitry Andric if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg))) 15240b57cec5SDimitry Andric return false; 15250b57cec5SDimitry Andric 1526e8d8bef9SDimitry Andric // $physreg = COPY %vreg 15270b57cec5SDimitry Andric auto PrevCopy = NAPhysToVirtMIs.find(DstReg); 15280b57cec5SDimitry Andric if (PrevCopy == NAPhysToVirtMIs.end()) { 15290b57cec5SDimitry Andric // We can't remove the copy: there was an intervening clobber of the 15300b57cec5SDimitry Andric // non-allocatable physical register after the copy to virtual. 15310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " 15320b57cec5SDimitry Andric << MI); 15330b57cec5SDimitry Andric return false; 15340b57cec5SDimitry Andric } 15350b57cec5SDimitry Andric 15368bcb0991SDimitry Andric Register PrevDstReg = PrevCopy->second->getOperand(0).getReg(); 15370b57cec5SDimitry Andric if (PrevDstReg == SrcReg) { 15380b57cec5SDimitry Andric // Remove the virt->phys copy: we saw the virtual register definition, and 15390b57cec5SDimitry Andric // the non-allocatable physical register's state hasn't changed since then. 15400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI); 15410b57cec5SDimitry Andric ++NumNAPhysCopies; 15420b57cec5SDimitry Andric return true; 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric // Potential missed optimization opportunity: we saw a different virtual 15460b57cec5SDimitry Andric // register get a copy of the non-allocatable physical register, and we only 15470b57cec5SDimitry Andric // track one such copy. Avoid getting confused by this new non-allocatable 15480b57cec5SDimitry Andric // physical register definition, and remove it from the tracked copies. 15490b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI); 15500b57cec5SDimitry Andric NAPhysToVirtMIs.erase(PrevCopy); 15510b57cec5SDimitry Andric return false; 15520b57cec5SDimitry Andric } 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric /// \bried Returns true if \p MO is a virtual register operand. 15550b57cec5SDimitry Andric static bool isVirtualRegisterOperand(MachineOperand &MO) { 1556e8d8bef9SDimitry Andric return MO.isReg() && MO.getReg().isVirtual(); 15570b57cec5SDimitry Andric } 15580b57cec5SDimitry Andric 15590b57cec5SDimitry Andric bool PeepholeOptimizer::findTargetRecurrence( 1560e8d8bef9SDimitry Andric Register Reg, const SmallSet<Register, 2> &TargetRegs, 15610b57cec5SDimitry Andric RecurrenceCycle &RC) { 15620b57cec5SDimitry Andric // Recurrence found if Reg is in TargetRegs. 15630b57cec5SDimitry Andric if (TargetRegs.count(Reg)) 15640b57cec5SDimitry Andric return true; 15650b57cec5SDimitry Andric 15660b57cec5SDimitry Andric // TODO: Curerntly, we only allow the last instruction of the recurrence 15670b57cec5SDimitry Andric // cycle (the instruction that feeds the PHI instruction) to have more than 15680b57cec5SDimitry Andric // one uses to guarantee that commuting operands does not tie registers 15690b57cec5SDimitry Andric // with overlapping live range. Once we have actual live range info of 15700b57cec5SDimitry Andric // each register, this constraint can be relaxed. 15710b57cec5SDimitry Andric if (!MRI->hasOneNonDBGUse(Reg)) 15720b57cec5SDimitry Andric return false; 15730b57cec5SDimitry Andric 15740b57cec5SDimitry Andric // Give up if the reccurrence chain length is longer than the limit. 15750b57cec5SDimitry Andric if (RC.size() >= MaxRecurrenceChain) 15760b57cec5SDimitry Andric return false; 15770b57cec5SDimitry Andric 15780b57cec5SDimitry Andric MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg)); 1579*0fca6ea1SDimitry Andric unsigned Idx = MI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr); 15800b57cec5SDimitry Andric 15810b57cec5SDimitry Andric // Only interested in recurrences whose instructions have only one def, which 15820b57cec5SDimitry Andric // is a virtual register. 15830b57cec5SDimitry Andric if (MI.getDesc().getNumDefs() != 1) 15840b57cec5SDimitry Andric return false; 15850b57cec5SDimitry Andric 15860b57cec5SDimitry Andric MachineOperand &DefOp = MI.getOperand(0); 15870b57cec5SDimitry Andric if (!isVirtualRegisterOperand(DefOp)) 15880b57cec5SDimitry Andric return false; 15890b57cec5SDimitry Andric 15900b57cec5SDimitry Andric // Check if def operand of MI is tied to any use operand. We are only 15910b57cec5SDimitry Andric // interested in the case that all the instructions in the recurrence chain 15920b57cec5SDimitry Andric // have there def operand tied with one of the use operand. 15930b57cec5SDimitry Andric unsigned TiedUseIdx; 15940b57cec5SDimitry Andric if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx)) 15950b57cec5SDimitry Andric return false; 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric if (Idx == TiedUseIdx) { 15980b57cec5SDimitry Andric RC.push_back(RecurrenceInstr(&MI)); 15990b57cec5SDimitry Andric return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); 16000b57cec5SDimitry Andric } else { 16010b57cec5SDimitry Andric // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx. 16020b57cec5SDimitry Andric unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex; 16030b57cec5SDimitry Andric if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) { 16040b57cec5SDimitry Andric RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx)); 16050b57cec5SDimitry Andric return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC); 16060b57cec5SDimitry Andric } 16070b57cec5SDimitry Andric } 16080b57cec5SDimitry Andric 16090b57cec5SDimitry Andric return false; 16100b57cec5SDimitry Andric } 16110b57cec5SDimitry Andric 16120b57cec5SDimitry Andric /// Phi instructions will eventually be lowered to copy instructions. 16130b57cec5SDimitry Andric /// If phi is in a loop header, a recurrence may formulated around the source 16140b57cec5SDimitry Andric /// and destination of the phi. For such case commuting operands of the 16150b57cec5SDimitry Andric /// instructions in the recurrence may enable coalescing of the copy instruction 16160b57cec5SDimitry Andric /// generated from the phi. For example, if there is a recurrence of 16170b57cec5SDimitry Andric /// 16180b57cec5SDimitry Andric /// LoopHeader: 16190b57cec5SDimitry Andric /// %1 = phi(%0, %100) 16200b57cec5SDimitry Andric /// LoopLatch: 16210b57cec5SDimitry Andric /// %0<def, tied1> = ADD %2<def, tied0>, %1 16220b57cec5SDimitry Andric /// 16230b57cec5SDimitry Andric /// , the fact that %0 and %2 are in the same tied operands set makes 16240b57cec5SDimitry Andric /// the coalescing of copy instruction generated from the phi in 16250b57cec5SDimitry Andric /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and 16260b57cec5SDimitry Andric /// %2 have overlapping live range. This introduces additional move 16270b57cec5SDimitry Andric /// instruction to the final assembly. However, if we commute %2 and 16280b57cec5SDimitry Andric /// %1 of ADD instruction, the redundant move instruction can be 16290b57cec5SDimitry Andric /// avoided. 16300b57cec5SDimitry Andric bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { 1631e8d8bef9SDimitry Andric SmallSet<Register, 2> TargetRegs; 16320b57cec5SDimitry Andric for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) { 16330b57cec5SDimitry Andric MachineOperand &MO = PHI.getOperand(Idx); 16340b57cec5SDimitry Andric assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction"); 16350b57cec5SDimitry Andric TargetRegs.insert(MO.getReg()); 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andric bool Changed = false; 16390b57cec5SDimitry Andric RecurrenceCycle RC; 16400b57cec5SDimitry Andric if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) { 16410b57cec5SDimitry Andric // Commutes operands of instructions in RC if necessary so that the copy to 16420b57cec5SDimitry Andric // be generated from PHI can be coalesced. 16430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); 16440b57cec5SDimitry Andric for (auto &RI : RC) { 16450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); 16460b57cec5SDimitry Andric auto CP = RI.getCommutePair(); 16470b57cec5SDimitry Andric if (CP) { 16480b57cec5SDimitry Andric Changed = true; 16490b57cec5SDimitry Andric TII->commuteInstruction(*(RI.getMI()), false, (*CP).first, 16500b57cec5SDimitry Andric (*CP).second); 16510b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); 16520b57cec5SDimitry Andric } 16530b57cec5SDimitry Andric } 16540b57cec5SDimitry Andric } 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric return Changed; 16570b57cec5SDimitry Andric } 16580b57cec5SDimitry Andric 16590b57cec5SDimitry Andric bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 16600b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 16610b57cec5SDimitry Andric return false; 16620b57cec5SDimitry Andric 16630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 16640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 16650b57cec5SDimitry Andric 16660b57cec5SDimitry Andric if (DisablePeephole) 16670b57cec5SDimitry Andric return false; 16680b57cec5SDimitry Andric 16690b57cec5SDimitry Andric TII = MF.getSubtarget().getInstrInfo(); 16700b57cec5SDimitry Andric TRI = MF.getSubtarget().getRegisterInfo(); 16710b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 1672*0fca6ea1SDimitry Andric DT = Aggressive ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree() 1673*0fca6ea1SDimitry Andric : nullptr; 1674*0fca6ea1SDimitry Andric MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 16755f757f3fSDimitry Andric MF.setDelegate(this); 16760b57cec5SDimitry Andric 16770b57cec5SDimitry Andric bool Changed = false; 16780b57cec5SDimitry Andric 16790b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 16800b57cec5SDimitry Andric bool SeenMoveImm = false; 16810b57cec5SDimitry Andric 16820b57cec5SDimitry Andric // During this forward scan, at some point it needs to answer the question 16830b57cec5SDimitry Andric // "given a pointer to an MI in the current BB, is it located before or 16840b57cec5SDimitry Andric // after the current instruction". 16850b57cec5SDimitry Andric // To perform this, the following set keeps track of the MIs already seen 16860b57cec5SDimitry Andric // during the scan, if a MI is not in the set, it is assumed to be located 16870b57cec5SDimitry Andric // after. Newly created MIs have to be inserted in the set as well. 16880b57cec5SDimitry Andric SmallPtrSet<MachineInstr*, 16> LocalMIs; 1689e8d8bef9SDimitry Andric SmallSet<Register, 4> ImmDefRegs; 1690e8d8bef9SDimitry Andric DenseMap<Register, MachineInstr *> ImmDefMIs; 1691e8d8bef9SDimitry Andric SmallSet<Register, 16> FoldAsLoadDefCandidates; 16920b57cec5SDimitry Andric 16930b57cec5SDimitry Andric // Track when a non-allocatable physical register is copied to a virtual 16940b57cec5SDimitry Andric // register so that useless moves can be removed. 16950b57cec5SDimitry Andric // 1696e8d8bef9SDimitry Andric // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg` 1697e8d8bef9SDimitry Andric // without any intervening re-definition of $physreg. 1698e8d8bef9SDimitry Andric DenseMap<Register, MachineInstr *> NAPhysToVirtMIs; 16990b57cec5SDimitry Andric 17005f757f3fSDimitry Andric CopySrcMIs.clear(); 17010b57cec5SDimitry Andric 17020b57cec5SDimitry Andric bool IsLoopHeader = MLI->isLoopHeader(&MBB); 17030b57cec5SDimitry Andric 17040b57cec5SDimitry Andric for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); 17050b57cec5SDimitry Andric MII != MIE; ) { 17060b57cec5SDimitry Andric MachineInstr *MI = &*MII; 17070b57cec5SDimitry Andric // We may be erasing MI below, increment MII now. 17080b57cec5SDimitry Andric ++MII; 17090b57cec5SDimitry Andric LocalMIs.insert(MI); 17100b57cec5SDimitry Andric 1711e8d8bef9SDimitry Andric // Skip debug instructions. They should not affect this peephole 1712e8d8bef9SDimitry Andric // optimization. 17130b57cec5SDimitry Andric if (MI->isDebugInstr()) 17140b57cec5SDimitry Andric continue; 17150b57cec5SDimitry Andric 17160b57cec5SDimitry Andric if (MI->isPosition()) 17170b57cec5SDimitry Andric continue; 17180b57cec5SDimitry Andric 17190b57cec5SDimitry Andric if (IsLoopHeader && MI->isPHI()) { 17200b57cec5SDimitry Andric if (optimizeRecurrence(*MI)) { 17210b57cec5SDimitry Andric Changed = true; 17220b57cec5SDimitry Andric continue; 17230b57cec5SDimitry Andric } 17240b57cec5SDimitry Andric } 17250b57cec5SDimitry Andric 17260b57cec5SDimitry Andric if (!MI->isCopy()) { 17270b57cec5SDimitry Andric for (const MachineOperand &MO : MI->operands()) { 17280b57cec5SDimitry Andric // Visit all operands: definitions can be implicit or explicit. 17290b57cec5SDimitry Andric if (MO.isReg()) { 17308bcb0991SDimitry Andric Register Reg = MO.getReg(); 17310b57cec5SDimitry Andric if (MO.isDef() && isNAPhysCopy(Reg)) { 17320b57cec5SDimitry Andric const auto &Def = NAPhysToVirtMIs.find(Reg); 17330b57cec5SDimitry Andric if (Def != NAPhysToVirtMIs.end()) { 17340b57cec5SDimitry Andric // A new definition of the non-allocatable physical register 17350b57cec5SDimitry Andric // invalidates previous copies. 17360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 17370b57cec5SDimitry Andric << "NAPhysCopy: invalidating because of " << *MI); 17380b57cec5SDimitry Andric NAPhysToVirtMIs.erase(Def); 17390b57cec5SDimitry Andric } 17400b57cec5SDimitry Andric } 17410b57cec5SDimitry Andric } else if (MO.isRegMask()) { 17420b57cec5SDimitry Andric const uint32_t *RegMask = MO.getRegMask(); 17430b57cec5SDimitry Andric for (auto &RegMI : NAPhysToVirtMIs) { 1744e8d8bef9SDimitry Andric Register Def = RegMI.first; 17450b57cec5SDimitry Andric if (MachineOperand::clobbersPhysReg(RegMask, Def)) { 17460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 17470b57cec5SDimitry Andric << "NAPhysCopy: invalidating because of " << *MI); 17480b57cec5SDimitry Andric NAPhysToVirtMIs.erase(Def); 17490b57cec5SDimitry Andric } 17500b57cec5SDimitry Andric } 17510b57cec5SDimitry Andric } 17520b57cec5SDimitry Andric } 17530b57cec5SDimitry Andric } 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric if (MI->isImplicitDef() || MI->isKill()) 17560b57cec5SDimitry Andric continue; 17570b57cec5SDimitry Andric 17580b57cec5SDimitry Andric if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { 17590b57cec5SDimitry Andric // Blow away all non-allocatable physical registers knowledge since we 17600b57cec5SDimitry Andric // don't know what's correct anymore. 17610b57cec5SDimitry Andric // 17620b57cec5SDimitry Andric // FIXME: handle explicit asm clobbers. 17630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " 17640b57cec5SDimitry Andric << *MI); 17650b57cec5SDimitry Andric NAPhysToVirtMIs.clear(); 17660b57cec5SDimitry Andric } 17670b57cec5SDimitry Andric 17680b57cec5SDimitry Andric if ((isUncoalescableCopy(*MI) && 17690b57cec5SDimitry Andric optimizeUncoalescableCopy(*MI, LocalMIs)) || 17700b57cec5SDimitry Andric (MI->isCompare() && optimizeCmpInstr(*MI)) || 17710b57cec5SDimitry Andric (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) { 17720b57cec5SDimitry Andric // MI is deleted. 17730b57cec5SDimitry Andric LocalMIs.erase(MI); 17740b57cec5SDimitry Andric Changed = true; 17750b57cec5SDimitry Andric continue; 17760b57cec5SDimitry Andric } 17770b57cec5SDimitry Andric 17780b57cec5SDimitry Andric if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) { 17790b57cec5SDimitry Andric Changed = true; 17800b57cec5SDimitry Andric continue; 17810b57cec5SDimitry Andric } 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) { 17840b57cec5SDimitry Andric // MI is just rewritten. 17850b57cec5SDimitry Andric Changed = true; 17860b57cec5SDimitry Andric continue; 17870b57cec5SDimitry Andric } 17880b57cec5SDimitry Andric 17895f757f3fSDimitry Andric if (MI->isCopy() && (foldRedundantCopy(*MI) || 17900b57cec5SDimitry Andric foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { 17910b57cec5SDimitry Andric LocalMIs.erase(MI); 17925ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n"); 17930b57cec5SDimitry Andric MI->eraseFromParent(); 17940b57cec5SDimitry Andric Changed = true; 17950b57cec5SDimitry Andric continue; 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 17980b57cec5SDimitry Andric if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) { 17990b57cec5SDimitry Andric SeenMoveImm = true; 18000b57cec5SDimitry Andric } else { 18010b57cec5SDimitry Andric Changed |= optimizeExtInstr(*MI, MBB, LocalMIs); 18020b57cec5SDimitry Andric // optimizeExtInstr might have created new instructions after MI 18030b57cec5SDimitry Andric // and before the already incremented MII. Adjust MII so that the 18040b57cec5SDimitry Andric // next iteration sees the new instructions. 18050b57cec5SDimitry Andric MII = MI; 18060b57cec5SDimitry Andric ++MII; 18075f757f3fSDimitry Andric if (SeenMoveImm) { 18085f757f3fSDimitry Andric bool Deleted; 18095f757f3fSDimitry Andric Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted); 18105f757f3fSDimitry Andric if (Deleted) { 18115f757f3fSDimitry Andric LocalMIs.erase(MI); 18125f757f3fSDimitry Andric continue; 18135f757f3fSDimitry Andric } 18145f757f3fSDimitry Andric } 18150b57cec5SDimitry Andric } 18160b57cec5SDimitry Andric 18170b57cec5SDimitry Andric // Check whether MI is a load candidate for folding into a later 18180b57cec5SDimitry Andric // instruction. If MI is not a candidate, check whether we can fold an 18190b57cec5SDimitry Andric // earlier load into MI. 18200b57cec5SDimitry Andric if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) && 18210b57cec5SDimitry Andric !FoldAsLoadDefCandidates.empty()) { 18220b57cec5SDimitry Andric 18230b57cec5SDimitry Andric // We visit each operand even after successfully folding a previous 18240b57cec5SDimitry Andric // one. This allows us to fold multiple loads into a single 18250b57cec5SDimitry Andric // instruction. We do assume that optimizeLoadInstr doesn't insert 18260b57cec5SDimitry Andric // foldable uses earlier in the argument list. Since we don't restart 18270b57cec5SDimitry Andric // iteration, we'd miss such cases. 18280b57cec5SDimitry Andric const MCInstrDesc &MIDesc = MI->getDesc(); 18290b57cec5SDimitry Andric for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands(); 18300b57cec5SDimitry Andric ++i) { 18310b57cec5SDimitry Andric const MachineOperand &MOp = MI->getOperand(i); 18320b57cec5SDimitry Andric if (!MOp.isReg()) 18330b57cec5SDimitry Andric continue; 1834e8d8bef9SDimitry Andric Register FoldAsLoadDefReg = MOp.getReg(); 18350b57cec5SDimitry Andric if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 18360b57cec5SDimitry Andric // We need to fold load after optimizeCmpInstr, since 18370b57cec5SDimitry Andric // optimizeCmpInstr can enable folding by converting SUB to CMP. 18380b57cec5SDimitry Andric // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 18390b57cec5SDimitry Andric // we need it for markUsesInDebugValueAsUndef(). 1840e8d8bef9SDimitry Andric Register FoldedReg = FoldAsLoadDefReg; 18410b57cec5SDimitry Andric MachineInstr *DefMI = nullptr; 18420b57cec5SDimitry Andric if (MachineInstr *FoldMI = 18430b57cec5SDimitry Andric TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { 18440b57cec5SDimitry Andric // Update LocalMIs since we replaced MI with FoldMI and deleted 18450b57cec5SDimitry Andric // DefMI. 18460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing: " << *MI); 18470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " With: " << *FoldMI); 18480b57cec5SDimitry Andric LocalMIs.erase(MI); 18490b57cec5SDimitry Andric LocalMIs.erase(DefMI); 18500b57cec5SDimitry Andric LocalMIs.insert(FoldMI); 18515ffd83dbSDimitry Andric // Update the call site info. 18525ffd83dbSDimitry Andric if (MI->shouldUpdateCallSiteInfo()) 18538bcb0991SDimitry Andric MI->getMF()->moveCallSiteInfo(MI, FoldMI); 18540b57cec5SDimitry Andric MI->eraseFromParent(); 18550b57cec5SDimitry Andric DefMI->eraseFromParent(); 18560b57cec5SDimitry Andric MRI->markUsesInDebugValueAsUndef(FoldedReg); 18570b57cec5SDimitry Andric FoldAsLoadDefCandidates.erase(FoldedReg); 18580b57cec5SDimitry Andric ++NumLoadFold; 18590b57cec5SDimitry Andric 18600b57cec5SDimitry Andric // MI is replaced with FoldMI so we can continue trying to fold 18610b57cec5SDimitry Andric Changed = true; 18620b57cec5SDimitry Andric MI = FoldMI; 18630b57cec5SDimitry Andric } 18640b57cec5SDimitry Andric } 18650b57cec5SDimitry Andric } 18660b57cec5SDimitry Andric } 18670b57cec5SDimitry Andric 18680b57cec5SDimitry Andric // If we run into an instruction we can't fold across, discard 18690b57cec5SDimitry Andric // the load candidates. Note: We might be able to fold *into* this 18700b57cec5SDimitry Andric // instruction, so this needs to be after the folding logic. 18710b57cec5SDimitry Andric if (MI->isLoadFoldBarrier()) { 18720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); 18730b57cec5SDimitry Andric FoldAsLoadDefCandidates.clear(); 18740b57cec5SDimitry Andric } 18750b57cec5SDimitry Andric } 18760b57cec5SDimitry Andric } 18770b57cec5SDimitry Andric 18785f757f3fSDimitry Andric MF.resetDelegate(this); 18790b57cec5SDimitry Andric return Changed; 18800b57cec5SDimitry Andric } 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromCopy() { 18830b57cec5SDimitry Andric assert(Def->isCopy() && "Invalid definition"); 18840b57cec5SDimitry Andric // Copy instruction are supposed to be: Def = Src. 18850b57cec5SDimitry Andric // If someone breaks this assumption, bad things will happen everywhere. 18868bcb0991SDimitry Andric // There may be implicit uses preventing the copy to be moved across 18878bcb0991SDimitry Andric // some target specific register definitions 18888bcb0991SDimitry Andric assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 && 18898bcb0991SDimitry Andric "Invalid number of operands"); 18908bcb0991SDimitry Andric assert(!Def->hasImplicitDef() && "Only implicit uses are allowed"); 18910b57cec5SDimitry Andric 18920b57cec5SDimitry Andric if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 18930b57cec5SDimitry Andric // If we look for a different subreg, it means we want a subreg of src. 18940b57cec5SDimitry Andric // Bails as we do not support composing subregs yet. 18950b57cec5SDimitry Andric return ValueTrackerResult(); 18960b57cec5SDimitry Andric // Otherwise, we want the whole source. 18970b57cec5SDimitry Andric const MachineOperand &Src = Def->getOperand(1); 18980b57cec5SDimitry Andric if (Src.isUndef()) 18990b57cec5SDimitry Andric return ValueTrackerResult(); 19000b57cec5SDimitry Andric return ValueTrackerResult(Src.getReg(), Src.getSubReg()); 19010b57cec5SDimitry Andric } 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { 19040b57cec5SDimitry Andric assert(Def->isBitcast() && "Invalid definition"); 19050b57cec5SDimitry Andric 19060b57cec5SDimitry Andric // Bail if there are effects that a plain copy will not expose. 19070b57cec5SDimitry Andric if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects()) 19080b57cec5SDimitry Andric return ValueTrackerResult(); 19090b57cec5SDimitry Andric 19100b57cec5SDimitry Andric // Bitcasts with more than one def are not supported. 19110b57cec5SDimitry Andric if (Def->getDesc().getNumDefs() != 1) 19120b57cec5SDimitry Andric return ValueTrackerResult(); 19130b57cec5SDimitry Andric const MachineOperand DefOp = Def->getOperand(DefIdx); 19140b57cec5SDimitry Andric if (DefOp.getSubReg() != DefSubReg) 19150b57cec5SDimitry Andric // If we look for a different subreg, it means we want a subreg of the src. 19160b57cec5SDimitry Andric // Bails as we do not support composing subregs yet. 19170b57cec5SDimitry Andric return ValueTrackerResult(); 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric unsigned SrcIdx = Def->getNumOperands(); 19200b57cec5SDimitry Andric for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 19210b57cec5SDimitry Andric ++OpIdx) { 19220b57cec5SDimitry Andric const MachineOperand &MO = Def->getOperand(OpIdx); 19230b57cec5SDimitry Andric if (!MO.isReg() || !MO.getReg()) 19240b57cec5SDimitry Andric continue; 19250b57cec5SDimitry Andric // Ignore dead implicit defs. 19260b57cec5SDimitry Andric if (MO.isImplicit() && MO.isDead()) 19270b57cec5SDimitry Andric continue; 19280b57cec5SDimitry Andric assert(!MO.isDef() && "We should have skipped all the definitions by now"); 19290b57cec5SDimitry Andric if (SrcIdx != EndOpIdx) 19300b57cec5SDimitry Andric // Multiple sources? 19310b57cec5SDimitry Andric return ValueTrackerResult(); 19320b57cec5SDimitry Andric SrcIdx = OpIdx; 19330b57cec5SDimitry Andric } 19340b57cec5SDimitry Andric 19358bcb0991SDimitry Andric // In some rare case, Def has no input, SrcIdx is out of bound, 19368bcb0991SDimitry Andric // getOperand(SrcIdx) will fail below. 19378bcb0991SDimitry Andric if (SrcIdx >= Def->getNumOperands()) 19388bcb0991SDimitry Andric return ValueTrackerResult(); 19398bcb0991SDimitry Andric 19400b57cec5SDimitry Andric // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY 19410b57cec5SDimitry Andric // will break the assumed guarantees for the upper bits. 19420b57cec5SDimitry Andric for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) { 19430b57cec5SDimitry Andric if (UseMI.isSubregToReg()) 19440b57cec5SDimitry Andric return ValueTrackerResult(); 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric const MachineOperand &Src = Def->getOperand(SrcIdx); 19480b57cec5SDimitry Andric if (Src.isUndef()) 19490b57cec5SDimitry Andric return ValueTrackerResult(); 19500b57cec5SDimitry Andric return ValueTrackerResult(Src.getReg(), Src.getSubReg()); 19510b57cec5SDimitry Andric } 19520b57cec5SDimitry Andric 19530b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { 19540b57cec5SDimitry Andric assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 19550b57cec5SDimitry Andric "Invalid definition"); 19560b57cec5SDimitry Andric 19570b57cec5SDimitry Andric if (Def->getOperand(DefIdx).getSubReg()) 19580b57cec5SDimitry Andric // If we are composing subregs, bail out. 19590b57cec5SDimitry Andric // The case we are checking is Def.<subreg> = REG_SEQUENCE. 19600b57cec5SDimitry Andric // This should almost never happen as the SSA property is tracked at 19610b57cec5SDimitry Andric // the register level (as opposed to the subreg level). 19620b57cec5SDimitry Andric // I.e., 19630b57cec5SDimitry Andric // Def.sub0 = 19640b57cec5SDimitry Andric // Def.sub1 = 19650b57cec5SDimitry Andric // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 19660b57cec5SDimitry Andric // Def. Thus, it must not be generated. 19670b57cec5SDimitry Andric // However, some code could theoretically generates a single 19680b57cec5SDimitry Andric // Def.sub0 (i.e, not defining the other subregs) and we would 19690b57cec5SDimitry Andric // have this case. 19700b57cec5SDimitry Andric // If we can ascertain (or force) that this never happens, we could 19710b57cec5SDimitry Andric // turn that into an assertion. 19720b57cec5SDimitry Andric return ValueTrackerResult(); 19730b57cec5SDimitry Andric 19740b57cec5SDimitry Andric if (!TII) 19750b57cec5SDimitry Andric // We could handle the REG_SEQUENCE here, but we do not want to 19760b57cec5SDimitry Andric // duplicate the code from the generic TII. 19770b57cec5SDimitry Andric return ValueTrackerResult(); 19780b57cec5SDimitry Andric 19790b57cec5SDimitry Andric SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs; 19800b57cec5SDimitry Andric if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 19810b57cec5SDimitry Andric return ValueTrackerResult(); 19820b57cec5SDimitry Andric 19830b57cec5SDimitry Andric // We are looking at: 19840b57cec5SDimitry Andric // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 19850b57cec5SDimitry Andric // Check if one of the operand defines the subreg we are interested in. 19860b57cec5SDimitry Andric for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { 19870b57cec5SDimitry Andric if (RegSeqInput.SubIdx == DefSubReg) 19880b57cec5SDimitry Andric return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); 19890b57cec5SDimitry Andric } 19900b57cec5SDimitry Andric 19910b57cec5SDimitry Andric // If the subreg we are tracking is super-defined by another subreg, 19920b57cec5SDimitry Andric // we could follow this value. However, this would require to compose 19930b57cec5SDimitry Andric // the subreg and we do not do that for now. 19940b57cec5SDimitry Andric return ValueTrackerResult(); 19950b57cec5SDimitry Andric } 19960b57cec5SDimitry Andric 19970b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { 19980b57cec5SDimitry Andric assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 19990b57cec5SDimitry Andric "Invalid definition"); 20000b57cec5SDimitry Andric 20010b57cec5SDimitry Andric if (Def->getOperand(DefIdx).getSubReg()) 20020b57cec5SDimitry Andric // If we are composing subreg, bail out. 20030b57cec5SDimitry Andric // Same remark as getNextSourceFromRegSequence. 20040b57cec5SDimitry Andric // I.e., this may be turned into an assert. 20050b57cec5SDimitry Andric return ValueTrackerResult(); 20060b57cec5SDimitry Andric 20070b57cec5SDimitry Andric if (!TII) 20080b57cec5SDimitry Andric // We could handle the REG_SEQUENCE here, but we do not want to 20090b57cec5SDimitry Andric // duplicate the code from the generic TII. 20100b57cec5SDimitry Andric return ValueTrackerResult(); 20110b57cec5SDimitry Andric 20120b57cec5SDimitry Andric RegSubRegPair BaseReg; 20130b57cec5SDimitry Andric RegSubRegPairAndIdx InsertedReg; 20140b57cec5SDimitry Andric if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 20150b57cec5SDimitry Andric return ValueTrackerResult(); 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric // We are looking at: 20180b57cec5SDimitry Andric // Def = INSERT_SUBREG v0, v1, sub1 20190b57cec5SDimitry Andric // There are two cases: 20200b57cec5SDimitry Andric // 1. DefSubReg == sub1, get v1. 20210b57cec5SDimitry Andric // 2. DefSubReg != sub1, the value may be available through v0. 20220b57cec5SDimitry Andric 20230b57cec5SDimitry Andric // #1 Check if the inserted register matches the required sub index. 20240b57cec5SDimitry Andric if (InsertedReg.SubIdx == DefSubReg) { 20250b57cec5SDimitry Andric return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); 20260b57cec5SDimitry Andric } 20270b57cec5SDimitry Andric // #2 Otherwise, if the sub register we are looking for is not partial 20280b57cec5SDimitry Andric // defined by the inserted element, we can look through the main 20290b57cec5SDimitry Andric // register (v0). 20300b57cec5SDimitry Andric const MachineOperand &MODef = Def->getOperand(DefIdx); 20310b57cec5SDimitry Andric // If the result register (Def) and the base register (v0) do not 20320b57cec5SDimitry Andric // have the same register class or if we have to compose 20330b57cec5SDimitry Andric // subregisters, bail out. 20340b57cec5SDimitry Andric if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 20350b57cec5SDimitry Andric BaseReg.SubReg) 20360b57cec5SDimitry Andric return ValueTrackerResult(); 20370b57cec5SDimitry Andric 20380b57cec5SDimitry Andric // Get the TRI and check if the inserted sub-register overlaps with the 20390b57cec5SDimitry Andric // sub-register we are tracking. 20400b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 20410b57cec5SDimitry Andric if (!TRI || 20420b57cec5SDimitry Andric !(TRI->getSubRegIndexLaneMask(DefSubReg) & 20430b57cec5SDimitry Andric TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none()) 20440b57cec5SDimitry Andric return ValueTrackerResult(); 20450b57cec5SDimitry Andric // At this point, the value is available in v0 via the same subreg 20460b57cec5SDimitry Andric // we used for Def. 20470b57cec5SDimitry Andric return ValueTrackerResult(BaseReg.Reg, DefSubReg); 20480b57cec5SDimitry Andric } 20490b57cec5SDimitry Andric 20500b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { 20510b57cec5SDimitry Andric assert((Def->isExtractSubreg() || 20520b57cec5SDimitry Andric Def->isExtractSubregLike()) && "Invalid definition"); 20530b57cec5SDimitry Andric // We are looking at: 20540b57cec5SDimitry Andric // Def = EXTRACT_SUBREG v0, sub0 20550b57cec5SDimitry Andric 20560b57cec5SDimitry Andric // Bail if we have to compose sub registers. 20570b57cec5SDimitry Andric // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 20580b57cec5SDimitry Andric if (DefSubReg) 20590b57cec5SDimitry Andric return ValueTrackerResult(); 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric if (!TII) 20620b57cec5SDimitry Andric // We could handle the EXTRACT_SUBREG here, but we do not want to 20630b57cec5SDimitry Andric // duplicate the code from the generic TII. 20640b57cec5SDimitry Andric return ValueTrackerResult(); 20650b57cec5SDimitry Andric 20660b57cec5SDimitry Andric RegSubRegPairAndIdx ExtractSubregInputReg; 20670b57cec5SDimitry Andric if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 20680b57cec5SDimitry Andric return ValueTrackerResult(); 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric // Bail if we have to compose sub registers. 20710b57cec5SDimitry Andric // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 20720b57cec5SDimitry Andric if (ExtractSubregInputReg.SubReg) 20730b57cec5SDimitry Andric return ValueTrackerResult(); 20740b57cec5SDimitry Andric // Otherwise, the value is available in the v0.sub0. 20750b57cec5SDimitry Andric return ValueTrackerResult(ExtractSubregInputReg.Reg, 20760b57cec5SDimitry Andric ExtractSubregInputReg.SubIdx); 20770b57cec5SDimitry Andric } 20780b57cec5SDimitry Andric 20790b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { 20800b57cec5SDimitry Andric assert(Def->isSubregToReg() && "Invalid definition"); 20810b57cec5SDimitry Andric // We are looking at: 20820b57cec5SDimitry Andric // Def = SUBREG_TO_REG Imm, v0, sub0 20830b57cec5SDimitry Andric 20840b57cec5SDimitry Andric // Bail if we have to compose sub registers. 20850b57cec5SDimitry Andric // If DefSubReg != sub0, we would have to check that all the bits 20860b57cec5SDimitry Andric // we track are included in sub0 and if yes, we would have to 20870b57cec5SDimitry Andric // determine the right subreg in v0. 20880b57cec5SDimitry Andric if (DefSubReg != Def->getOperand(3).getImm()) 20890b57cec5SDimitry Andric return ValueTrackerResult(); 20900b57cec5SDimitry Andric // Bail if we have to compose sub registers. 20910b57cec5SDimitry Andric // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 20920b57cec5SDimitry Andric if (Def->getOperand(2).getSubReg()) 20930b57cec5SDimitry Andric return ValueTrackerResult(); 20940b57cec5SDimitry Andric 20950b57cec5SDimitry Andric return ValueTrackerResult(Def->getOperand(2).getReg(), 20960b57cec5SDimitry Andric Def->getOperand(3).getImm()); 20970b57cec5SDimitry Andric } 20980b57cec5SDimitry Andric 20990b57cec5SDimitry Andric /// Explore each PHI incoming operand and return its sources. 21000b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceFromPHI() { 21010b57cec5SDimitry Andric assert(Def->isPHI() && "Invalid definition"); 21020b57cec5SDimitry Andric ValueTrackerResult Res; 21030b57cec5SDimitry Andric 21040b57cec5SDimitry Andric // If we look for a different subreg, bail as we do not support composing 21050b57cec5SDimitry Andric // subregs yet. 21060b57cec5SDimitry Andric if (Def->getOperand(0).getSubReg() != DefSubReg) 21070b57cec5SDimitry Andric return ValueTrackerResult(); 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric // Return all register sources for PHI instructions. 21100b57cec5SDimitry Andric for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { 21110b57cec5SDimitry Andric const MachineOperand &MO = Def->getOperand(i); 21120b57cec5SDimitry Andric assert(MO.isReg() && "Invalid PHI instruction"); 21130b57cec5SDimitry Andric // We have no code to deal with undef operands. They shouldn't happen in 21140b57cec5SDimitry Andric // normal programs anyway. 21150b57cec5SDimitry Andric if (MO.isUndef()) 21160b57cec5SDimitry Andric return ValueTrackerResult(); 21170b57cec5SDimitry Andric Res.addSource(MO.getReg(), MO.getSubReg()); 21180b57cec5SDimitry Andric } 21190b57cec5SDimitry Andric 21200b57cec5SDimitry Andric return Res; 21210b57cec5SDimitry Andric } 21220b57cec5SDimitry Andric 21230b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSourceImpl() { 21240b57cec5SDimitry Andric assert(Def && "This method needs a valid definition"); 21250b57cec5SDimitry Andric 21260b57cec5SDimitry Andric assert(((Def->getOperand(DefIdx).isDef() && 21270b57cec5SDimitry Andric (DefIdx < Def->getDesc().getNumDefs() || 21280b57cec5SDimitry Andric Def->getDesc().isVariadic())) || 21290b57cec5SDimitry Andric Def->getOperand(DefIdx).isImplicit()) && 21300b57cec5SDimitry Andric "Invalid DefIdx"); 21310b57cec5SDimitry Andric if (Def->isCopy()) 21320b57cec5SDimitry Andric return getNextSourceFromCopy(); 21330b57cec5SDimitry Andric if (Def->isBitcast()) 21340b57cec5SDimitry Andric return getNextSourceFromBitcast(); 21350b57cec5SDimitry Andric // All the remaining cases involve "complex" instructions. 21360b57cec5SDimitry Andric // Bail if we did not ask for the advanced tracking. 21370b57cec5SDimitry Andric if (DisableAdvCopyOpt) 21380b57cec5SDimitry Andric return ValueTrackerResult(); 21390b57cec5SDimitry Andric if (Def->isRegSequence() || Def->isRegSequenceLike()) 21400b57cec5SDimitry Andric return getNextSourceFromRegSequence(); 21410b57cec5SDimitry Andric if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 21420b57cec5SDimitry Andric return getNextSourceFromInsertSubreg(); 21430b57cec5SDimitry Andric if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 21440b57cec5SDimitry Andric return getNextSourceFromExtractSubreg(); 21450b57cec5SDimitry Andric if (Def->isSubregToReg()) 21460b57cec5SDimitry Andric return getNextSourceFromSubregToReg(); 21470b57cec5SDimitry Andric if (Def->isPHI()) 21480b57cec5SDimitry Andric return getNextSourceFromPHI(); 21490b57cec5SDimitry Andric return ValueTrackerResult(); 21500b57cec5SDimitry Andric } 21510b57cec5SDimitry Andric 21520b57cec5SDimitry Andric ValueTrackerResult ValueTracker::getNextSource() { 21530b57cec5SDimitry Andric // If we reach a point where we cannot move up in the use-def chain, 21540b57cec5SDimitry Andric // there is nothing we can get. 21550b57cec5SDimitry Andric if (!Def) 21560b57cec5SDimitry Andric return ValueTrackerResult(); 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andric ValueTrackerResult Res = getNextSourceImpl(); 21590b57cec5SDimitry Andric if (Res.isValid()) { 21600b57cec5SDimitry Andric // Update definition, definition index, and subregister for the 21610b57cec5SDimitry Andric // next call of getNextSource. 21620b57cec5SDimitry Andric // Update the current register. 21630b57cec5SDimitry Andric bool OneRegSrc = Res.getNumSources() == 1; 21640b57cec5SDimitry Andric if (OneRegSrc) 21650b57cec5SDimitry Andric Reg = Res.getSrcReg(0); 21660b57cec5SDimitry Andric // Update the result before moving up in the use-def chain 21670b57cec5SDimitry Andric // with the instruction containing the last found sources. 21680b57cec5SDimitry Andric Res.setInst(Def); 21690b57cec5SDimitry Andric 21700b57cec5SDimitry Andric // If we can still move up in the use-def chain, move to the next 21710b57cec5SDimitry Andric // definition. 2172bdd1243dSDimitry Andric if (!Reg.isPhysical() && OneRegSrc) { 21730b57cec5SDimitry Andric MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); 21740b57cec5SDimitry Andric if (DI != MRI.def_end()) { 21750b57cec5SDimitry Andric Def = DI->getParent(); 21760b57cec5SDimitry Andric DefIdx = DI.getOperandNo(); 21770b57cec5SDimitry Andric DefSubReg = Res.getSrcSubReg(0); 21780b57cec5SDimitry Andric } else { 21790b57cec5SDimitry Andric Def = nullptr; 21800b57cec5SDimitry Andric } 21810b57cec5SDimitry Andric return Res; 21820b57cec5SDimitry Andric } 21830b57cec5SDimitry Andric } 21840b57cec5SDimitry Andric // If we end up here, this means we will not be able to find another source 21850b57cec5SDimitry Andric // for the next iteration. Make sure any new call to getNextSource bails out 21860b57cec5SDimitry Andric // early by cutting the use-def chain. 21870b57cec5SDimitry Andric Def = nullptr; 21880b57cec5SDimitry Andric return Res; 21890b57cec5SDimitry Andric } 2190