10b57cec5SDimitry Andric //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // The machine combiner pass uses machine trace metrics to ensure the combined 100b57cec5SDimitry Andric // instructions do not lengthen the critical path or the resource depth. 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 140b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 15480093f4SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 16480093f4SDimitry Andric #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" 17bdd1243dSDimitry Andric #include "llvm/CodeGen/MachineCombinerPattern.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 23480093f4SDimitry Andric #include "llvm/CodeGen/MachineSizeOpts.h" 240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineTraceMetrics.h" 25e8d8bef9SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 30480093f4SDimitry Andric #include "llvm/InitializePasses.h" 310b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 320b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 330b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric using namespace llvm; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric #define DEBUG_TYPE "machine-combiner" 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric STATISTIC(NumInstCombined, "Number of machineinst combined"); 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric static cl::opt<unsigned> 420b57cec5SDimitry Andric inc_threshold("machine-combiner-inc-threshold", cl::Hidden, 430b57cec5SDimitry Andric cl::desc("Incremental depth computation will be used for basic " 440b57cec5SDimitry Andric "blocks with more instructions."), cl::init(500)); 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, 470b57cec5SDimitry Andric cl::desc("Dump all substituted intrs"), 480b57cec5SDimitry Andric cl::init(false)); 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS 510b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder( 520b57cec5SDimitry Andric "machine-combiner-verify-pattern-order", cl::Hidden, 530b57cec5SDimitry Andric cl::desc( 540b57cec5SDimitry Andric "Verify that the generated patterns are ordered by increasing latency"), 550b57cec5SDimitry Andric cl::init(true)); 560b57cec5SDimitry Andric #else 570b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder( 580b57cec5SDimitry Andric "machine-combiner-verify-pattern-order", cl::Hidden, 590b57cec5SDimitry Andric cl::desc( 600b57cec5SDimitry Andric "Verify that the generated patterns are ordered by increasing latency"), 610b57cec5SDimitry Andric cl::init(false)); 620b57cec5SDimitry Andric #endif 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric namespace { 650b57cec5SDimitry Andric class MachineCombiner : public MachineFunctionPass { 6606c3fb27SDimitry Andric const TargetSubtargetInfo *STI = nullptr; 6706c3fb27SDimitry Andric const TargetInstrInfo *TII = nullptr; 6806c3fb27SDimitry Andric const TargetRegisterInfo *TRI = nullptr; 690b57cec5SDimitry Andric MCSchedModel SchedModel; 7006c3fb27SDimitry Andric MachineRegisterInfo *MRI = nullptr; 7106c3fb27SDimitry Andric MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo 7206c3fb27SDimitry Andric MachineTraceMetrics *Traces = nullptr; 7306c3fb27SDimitry Andric MachineTraceMetrics::Ensemble *TraceEnsemble = nullptr; 7406c3fb27SDimitry Andric MachineBlockFrequencyInfo *MBFI = nullptr; 7506c3fb27SDimitry Andric ProfileSummaryInfo *PSI = nullptr; 76e8d8bef9SDimitry Andric RegisterClassInfo RegClassInfo; 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric TargetSchedModel TSchedModel; 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric /// True if optimizing for code size. 8106c3fb27SDimitry Andric bool OptSize = false; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric public: 840b57cec5SDimitry Andric static char ID; 850b57cec5SDimitry Andric MachineCombiner() : MachineFunctionPass(ID) { 860b57cec5SDimitry Andric initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 890b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 900b57cec5SDimitry Andric StringRef getPassName() const override { return "Machine InstCombiner"; } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric private: 930b57cec5SDimitry Andric bool combineInstructions(MachineBasicBlock *); 940b57cec5SDimitry Andric MachineInstr *getOperandDef(const MachineOperand &MO); 95fcaf7f86SDimitry Andric bool isTransientMI(const MachineInstr *MI); 960b57cec5SDimitry Andric unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, 970b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 9806c3fb27SDimitry Andric MachineTraceMetrics::Trace BlockTrace, 9906c3fb27SDimitry Andric const MachineBasicBlock &MBB); 1000b57cec5SDimitry Andric unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, 1010b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace); 102*0fca6ea1SDimitry Andric bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, 1030b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace, 1040b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 1050b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, 1060b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 107*0fca6ea1SDimitry Andric unsigned Pattern, bool SlackIsAccurate); 108e8d8bef9SDimitry Andric bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB, 109e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 110e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, 111*0fca6ea1SDimitry Andric unsigned Pattern); 1120b57cec5SDimitry Andric bool preservesResourceLen(MachineBasicBlock *MBB, 1130b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace, 1140b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 1150b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs); 1160b57cec5SDimitry Andric void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, 1170b57cec5SDimitry Andric SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); 1180b57cec5SDimitry Andric std::pair<unsigned, unsigned> 1190b57cec5SDimitry Andric getLatenciesForInstrSequences(MachineInstr &MI, 1200b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 1210b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, 1220b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace); 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root, 125*0fca6ea1SDimitry Andric SmallVector<unsigned, 16> &Patterns); 126*0fca6ea1SDimitry Andric CombinerObjective getCombinerObjective(unsigned Pattern); 1270b57cec5SDimitry Andric }; 1280b57cec5SDimitry Andric } 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric char MachineCombiner::ID = 0; 1310b57cec5SDimitry Andric char &llvm::MachineCombinerID = MachineCombiner::ID; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, 1340b57cec5SDimitry Andric "Machine InstCombiner", false, false) 135*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 1360b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) 1370b57cec5SDimitry Andric INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", 1380b57cec5SDimitry Andric false, false) 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 1410b57cec5SDimitry Andric AU.setPreservesCFG(); 142*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>(); 143*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 144*0fca6ea1SDimitry Andric AU.addPreserved<MachineLoopInfoWrapperPass>(); 1450b57cec5SDimitry Andric AU.addRequired<MachineTraceMetrics>(); 1460b57cec5SDimitry Andric AU.addPreserved<MachineTraceMetrics>(); 147480093f4SDimitry Andric AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); 148480093f4SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>(); 1490b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 15206c3fb27SDimitry Andric MachineInstr * 15306c3fb27SDimitry Andric MachineCombiner::getOperandDef(const MachineOperand &MO) { 1540b57cec5SDimitry Andric MachineInstr *DefInstr = nullptr; 1550b57cec5SDimitry Andric // We need a virtual register definition. 156bdd1243dSDimitry Andric if (MO.isReg() && MO.getReg().isVirtual()) 1570b57cec5SDimitry Andric DefInstr = MRI->getUniqueVRegDef(MO.getReg()); 1580b57cec5SDimitry Andric return DefInstr; 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 161fcaf7f86SDimitry Andric /// Return true if MI is unlikely to generate an actual target instruction. 162fcaf7f86SDimitry Andric bool MachineCombiner::isTransientMI(const MachineInstr *MI) { 163fcaf7f86SDimitry Andric if (!MI->isCopy()) 164fcaf7f86SDimitry Andric return MI->isTransient(); 165fcaf7f86SDimitry Andric 166fcaf7f86SDimitry Andric // If MI is a COPY, check if its src and dst registers can be coalesced. 167fcaf7f86SDimitry Andric Register Dst = MI->getOperand(0).getReg(); 168fcaf7f86SDimitry Andric Register Src = MI->getOperand(1).getReg(); 169fcaf7f86SDimitry Andric 170fcaf7f86SDimitry Andric if (!MI->isFullCopy()) { 171fcaf7f86SDimitry Andric // If src RC contains super registers of dst RC, it can also be coalesced. 172fcaf7f86SDimitry Andric if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical()) 173fcaf7f86SDimitry Andric return false; 174fcaf7f86SDimitry Andric 175fcaf7f86SDimitry Andric auto SrcSub = MI->getOperand(1).getSubReg(); 176fcaf7f86SDimitry Andric auto SrcRC = MRI->getRegClass(Src); 177fcaf7f86SDimitry Andric auto DstRC = MRI->getRegClass(Dst); 178fcaf7f86SDimitry Andric return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr; 179fcaf7f86SDimitry Andric } 180fcaf7f86SDimitry Andric 181fcaf7f86SDimitry Andric if (Src.isPhysical() && Dst.isPhysical()) 182fcaf7f86SDimitry Andric return Src == Dst; 183fcaf7f86SDimitry Andric 184fcaf7f86SDimitry Andric if (Src.isVirtual() && Dst.isVirtual()) { 185fcaf7f86SDimitry Andric auto SrcRC = MRI->getRegClass(Src); 186fcaf7f86SDimitry Andric auto DstRC = MRI->getRegClass(Dst); 187fcaf7f86SDimitry Andric return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC); 188fcaf7f86SDimitry Andric } 189fcaf7f86SDimitry Andric 190fcaf7f86SDimitry Andric if (Src.isVirtual()) 191fcaf7f86SDimitry Andric std::swap(Src, Dst); 192fcaf7f86SDimitry Andric 193fcaf7f86SDimitry Andric // Now Src is physical register, Dst is virtual register. 194fcaf7f86SDimitry Andric auto DstRC = MRI->getRegClass(Dst); 195fcaf7f86SDimitry Andric return DstRC->contains(Src); 196fcaf7f86SDimitry Andric } 197fcaf7f86SDimitry Andric 1980b57cec5SDimitry Andric /// Computes depth of instructions in vector \InsInstr. 1990b57cec5SDimitry Andric /// 2000b57cec5SDimitry Andric /// \param InsInstrs is a vector of machine instructions 2010b57cec5SDimitry Andric /// \param InstrIdxForVirtReg is a dense map of virtual register to index 2020b57cec5SDimitry Andric /// of defining machine instruction in \p InsInstrs 2030b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions 2040b57cec5SDimitry Andric /// 2050b57cec5SDimitry Andric /// \returns Depth of last instruction in \InsInstrs ("NewRoot") 2060b57cec5SDimitry Andric unsigned 2070b57cec5SDimitry Andric MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, 2080b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, 20906c3fb27SDimitry Andric MachineTraceMetrics::Trace BlockTrace, 21006c3fb27SDimitry Andric const MachineBasicBlock &MBB) { 2110b57cec5SDimitry Andric SmallVector<unsigned, 16> InstrDepth; 2120b57cec5SDimitry Andric // For each instruction in the new sequence compute the depth based on the 2130b57cec5SDimitry Andric // operands. Use the trace information when possible. For new operands which 2140b57cec5SDimitry Andric // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth 2150b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs) { // for each Use 2160b57cec5SDimitry Andric unsigned IDepth = 0; 21706c3fb27SDimitry Andric for (const MachineOperand &MO : InstrPtr->all_uses()) { 2180b57cec5SDimitry Andric // Check for virtual register operand. 21906c3fb27SDimitry Andric if (!MO.getReg().isVirtual()) 2200b57cec5SDimitry Andric continue; 2210b57cec5SDimitry Andric unsigned DepthOp = 0; 2220b57cec5SDimitry Andric unsigned LatencyOp = 0; 2230b57cec5SDimitry Andric DenseMap<unsigned, unsigned>::iterator II = 2240b57cec5SDimitry Andric InstrIdxForVirtReg.find(MO.getReg()); 2250b57cec5SDimitry Andric if (II != InstrIdxForVirtReg.end()) { 2260b57cec5SDimitry Andric // Operand is new virtual register not in trace 2270b57cec5SDimitry Andric assert(II->second < InstrDepth.size() && "Bad Index"); 2280b57cec5SDimitry Andric MachineInstr *DefInstr = InsInstrs[II->second]; 2290b57cec5SDimitry Andric assert(DefInstr && 2300b57cec5SDimitry Andric "There must be a definition for a new virtual register"); 2310b57cec5SDimitry Andric DepthOp = InstrDepth[II->second]; 232*0fca6ea1SDimitry Andric int DefIdx = 233*0fca6ea1SDimitry Andric DefInstr->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr); 234*0fca6ea1SDimitry Andric int UseIdx = 235*0fca6ea1SDimitry Andric InstrPtr->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr); 2360b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx, 2370b57cec5SDimitry Andric InstrPtr, UseIdx); 2380b57cec5SDimitry Andric } else { 2390b57cec5SDimitry Andric MachineInstr *DefInstr = getOperandDef(MO); 24006c3fb27SDimitry Andric if (DefInstr && (TII->getMachineCombinerTraceStrategy() != 24106c3fb27SDimitry Andric MachineTraceStrategy::TS_Local || 24206c3fb27SDimitry Andric DefInstr->getParent() == &MBB)) { 2430b57cec5SDimitry Andric DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; 244fcaf7f86SDimitry Andric if (!isTransientMI(DefInstr)) 2450b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency( 246*0fca6ea1SDimitry Andric DefInstr, 247*0fca6ea1SDimitry Andric DefInstr->findRegisterDefOperandIdx(MO.getReg(), 248*0fca6ea1SDimitry Andric /*TRI=*/nullptr), 249*0fca6ea1SDimitry Andric InstrPtr, 250*0fca6ea1SDimitry Andric InstrPtr->findRegisterUseOperandIdx(MO.getReg(), 251*0fca6ea1SDimitry Andric /*TRI=*/nullptr)); 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric IDepth = std::max(IDepth, DepthOp + LatencyOp); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric InstrDepth.push_back(IDepth); 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric unsigned NewRootIdx = InsInstrs.size() - 1; 2590b57cec5SDimitry Andric return InstrDepth[NewRootIdx]; 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric /// Computes instruction latency as max of latency of defined operands. 2630b57cec5SDimitry Andric /// 2640b57cec5SDimitry Andric /// \param Root is a machine instruction that could be replaced by NewRoot. 2650b57cec5SDimitry Andric /// It is used to compute a more accurate latency information for NewRoot in 2660b57cec5SDimitry Andric /// case there is a dependent instruction in the same trace (\p BlockTrace) 2670b57cec5SDimitry Andric /// \param NewRoot is the instruction for which the latency is computed 2680b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions 2690b57cec5SDimitry Andric /// 2700b57cec5SDimitry Andric /// \returns Latency of \p NewRoot 2710b57cec5SDimitry Andric unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, 2720b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) { 2730b57cec5SDimitry Andric // Check each definition in NewRoot and compute the latency 2740b57cec5SDimitry Andric unsigned NewRootLatency = 0; 2750b57cec5SDimitry Andric 27606c3fb27SDimitry Andric for (const MachineOperand &MO : NewRoot->all_defs()) { 2770b57cec5SDimitry Andric // Check for virtual register operand. 27806c3fb27SDimitry Andric if (!MO.getReg().isVirtual()) 2790b57cec5SDimitry Andric continue; 2800b57cec5SDimitry Andric // Get the first instruction that uses MO 2810b57cec5SDimitry Andric MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); 2820b57cec5SDimitry Andric RI++; 2830b57cec5SDimitry Andric if (RI == MRI->reg_end()) 2840b57cec5SDimitry Andric continue; 2850b57cec5SDimitry Andric MachineInstr *UseMO = RI->getParent(); 2860b57cec5SDimitry Andric unsigned LatencyOp = 0; 2870b57cec5SDimitry Andric if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) { 2880b57cec5SDimitry Andric LatencyOp = TSchedModel.computeOperandLatency( 289*0fca6ea1SDimitry Andric NewRoot, 290*0fca6ea1SDimitry Andric NewRoot->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr), 291*0fca6ea1SDimitry Andric UseMO, 292*0fca6ea1SDimitry Andric UseMO->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr)); 2930b57cec5SDimitry Andric } else { 2940b57cec5SDimitry Andric LatencyOp = TSchedModel.computeInstrLatency(NewRoot); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric NewRootLatency = std::max(NewRootLatency, LatencyOp); 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric return NewRootLatency; 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric 301*0fca6ea1SDimitry Andric CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) { 3020b57cec5SDimitry Andric // TODO: If C++ ever gets a real enum class, make this part of the 3030b57cec5SDimitry Andric // MachineCombinerPattern class. 304*0fca6ea1SDimitry Andric switch (Pattern) { 3050b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_AX_BY: 3060b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_AX_YB: 3070b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_XA_BY: 3080b57cec5SDimitry Andric case MachineCombinerPattern::REASSOC_XA_YB: 3090b57cec5SDimitry Andric return CombinerObjective::MustReduceDepth; 3100b57cec5SDimitry Andric default: 311*0fca6ea1SDimitry Andric return TII->getCombinerObjective(Pattern); 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric /// Estimate the latency of the new and original instruction sequence by summing 3160b57cec5SDimitry Andric /// up the latencies of the inserted and deleted instructions. This assumes 3170b57cec5SDimitry Andric /// that the inserted and deleted instructions are dependent instruction chains, 3180b57cec5SDimitry Andric /// which might not hold in all cases. 3190b57cec5SDimitry Andric std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( 3200b57cec5SDimitry Andric MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs, 3210b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, 3220b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace) { 3230b57cec5SDimitry Andric assert(!InsInstrs.empty() && "Only support sequences that insert instrs."); 3240b57cec5SDimitry Andric unsigned NewRootLatency = 0; 3250b57cec5SDimitry Andric // NewRoot is the last instruction in the \p InsInstrs vector. 3260b57cec5SDimitry Andric MachineInstr *NewRoot = InsInstrs.back(); 3270b57cec5SDimitry Andric for (unsigned i = 0; i < InsInstrs.size() - 1; i++) 3280b57cec5SDimitry Andric NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); 3290b57cec5SDimitry Andric NewRootLatency += getLatency(&MI, NewRoot, BlockTrace); 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric unsigned RootLatency = 0; 332fcaf7f86SDimitry Andric for (auto *I : DelInstrs) 3330b57cec5SDimitry Andric RootLatency += TSchedModel.computeInstrLatency(I); 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric return {NewRootLatency, RootLatency}; 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 338e8d8bef9SDimitry Andric bool MachineCombiner::reduceRegisterPressure( 339e8d8bef9SDimitry Andric MachineInstr &Root, MachineBasicBlock *MBB, 340e8d8bef9SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 341*0fca6ea1SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, unsigned Pattern) { 342e8d8bef9SDimitry Andric // FIXME: for now, we don't do any check for the register pressure patterns. 343e8d8bef9SDimitry Andric // We treat them as always profitable. But we can do better if we make 344e8d8bef9SDimitry Andric // RegPressureTracker class be aware of TIE attribute. Then we can get an 345e8d8bef9SDimitry Andric // accurate compare of register pressure with DelInstrs or InsInstrs. 346e8d8bef9SDimitry Andric return true; 347e8d8bef9SDimitry Andric } 348e8d8bef9SDimitry Andric 3490b57cec5SDimitry Andric /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. 3500b57cec5SDimitry Andric /// The new code sequence ends in MI NewRoot. A necessary condition for the new 3510b57cec5SDimitry Andric /// sequence to replace the old sequence is that it cannot lengthen the critical 3520b57cec5SDimitry Andric /// path. The definition of "improve" may be restricted by specifying that the 3530b57cec5SDimitry Andric /// new path improves the data dependency chain (MustReduceDepth). 3540b57cec5SDimitry Andric bool MachineCombiner::improvesCriticalPathLen( 3550b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineInstr *Root, 3560b57cec5SDimitry Andric MachineTraceMetrics::Trace BlockTrace, 3570b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 3580b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, 359*0fca6ea1SDimitry Andric DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned Pattern, 3600b57cec5SDimitry Andric bool SlackIsAccurate) { 3610b57cec5SDimitry Andric // Get depth and latency of NewRoot and Root. 36206c3fb27SDimitry Andric unsigned NewRootDepth = 36306c3fb27SDimitry Andric getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB); 3640b57cec5SDimitry Andric unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: " 3670b57cec5SDimitry Andric << NewRootDepth << "\tRootDepth: " << RootDepth); 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric // For a transform such as reassociation, the cost equation is 3700b57cec5SDimitry Andric // conservatively calculated so that we must improve the depth (data 3710b57cec5SDimitry Andric // dependency cycles) in the critical path to proceed with the transform. 3720b57cec5SDimitry Andric // Being conservative also protects against inaccuracies in the underlying 3730b57cec5SDimitry Andric // machine trace metrics and CPU models. 3740b57cec5SDimitry Andric if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) { 3750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth "); 3760b57cec5SDimitry Andric LLVM_DEBUG(NewRootDepth < RootDepth 3770b57cec5SDimitry Andric ? dbgs() << "\t and it does it\n" 3780b57cec5SDimitry Andric : dbgs() << "\t but it does NOT do it\n"); 3790b57cec5SDimitry Andric return NewRootDepth < RootDepth; 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric // A more flexible cost calculation for the critical path includes the slack 3830b57cec5SDimitry Andric // of the original code sequence. This may allow the transform to proceed 3840b57cec5SDimitry Andric // even if the instruction depths (data dependency cycles) become worse. 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric // Account for the latency of the inserted and deleted instructions by 3870b57cec5SDimitry Andric unsigned NewRootLatency, RootLatency; 38806c3fb27SDimitry Andric if (TII->accumulateInstrSeqToRootLatency(*Root)) { 3890b57cec5SDimitry Andric std::tie(NewRootLatency, RootLatency) = 3900b57cec5SDimitry Andric getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace); 39106c3fb27SDimitry Andric } else { 39206c3fb27SDimitry Andric NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back()); 39306c3fb27SDimitry Andric RootLatency = TSchedModel.computeInstrLatency(Root); 39406c3fb27SDimitry Andric } 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric unsigned RootSlack = BlockTrace.getInstrSlack(*Root); 3970b57cec5SDimitry Andric unsigned NewCycleCount = NewRootDepth + NewRootLatency; 3980b57cec5SDimitry Andric unsigned OldCycleCount = 3990b57cec5SDimitry Andric RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0); 4000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency 4010b57cec5SDimitry Andric << "\tRootLatency: " << RootLatency << "\n\tRootSlack: " 4020b57cec5SDimitry Andric << RootSlack << " SlackIsAccurate=" << SlackIsAccurate 4030b57cec5SDimitry Andric << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount 4040b57cec5SDimitry Andric << "\n\tRootDepth + RootLatency + RootSlack = " 4050b57cec5SDimitry Andric << OldCycleCount;); 4060b57cec5SDimitry Andric LLVM_DEBUG(NewCycleCount <= OldCycleCount 4070b57cec5SDimitry Andric ? dbgs() << "\n\t It IMPROVES PathLen because" 4080b57cec5SDimitry Andric : dbgs() << "\n\t It DOES NOT improve PathLen because"); 4090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount 4100b57cec5SDimitry Andric << ", OldCycleCount = " << OldCycleCount << "\n"); 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric return NewCycleCount <= OldCycleCount; 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric /// helper routine to convert instructions into SC 4160b57cec5SDimitry Andric void MachineCombiner::instr2instrSC( 4170b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &Instrs, 4180b57cec5SDimitry Andric SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) { 4190b57cec5SDimitry Andric for (auto *InstrPtr : Instrs) { 4200b57cec5SDimitry Andric unsigned Opc = InstrPtr->getOpcode(); 4210b57cec5SDimitry Andric unsigned Idx = TII->get(Opc).getSchedClass(); 4220b57cec5SDimitry Andric const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); 4230b57cec5SDimitry Andric InstrsSC.push_back(SC); 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric /// True when the new instructions do not increase resource length 4280b57cec5SDimitry Andric bool MachineCombiner::preservesResourceLen( 4290b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, 4300b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 4310b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs) { 4320b57cec5SDimitry Andric if (!TSchedModel.hasInstrSchedModel()) 4330b57cec5SDimitry Andric return true; 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric // Compute current resource length 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric //ArrayRef<const MachineBasicBlock *> MBBarr(MBB); 4380b57cec5SDimitry Andric SmallVector <const MachineBasicBlock *, 1> MBBarr; 4390b57cec5SDimitry Andric MBBarr.push_back(MBB); 4400b57cec5SDimitry Andric unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric // Deal with SC rather than Instructions. 4430b57cec5SDimitry Andric SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC; 4440b57cec5SDimitry Andric SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC; 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric instr2instrSC(InsInstrs, InsInstrsSC); 4470b57cec5SDimitry Andric instr2instrSC(DelInstrs, DelInstrsSC); 4480b57cec5SDimitry Andric 449bdd1243dSDimitry Andric ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC}; 450bdd1243dSDimitry Andric ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC}; 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric // Compute new resource length. 4530b57cec5SDimitry Andric unsigned ResLenAfterCombine = 4540b57cec5SDimitry Andric BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: " 4570b57cec5SDimitry Andric << ResLenBeforeCombine 4580b57cec5SDimitry Andric << " and after: " << ResLenAfterCombine << "\n";); 4590b57cec5SDimitry Andric LLVM_DEBUG( 4605ffd83dbSDimitry Andric ResLenAfterCombine <= 4615ffd83dbSDimitry Andric ResLenBeforeCombine + TII->getExtendResourceLenLimit() 4620b57cec5SDimitry Andric ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n" 4630b57cec5SDimitry Andric : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource " 4640b57cec5SDimitry Andric "Length\n"); 4650b57cec5SDimitry Andric 4665ffd83dbSDimitry Andric return ResLenAfterCombine <= 4675ffd83dbSDimitry Andric ResLenBeforeCombine + TII->getExtendResourceLenLimit(); 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction 4710b57cec5SDimitry Andric /// depths if requested. 4720b57cec5SDimitry Andric /// 4730b57cec5SDimitry Andric /// \param MBB basic block to insert instructions in 4740b57cec5SDimitry Andric /// \param MI current machine instruction 4750b57cec5SDimitry Andric /// \param InsInstrs new instructions to insert in \p MBB 4760b57cec5SDimitry Andric /// \param DelInstrs instruction to delete from \p MBB 47706c3fb27SDimitry Andric /// \param TraceEnsemble is a pointer to the machine trace information 4780b57cec5SDimitry Andric /// \param RegUnits set of live registers, needed to compute instruction depths 479e8d8bef9SDimitry Andric /// \param TII is target instruction info, used to call target hook 480e8d8bef9SDimitry Andric /// \param Pattern is used to call target hook finalizeInsInstrs 4810b57cec5SDimitry Andric /// \param IncrementalUpdate if true, compute instruction depths incrementally, 4820b57cec5SDimitry Andric /// otherwise invalidate the trace 483*0fca6ea1SDimitry Andric static void 484*0fca6ea1SDimitry Andric insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, 48506c3fb27SDimitry Andric SmallVectorImpl<MachineInstr *> &InsInstrs, 48606c3fb27SDimitry Andric SmallVectorImpl<MachineInstr *> &DelInstrs, 48706c3fb27SDimitry Andric MachineTraceMetrics::Ensemble *TraceEnsemble, 488*0fca6ea1SDimitry Andric SparseSet<LiveRegUnit> &RegUnits, 489*0fca6ea1SDimitry Andric const TargetInstrInfo *TII, unsigned Pattern, 490*0fca6ea1SDimitry Andric bool IncrementalUpdate) { 491e8d8bef9SDimitry Andric // If we want to fix up some placeholder for some target, do it now. 492e8d8bef9SDimitry Andric // We need this because in genAlternativeCodeSequence, we have not decided the 493e8d8bef9SDimitry Andric // better pattern InsInstrs or DelInstrs, so we don't want generate some 494e8d8bef9SDimitry Andric // sideeffect to the function. For example we need to delay the constant pool 495e8d8bef9SDimitry Andric // entry creation here after InsInstrs is selected as better pattern. 496e8d8bef9SDimitry Andric // Otherwise the constant pool entry created for InsInstrs will not be deleted 497e8d8bef9SDimitry Andric // even if InsInstrs is not the better pattern. 498e8d8bef9SDimitry Andric TII->finalizeInsInstrs(MI, Pattern, InsInstrs); 499e8d8bef9SDimitry Andric 5000b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs) 5010b57cec5SDimitry Andric MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric for (auto *InstrPtr : DelInstrs) { 5040eae32dcSDimitry Andric InstrPtr->eraseFromParent(); 5050b57cec5SDimitry Andric // Erase all LiveRegs defined by the removed instruction 506fcaf7f86SDimitry Andric for (auto *I = RegUnits.begin(); I != RegUnits.end();) { 5070b57cec5SDimitry Andric if (I->MI == InstrPtr) 5080b57cec5SDimitry Andric I = RegUnits.erase(I); 5090b57cec5SDimitry Andric else 5100b57cec5SDimitry Andric I++; 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric if (IncrementalUpdate) 5150b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs) 51606c3fb27SDimitry Andric TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits); 5170b57cec5SDimitry Andric else 51806c3fb27SDimitry Andric TraceEnsemble->invalidate(MBB); 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric NumInstCombined++; 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric // Check that the difference between original and new latency is decreasing for 5240b57cec5SDimitry Andric // later patterns. This helps to discover sub-optimal pattern orderings. 525*0fca6ea1SDimitry Andric void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB, 526*0fca6ea1SDimitry Andric MachineInstr &Root, 527*0fca6ea1SDimitry Andric SmallVector<unsigned, 16> &Patterns) { 5280b57cec5SDimitry Andric long PrevLatencyDiff = std::numeric_limits<long>::max(); 5290b57cec5SDimitry Andric (void)PrevLatencyDiff; // Variable is used in assert only. 5300b57cec5SDimitry Andric for (auto P : Patterns) { 5310b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs; 5320b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs; 5330b57cec5SDimitry Andric DenseMap<unsigned, unsigned> InstrIdxForVirtReg; 5340b57cec5SDimitry Andric TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs, 5350b57cec5SDimitry Andric InstrIdxForVirtReg); 5360b57cec5SDimitry Andric // Found pattern, but did not generate alternative sequence. 5370b57cec5SDimitry Andric // This can happen e.g. when an immediate could not be materialized 5380b57cec5SDimitry Andric // in a single instruction. 5390b57cec5SDimitry Andric if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries()) 5400b57cec5SDimitry Andric continue; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric unsigned NewRootLatency, RootLatency; 5430b57cec5SDimitry Andric std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences( 54406c3fb27SDimitry Andric Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(MBB)); 5450b57cec5SDimitry Andric long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency); 5460b57cec5SDimitry Andric assert(CurrentLatencyDiff <= PrevLatencyDiff && 5470b57cec5SDimitry Andric "Current pattern is better than previous pattern."); 5480b57cec5SDimitry Andric PrevLatencyDiff = CurrentLatencyDiff; 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric /// Substitute a slow code sequence with a faster one by 5530b57cec5SDimitry Andric /// evaluating instruction combining pattern. 5540b57cec5SDimitry Andric /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction 5550b57cec5SDimitry Andric /// combining based on machine trace metrics. Only combine a sequence of 5560b57cec5SDimitry Andric /// instructions when this neither lengthens the critical path nor increases 5570b57cec5SDimitry Andric /// resource pressure. When optimizing for codesize always combine when the new 5580b57cec5SDimitry Andric /// sequence is shorter. 5590b57cec5SDimitry Andric bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { 5600b57cec5SDimitry Andric bool Changed = false; 5610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric bool IncrementalUpdate = false; 5640b57cec5SDimitry Andric auto BlockIter = MBB->begin(); 5650b57cec5SDimitry Andric decltype(BlockIter) LastUpdate; 5660b57cec5SDimitry Andric // Check if the block is in a loop. 5670b57cec5SDimitry Andric const MachineLoop *ML = MLI->getLoopFor(MBB); 56806c3fb27SDimitry Andric if (!TraceEnsemble) 56906c3fb27SDimitry Andric TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy()); 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric SparseSet<LiveRegUnit> RegUnits; 5720b57cec5SDimitry Andric RegUnits.setUniverse(TRI->getNumRegUnits()); 5730b57cec5SDimitry Andric 574480093f4SDimitry Andric bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI); 575480093f4SDimitry Andric 576e8d8bef9SDimitry Andric bool DoRegPressureReduce = 577e8d8bef9SDimitry Andric TII->shouldReduceRegisterPressure(MBB, &RegClassInfo); 578e8d8bef9SDimitry Andric 5790b57cec5SDimitry Andric while (BlockIter != MBB->end()) { 5800b57cec5SDimitry Andric auto &MI = *BlockIter++; 581*0fca6ea1SDimitry Andric SmallVector<unsigned, 16> Patterns; 5820b57cec5SDimitry Andric // The motivating example is: 5830b57cec5SDimitry Andric // 5840b57cec5SDimitry Andric // MUL Other MUL_op1 MUL_op2 Other 5850b57cec5SDimitry Andric // \ / \ | / 5860b57cec5SDimitry Andric // ADD/SUB => MADD/MSUB 5870b57cec5SDimitry Andric // (=Root) (=NewRoot) 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is 5900b57cec5SDimitry Andric // usually beneficial for code size it unfortunately can hurt performance 5910b57cec5SDimitry Andric // when the ADD is on the critical path, but the MUL is not. With the 5920b57cec5SDimitry Andric // substitution the MUL becomes part of the critical path (in form of the 5930b57cec5SDimitry Andric // MADD) and can lengthen it on architectures where the MADD latency is 5940b57cec5SDimitry Andric // longer than the ADD latency. 5950b57cec5SDimitry Andric // 5960b57cec5SDimitry Andric // For each instruction we check if it can be the root of a combiner 5970b57cec5SDimitry Andric // pattern. Then for each pattern the new code sequence in form of MI is 5980b57cec5SDimitry Andric // generated and evaluated. When the efficiency criteria (don't lengthen 5990b57cec5SDimitry Andric // critical path, don't use more resources) is met the new sequence gets 6000b57cec5SDimitry Andric // hooked up into the basic block before the old sequence is removed. 6010b57cec5SDimitry Andric // 6020b57cec5SDimitry Andric // The algorithm does not try to evaluate all patterns and pick the best. 6030b57cec5SDimitry Andric // This is only an artificial restriction though. In practice there is 6040b57cec5SDimitry Andric // mostly one pattern, and getMachineCombinerPatterns() can order patterns 6050b57cec5SDimitry Andric // based on an internal cost heuristic. If 6060b57cec5SDimitry Andric // machine-combiner-verify-pattern-order is enabled, all patterns are 6070b57cec5SDimitry Andric // checked to ensure later patterns do not provide better latency savings. 6080b57cec5SDimitry Andric 609e8d8bef9SDimitry Andric if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce)) 6100b57cec5SDimitry Andric continue; 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric if (VerifyPatternOrder) 6130b57cec5SDimitry Andric verifyPatternOrder(MBB, MI, Patterns); 6140b57cec5SDimitry Andric 615bdd1243dSDimitry Andric for (const auto P : Patterns) { 6160b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> InsInstrs; 6170b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> DelInstrs; 6180b57cec5SDimitry Andric DenseMap<unsigned, unsigned> InstrIdxForVirtReg; 6190b57cec5SDimitry Andric TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, 6200b57cec5SDimitry Andric InstrIdxForVirtReg); 6210b57cec5SDimitry Andric // Found pattern, but did not generate alternative sequence. 6220b57cec5SDimitry Andric // This can happen e.g. when an immediate could not be materialized 6230b57cec5SDimitry Andric // in a single instruction. 624bdd1243dSDimitry Andric if (InsInstrs.empty()) 6250b57cec5SDimitry Andric continue; 6260b57cec5SDimitry Andric 6270b57cec5SDimitry Andric LLVM_DEBUG(if (dump_intrs) { 6280b57cec5SDimitry Andric dbgs() << "\tFor the Pattern (" << (int)P 6290b57cec5SDimitry Andric << ") these instructions could be removed\n"; 6300b57cec5SDimitry Andric for (auto const *InstrPtr : DelInstrs) 6310b57cec5SDimitry Andric InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false, 6320b57cec5SDimitry Andric /*SkipDebugLoc*/false, /*AddNewLine*/true, TII); 6330b57cec5SDimitry Andric dbgs() << "\tThese instructions could replace the removed ones\n"; 6340b57cec5SDimitry Andric for (auto const *InstrPtr : InsInstrs) 6350b57cec5SDimitry Andric InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false, 6360b57cec5SDimitry Andric /*SkipDebugLoc*/false, /*AddNewLine*/true, TII); 6370b57cec5SDimitry Andric }); 6380b57cec5SDimitry Andric 639e8d8bef9SDimitry Andric if (IncrementalUpdate && LastUpdate != BlockIter) { 6400b57cec5SDimitry Andric // Update depths since the last incremental update. 64106c3fb27SDimitry Andric TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits); 6420b57cec5SDimitry Andric LastUpdate = BlockIter; 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 645e8d8bef9SDimitry Andric if (DoRegPressureReduce && 646e8d8bef9SDimitry Andric getCombinerObjective(P) == 647e8d8bef9SDimitry Andric CombinerObjective::MustReduceRegisterPressure) { 648e8d8bef9SDimitry Andric if (MBB->size() > inc_threshold) { 649e8d8bef9SDimitry Andric // Use incremental depth updates for basic blocks above threshold 650e8d8bef9SDimitry Andric IncrementalUpdate = true; 651e8d8bef9SDimitry Andric LastUpdate = BlockIter; 652e8d8bef9SDimitry Andric } 653e8d8bef9SDimitry Andric if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) { 654e8d8bef9SDimitry Andric // Replace DelInstrs with InsInstrs. 65506c3fb27SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble, 656e8d8bef9SDimitry Andric RegUnits, TII, P, IncrementalUpdate); 657e8d8bef9SDimitry Andric Changed |= true; 658e8d8bef9SDimitry Andric 659e8d8bef9SDimitry Andric // Go back to previous instruction as it may have ILP reassociation 660e8d8bef9SDimitry Andric // opportunity. 661e8d8bef9SDimitry Andric BlockIter--; 662e8d8bef9SDimitry Andric break; 663e8d8bef9SDimitry Andric } 664e8d8bef9SDimitry Andric } 665e8d8bef9SDimitry Andric 666bdd1243dSDimitry Andric if (ML && TII->isThroughputPattern(P)) { 667bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n"); 66806c3fb27SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble, 669bdd1243dSDimitry Andric RegUnits, TII, P, IncrementalUpdate); 670bdd1243dSDimitry Andric // Eagerly stop after the first pattern fires. 671bdd1243dSDimitry Andric Changed = true; 672bdd1243dSDimitry Andric break; 673bdd1243dSDimitry Andric } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) { 674bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize (" 675bdd1243dSDimitry Andric << InsInstrs.size() << " < " 676bdd1243dSDimitry Andric << DelInstrs.size() << ")\n"); 67706c3fb27SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble, 678e8d8bef9SDimitry Andric RegUnits, TII, P, IncrementalUpdate); 6790b57cec5SDimitry Andric // Eagerly stop after the first pattern fires. 6800b57cec5SDimitry Andric Changed = true; 6810b57cec5SDimitry Andric break; 6820b57cec5SDimitry Andric } else { 6830b57cec5SDimitry Andric // For big basic blocks, we only compute the full trace the first time 6840b57cec5SDimitry Andric // we hit this. We do not invalidate the trace, but instead update the 6850b57cec5SDimitry Andric // instruction depths incrementally. 6860b57cec5SDimitry Andric // NOTE: Only the instruction depths up to MI are accurate. All other 6870b57cec5SDimitry Andric // trace information is not updated. 68806c3fb27SDimitry Andric MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB); 6890b57cec5SDimitry Andric Traces->verifyAnalysis(); 6900b57cec5SDimitry Andric if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs, 6910b57cec5SDimitry Andric InstrIdxForVirtReg, P, 6920b57cec5SDimitry Andric !IncrementalUpdate) && 6930b57cec5SDimitry Andric preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) { 6940b57cec5SDimitry Andric if (MBB->size() > inc_threshold) { 6950b57cec5SDimitry Andric // Use incremental depth updates for basic blocks above treshold 6960b57cec5SDimitry Andric IncrementalUpdate = true; 6970b57cec5SDimitry Andric LastUpdate = BlockIter; 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric 70006c3fb27SDimitry Andric insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble, 701e8d8bef9SDimitry Andric RegUnits, TII, P, IncrementalUpdate); 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric // Eagerly stop after the first pattern fires. 7040b57cec5SDimitry Andric Changed = true; 7050b57cec5SDimitry Andric break; 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric // Cleanup instructions of the alternative code sequence. There is no 7080b57cec5SDimitry Andric // use for them. 7090b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent(); 7100b57cec5SDimitry Andric for (auto *InstrPtr : InsInstrs) 7110eae32dcSDimitry Andric MF->deleteMachineInstr(InstrPtr); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric InstrIdxForVirtReg.clear(); 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric if (Changed && IncrementalUpdate) 7180b57cec5SDimitry Andric Traces->invalidate(MBB); 7190b57cec5SDimitry Andric return Changed; 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { 7230b57cec5SDimitry Andric STI = &MF.getSubtarget(); 7240b57cec5SDimitry Andric TII = STI->getInstrInfo(); 7250b57cec5SDimitry Andric TRI = STI->getRegisterInfo(); 7260b57cec5SDimitry Andric SchedModel = STI->getSchedModel(); 7270b57cec5SDimitry Andric TSchedModel.init(STI); 7280b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 729*0fca6ea1SDimitry Andric MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 7300b57cec5SDimitry Andric Traces = &getAnalysis<MachineTraceMetrics>(); 731480093f4SDimitry Andric PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 732480093f4SDimitry Andric MBFI = (PSI && PSI->hasProfileSummary()) ? 733480093f4SDimitry Andric &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : 734480093f4SDimitry Andric nullptr; 73506c3fb27SDimitry Andric TraceEnsemble = nullptr; 7360b57cec5SDimitry Andric OptSize = MF.getFunction().hasOptSize(); 737e8d8bef9SDimitry Andric RegClassInfo.runOnMachineFunction(MF); 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); 7400b57cec5SDimitry Andric if (!TII->useMachineCombiner()) { 7410b57cec5SDimitry Andric LLVM_DEBUG( 7420b57cec5SDimitry Andric dbgs() 7430b57cec5SDimitry Andric << " Skipping pass: Target does not support machine combiner\n"); 7440b57cec5SDimitry Andric return false; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric bool Changed = false; 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric // Try to combine instructions. 7500b57cec5SDimitry Andric for (auto &MBB : MF) 7510b57cec5SDimitry Andric Changed |= combineInstructions(&MBB); 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric return Changed; 7540b57cec5SDimitry Andric } 755