xref: /openbsd-src/gnu/llvm/llvm/lib/CodeGen/MachineCombiner.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // The machine combiner pass uses machine trace metrics to ensure the combined
1009467b48Spatrick // instructions do not lengthen the critical path or the resource depth.
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick 
1309467b48Spatrick #include "llvm/ADT/DenseMap.h"
1409467b48Spatrick #include "llvm/ADT/Statistic.h"
1509467b48Spatrick #include "llvm/Analysis/ProfileSummaryInfo.h"
1609467b48Spatrick #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
17*d415bd75Srobert #include "llvm/CodeGen/MachineCombinerPattern.h"
1809467b48Spatrick #include "llvm/CodeGen/MachineDominators.h"
1909467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
2009467b48Spatrick #include "llvm/CodeGen/MachineFunctionPass.h"
2109467b48Spatrick #include "llvm/CodeGen/MachineLoopInfo.h"
2209467b48Spatrick #include "llvm/CodeGen/MachineRegisterInfo.h"
2309467b48Spatrick #include "llvm/CodeGen/MachineSizeOpts.h"
2409467b48Spatrick #include "llvm/CodeGen/MachineTraceMetrics.h"
2573471bf0Spatrick #include "llvm/CodeGen/RegisterClassInfo.h"
2609467b48Spatrick #include "llvm/CodeGen/TargetInstrInfo.h"
2709467b48Spatrick #include "llvm/CodeGen/TargetRegisterInfo.h"
2809467b48Spatrick #include "llvm/CodeGen/TargetSchedule.h"
2909467b48Spatrick #include "llvm/CodeGen/TargetSubtargetInfo.h"
3009467b48Spatrick #include "llvm/InitializePasses.h"
3109467b48Spatrick #include "llvm/Support/CommandLine.h"
3209467b48Spatrick #include "llvm/Support/Debug.h"
3309467b48Spatrick #include "llvm/Support/raw_ostream.h"
3409467b48Spatrick 
3509467b48Spatrick using namespace llvm;
3609467b48Spatrick 
3709467b48Spatrick #define DEBUG_TYPE "machine-combiner"
3809467b48Spatrick 
3909467b48Spatrick STATISTIC(NumInstCombined, "Number of machineinst combined");
4009467b48Spatrick 
4109467b48Spatrick static cl::opt<unsigned>
4209467b48Spatrick inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
4309467b48Spatrick               cl::desc("Incremental depth computation will be used for basic "
4409467b48Spatrick                        "blocks with more instructions."), cl::init(500));
4509467b48Spatrick 
4609467b48Spatrick static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
4709467b48Spatrick                                 cl::desc("Dump all substituted intrs"),
4809467b48Spatrick                                 cl::init(false));
4909467b48Spatrick 
5009467b48Spatrick #ifdef EXPENSIVE_CHECKS
5109467b48Spatrick static cl::opt<bool> VerifyPatternOrder(
5209467b48Spatrick     "machine-combiner-verify-pattern-order", cl::Hidden,
5309467b48Spatrick     cl::desc(
5409467b48Spatrick         "Verify that the generated patterns are ordered by increasing latency"),
5509467b48Spatrick     cl::init(true));
5609467b48Spatrick #else
5709467b48Spatrick static cl::opt<bool> VerifyPatternOrder(
5809467b48Spatrick     "machine-combiner-verify-pattern-order", cl::Hidden,
5909467b48Spatrick     cl::desc(
6009467b48Spatrick         "Verify that the generated patterns are ordered by increasing latency"),
6109467b48Spatrick     cl::init(false));
6209467b48Spatrick #endif
6309467b48Spatrick 
6409467b48Spatrick namespace {
6509467b48Spatrick class MachineCombiner : public MachineFunctionPass {
6609467b48Spatrick   const TargetSubtargetInfo *STI;
6709467b48Spatrick   const TargetInstrInfo *TII;
6809467b48Spatrick   const TargetRegisterInfo *TRI;
6909467b48Spatrick   MCSchedModel SchedModel;
7009467b48Spatrick   MachineRegisterInfo *MRI;
7109467b48Spatrick   MachineLoopInfo *MLI; // Current MachineLoopInfo
7209467b48Spatrick   MachineTraceMetrics *Traces;
7309467b48Spatrick   MachineTraceMetrics::Ensemble *MinInstr;
7409467b48Spatrick   MachineBlockFrequencyInfo *MBFI;
7509467b48Spatrick   ProfileSummaryInfo *PSI;
7673471bf0Spatrick   RegisterClassInfo RegClassInfo;
7709467b48Spatrick 
7809467b48Spatrick   TargetSchedModel TSchedModel;
7909467b48Spatrick 
8009467b48Spatrick   /// True if optimizing for code size.
8109467b48Spatrick   bool OptSize;
8209467b48Spatrick 
8309467b48Spatrick public:
8409467b48Spatrick   static char ID;
MachineCombiner()8509467b48Spatrick   MachineCombiner() : MachineFunctionPass(ID) {
8609467b48Spatrick     initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
8709467b48Spatrick   }
8809467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override;
8909467b48Spatrick   bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const9009467b48Spatrick   StringRef getPassName() const override { return "Machine InstCombiner"; }
9109467b48Spatrick 
9209467b48Spatrick private:
9309467b48Spatrick   bool combineInstructions(MachineBasicBlock *);
9409467b48Spatrick   MachineInstr *getOperandDef(const MachineOperand &MO);
95*d415bd75Srobert   bool isTransientMI(const MachineInstr *MI);
9609467b48Spatrick   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
9709467b48Spatrick                     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
9809467b48Spatrick                     MachineTraceMetrics::Trace BlockTrace);
9909467b48Spatrick   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
10009467b48Spatrick                       MachineTraceMetrics::Trace BlockTrace);
10109467b48Spatrick   bool
10209467b48Spatrick   improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
10309467b48Spatrick                           MachineTraceMetrics::Trace BlockTrace,
10409467b48Spatrick                           SmallVectorImpl<MachineInstr *> &InsInstrs,
10509467b48Spatrick                           SmallVectorImpl<MachineInstr *> &DelInstrs,
10609467b48Spatrick                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
10709467b48Spatrick                           MachineCombinerPattern Pattern, bool SlackIsAccurate);
10873471bf0Spatrick   bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
10973471bf0Spatrick                               SmallVectorImpl<MachineInstr *> &InsInstrs,
11073471bf0Spatrick                               SmallVectorImpl<MachineInstr *> &DelInstrs,
11173471bf0Spatrick                               MachineCombinerPattern Pattern);
11209467b48Spatrick   bool preservesResourceLen(MachineBasicBlock *MBB,
11309467b48Spatrick                             MachineTraceMetrics::Trace BlockTrace,
11409467b48Spatrick                             SmallVectorImpl<MachineInstr *> &InsInstrs,
11509467b48Spatrick                             SmallVectorImpl<MachineInstr *> &DelInstrs);
11609467b48Spatrick   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
11709467b48Spatrick                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
11809467b48Spatrick   std::pair<unsigned, unsigned>
11909467b48Spatrick   getLatenciesForInstrSequences(MachineInstr &MI,
12009467b48Spatrick                                 SmallVectorImpl<MachineInstr *> &InsInstrs,
12109467b48Spatrick                                 SmallVectorImpl<MachineInstr *> &DelInstrs,
12209467b48Spatrick                                 MachineTraceMetrics::Trace BlockTrace);
12309467b48Spatrick 
12409467b48Spatrick   void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
12509467b48Spatrick                           SmallVector<MachineCombinerPattern, 16> &Patterns);
12609467b48Spatrick };
12709467b48Spatrick }
12809467b48Spatrick 
12909467b48Spatrick char MachineCombiner::ID = 0;
13009467b48Spatrick char &llvm::MachineCombinerID = MachineCombiner::ID;
13109467b48Spatrick 
13209467b48Spatrick INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
13309467b48Spatrick                       "Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)13409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
13509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
13609467b48Spatrick INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
13709467b48Spatrick                     false, false)
13809467b48Spatrick 
13909467b48Spatrick void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
14009467b48Spatrick   AU.setPreservesCFG();
14109467b48Spatrick   AU.addPreserved<MachineDominatorTree>();
14209467b48Spatrick   AU.addRequired<MachineLoopInfo>();
14309467b48Spatrick   AU.addPreserved<MachineLoopInfo>();
14409467b48Spatrick   AU.addRequired<MachineTraceMetrics>();
14509467b48Spatrick   AU.addPreserved<MachineTraceMetrics>();
14609467b48Spatrick   AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
14709467b48Spatrick   AU.addRequired<ProfileSummaryInfoWrapperPass>();
14809467b48Spatrick   MachineFunctionPass::getAnalysisUsage(AU);
14909467b48Spatrick }
15009467b48Spatrick 
getOperandDef(const MachineOperand & MO)15109467b48Spatrick MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
15209467b48Spatrick   MachineInstr *DefInstr = nullptr;
15309467b48Spatrick   // We need a virtual register definition.
154*d415bd75Srobert   if (MO.isReg() && MO.getReg().isVirtual())
15509467b48Spatrick     DefInstr = MRI->getUniqueVRegDef(MO.getReg());
15609467b48Spatrick   // PHI's have no depth etc.
15709467b48Spatrick   if (DefInstr && DefInstr->isPHI())
15809467b48Spatrick     DefInstr = nullptr;
15909467b48Spatrick   return DefInstr;
16009467b48Spatrick }
16109467b48Spatrick 
162*d415bd75Srobert /// Return true if MI is unlikely to generate an actual target instruction.
isTransientMI(const MachineInstr * MI)163*d415bd75Srobert bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
164*d415bd75Srobert   if (!MI->isCopy())
165*d415bd75Srobert     return MI->isTransient();
166*d415bd75Srobert 
167*d415bd75Srobert   // If MI is a COPY, check if its src and dst registers can be coalesced.
168*d415bd75Srobert   Register Dst = MI->getOperand(0).getReg();
169*d415bd75Srobert   Register Src = MI->getOperand(1).getReg();
170*d415bd75Srobert 
171*d415bd75Srobert   if (!MI->isFullCopy()) {
172*d415bd75Srobert     // If src RC contains super registers of dst RC, it can also be coalesced.
173*d415bd75Srobert     if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
174*d415bd75Srobert       return false;
175*d415bd75Srobert 
176*d415bd75Srobert     auto SrcSub = MI->getOperand(1).getSubReg();
177*d415bd75Srobert     auto SrcRC = MRI->getRegClass(Src);
178*d415bd75Srobert     auto DstRC = MRI->getRegClass(Dst);
179*d415bd75Srobert     return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr;
180*d415bd75Srobert   }
181*d415bd75Srobert 
182*d415bd75Srobert   if (Src.isPhysical() && Dst.isPhysical())
183*d415bd75Srobert     return Src == Dst;
184*d415bd75Srobert 
185*d415bd75Srobert   if (Src.isVirtual() && Dst.isVirtual()) {
186*d415bd75Srobert     auto SrcRC = MRI->getRegClass(Src);
187*d415bd75Srobert     auto DstRC = MRI->getRegClass(Dst);
188*d415bd75Srobert     return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
189*d415bd75Srobert   }
190*d415bd75Srobert 
191*d415bd75Srobert   if (Src.isVirtual())
192*d415bd75Srobert     std::swap(Src, Dst);
193*d415bd75Srobert 
194*d415bd75Srobert   // Now Src is physical register, Dst is virtual register.
195*d415bd75Srobert   auto DstRC = MRI->getRegClass(Dst);
196*d415bd75Srobert   return DstRC->contains(Src);
197*d415bd75Srobert }
198*d415bd75Srobert 
19909467b48Spatrick /// Computes depth of instructions in vector \InsInstr.
20009467b48Spatrick ///
20109467b48Spatrick /// \param InsInstrs is a vector of machine instructions
20209467b48Spatrick /// \param InstrIdxForVirtReg is a dense map of virtual register to index
20309467b48Spatrick /// of defining machine instruction in \p InsInstrs
20409467b48Spatrick /// \param BlockTrace is a trace of machine instructions
20509467b48Spatrick ///
20609467b48Spatrick /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
20709467b48Spatrick unsigned
getDepth(SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineTraceMetrics::Trace BlockTrace)20809467b48Spatrick MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
20909467b48Spatrick                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
21009467b48Spatrick                           MachineTraceMetrics::Trace BlockTrace) {
21109467b48Spatrick   SmallVector<unsigned, 16> InstrDepth;
21209467b48Spatrick   // For each instruction in the new sequence compute the depth based on the
21309467b48Spatrick   // operands. Use the trace information when possible. For new operands which
21409467b48Spatrick   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
21509467b48Spatrick   for (auto *InstrPtr : InsInstrs) { // for each Use
21609467b48Spatrick     unsigned IDepth = 0;
21709467b48Spatrick     for (const MachineOperand &MO : InstrPtr->operands()) {
21809467b48Spatrick       // Check for virtual register operand.
219*d415bd75Srobert       if (!(MO.isReg() && MO.getReg().isVirtual()))
22009467b48Spatrick         continue;
22109467b48Spatrick       if (!MO.isUse())
22209467b48Spatrick         continue;
22309467b48Spatrick       unsigned DepthOp = 0;
22409467b48Spatrick       unsigned LatencyOp = 0;
22509467b48Spatrick       DenseMap<unsigned, unsigned>::iterator II =
22609467b48Spatrick           InstrIdxForVirtReg.find(MO.getReg());
22709467b48Spatrick       if (II != InstrIdxForVirtReg.end()) {
22809467b48Spatrick         // Operand is new virtual register not in trace
22909467b48Spatrick         assert(II->second < InstrDepth.size() && "Bad Index");
23009467b48Spatrick         MachineInstr *DefInstr = InsInstrs[II->second];
23109467b48Spatrick         assert(DefInstr &&
23209467b48Spatrick                "There must be a definition for a new virtual register");
23309467b48Spatrick         DepthOp = InstrDepth[II->second];
23409467b48Spatrick         int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
23509467b48Spatrick         int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
23609467b48Spatrick         LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
23709467b48Spatrick                                                       InstrPtr, UseIdx);
23809467b48Spatrick       } else {
23909467b48Spatrick         MachineInstr *DefInstr = getOperandDef(MO);
24009467b48Spatrick         if (DefInstr) {
24109467b48Spatrick           DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
242*d415bd75Srobert           if (!isTransientMI(DefInstr))
24309467b48Spatrick             LatencyOp = TSchedModel.computeOperandLatency(
24409467b48Spatrick                 DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
24509467b48Spatrick                 InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
24609467b48Spatrick         }
24709467b48Spatrick       }
24809467b48Spatrick       IDepth = std::max(IDepth, DepthOp + LatencyOp);
24909467b48Spatrick     }
25009467b48Spatrick     InstrDepth.push_back(IDepth);
25109467b48Spatrick   }
25209467b48Spatrick   unsigned NewRootIdx = InsInstrs.size() - 1;
25309467b48Spatrick   return InstrDepth[NewRootIdx];
25409467b48Spatrick }
25509467b48Spatrick 
25609467b48Spatrick /// Computes instruction latency as max of latency of defined operands.
25709467b48Spatrick ///
25809467b48Spatrick /// \param Root is a machine instruction that could be replaced by NewRoot.
25909467b48Spatrick /// It is used to compute a more accurate latency information for NewRoot in
26009467b48Spatrick /// case there is a dependent instruction in the same trace (\p BlockTrace)
26109467b48Spatrick /// \param NewRoot is the instruction for which the latency is computed
26209467b48Spatrick /// \param BlockTrace is a trace of machine instructions
26309467b48Spatrick ///
26409467b48Spatrick /// \returns Latency of \p NewRoot
getLatency(MachineInstr * Root,MachineInstr * NewRoot,MachineTraceMetrics::Trace BlockTrace)26509467b48Spatrick unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
26609467b48Spatrick                                      MachineTraceMetrics::Trace BlockTrace) {
26709467b48Spatrick   // Check each definition in NewRoot and compute the latency
26809467b48Spatrick   unsigned NewRootLatency = 0;
26909467b48Spatrick 
27009467b48Spatrick   for (const MachineOperand &MO : NewRoot->operands()) {
27109467b48Spatrick     // Check for virtual register operand.
272*d415bd75Srobert     if (!(MO.isReg() && MO.getReg().isVirtual()))
27309467b48Spatrick       continue;
27409467b48Spatrick     if (!MO.isDef())
27509467b48Spatrick       continue;
27609467b48Spatrick     // Get the first instruction that uses MO
27709467b48Spatrick     MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
27809467b48Spatrick     RI++;
27909467b48Spatrick     if (RI == MRI->reg_end())
28009467b48Spatrick       continue;
28109467b48Spatrick     MachineInstr *UseMO = RI->getParent();
28209467b48Spatrick     unsigned LatencyOp = 0;
28309467b48Spatrick     if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
28409467b48Spatrick       LatencyOp = TSchedModel.computeOperandLatency(
28509467b48Spatrick           NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
28609467b48Spatrick           UseMO->findRegisterUseOperandIdx(MO.getReg()));
28709467b48Spatrick     } else {
28809467b48Spatrick       LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
28909467b48Spatrick     }
29009467b48Spatrick     NewRootLatency = std::max(NewRootLatency, LatencyOp);
29109467b48Spatrick   }
29209467b48Spatrick   return NewRootLatency;
29309467b48Spatrick }
29409467b48Spatrick 
29509467b48Spatrick /// The combiner's goal may differ based on which pattern it is attempting
29609467b48Spatrick /// to optimize.
29709467b48Spatrick enum class CombinerObjective {
29809467b48Spatrick   MustReduceDepth,            // The data dependency chain must be improved.
29973471bf0Spatrick   MustReduceRegisterPressure, // The register pressure must be reduced.
30009467b48Spatrick   Default                     // The critical path must not be lengthened.
30109467b48Spatrick };
30209467b48Spatrick 
getCombinerObjective(MachineCombinerPattern P)30309467b48Spatrick static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
30409467b48Spatrick   // TODO: If C++ ever gets a real enum class, make this part of the
30509467b48Spatrick   // MachineCombinerPattern class.
30609467b48Spatrick   switch (P) {
30709467b48Spatrick   case MachineCombinerPattern::REASSOC_AX_BY:
30809467b48Spatrick   case MachineCombinerPattern::REASSOC_AX_YB:
30909467b48Spatrick   case MachineCombinerPattern::REASSOC_XA_BY:
31009467b48Spatrick   case MachineCombinerPattern::REASSOC_XA_YB:
311097a140dSpatrick   case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
312097a140dSpatrick   case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
313*d415bd75Srobert   case MachineCombinerPattern::SUBADD_OP1:
314*d415bd75Srobert   case MachineCombinerPattern::SUBADD_OP2:
315*d415bd75Srobert   case MachineCombinerPattern::FMADD_AX:
316*d415bd75Srobert   case MachineCombinerPattern::FMADD_XA:
317*d415bd75Srobert   case MachineCombinerPattern::FMSUB:
318*d415bd75Srobert   case MachineCombinerPattern::FNMSUB:
31909467b48Spatrick     return CombinerObjective::MustReduceDepth;
32073471bf0Spatrick   case MachineCombinerPattern::REASSOC_XY_BCA:
32173471bf0Spatrick   case MachineCombinerPattern::REASSOC_XY_BAC:
32273471bf0Spatrick     return CombinerObjective::MustReduceRegisterPressure;
32309467b48Spatrick   default:
32409467b48Spatrick     return CombinerObjective::Default;
32509467b48Spatrick   }
32609467b48Spatrick }
32709467b48Spatrick 
32809467b48Spatrick /// Estimate the latency of the new and original instruction sequence by summing
32909467b48Spatrick /// up the latencies of the inserted and deleted instructions. This assumes
33009467b48Spatrick /// that the inserted and deleted instructions are dependent instruction chains,
33109467b48Spatrick /// which might not hold in all cases.
getLatenciesForInstrSequences(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineTraceMetrics::Trace BlockTrace)33209467b48Spatrick std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
33309467b48Spatrick     MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
33409467b48Spatrick     SmallVectorImpl<MachineInstr *> &DelInstrs,
33509467b48Spatrick     MachineTraceMetrics::Trace BlockTrace) {
33609467b48Spatrick   assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
33709467b48Spatrick   unsigned NewRootLatency = 0;
33809467b48Spatrick   // NewRoot is the last instruction in the \p InsInstrs vector.
33909467b48Spatrick   MachineInstr *NewRoot = InsInstrs.back();
34009467b48Spatrick   for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
34109467b48Spatrick     NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
34209467b48Spatrick   NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
34309467b48Spatrick 
34409467b48Spatrick   unsigned RootLatency = 0;
345*d415bd75Srobert   for (auto *I : DelInstrs)
34609467b48Spatrick     RootLatency += TSchedModel.computeInstrLatency(I);
34709467b48Spatrick 
34809467b48Spatrick   return {NewRootLatency, RootLatency};
34909467b48Spatrick }
35009467b48Spatrick 
reduceRegisterPressure(MachineInstr & Root,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineCombinerPattern Pattern)35173471bf0Spatrick bool MachineCombiner::reduceRegisterPressure(
35273471bf0Spatrick     MachineInstr &Root, MachineBasicBlock *MBB,
35373471bf0Spatrick     SmallVectorImpl<MachineInstr *> &InsInstrs,
35473471bf0Spatrick     SmallVectorImpl<MachineInstr *> &DelInstrs,
35573471bf0Spatrick     MachineCombinerPattern Pattern) {
35673471bf0Spatrick   // FIXME: for now, we don't do any check for the register pressure patterns.
35773471bf0Spatrick   // We treat them as always profitable. But we can do better if we make
35873471bf0Spatrick   // RegPressureTracker class be aware of TIE attribute. Then we can get an
35973471bf0Spatrick   // accurate compare of register pressure with DelInstrs or InsInstrs.
36073471bf0Spatrick   return true;
36173471bf0Spatrick }
36273471bf0Spatrick 
36309467b48Spatrick /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
36409467b48Spatrick /// The new code sequence ends in MI NewRoot. A necessary condition for the new
36509467b48Spatrick /// sequence to replace the old sequence is that it cannot lengthen the critical
36609467b48Spatrick /// path. The definition of "improve" may be restricted by specifying that the
36709467b48Spatrick /// new path improves the data dependency chain (MustReduceDepth).
improvesCriticalPathLen(MachineBasicBlock * MBB,MachineInstr * Root,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineCombinerPattern Pattern,bool SlackIsAccurate)36809467b48Spatrick bool MachineCombiner::improvesCriticalPathLen(
36909467b48Spatrick     MachineBasicBlock *MBB, MachineInstr *Root,
37009467b48Spatrick     MachineTraceMetrics::Trace BlockTrace,
37109467b48Spatrick     SmallVectorImpl<MachineInstr *> &InsInstrs,
37209467b48Spatrick     SmallVectorImpl<MachineInstr *> &DelInstrs,
37309467b48Spatrick     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
37409467b48Spatrick     MachineCombinerPattern Pattern,
37509467b48Spatrick     bool SlackIsAccurate) {
37609467b48Spatrick   // Get depth and latency of NewRoot and Root.
37709467b48Spatrick   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
37809467b48Spatrick   unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
37909467b48Spatrick 
38009467b48Spatrick   LLVM_DEBUG(dbgs() << "  Dependence data for " << *Root << "\tNewRootDepth: "
38109467b48Spatrick                     << NewRootDepth << "\tRootDepth: " << RootDepth);
38209467b48Spatrick 
38309467b48Spatrick   // For a transform such as reassociation, the cost equation is
38409467b48Spatrick   // conservatively calculated so that we must improve the depth (data
38509467b48Spatrick   // dependency cycles) in the critical path to proceed with the transform.
38609467b48Spatrick   // Being conservative also protects against inaccuracies in the underlying
38709467b48Spatrick   // machine trace metrics and CPU models.
38809467b48Spatrick   if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
38909467b48Spatrick     LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
39009467b48Spatrick     LLVM_DEBUG(NewRootDepth < RootDepth
39109467b48Spatrick                    ? dbgs() << "\t  and it does it\n"
39209467b48Spatrick                    : dbgs() << "\t  but it does NOT do it\n");
39309467b48Spatrick     return NewRootDepth < RootDepth;
39409467b48Spatrick   }
39509467b48Spatrick 
39609467b48Spatrick   // A more flexible cost calculation for the critical path includes the slack
39709467b48Spatrick   // of the original code sequence. This may allow the transform to proceed
39809467b48Spatrick   // even if the instruction depths (data dependency cycles) become worse.
39909467b48Spatrick 
40009467b48Spatrick   // Account for the latency of the inserted and deleted instructions by
40109467b48Spatrick   unsigned NewRootLatency, RootLatency;
40209467b48Spatrick   std::tie(NewRootLatency, RootLatency) =
40309467b48Spatrick       getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
40409467b48Spatrick 
40509467b48Spatrick   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
40609467b48Spatrick   unsigned NewCycleCount = NewRootDepth + NewRootLatency;
40709467b48Spatrick   unsigned OldCycleCount =
40809467b48Spatrick       RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
40909467b48Spatrick   LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
41009467b48Spatrick                     << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
41109467b48Spatrick                     << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
41209467b48Spatrick                     << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
41309467b48Spatrick                     << "\n\tRootDepth + RootLatency + RootSlack = "
41409467b48Spatrick                     << OldCycleCount;);
41509467b48Spatrick   LLVM_DEBUG(NewCycleCount <= OldCycleCount
41609467b48Spatrick                  ? dbgs() << "\n\t  It IMPROVES PathLen because"
41709467b48Spatrick                  : dbgs() << "\n\t  It DOES NOT improve PathLen because");
41809467b48Spatrick   LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
41909467b48Spatrick                     << ", OldCycleCount = " << OldCycleCount << "\n");
42009467b48Spatrick 
42109467b48Spatrick   return NewCycleCount <= OldCycleCount;
42209467b48Spatrick }
42309467b48Spatrick 
42409467b48Spatrick /// helper routine to convert instructions into SC
instr2instrSC(SmallVectorImpl<MachineInstr * > & Instrs,SmallVectorImpl<const MCSchedClassDesc * > & InstrsSC)42509467b48Spatrick void MachineCombiner::instr2instrSC(
42609467b48Spatrick     SmallVectorImpl<MachineInstr *> &Instrs,
42709467b48Spatrick     SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
42809467b48Spatrick   for (auto *InstrPtr : Instrs) {
42909467b48Spatrick     unsigned Opc = InstrPtr->getOpcode();
43009467b48Spatrick     unsigned Idx = TII->get(Opc).getSchedClass();
43109467b48Spatrick     const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
43209467b48Spatrick     InstrsSC.push_back(SC);
43309467b48Spatrick   }
43409467b48Spatrick }
43509467b48Spatrick 
43609467b48Spatrick /// True when the new instructions do not increase resource length
preservesResourceLen(MachineBasicBlock * MBB,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs)43709467b48Spatrick bool MachineCombiner::preservesResourceLen(
43809467b48Spatrick     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
43909467b48Spatrick     SmallVectorImpl<MachineInstr *> &InsInstrs,
44009467b48Spatrick     SmallVectorImpl<MachineInstr *> &DelInstrs) {
44109467b48Spatrick   if (!TSchedModel.hasInstrSchedModel())
44209467b48Spatrick     return true;
44309467b48Spatrick 
44409467b48Spatrick   // Compute current resource length
44509467b48Spatrick 
44609467b48Spatrick   //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
44709467b48Spatrick   SmallVector <const MachineBasicBlock *, 1> MBBarr;
44809467b48Spatrick   MBBarr.push_back(MBB);
44909467b48Spatrick   unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
45009467b48Spatrick 
45109467b48Spatrick   // Deal with SC rather than Instructions.
45209467b48Spatrick   SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
45309467b48Spatrick   SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
45409467b48Spatrick 
45509467b48Spatrick   instr2instrSC(InsInstrs, InsInstrsSC);
45609467b48Spatrick   instr2instrSC(DelInstrs, DelInstrsSC);
45709467b48Spatrick 
458*d415bd75Srobert   ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
459*d415bd75Srobert   ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
46009467b48Spatrick 
46109467b48Spatrick   // Compute new resource length.
46209467b48Spatrick   unsigned ResLenAfterCombine =
46309467b48Spatrick       BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
46409467b48Spatrick 
46509467b48Spatrick   LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
46609467b48Spatrick                     << ResLenBeforeCombine
46709467b48Spatrick                     << " and after: " << ResLenAfterCombine << "\n";);
46809467b48Spatrick   LLVM_DEBUG(
469097a140dSpatrick       ResLenAfterCombine <=
470097a140dSpatrick       ResLenBeforeCombine + TII->getExtendResourceLenLimit()
47109467b48Spatrick           ? dbgs() << "\t\t  As result it IMPROVES/PRESERVES Resource Length\n"
47209467b48Spatrick           : dbgs() << "\t\t  As result it DOES NOT improve/preserve Resource "
47309467b48Spatrick                       "Length\n");
47409467b48Spatrick 
475097a140dSpatrick   return ResLenAfterCombine <=
476097a140dSpatrick          ResLenBeforeCombine + TII->getExtendResourceLenLimit();
47709467b48Spatrick }
47809467b48Spatrick 
47909467b48Spatrick /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
48009467b48Spatrick /// depths if requested.
48109467b48Spatrick ///
48209467b48Spatrick /// \param MBB basic block to insert instructions in
48309467b48Spatrick /// \param MI current machine instruction
48409467b48Spatrick /// \param InsInstrs new instructions to insert in \p MBB
48509467b48Spatrick /// \param DelInstrs instruction to delete from \p MBB
48609467b48Spatrick /// \param MinInstr is a pointer to the machine trace information
48709467b48Spatrick /// \param RegUnits set of live registers, needed to compute instruction depths
48873471bf0Spatrick /// \param TII is target instruction info, used to call target hook
48973471bf0Spatrick /// \param Pattern is used to call target hook finalizeInsInstrs
49009467b48Spatrick /// \param IncrementalUpdate if true, compute instruction depths incrementally,
49109467b48Spatrick ///                          otherwise invalidate the trace
insertDeleteInstructions(MachineBasicBlock * MBB,MachineInstr & MI,SmallVector<MachineInstr *,16> InsInstrs,SmallVector<MachineInstr *,16> DelInstrs,MachineTraceMetrics::Ensemble * MinInstr,SparseSet<LiveRegUnit> & RegUnits,const TargetInstrInfo * TII,MachineCombinerPattern Pattern,bool IncrementalUpdate)49209467b48Spatrick static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
49309467b48Spatrick                                      SmallVector<MachineInstr *, 16> InsInstrs,
49409467b48Spatrick                                      SmallVector<MachineInstr *, 16> DelInstrs,
49509467b48Spatrick                                      MachineTraceMetrics::Ensemble *MinInstr,
49609467b48Spatrick                                      SparseSet<LiveRegUnit> &RegUnits,
49773471bf0Spatrick                                      const TargetInstrInfo *TII,
49873471bf0Spatrick                                      MachineCombinerPattern Pattern,
49909467b48Spatrick                                      bool IncrementalUpdate) {
50073471bf0Spatrick   // If we want to fix up some placeholder for some target, do it now.
50173471bf0Spatrick   // We need this because in genAlternativeCodeSequence, we have not decided the
50273471bf0Spatrick   // better pattern InsInstrs or DelInstrs, so we don't want generate some
50373471bf0Spatrick   // sideeffect to the function. For example we need to delay the constant pool
50473471bf0Spatrick   // entry creation here after InsInstrs is selected as better pattern.
50573471bf0Spatrick   // Otherwise the constant pool entry created for InsInstrs will not be deleted
50673471bf0Spatrick   // even if InsInstrs is not the better pattern.
50773471bf0Spatrick   TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
50873471bf0Spatrick 
50909467b48Spatrick   for (auto *InstrPtr : InsInstrs)
51009467b48Spatrick     MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
51109467b48Spatrick 
51209467b48Spatrick   for (auto *InstrPtr : DelInstrs) {
513*d415bd75Srobert     InstrPtr->eraseFromParent();
51409467b48Spatrick     // Erase all LiveRegs defined by the removed instruction
515*d415bd75Srobert     for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
51609467b48Spatrick       if (I->MI == InstrPtr)
51709467b48Spatrick         I = RegUnits.erase(I);
51809467b48Spatrick       else
51909467b48Spatrick         I++;
52009467b48Spatrick     }
52109467b48Spatrick   }
52209467b48Spatrick 
52309467b48Spatrick   if (IncrementalUpdate)
52409467b48Spatrick     for (auto *InstrPtr : InsInstrs)
52509467b48Spatrick       MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
52609467b48Spatrick   else
52709467b48Spatrick     MinInstr->invalidate(MBB);
52809467b48Spatrick 
52909467b48Spatrick   NumInstCombined++;
53009467b48Spatrick }
53109467b48Spatrick 
53209467b48Spatrick // Check that the difference between original and new latency is decreasing for
53309467b48Spatrick // later patterns. This helps to discover sub-optimal pattern orderings.
verifyPatternOrder(MachineBasicBlock * MBB,MachineInstr & Root,SmallVector<MachineCombinerPattern,16> & Patterns)53409467b48Spatrick void MachineCombiner::verifyPatternOrder(
53509467b48Spatrick     MachineBasicBlock *MBB, MachineInstr &Root,
53609467b48Spatrick     SmallVector<MachineCombinerPattern, 16> &Patterns) {
53709467b48Spatrick   long PrevLatencyDiff = std::numeric_limits<long>::max();
53809467b48Spatrick   (void)PrevLatencyDiff; // Variable is used in assert only.
53909467b48Spatrick   for (auto P : Patterns) {
54009467b48Spatrick     SmallVector<MachineInstr *, 16> InsInstrs;
54109467b48Spatrick     SmallVector<MachineInstr *, 16> DelInstrs;
54209467b48Spatrick     DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
54309467b48Spatrick     TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
54409467b48Spatrick                                     InstrIdxForVirtReg);
54509467b48Spatrick     // Found pattern, but did not generate alternative sequence.
54609467b48Spatrick     // This can happen e.g. when an immediate could not be materialized
54709467b48Spatrick     // in a single instruction.
54809467b48Spatrick     if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
54909467b48Spatrick       continue;
55009467b48Spatrick 
55109467b48Spatrick     unsigned NewRootLatency, RootLatency;
55209467b48Spatrick     std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
55309467b48Spatrick         Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
55409467b48Spatrick     long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
55509467b48Spatrick     assert(CurrentLatencyDiff <= PrevLatencyDiff &&
55609467b48Spatrick            "Current pattern is better than previous pattern.");
55709467b48Spatrick     PrevLatencyDiff = CurrentLatencyDiff;
55809467b48Spatrick   }
55909467b48Spatrick }
56009467b48Spatrick 
56109467b48Spatrick /// Substitute a slow code sequence with a faster one by
56209467b48Spatrick /// evaluating instruction combining pattern.
56309467b48Spatrick /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
56409467b48Spatrick /// combining based on machine trace metrics. Only combine a sequence of
56509467b48Spatrick /// instructions  when this neither lengthens the critical path nor increases
56609467b48Spatrick /// resource pressure. When optimizing for codesize always combine when the new
56709467b48Spatrick /// sequence is shorter.
combineInstructions(MachineBasicBlock * MBB)56809467b48Spatrick bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
56909467b48Spatrick   bool Changed = false;
57009467b48Spatrick   LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
57109467b48Spatrick 
57209467b48Spatrick   bool IncrementalUpdate = false;
57309467b48Spatrick   auto BlockIter = MBB->begin();
57409467b48Spatrick   decltype(BlockIter) LastUpdate;
57509467b48Spatrick   // Check if the block is in a loop.
57609467b48Spatrick   const MachineLoop *ML = MLI->getLoopFor(MBB);
57709467b48Spatrick   if (!MinInstr)
57809467b48Spatrick     MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
57909467b48Spatrick 
58009467b48Spatrick   SparseSet<LiveRegUnit> RegUnits;
58109467b48Spatrick   RegUnits.setUniverse(TRI->getNumRegUnits());
58209467b48Spatrick 
58309467b48Spatrick   bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
58409467b48Spatrick 
58573471bf0Spatrick   bool DoRegPressureReduce =
58673471bf0Spatrick       TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
58773471bf0Spatrick 
58809467b48Spatrick   while (BlockIter != MBB->end()) {
58909467b48Spatrick     auto &MI = *BlockIter++;
59009467b48Spatrick     SmallVector<MachineCombinerPattern, 16> Patterns;
59109467b48Spatrick     // The motivating example is:
59209467b48Spatrick     //
59309467b48Spatrick     //     MUL  Other        MUL_op1 MUL_op2  Other
59409467b48Spatrick     //      \    /               \      |    /
59509467b48Spatrick     //      ADD/SUB      =>        MADD/MSUB
59609467b48Spatrick     //      (=Root)                (=NewRoot)
59709467b48Spatrick 
59809467b48Spatrick     // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
59909467b48Spatrick     // usually beneficial for code size it unfortunately can hurt performance
60009467b48Spatrick     // when the ADD is on the critical path, but the MUL is not. With the
60109467b48Spatrick     // substitution the MUL becomes part of the critical path (in form of the
60209467b48Spatrick     // MADD) and can lengthen it on architectures where the MADD latency is
60309467b48Spatrick     // longer than the ADD latency.
60409467b48Spatrick     //
60509467b48Spatrick     // For each instruction we check if it can be the root of a combiner
60609467b48Spatrick     // pattern. Then for each pattern the new code sequence in form of MI is
60709467b48Spatrick     // generated and evaluated. When the efficiency criteria (don't lengthen
60809467b48Spatrick     // critical path, don't use more resources) is met the new sequence gets
60909467b48Spatrick     // hooked up into the basic block before the old sequence is removed.
61009467b48Spatrick     //
61109467b48Spatrick     // The algorithm does not try to evaluate all patterns and pick the best.
61209467b48Spatrick     // This is only an artificial restriction though. In practice there is
61309467b48Spatrick     // mostly one pattern, and getMachineCombinerPatterns() can order patterns
61409467b48Spatrick     // based on an internal cost heuristic. If
61509467b48Spatrick     // machine-combiner-verify-pattern-order is enabled, all patterns are
61609467b48Spatrick     // checked to ensure later patterns do not provide better latency savings.
61709467b48Spatrick 
61873471bf0Spatrick     if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
61909467b48Spatrick       continue;
62009467b48Spatrick 
62109467b48Spatrick     if (VerifyPatternOrder)
62209467b48Spatrick       verifyPatternOrder(MBB, MI, Patterns);
62309467b48Spatrick 
624*d415bd75Srobert     for (const auto P : Patterns) {
62509467b48Spatrick       SmallVector<MachineInstr *, 16> InsInstrs;
62609467b48Spatrick       SmallVector<MachineInstr *, 16> DelInstrs;
62709467b48Spatrick       DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
62809467b48Spatrick       TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
62909467b48Spatrick                                       InstrIdxForVirtReg);
63009467b48Spatrick       // Found pattern, but did not generate alternative sequence.
63109467b48Spatrick       // This can happen e.g. when an immediate could not be materialized
63209467b48Spatrick       // in a single instruction.
633*d415bd75Srobert       if (InsInstrs.empty())
63409467b48Spatrick         continue;
63509467b48Spatrick 
63609467b48Spatrick       LLVM_DEBUG(if (dump_intrs) {
63709467b48Spatrick         dbgs() << "\tFor the Pattern (" << (int)P
63809467b48Spatrick                << ") these instructions could be removed\n";
63909467b48Spatrick         for (auto const *InstrPtr : DelInstrs)
64009467b48Spatrick           InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
64109467b48Spatrick                           /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
64209467b48Spatrick         dbgs() << "\tThese instructions could replace the removed ones\n";
64309467b48Spatrick         for (auto const *InstrPtr : InsInstrs)
64409467b48Spatrick           InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
64509467b48Spatrick                           /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
64609467b48Spatrick       });
64709467b48Spatrick 
64873471bf0Spatrick       if (IncrementalUpdate && LastUpdate != BlockIter) {
64909467b48Spatrick         // Update depths since the last incremental update.
65009467b48Spatrick         MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
65109467b48Spatrick         LastUpdate = BlockIter;
65209467b48Spatrick       }
65309467b48Spatrick 
65473471bf0Spatrick       if (DoRegPressureReduce &&
65573471bf0Spatrick           getCombinerObjective(P) ==
65673471bf0Spatrick               CombinerObjective::MustReduceRegisterPressure) {
65773471bf0Spatrick         if (MBB->size() > inc_threshold) {
65873471bf0Spatrick           // Use incremental depth updates for basic blocks above threshold
65973471bf0Spatrick           IncrementalUpdate = true;
66073471bf0Spatrick           LastUpdate = BlockIter;
66173471bf0Spatrick         }
66273471bf0Spatrick         if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
66373471bf0Spatrick           // Replace DelInstrs with InsInstrs.
66473471bf0Spatrick           insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
66573471bf0Spatrick                                    RegUnits, TII, P, IncrementalUpdate);
66673471bf0Spatrick           Changed |= true;
66773471bf0Spatrick 
66873471bf0Spatrick           // Go back to previous instruction as it may have ILP reassociation
66973471bf0Spatrick           // opportunity.
67073471bf0Spatrick           BlockIter--;
67173471bf0Spatrick           break;
67273471bf0Spatrick         }
67373471bf0Spatrick       }
67473471bf0Spatrick 
675*d415bd75Srobert       if (ML && TII->isThroughputPattern(P)) {
676*d415bd75Srobert         LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
677*d415bd75Srobert         insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
678*d415bd75Srobert                                  RegUnits, TII, P, IncrementalUpdate);
679*d415bd75Srobert         // Eagerly stop after the first pattern fires.
680*d415bd75Srobert         Changed = true;
681*d415bd75Srobert         break;
682*d415bd75Srobert       } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
683*d415bd75Srobert         LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
684*d415bd75Srobert                           << InsInstrs.size() << " < "
685*d415bd75Srobert                           << DelInstrs.size() << ")\n");
68609467b48Spatrick         insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
68773471bf0Spatrick                                  RegUnits, TII, P, IncrementalUpdate);
68809467b48Spatrick         // Eagerly stop after the first pattern fires.
68909467b48Spatrick         Changed = true;
69009467b48Spatrick         break;
69109467b48Spatrick       } else {
69209467b48Spatrick         // For big basic blocks, we only compute the full trace the first time
69309467b48Spatrick         // we hit this. We do not invalidate the trace, but instead update the
69409467b48Spatrick         // instruction depths incrementally.
69509467b48Spatrick         // NOTE: Only the instruction depths up to MI are accurate. All other
69609467b48Spatrick         // trace information is not updated.
69709467b48Spatrick         MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
69809467b48Spatrick         Traces->verifyAnalysis();
69909467b48Spatrick         if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
70009467b48Spatrick                                     InstrIdxForVirtReg, P,
70109467b48Spatrick                                     !IncrementalUpdate) &&
70209467b48Spatrick             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
70309467b48Spatrick           if (MBB->size() > inc_threshold) {
70409467b48Spatrick             // Use incremental depth updates for basic blocks above treshold
70509467b48Spatrick             IncrementalUpdate = true;
70609467b48Spatrick             LastUpdate = BlockIter;
70709467b48Spatrick           }
70809467b48Spatrick 
70909467b48Spatrick           insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
71073471bf0Spatrick                                    RegUnits, TII, P, IncrementalUpdate);
71109467b48Spatrick 
71209467b48Spatrick           // Eagerly stop after the first pattern fires.
71309467b48Spatrick           Changed = true;
71409467b48Spatrick           break;
71509467b48Spatrick         }
71609467b48Spatrick         // Cleanup instructions of the alternative code sequence. There is no
71709467b48Spatrick         // use for them.
71809467b48Spatrick         MachineFunction *MF = MBB->getParent();
71909467b48Spatrick         for (auto *InstrPtr : InsInstrs)
720*d415bd75Srobert           MF->deleteMachineInstr(InstrPtr);
72109467b48Spatrick       }
72209467b48Spatrick       InstrIdxForVirtReg.clear();
72309467b48Spatrick     }
72409467b48Spatrick   }
72509467b48Spatrick 
72609467b48Spatrick   if (Changed && IncrementalUpdate)
72709467b48Spatrick     Traces->invalidate(MBB);
72809467b48Spatrick   return Changed;
72909467b48Spatrick }
73009467b48Spatrick 
runOnMachineFunction(MachineFunction & MF)73109467b48Spatrick bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
73209467b48Spatrick   STI = &MF.getSubtarget();
73309467b48Spatrick   TII = STI->getInstrInfo();
73409467b48Spatrick   TRI = STI->getRegisterInfo();
73509467b48Spatrick   SchedModel = STI->getSchedModel();
73609467b48Spatrick   TSchedModel.init(STI);
73709467b48Spatrick   MRI = &MF.getRegInfo();
73809467b48Spatrick   MLI = &getAnalysis<MachineLoopInfo>();
73909467b48Spatrick   Traces = &getAnalysis<MachineTraceMetrics>();
74009467b48Spatrick   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
74109467b48Spatrick   MBFI = (PSI && PSI->hasProfileSummary()) ?
74209467b48Spatrick          &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
74309467b48Spatrick          nullptr;
74409467b48Spatrick   MinInstr = nullptr;
74509467b48Spatrick   OptSize = MF.getFunction().hasOptSize();
74673471bf0Spatrick   RegClassInfo.runOnMachineFunction(MF);
74709467b48Spatrick 
74809467b48Spatrick   LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
74909467b48Spatrick   if (!TII->useMachineCombiner()) {
75009467b48Spatrick     LLVM_DEBUG(
75109467b48Spatrick         dbgs()
75209467b48Spatrick         << "  Skipping pass: Target does not support machine combiner\n");
75309467b48Spatrick     return false;
75409467b48Spatrick   }
75509467b48Spatrick 
75609467b48Spatrick   bool Changed = false;
75709467b48Spatrick 
75809467b48Spatrick   // Try to combine instructions.
75909467b48Spatrick   for (auto &MBB : MF)
76009467b48Spatrick     Changed |= combineInstructions(&MBB);
76109467b48Spatrick 
76209467b48Spatrick   return Changed;
76309467b48Spatrick }
764