xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // The machine combiner pass uses machine trace metrics to ensure the combined
100b57cec5SDimitry Andric // instructions do not lengthen the critical path or the resource depth.
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
140b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
15480093f4SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
16480093f4SDimitry Andric #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
17bdd1243dSDimitry Andric #include "llvm/CodeGen/MachineCombinerPattern.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
23480093f4SDimitry Andric #include "llvm/CodeGen/MachineSizeOpts.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineTraceMetrics.h"
25e8d8bef9SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
30480093f4SDimitry Andric #include "llvm/InitializePasses.h"
310b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
320b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
330b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric #define DEBUG_TYPE "machine-combiner"
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric STATISTIC(NumInstCombined, "Number of machineinst combined");
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric static cl::opt<unsigned>
420b57cec5SDimitry Andric inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
430b57cec5SDimitry Andric               cl::desc("Incremental depth computation will be used for basic "
440b57cec5SDimitry Andric                        "blocks with more instructions."), cl::init(500));
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
470b57cec5SDimitry Andric                                 cl::desc("Dump all substituted intrs"),
480b57cec5SDimitry Andric                                 cl::init(false));
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
510b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder(
520b57cec5SDimitry Andric     "machine-combiner-verify-pattern-order", cl::Hidden,
530b57cec5SDimitry Andric     cl::desc(
540b57cec5SDimitry Andric         "Verify that the generated patterns are ordered by increasing latency"),
550b57cec5SDimitry Andric     cl::init(true));
560b57cec5SDimitry Andric #else
570b57cec5SDimitry Andric static cl::opt<bool> VerifyPatternOrder(
580b57cec5SDimitry Andric     "machine-combiner-verify-pattern-order", cl::Hidden,
590b57cec5SDimitry Andric     cl::desc(
600b57cec5SDimitry Andric         "Verify that the generated patterns are ordered by increasing latency"),
610b57cec5SDimitry Andric     cl::init(false));
620b57cec5SDimitry Andric #endif
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric namespace {
650b57cec5SDimitry Andric class MachineCombiner : public MachineFunctionPass {
6606c3fb27SDimitry Andric   const TargetSubtargetInfo *STI = nullptr;
6706c3fb27SDimitry Andric   const TargetInstrInfo *TII = nullptr;
6806c3fb27SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
690b57cec5SDimitry Andric   MCSchedModel SchedModel;
7006c3fb27SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
7106c3fb27SDimitry Andric   MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
7206c3fb27SDimitry Andric   MachineTraceMetrics *Traces = nullptr;
7306c3fb27SDimitry Andric   MachineTraceMetrics::Ensemble *TraceEnsemble = nullptr;
7406c3fb27SDimitry Andric   MachineBlockFrequencyInfo *MBFI = nullptr;
7506c3fb27SDimitry Andric   ProfileSummaryInfo *PSI = nullptr;
76e8d8bef9SDimitry Andric   RegisterClassInfo RegClassInfo;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   TargetSchedModel TSchedModel;
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric   /// True if optimizing for code size.
8106c3fb27SDimitry Andric   bool OptSize = false;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric public:
840b57cec5SDimitry Andric   static char ID;
850b57cec5SDimitry Andric   MachineCombiner() : MachineFunctionPass(ID) {
860b57cec5SDimitry Andric     initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
870b57cec5SDimitry Andric   }
880b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
890b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
900b57cec5SDimitry Andric   StringRef getPassName() const override { return "Machine InstCombiner"; }
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric private:
930b57cec5SDimitry Andric   bool combineInstructions(MachineBasicBlock *);
940b57cec5SDimitry Andric   MachineInstr *getOperandDef(const MachineOperand &MO);
95fcaf7f86SDimitry Andric   bool isTransientMI(const MachineInstr *MI);
960b57cec5SDimitry Andric   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
970b57cec5SDimitry Andric                     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
9806c3fb27SDimitry Andric                     MachineTraceMetrics::Trace BlockTrace,
9906c3fb27SDimitry Andric                     const MachineBasicBlock &MBB);
1000b57cec5SDimitry Andric   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
1010b57cec5SDimitry Andric                       MachineTraceMetrics::Trace BlockTrace);
102*0fca6ea1SDimitry Andric   bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
1030b57cec5SDimitry Andric                                MachineTraceMetrics::Trace BlockTrace,
1040b57cec5SDimitry Andric                                SmallVectorImpl<MachineInstr *> &InsInstrs,
1050b57cec5SDimitry Andric                                SmallVectorImpl<MachineInstr *> &DelInstrs,
1060b57cec5SDimitry Andric                                DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
107*0fca6ea1SDimitry Andric                                unsigned Pattern, bool SlackIsAccurate);
108e8d8bef9SDimitry Andric   bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
109e8d8bef9SDimitry Andric                               SmallVectorImpl<MachineInstr *> &InsInstrs,
110e8d8bef9SDimitry Andric                               SmallVectorImpl<MachineInstr *> &DelInstrs,
111*0fca6ea1SDimitry Andric                               unsigned Pattern);
1120b57cec5SDimitry Andric   bool preservesResourceLen(MachineBasicBlock *MBB,
1130b57cec5SDimitry Andric                             MachineTraceMetrics::Trace BlockTrace,
1140b57cec5SDimitry Andric                             SmallVectorImpl<MachineInstr *> &InsInstrs,
1150b57cec5SDimitry Andric                             SmallVectorImpl<MachineInstr *> &DelInstrs);
1160b57cec5SDimitry Andric   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
1170b57cec5SDimitry Andric                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
1180b57cec5SDimitry Andric   std::pair<unsigned, unsigned>
1190b57cec5SDimitry Andric   getLatenciesForInstrSequences(MachineInstr &MI,
1200b57cec5SDimitry Andric                                 SmallVectorImpl<MachineInstr *> &InsInstrs,
1210b57cec5SDimitry Andric                                 SmallVectorImpl<MachineInstr *> &DelInstrs,
1220b57cec5SDimitry Andric                                 MachineTraceMetrics::Trace BlockTrace);
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric   void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
125*0fca6ea1SDimitry Andric                           SmallVector<unsigned, 16> &Patterns);
126*0fca6ea1SDimitry Andric   CombinerObjective getCombinerObjective(unsigned Pattern);
1270b57cec5SDimitry Andric };
1280b57cec5SDimitry Andric }
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric char MachineCombiner::ID = 0;
1310b57cec5SDimitry Andric char &llvm::MachineCombinerID = MachineCombiner::ID;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
1340b57cec5SDimitry Andric                       "Machine InstCombiner", false, false)
135*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
1360b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
1370b57cec5SDimitry Andric INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
1380b57cec5SDimitry Andric                     false, false)
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
1410b57cec5SDimitry Andric   AU.setPreservesCFG();
142*0fca6ea1SDimitry Andric   AU.addPreserved<MachineDominatorTreeWrapperPass>();
143*0fca6ea1SDimitry Andric   AU.addRequired<MachineLoopInfoWrapperPass>();
144*0fca6ea1SDimitry Andric   AU.addPreserved<MachineLoopInfoWrapperPass>();
1450b57cec5SDimitry Andric   AU.addRequired<MachineTraceMetrics>();
1460b57cec5SDimitry Andric   AU.addPreserved<MachineTraceMetrics>();
147480093f4SDimitry Andric   AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
148480093f4SDimitry Andric   AU.addRequired<ProfileSummaryInfoWrapperPass>();
1490b57cec5SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric 
15206c3fb27SDimitry Andric MachineInstr *
15306c3fb27SDimitry Andric MachineCombiner::getOperandDef(const MachineOperand &MO) {
1540b57cec5SDimitry Andric   MachineInstr *DefInstr = nullptr;
1550b57cec5SDimitry Andric   // We need a virtual register definition.
156bdd1243dSDimitry Andric   if (MO.isReg() && MO.getReg().isVirtual())
1570b57cec5SDimitry Andric     DefInstr = MRI->getUniqueVRegDef(MO.getReg());
1580b57cec5SDimitry Andric   return DefInstr;
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
161fcaf7f86SDimitry Andric /// Return true if MI is unlikely to generate an actual target instruction.
162fcaf7f86SDimitry Andric bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
163fcaf7f86SDimitry Andric   if (!MI->isCopy())
164fcaf7f86SDimitry Andric     return MI->isTransient();
165fcaf7f86SDimitry Andric 
166fcaf7f86SDimitry Andric   // If MI is a COPY, check if its src and dst registers can be coalesced.
167fcaf7f86SDimitry Andric   Register Dst = MI->getOperand(0).getReg();
168fcaf7f86SDimitry Andric   Register Src = MI->getOperand(1).getReg();
169fcaf7f86SDimitry Andric 
170fcaf7f86SDimitry Andric   if (!MI->isFullCopy()) {
171fcaf7f86SDimitry Andric     // If src RC contains super registers of dst RC, it can also be coalesced.
172fcaf7f86SDimitry Andric     if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
173fcaf7f86SDimitry Andric       return false;
174fcaf7f86SDimitry Andric 
175fcaf7f86SDimitry Andric     auto SrcSub = MI->getOperand(1).getSubReg();
176fcaf7f86SDimitry Andric     auto SrcRC = MRI->getRegClass(Src);
177fcaf7f86SDimitry Andric     auto DstRC = MRI->getRegClass(Dst);
178fcaf7f86SDimitry Andric     return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr;
179fcaf7f86SDimitry Andric   }
180fcaf7f86SDimitry Andric 
181fcaf7f86SDimitry Andric   if (Src.isPhysical() && Dst.isPhysical())
182fcaf7f86SDimitry Andric     return Src == Dst;
183fcaf7f86SDimitry Andric 
184fcaf7f86SDimitry Andric   if (Src.isVirtual() && Dst.isVirtual()) {
185fcaf7f86SDimitry Andric     auto SrcRC = MRI->getRegClass(Src);
186fcaf7f86SDimitry Andric     auto DstRC = MRI->getRegClass(Dst);
187fcaf7f86SDimitry Andric     return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
188fcaf7f86SDimitry Andric   }
189fcaf7f86SDimitry Andric 
190fcaf7f86SDimitry Andric   if (Src.isVirtual())
191fcaf7f86SDimitry Andric     std::swap(Src, Dst);
192fcaf7f86SDimitry Andric 
193fcaf7f86SDimitry Andric   // Now Src is physical register, Dst is virtual register.
194fcaf7f86SDimitry Andric   auto DstRC = MRI->getRegClass(Dst);
195fcaf7f86SDimitry Andric   return DstRC->contains(Src);
196fcaf7f86SDimitry Andric }
197fcaf7f86SDimitry Andric 
1980b57cec5SDimitry Andric /// Computes depth of instructions in vector \InsInstr.
1990b57cec5SDimitry Andric ///
2000b57cec5SDimitry Andric /// \param InsInstrs is a vector of machine instructions
2010b57cec5SDimitry Andric /// \param InstrIdxForVirtReg is a dense map of virtual register to index
2020b57cec5SDimitry Andric /// of defining machine instruction in \p InsInstrs
2030b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions
2040b57cec5SDimitry Andric ///
2050b57cec5SDimitry Andric /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
2060b57cec5SDimitry Andric unsigned
2070b57cec5SDimitry Andric MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
2080b57cec5SDimitry Andric                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
20906c3fb27SDimitry Andric                           MachineTraceMetrics::Trace BlockTrace,
21006c3fb27SDimitry Andric                           const MachineBasicBlock &MBB) {
2110b57cec5SDimitry Andric   SmallVector<unsigned, 16> InstrDepth;
2120b57cec5SDimitry Andric   // For each instruction in the new sequence compute the depth based on the
2130b57cec5SDimitry Andric   // operands. Use the trace information when possible. For new operands which
2140b57cec5SDimitry Andric   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
2150b57cec5SDimitry Andric   for (auto *InstrPtr : InsInstrs) { // for each Use
2160b57cec5SDimitry Andric     unsigned IDepth = 0;
21706c3fb27SDimitry Andric     for (const MachineOperand &MO : InstrPtr->all_uses()) {
2180b57cec5SDimitry Andric       // Check for virtual register operand.
21906c3fb27SDimitry Andric       if (!MO.getReg().isVirtual())
2200b57cec5SDimitry Andric         continue;
2210b57cec5SDimitry Andric       unsigned DepthOp = 0;
2220b57cec5SDimitry Andric       unsigned LatencyOp = 0;
2230b57cec5SDimitry Andric       DenseMap<unsigned, unsigned>::iterator II =
2240b57cec5SDimitry Andric           InstrIdxForVirtReg.find(MO.getReg());
2250b57cec5SDimitry Andric       if (II != InstrIdxForVirtReg.end()) {
2260b57cec5SDimitry Andric         // Operand is new virtual register not in trace
2270b57cec5SDimitry Andric         assert(II->second < InstrDepth.size() && "Bad Index");
2280b57cec5SDimitry Andric         MachineInstr *DefInstr = InsInstrs[II->second];
2290b57cec5SDimitry Andric         assert(DefInstr &&
2300b57cec5SDimitry Andric                "There must be a definition for a new virtual register");
2310b57cec5SDimitry Andric         DepthOp = InstrDepth[II->second];
232*0fca6ea1SDimitry Andric         int DefIdx =
233*0fca6ea1SDimitry Andric             DefInstr->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr);
234*0fca6ea1SDimitry Andric         int UseIdx =
235*0fca6ea1SDimitry Andric             InstrPtr->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr);
2360b57cec5SDimitry Andric         LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
2370b57cec5SDimitry Andric                                                       InstrPtr, UseIdx);
2380b57cec5SDimitry Andric       } else {
2390b57cec5SDimitry Andric         MachineInstr *DefInstr = getOperandDef(MO);
24006c3fb27SDimitry Andric         if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
24106c3fb27SDimitry Andric                              MachineTraceStrategy::TS_Local ||
24206c3fb27SDimitry Andric                          DefInstr->getParent() == &MBB)) {
2430b57cec5SDimitry Andric           DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
244fcaf7f86SDimitry Andric           if (!isTransientMI(DefInstr))
2450b57cec5SDimitry Andric             LatencyOp = TSchedModel.computeOperandLatency(
246*0fca6ea1SDimitry Andric                 DefInstr,
247*0fca6ea1SDimitry Andric                 DefInstr->findRegisterDefOperandIdx(MO.getReg(),
248*0fca6ea1SDimitry Andric                                                     /*TRI=*/nullptr),
249*0fca6ea1SDimitry Andric                 InstrPtr,
250*0fca6ea1SDimitry Andric                 InstrPtr->findRegisterUseOperandIdx(MO.getReg(),
251*0fca6ea1SDimitry Andric                                                     /*TRI=*/nullptr));
2520b57cec5SDimitry Andric         }
2530b57cec5SDimitry Andric       }
2540b57cec5SDimitry Andric       IDepth = std::max(IDepth, DepthOp + LatencyOp);
2550b57cec5SDimitry Andric     }
2560b57cec5SDimitry Andric     InstrDepth.push_back(IDepth);
2570b57cec5SDimitry Andric   }
2580b57cec5SDimitry Andric   unsigned NewRootIdx = InsInstrs.size() - 1;
2590b57cec5SDimitry Andric   return InstrDepth[NewRootIdx];
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric /// Computes instruction latency as max of latency of defined operands.
2630b57cec5SDimitry Andric ///
2640b57cec5SDimitry Andric /// \param Root is a machine instruction that could be replaced by NewRoot.
2650b57cec5SDimitry Andric /// It is used to compute a more accurate latency information for NewRoot in
2660b57cec5SDimitry Andric /// case there is a dependent instruction in the same trace (\p BlockTrace)
2670b57cec5SDimitry Andric /// \param NewRoot is the instruction for which the latency is computed
2680b57cec5SDimitry Andric /// \param BlockTrace is a trace of machine instructions
2690b57cec5SDimitry Andric ///
2700b57cec5SDimitry Andric /// \returns Latency of \p NewRoot
2710b57cec5SDimitry Andric unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
2720b57cec5SDimitry Andric                                      MachineTraceMetrics::Trace BlockTrace) {
2730b57cec5SDimitry Andric   // Check each definition in NewRoot and compute the latency
2740b57cec5SDimitry Andric   unsigned NewRootLatency = 0;
2750b57cec5SDimitry Andric 
27606c3fb27SDimitry Andric   for (const MachineOperand &MO : NewRoot->all_defs()) {
2770b57cec5SDimitry Andric     // Check for virtual register operand.
27806c3fb27SDimitry Andric     if (!MO.getReg().isVirtual())
2790b57cec5SDimitry Andric       continue;
2800b57cec5SDimitry Andric     // Get the first instruction that uses MO
2810b57cec5SDimitry Andric     MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
2820b57cec5SDimitry Andric     RI++;
2830b57cec5SDimitry Andric     if (RI == MRI->reg_end())
2840b57cec5SDimitry Andric       continue;
2850b57cec5SDimitry Andric     MachineInstr *UseMO = RI->getParent();
2860b57cec5SDimitry Andric     unsigned LatencyOp = 0;
2870b57cec5SDimitry Andric     if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
2880b57cec5SDimitry Andric       LatencyOp = TSchedModel.computeOperandLatency(
289*0fca6ea1SDimitry Andric           NewRoot,
290*0fca6ea1SDimitry Andric           NewRoot->findRegisterDefOperandIdx(MO.getReg(), /*TRI=*/nullptr),
291*0fca6ea1SDimitry Andric           UseMO,
292*0fca6ea1SDimitry Andric           UseMO->findRegisterUseOperandIdx(MO.getReg(), /*TRI=*/nullptr));
2930b57cec5SDimitry Andric     } else {
2940b57cec5SDimitry Andric       LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
2950b57cec5SDimitry Andric     }
2960b57cec5SDimitry Andric     NewRootLatency = std::max(NewRootLatency, LatencyOp);
2970b57cec5SDimitry Andric   }
2980b57cec5SDimitry Andric   return NewRootLatency;
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric 
301*0fca6ea1SDimitry Andric CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) {
3020b57cec5SDimitry Andric   // TODO: If C++ ever gets a real enum class, make this part of the
3030b57cec5SDimitry Andric   // MachineCombinerPattern class.
304*0fca6ea1SDimitry Andric   switch (Pattern) {
3050b57cec5SDimitry Andric   case MachineCombinerPattern::REASSOC_AX_BY:
3060b57cec5SDimitry Andric   case MachineCombinerPattern::REASSOC_AX_YB:
3070b57cec5SDimitry Andric   case MachineCombinerPattern::REASSOC_XA_BY:
3080b57cec5SDimitry Andric   case MachineCombinerPattern::REASSOC_XA_YB:
3090b57cec5SDimitry Andric     return CombinerObjective::MustReduceDepth;
3100b57cec5SDimitry Andric   default:
311*0fca6ea1SDimitry Andric     return TII->getCombinerObjective(Pattern);
3120b57cec5SDimitry Andric   }
3130b57cec5SDimitry Andric }
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric /// Estimate the latency of the new and original instruction sequence by summing
3160b57cec5SDimitry Andric /// up the latencies of the inserted and deleted instructions. This assumes
3170b57cec5SDimitry Andric /// that the inserted and deleted instructions are dependent instruction chains,
3180b57cec5SDimitry Andric /// which might not hold in all cases.
3190b57cec5SDimitry Andric std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
3200b57cec5SDimitry Andric     MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
3210b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &DelInstrs,
3220b57cec5SDimitry Andric     MachineTraceMetrics::Trace BlockTrace) {
3230b57cec5SDimitry Andric   assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
3240b57cec5SDimitry Andric   unsigned NewRootLatency = 0;
3250b57cec5SDimitry Andric   // NewRoot is the last instruction in the \p InsInstrs vector.
3260b57cec5SDimitry Andric   MachineInstr *NewRoot = InsInstrs.back();
3270b57cec5SDimitry Andric   for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
3280b57cec5SDimitry Andric     NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
3290b57cec5SDimitry Andric   NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric   unsigned RootLatency = 0;
332fcaf7f86SDimitry Andric   for (auto *I : DelInstrs)
3330b57cec5SDimitry Andric     RootLatency += TSchedModel.computeInstrLatency(I);
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric   return {NewRootLatency, RootLatency};
3360b57cec5SDimitry Andric }
3370b57cec5SDimitry Andric 
338e8d8bef9SDimitry Andric bool MachineCombiner::reduceRegisterPressure(
339e8d8bef9SDimitry Andric     MachineInstr &Root, MachineBasicBlock *MBB,
340e8d8bef9SDimitry Andric     SmallVectorImpl<MachineInstr *> &InsInstrs,
341*0fca6ea1SDimitry Andric     SmallVectorImpl<MachineInstr *> &DelInstrs, unsigned Pattern) {
342e8d8bef9SDimitry Andric   // FIXME: for now, we don't do any check for the register pressure patterns.
343e8d8bef9SDimitry Andric   // We treat them as always profitable. But we can do better if we make
344e8d8bef9SDimitry Andric   // RegPressureTracker class be aware of TIE attribute. Then we can get an
345e8d8bef9SDimitry Andric   // accurate compare of register pressure with DelInstrs or InsInstrs.
346e8d8bef9SDimitry Andric   return true;
347e8d8bef9SDimitry Andric }
348e8d8bef9SDimitry Andric 
3490b57cec5SDimitry Andric /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
3500b57cec5SDimitry Andric /// The new code sequence ends in MI NewRoot. A necessary condition for the new
3510b57cec5SDimitry Andric /// sequence to replace the old sequence is that it cannot lengthen the critical
3520b57cec5SDimitry Andric /// path. The definition of "improve" may be restricted by specifying that the
3530b57cec5SDimitry Andric /// new path improves the data dependency chain (MustReduceDepth).
3540b57cec5SDimitry Andric bool MachineCombiner::improvesCriticalPathLen(
3550b57cec5SDimitry Andric     MachineBasicBlock *MBB, MachineInstr *Root,
3560b57cec5SDimitry Andric     MachineTraceMetrics::Trace BlockTrace,
3570b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &InsInstrs,
3580b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &DelInstrs,
359*0fca6ea1SDimitry Andric     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned Pattern,
3600b57cec5SDimitry Andric     bool SlackIsAccurate) {
3610b57cec5SDimitry Andric   // Get depth and latency of NewRoot and Root.
36206c3fb27SDimitry Andric   unsigned NewRootDepth =
36306c3fb27SDimitry Andric       getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB);
3640b57cec5SDimitry Andric   unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
3650b57cec5SDimitry Andric 
3660b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Dependence data for " << *Root << "\tNewRootDepth: "
3670b57cec5SDimitry Andric                     << NewRootDepth << "\tRootDepth: " << RootDepth);
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   // For a transform such as reassociation, the cost equation is
3700b57cec5SDimitry Andric   // conservatively calculated so that we must improve the depth (data
3710b57cec5SDimitry Andric   // dependency cycles) in the critical path to proceed with the transform.
3720b57cec5SDimitry Andric   // Being conservative also protects against inaccuracies in the underlying
3730b57cec5SDimitry Andric   // machine trace metrics and CPU models.
3740b57cec5SDimitry Andric   if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
3750b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
3760b57cec5SDimitry Andric     LLVM_DEBUG(NewRootDepth < RootDepth
3770b57cec5SDimitry Andric                    ? dbgs() << "\t  and it does it\n"
3780b57cec5SDimitry Andric                    : dbgs() << "\t  but it does NOT do it\n");
3790b57cec5SDimitry Andric     return NewRootDepth < RootDepth;
3800b57cec5SDimitry Andric   }
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   // A more flexible cost calculation for the critical path includes the slack
3830b57cec5SDimitry Andric   // of the original code sequence. This may allow the transform to proceed
3840b57cec5SDimitry Andric   // even if the instruction depths (data dependency cycles) become worse.
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   // Account for the latency of the inserted and deleted instructions by
3870b57cec5SDimitry Andric   unsigned NewRootLatency, RootLatency;
38806c3fb27SDimitry Andric   if (TII->accumulateInstrSeqToRootLatency(*Root)) {
3890b57cec5SDimitry Andric     std::tie(NewRootLatency, RootLatency) =
3900b57cec5SDimitry Andric         getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
39106c3fb27SDimitry Andric   } else {
39206c3fb27SDimitry Andric     NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back());
39306c3fb27SDimitry Andric     RootLatency = TSchedModel.computeInstrLatency(Root);
39406c3fb27SDimitry Andric   }
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
3970b57cec5SDimitry Andric   unsigned NewCycleCount = NewRootDepth + NewRootLatency;
3980b57cec5SDimitry Andric   unsigned OldCycleCount =
3990b57cec5SDimitry Andric       RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
4000b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
4010b57cec5SDimitry Andric                     << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
4020b57cec5SDimitry Andric                     << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
4030b57cec5SDimitry Andric                     << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
4040b57cec5SDimitry Andric                     << "\n\tRootDepth + RootLatency + RootSlack = "
4050b57cec5SDimitry Andric                     << OldCycleCount;);
4060b57cec5SDimitry Andric   LLVM_DEBUG(NewCycleCount <= OldCycleCount
4070b57cec5SDimitry Andric                  ? dbgs() << "\n\t  It IMPROVES PathLen because"
4080b57cec5SDimitry Andric                  : dbgs() << "\n\t  It DOES NOT improve PathLen because");
4090b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
4100b57cec5SDimitry Andric                     << ", OldCycleCount = " << OldCycleCount << "\n");
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric   return NewCycleCount <= OldCycleCount;
4130b57cec5SDimitry Andric }
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric /// helper routine to convert instructions into SC
4160b57cec5SDimitry Andric void MachineCombiner::instr2instrSC(
4170b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &Instrs,
4180b57cec5SDimitry Andric     SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
4190b57cec5SDimitry Andric   for (auto *InstrPtr : Instrs) {
4200b57cec5SDimitry Andric     unsigned Opc = InstrPtr->getOpcode();
4210b57cec5SDimitry Andric     unsigned Idx = TII->get(Opc).getSchedClass();
4220b57cec5SDimitry Andric     const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
4230b57cec5SDimitry Andric     InstrsSC.push_back(SC);
4240b57cec5SDimitry Andric   }
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric /// True when the new instructions do not increase resource length
4280b57cec5SDimitry Andric bool MachineCombiner::preservesResourceLen(
4290b57cec5SDimitry Andric     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
4300b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &InsInstrs,
4310b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &DelInstrs) {
4320b57cec5SDimitry Andric   if (!TSchedModel.hasInstrSchedModel())
4330b57cec5SDimitry Andric     return true;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   // Compute current resource length
4360b57cec5SDimitry Andric 
4370b57cec5SDimitry Andric   //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
4380b57cec5SDimitry Andric   SmallVector <const MachineBasicBlock *, 1> MBBarr;
4390b57cec5SDimitry Andric   MBBarr.push_back(MBB);
4400b57cec5SDimitry Andric   unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric   // Deal with SC rather than Instructions.
4430b57cec5SDimitry Andric   SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
4440b57cec5SDimitry Andric   SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric   instr2instrSC(InsInstrs, InsInstrsSC);
4470b57cec5SDimitry Andric   instr2instrSC(DelInstrs, DelInstrsSC);
4480b57cec5SDimitry Andric 
449bdd1243dSDimitry Andric   ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
450bdd1243dSDimitry Andric   ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   // Compute new resource length.
4530b57cec5SDimitry Andric   unsigned ResLenAfterCombine =
4540b57cec5SDimitry Andric       BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
4570b57cec5SDimitry Andric                     << ResLenBeforeCombine
4580b57cec5SDimitry Andric                     << " and after: " << ResLenAfterCombine << "\n";);
4590b57cec5SDimitry Andric   LLVM_DEBUG(
4605ffd83dbSDimitry Andric       ResLenAfterCombine <=
4615ffd83dbSDimitry Andric       ResLenBeforeCombine + TII->getExtendResourceLenLimit()
4620b57cec5SDimitry Andric           ? dbgs() << "\t\t  As result it IMPROVES/PRESERVES Resource Length\n"
4630b57cec5SDimitry Andric           : dbgs() << "\t\t  As result it DOES NOT improve/preserve Resource "
4640b57cec5SDimitry Andric                       "Length\n");
4650b57cec5SDimitry Andric 
4665ffd83dbSDimitry Andric   return ResLenAfterCombine <=
4675ffd83dbSDimitry Andric          ResLenBeforeCombine + TII->getExtendResourceLenLimit();
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
4710b57cec5SDimitry Andric /// depths if requested.
4720b57cec5SDimitry Andric ///
4730b57cec5SDimitry Andric /// \param MBB basic block to insert instructions in
4740b57cec5SDimitry Andric /// \param MI current machine instruction
4750b57cec5SDimitry Andric /// \param InsInstrs new instructions to insert in \p MBB
4760b57cec5SDimitry Andric /// \param DelInstrs instruction to delete from \p MBB
47706c3fb27SDimitry Andric /// \param TraceEnsemble is a pointer to the machine trace information
4780b57cec5SDimitry Andric /// \param RegUnits set of live registers, needed to compute instruction depths
479e8d8bef9SDimitry Andric /// \param TII is target instruction info, used to call target hook
480e8d8bef9SDimitry Andric /// \param Pattern is used to call target hook finalizeInsInstrs
4810b57cec5SDimitry Andric /// \param IncrementalUpdate if true, compute instruction depths incrementally,
4820b57cec5SDimitry Andric ///                          otherwise invalidate the trace
483*0fca6ea1SDimitry Andric static void
484*0fca6ea1SDimitry Andric insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
48506c3fb27SDimitry Andric                          SmallVectorImpl<MachineInstr *> &InsInstrs,
48606c3fb27SDimitry Andric                          SmallVectorImpl<MachineInstr *> &DelInstrs,
48706c3fb27SDimitry Andric                          MachineTraceMetrics::Ensemble *TraceEnsemble,
488*0fca6ea1SDimitry Andric                          SparseSet<LiveRegUnit> &RegUnits,
489*0fca6ea1SDimitry Andric                          const TargetInstrInfo *TII, unsigned Pattern,
490*0fca6ea1SDimitry Andric                          bool IncrementalUpdate) {
491e8d8bef9SDimitry Andric   // If we want to fix up some placeholder for some target, do it now.
492e8d8bef9SDimitry Andric   // We need this because in genAlternativeCodeSequence, we have not decided the
493e8d8bef9SDimitry Andric   // better pattern InsInstrs or DelInstrs, so we don't want generate some
494e8d8bef9SDimitry Andric   // sideeffect to the function. For example we need to delay the constant pool
495e8d8bef9SDimitry Andric   // entry creation here after InsInstrs is selected as better pattern.
496e8d8bef9SDimitry Andric   // Otherwise the constant pool entry created for InsInstrs will not be deleted
497e8d8bef9SDimitry Andric   // even if InsInstrs is not the better pattern.
498e8d8bef9SDimitry Andric   TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
499e8d8bef9SDimitry Andric 
5000b57cec5SDimitry Andric   for (auto *InstrPtr : InsInstrs)
5010b57cec5SDimitry Andric     MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
5020b57cec5SDimitry Andric 
5030b57cec5SDimitry Andric   for (auto *InstrPtr : DelInstrs) {
5040eae32dcSDimitry Andric     InstrPtr->eraseFromParent();
5050b57cec5SDimitry Andric     // Erase all LiveRegs defined by the removed instruction
506fcaf7f86SDimitry Andric     for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
5070b57cec5SDimitry Andric       if (I->MI == InstrPtr)
5080b57cec5SDimitry Andric         I = RegUnits.erase(I);
5090b57cec5SDimitry Andric       else
5100b57cec5SDimitry Andric         I++;
5110b57cec5SDimitry Andric     }
5120b57cec5SDimitry Andric   }
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric   if (IncrementalUpdate)
5150b57cec5SDimitry Andric     for (auto *InstrPtr : InsInstrs)
51606c3fb27SDimitry Andric       TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits);
5170b57cec5SDimitry Andric   else
51806c3fb27SDimitry Andric     TraceEnsemble->invalidate(MBB);
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric   NumInstCombined++;
5210b57cec5SDimitry Andric }
5220b57cec5SDimitry Andric 
5230b57cec5SDimitry Andric // Check that the difference between original and new latency is decreasing for
5240b57cec5SDimitry Andric // later patterns. This helps to discover sub-optimal pattern orderings.
525*0fca6ea1SDimitry Andric void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB,
526*0fca6ea1SDimitry Andric                                          MachineInstr &Root,
527*0fca6ea1SDimitry Andric                                          SmallVector<unsigned, 16> &Patterns) {
5280b57cec5SDimitry Andric   long PrevLatencyDiff = std::numeric_limits<long>::max();
5290b57cec5SDimitry Andric   (void)PrevLatencyDiff; // Variable is used in assert only.
5300b57cec5SDimitry Andric   for (auto P : Patterns) {
5310b57cec5SDimitry Andric     SmallVector<MachineInstr *, 16> InsInstrs;
5320b57cec5SDimitry Andric     SmallVector<MachineInstr *, 16> DelInstrs;
5330b57cec5SDimitry Andric     DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
5340b57cec5SDimitry Andric     TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
5350b57cec5SDimitry Andric                                     InstrIdxForVirtReg);
5360b57cec5SDimitry Andric     // Found pattern, but did not generate alternative sequence.
5370b57cec5SDimitry Andric     // This can happen e.g. when an immediate could not be materialized
5380b57cec5SDimitry Andric     // in a single instruction.
5390b57cec5SDimitry Andric     if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
5400b57cec5SDimitry Andric       continue;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric     unsigned NewRootLatency, RootLatency;
5430b57cec5SDimitry Andric     std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
54406c3fb27SDimitry Andric         Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(MBB));
5450b57cec5SDimitry Andric     long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
5460b57cec5SDimitry Andric     assert(CurrentLatencyDiff <= PrevLatencyDiff &&
5470b57cec5SDimitry Andric            "Current pattern is better than previous pattern.");
5480b57cec5SDimitry Andric     PrevLatencyDiff = CurrentLatencyDiff;
5490b57cec5SDimitry Andric   }
5500b57cec5SDimitry Andric }
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric /// Substitute a slow code sequence with a faster one by
5530b57cec5SDimitry Andric /// evaluating instruction combining pattern.
5540b57cec5SDimitry Andric /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
5550b57cec5SDimitry Andric /// combining based on machine trace metrics. Only combine a sequence of
5560b57cec5SDimitry Andric /// instructions  when this neither lengthens the critical path nor increases
5570b57cec5SDimitry Andric /// resource pressure. When optimizing for codesize always combine when the new
5580b57cec5SDimitry Andric /// sequence is shorter.
5590b57cec5SDimitry Andric bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
5600b57cec5SDimitry Andric   bool Changed = false;
5610b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric   bool IncrementalUpdate = false;
5640b57cec5SDimitry Andric   auto BlockIter = MBB->begin();
5650b57cec5SDimitry Andric   decltype(BlockIter) LastUpdate;
5660b57cec5SDimitry Andric   // Check if the block is in a loop.
5670b57cec5SDimitry Andric   const MachineLoop *ML = MLI->getLoopFor(MBB);
56806c3fb27SDimitry Andric   if (!TraceEnsemble)
56906c3fb27SDimitry Andric     TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric   SparseSet<LiveRegUnit> RegUnits;
5720b57cec5SDimitry Andric   RegUnits.setUniverse(TRI->getNumRegUnits());
5730b57cec5SDimitry Andric 
574480093f4SDimitry Andric   bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
575480093f4SDimitry Andric 
576e8d8bef9SDimitry Andric   bool DoRegPressureReduce =
577e8d8bef9SDimitry Andric       TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
578e8d8bef9SDimitry Andric 
5790b57cec5SDimitry Andric   while (BlockIter != MBB->end()) {
5800b57cec5SDimitry Andric     auto &MI = *BlockIter++;
581*0fca6ea1SDimitry Andric     SmallVector<unsigned, 16> Patterns;
5820b57cec5SDimitry Andric     // The motivating example is:
5830b57cec5SDimitry Andric     //
5840b57cec5SDimitry Andric     //     MUL  Other        MUL_op1 MUL_op2  Other
5850b57cec5SDimitry Andric     //      \    /               \      |    /
5860b57cec5SDimitry Andric     //      ADD/SUB      =>        MADD/MSUB
5870b57cec5SDimitry Andric     //      (=Root)                (=NewRoot)
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric     // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
5900b57cec5SDimitry Andric     // usually beneficial for code size it unfortunately can hurt performance
5910b57cec5SDimitry Andric     // when the ADD is on the critical path, but the MUL is not. With the
5920b57cec5SDimitry Andric     // substitution the MUL becomes part of the critical path (in form of the
5930b57cec5SDimitry Andric     // MADD) and can lengthen it on architectures where the MADD latency is
5940b57cec5SDimitry Andric     // longer than the ADD latency.
5950b57cec5SDimitry Andric     //
5960b57cec5SDimitry Andric     // For each instruction we check if it can be the root of a combiner
5970b57cec5SDimitry Andric     // pattern. Then for each pattern the new code sequence in form of MI is
5980b57cec5SDimitry Andric     // generated and evaluated. When the efficiency criteria (don't lengthen
5990b57cec5SDimitry Andric     // critical path, don't use more resources) is met the new sequence gets
6000b57cec5SDimitry Andric     // hooked up into the basic block before the old sequence is removed.
6010b57cec5SDimitry Andric     //
6020b57cec5SDimitry Andric     // The algorithm does not try to evaluate all patterns and pick the best.
6030b57cec5SDimitry Andric     // This is only an artificial restriction though. In practice there is
6040b57cec5SDimitry Andric     // mostly one pattern, and getMachineCombinerPatterns() can order patterns
6050b57cec5SDimitry Andric     // based on an internal cost heuristic. If
6060b57cec5SDimitry Andric     // machine-combiner-verify-pattern-order is enabled, all patterns are
6070b57cec5SDimitry Andric     // checked to ensure later patterns do not provide better latency savings.
6080b57cec5SDimitry Andric 
609e8d8bef9SDimitry Andric     if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
6100b57cec5SDimitry Andric       continue;
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric     if (VerifyPatternOrder)
6130b57cec5SDimitry Andric       verifyPatternOrder(MBB, MI, Patterns);
6140b57cec5SDimitry Andric 
615bdd1243dSDimitry Andric     for (const auto P : Patterns) {
6160b57cec5SDimitry Andric       SmallVector<MachineInstr *, 16> InsInstrs;
6170b57cec5SDimitry Andric       SmallVector<MachineInstr *, 16> DelInstrs;
6180b57cec5SDimitry Andric       DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
6190b57cec5SDimitry Andric       TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
6200b57cec5SDimitry Andric                                       InstrIdxForVirtReg);
6210b57cec5SDimitry Andric       // Found pattern, but did not generate alternative sequence.
6220b57cec5SDimitry Andric       // This can happen e.g. when an immediate could not be materialized
6230b57cec5SDimitry Andric       // in a single instruction.
624bdd1243dSDimitry Andric       if (InsInstrs.empty())
6250b57cec5SDimitry Andric         continue;
6260b57cec5SDimitry Andric 
6270b57cec5SDimitry Andric       LLVM_DEBUG(if (dump_intrs) {
6280b57cec5SDimitry Andric         dbgs() << "\tFor the Pattern (" << (int)P
6290b57cec5SDimitry Andric                << ") these instructions could be removed\n";
6300b57cec5SDimitry Andric         for (auto const *InstrPtr : DelInstrs)
6310b57cec5SDimitry Andric           InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
6320b57cec5SDimitry Andric                           /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
6330b57cec5SDimitry Andric         dbgs() << "\tThese instructions could replace the removed ones\n";
6340b57cec5SDimitry Andric         for (auto const *InstrPtr : InsInstrs)
6350b57cec5SDimitry Andric           InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
6360b57cec5SDimitry Andric                           /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
6370b57cec5SDimitry Andric       });
6380b57cec5SDimitry Andric 
639e8d8bef9SDimitry Andric       if (IncrementalUpdate && LastUpdate != BlockIter) {
6400b57cec5SDimitry Andric         // Update depths since the last incremental update.
64106c3fb27SDimitry Andric         TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
6420b57cec5SDimitry Andric         LastUpdate = BlockIter;
6430b57cec5SDimitry Andric       }
6440b57cec5SDimitry Andric 
645e8d8bef9SDimitry Andric       if (DoRegPressureReduce &&
646e8d8bef9SDimitry Andric           getCombinerObjective(P) ==
647e8d8bef9SDimitry Andric               CombinerObjective::MustReduceRegisterPressure) {
648e8d8bef9SDimitry Andric         if (MBB->size() > inc_threshold) {
649e8d8bef9SDimitry Andric           // Use incremental depth updates for basic blocks above threshold
650e8d8bef9SDimitry Andric           IncrementalUpdate = true;
651e8d8bef9SDimitry Andric           LastUpdate = BlockIter;
652e8d8bef9SDimitry Andric         }
653e8d8bef9SDimitry Andric         if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
654e8d8bef9SDimitry Andric           // Replace DelInstrs with InsInstrs.
65506c3fb27SDimitry Andric           insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
656e8d8bef9SDimitry Andric                                    RegUnits, TII, P, IncrementalUpdate);
657e8d8bef9SDimitry Andric           Changed |= true;
658e8d8bef9SDimitry Andric 
659e8d8bef9SDimitry Andric           // Go back to previous instruction as it may have ILP reassociation
660e8d8bef9SDimitry Andric           // opportunity.
661e8d8bef9SDimitry Andric           BlockIter--;
662e8d8bef9SDimitry Andric           break;
663e8d8bef9SDimitry Andric         }
664e8d8bef9SDimitry Andric       }
665e8d8bef9SDimitry Andric 
666bdd1243dSDimitry Andric       if (ML && TII->isThroughputPattern(P)) {
667bdd1243dSDimitry Andric         LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
66806c3fb27SDimitry Andric         insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
669bdd1243dSDimitry Andric                                  RegUnits, TII, P, IncrementalUpdate);
670bdd1243dSDimitry Andric         // Eagerly stop after the first pattern fires.
671bdd1243dSDimitry Andric         Changed = true;
672bdd1243dSDimitry Andric         break;
673bdd1243dSDimitry Andric       } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
674bdd1243dSDimitry Andric         LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
675bdd1243dSDimitry Andric                           << InsInstrs.size() << " < "
676bdd1243dSDimitry Andric                           << DelInstrs.size() << ")\n");
67706c3fb27SDimitry Andric         insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
678e8d8bef9SDimitry Andric                                  RegUnits, TII, P, IncrementalUpdate);
6790b57cec5SDimitry Andric         // Eagerly stop after the first pattern fires.
6800b57cec5SDimitry Andric         Changed = true;
6810b57cec5SDimitry Andric         break;
6820b57cec5SDimitry Andric       } else {
6830b57cec5SDimitry Andric         // For big basic blocks, we only compute the full trace the first time
6840b57cec5SDimitry Andric         // we hit this. We do not invalidate the trace, but instead update the
6850b57cec5SDimitry Andric         // instruction depths incrementally.
6860b57cec5SDimitry Andric         // NOTE: Only the instruction depths up to MI are accurate. All other
6870b57cec5SDimitry Andric         // trace information is not updated.
68806c3fb27SDimitry Andric         MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB);
6890b57cec5SDimitry Andric         Traces->verifyAnalysis();
6900b57cec5SDimitry Andric         if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
6910b57cec5SDimitry Andric                                     InstrIdxForVirtReg, P,
6920b57cec5SDimitry Andric                                     !IncrementalUpdate) &&
6930b57cec5SDimitry Andric             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
6940b57cec5SDimitry Andric           if (MBB->size() > inc_threshold) {
6950b57cec5SDimitry Andric             // Use incremental depth updates for basic blocks above treshold
6960b57cec5SDimitry Andric             IncrementalUpdate = true;
6970b57cec5SDimitry Andric             LastUpdate = BlockIter;
6980b57cec5SDimitry Andric           }
6990b57cec5SDimitry Andric 
70006c3fb27SDimitry Andric           insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
701e8d8bef9SDimitry Andric                                    RegUnits, TII, P, IncrementalUpdate);
7020b57cec5SDimitry Andric 
7030b57cec5SDimitry Andric           // Eagerly stop after the first pattern fires.
7040b57cec5SDimitry Andric           Changed = true;
7050b57cec5SDimitry Andric           break;
7060b57cec5SDimitry Andric         }
7070b57cec5SDimitry Andric         // Cleanup instructions of the alternative code sequence. There is no
7080b57cec5SDimitry Andric         // use for them.
7090b57cec5SDimitry Andric         MachineFunction *MF = MBB->getParent();
7100b57cec5SDimitry Andric         for (auto *InstrPtr : InsInstrs)
7110eae32dcSDimitry Andric           MF->deleteMachineInstr(InstrPtr);
7120b57cec5SDimitry Andric       }
7130b57cec5SDimitry Andric       InstrIdxForVirtReg.clear();
7140b57cec5SDimitry Andric     }
7150b57cec5SDimitry Andric   }
7160b57cec5SDimitry Andric 
7170b57cec5SDimitry Andric   if (Changed && IncrementalUpdate)
7180b57cec5SDimitry Andric     Traces->invalidate(MBB);
7190b57cec5SDimitry Andric   return Changed;
7200b57cec5SDimitry Andric }
7210b57cec5SDimitry Andric 
7220b57cec5SDimitry Andric bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
7230b57cec5SDimitry Andric   STI = &MF.getSubtarget();
7240b57cec5SDimitry Andric   TII = STI->getInstrInfo();
7250b57cec5SDimitry Andric   TRI = STI->getRegisterInfo();
7260b57cec5SDimitry Andric   SchedModel = STI->getSchedModel();
7270b57cec5SDimitry Andric   TSchedModel.init(STI);
7280b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
729*0fca6ea1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
7300b57cec5SDimitry Andric   Traces = &getAnalysis<MachineTraceMetrics>();
731480093f4SDimitry Andric   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
732480093f4SDimitry Andric   MBFI = (PSI && PSI->hasProfileSummary()) ?
733480093f4SDimitry Andric          &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
734480093f4SDimitry Andric          nullptr;
73506c3fb27SDimitry Andric   TraceEnsemble = nullptr;
7360b57cec5SDimitry Andric   OptSize = MF.getFunction().hasOptSize();
737e8d8bef9SDimitry Andric   RegClassInfo.runOnMachineFunction(MF);
7380b57cec5SDimitry Andric 
7390b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
7400b57cec5SDimitry Andric   if (!TII->useMachineCombiner()) {
7410b57cec5SDimitry Andric     LLVM_DEBUG(
7420b57cec5SDimitry Andric         dbgs()
7430b57cec5SDimitry Andric         << "  Skipping pass: Target does not support machine combiner\n");
7440b57cec5SDimitry Andric     return false;
7450b57cec5SDimitry Andric   }
7460b57cec5SDimitry Andric 
7470b57cec5SDimitry Andric   bool Changed = false;
7480b57cec5SDimitry Andric 
7490b57cec5SDimitry Andric   // Try to combine instructions.
7500b57cec5SDimitry Andric   for (auto &MBB : MF)
7510b57cec5SDimitry Andric     Changed |= combineInstructions(&MBB);
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric   return Changed;
7540b57cec5SDimitry Andric }
755