xref: /minix3/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineCombiner.cpp (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
2*0a6a1f1dSLionel Sambuc //
3*0a6a1f1dSLionel Sambuc //                     The LLVM Compiler Infrastructure
4*0a6a1f1dSLionel Sambuc //
5*0a6a1f1dSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6*0a6a1f1dSLionel Sambuc // License. See LICENSE.TXT for details.
7*0a6a1f1dSLionel Sambuc //
8*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
9*0a6a1f1dSLionel Sambuc //
10*0a6a1f1dSLionel Sambuc // The machine combiner pass uses machine trace metrics to ensure the combined
11*0a6a1f1dSLionel Sambuc // instructions does not lengthen the critical path or the resource depth.
12*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
13*0a6a1f1dSLionel Sambuc #define DEBUG_TYPE "machine-combiner"
14*0a6a1f1dSLionel Sambuc 
15*0a6a1f1dSLionel Sambuc #include "llvm/ADT/Statistic.h"
16*0a6a1f1dSLionel Sambuc #include "llvm/ADT/DenseMap.h"
17*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineDominators.h"
18*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineFunction.h"
19*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineFunctionPass.h"
20*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineInstrBuilder.h"
21*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineLoopInfo.h"
22*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineRegisterInfo.h"
23*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/MachineTraceMetrics.h"
24*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/Passes.h"
25*0a6a1f1dSLionel Sambuc #include "llvm/CodeGen/TargetSchedule.h"
26*0a6a1f1dSLionel Sambuc #include "llvm/Support/CommandLine.h"
27*0a6a1f1dSLionel Sambuc #include "llvm/Support/Debug.h"
28*0a6a1f1dSLionel Sambuc #include "llvm/Support/raw_ostream.h"
29*0a6a1f1dSLionel Sambuc #include "llvm/Target/TargetInstrInfo.h"
30*0a6a1f1dSLionel Sambuc #include "llvm/Target/TargetRegisterInfo.h"
31*0a6a1f1dSLionel Sambuc #include "llvm/Target/TargetSubtargetInfo.h"
32*0a6a1f1dSLionel Sambuc 
33*0a6a1f1dSLionel Sambuc using namespace llvm;
34*0a6a1f1dSLionel Sambuc 
35*0a6a1f1dSLionel Sambuc STATISTIC(NumInstCombined, "Number of machineinst combined");
36*0a6a1f1dSLionel Sambuc 
37*0a6a1f1dSLionel Sambuc namespace {
38*0a6a1f1dSLionel Sambuc class MachineCombiner : public MachineFunctionPass {
39*0a6a1f1dSLionel Sambuc   const TargetInstrInfo *TII;
40*0a6a1f1dSLionel Sambuc   const TargetRegisterInfo *TRI;
41*0a6a1f1dSLionel Sambuc   MCSchedModel SchedModel;
42*0a6a1f1dSLionel Sambuc   MachineRegisterInfo *MRI;
43*0a6a1f1dSLionel Sambuc   MachineTraceMetrics *Traces;
44*0a6a1f1dSLionel Sambuc   MachineTraceMetrics::Ensemble *MinInstr;
45*0a6a1f1dSLionel Sambuc 
46*0a6a1f1dSLionel Sambuc   TargetSchedModel TSchedModel;
47*0a6a1f1dSLionel Sambuc 
48*0a6a1f1dSLionel Sambuc   /// OptSize - True if optimizing for code size.
49*0a6a1f1dSLionel Sambuc   bool OptSize;
50*0a6a1f1dSLionel Sambuc 
51*0a6a1f1dSLionel Sambuc public:
52*0a6a1f1dSLionel Sambuc   static char ID;
MachineCombiner()53*0a6a1f1dSLionel Sambuc   MachineCombiner() : MachineFunctionPass(ID) {
54*0a6a1f1dSLionel Sambuc     initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
55*0a6a1f1dSLionel Sambuc   }
56*0a6a1f1dSLionel Sambuc   void getAnalysisUsage(AnalysisUsage &AU) const override;
57*0a6a1f1dSLionel Sambuc   bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const58*0a6a1f1dSLionel Sambuc   const char *getPassName() const override { return "Machine InstCombiner"; }
59*0a6a1f1dSLionel Sambuc 
60*0a6a1f1dSLionel Sambuc private:
61*0a6a1f1dSLionel Sambuc   bool doSubstitute(unsigned NewSize, unsigned OldSize);
62*0a6a1f1dSLionel Sambuc   bool combineInstructions(MachineBasicBlock *);
63*0a6a1f1dSLionel Sambuc   MachineInstr *getOperandDef(const MachineOperand &MO);
64*0a6a1f1dSLionel Sambuc   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
65*0a6a1f1dSLionel Sambuc                     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
66*0a6a1f1dSLionel Sambuc                     MachineTraceMetrics::Trace BlockTrace);
67*0a6a1f1dSLionel Sambuc   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
68*0a6a1f1dSLionel Sambuc                       MachineTraceMetrics::Trace BlockTrace);
69*0a6a1f1dSLionel Sambuc   bool
70*0a6a1f1dSLionel Sambuc   preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
71*0a6a1f1dSLionel Sambuc                            MachineTraceMetrics::Trace BlockTrace,
72*0a6a1f1dSLionel Sambuc                            SmallVectorImpl<MachineInstr *> &InsInstrs,
73*0a6a1f1dSLionel Sambuc                            DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
74*0a6a1f1dSLionel Sambuc   bool preservesResourceLen(MachineBasicBlock *MBB,
75*0a6a1f1dSLionel Sambuc                             MachineTraceMetrics::Trace BlockTrace,
76*0a6a1f1dSLionel Sambuc                             SmallVectorImpl<MachineInstr *> &InsInstrs,
77*0a6a1f1dSLionel Sambuc                             SmallVectorImpl<MachineInstr *> &DelInstrs);
78*0a6a1f1dSLionel Sambuc   void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
79*0a6a1f1dSLionel Sambuc                      SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
80*0a6a1f1dSLionel Sambuc };
81*0a6a1f1dSLionel Sambuc }
82*0a6a1f1dSLionel Sambuc 
83*0a6a1f1dSLionel Sambuc char MachineCombiner::ID = 0;
84*0a6a1f1dSLionel Sambuc char &llvm::MachineCombinerID = MachineCombiner::ID;
85*0a6a1f1dSLionel Sambuc 
86*0a6a1f1dSLionel Sambuc INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
87*0a6a1f1dSLionel Sambuc                       "Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)88*0a6a1f1dSLionel Sambuc INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
89*0a6a1f1dSLionel Sambuc INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
90*0a6a1f1dSLionel Sambuc                     false, false)
91*0a6a1f1dSLionel Sambuc 
92*0a6a1f1dSLionel Sambuc void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
93*0a6a1f1dSLionel Sambuc   AU.setPreservesCFG();
94*0a6a1f1dSLionel Sambuc   AU.addPreserved<MachineDominatorTree>();
95*0a6a1f1dSLionel Sambuc   AU.addPreserved<MachineLoopInfo>();
96*0a6a1f1dSLionel Sambuc   AU.addRequired<MachineTraceMetrics>();
97*0a6a1f1dSLionel Sambuc   AU.addPreserved<MachineTraceMetrics>();
98*0a6a1f1dSLionel Sambuc   MachineFunctionPass::getAnalysisUsage(AU);
99*0a6a1f1dSLionel Sambuc }
100*0a6a1f1dSLionel Sambuc 
getOperandDef(const MachineOperand & MO)101*0a6a1f1dSLionel Sambuc MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
102*0a6a1f1dSLionel Sambuc   MachineInstr *DefInstr = nullptr;
103*0a6a1f1dSLionel Sambuc   // We need a virtual register definition.
104*0a6a1f1dSLionel Sambuc   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
105*0a6a1f1dSLionel Sambuc     DefInstr = MRI->getUniqueVRegDef(MO.getReg());
106*0a6a1f1dSLionel Sambuc   // PHI's have no depth etc.
107*0a6a1f1dSLionel Sambuc   if (DefInstr && DefInstr->isPHI())
108*0a6a1f1dSLionel Sambuc     DefInstr = nullptr;
109*0a6a1f1dSLionel Sambuc   return DefInstr;
110*0a6a1f1dSLionel Sambuc }
111*0a6a1f1dSLionel Sambuc 
112*0a6a1f1dSLionel Sambuc /// getDepth - Computes depth of instructions in vector \InsInstr.
113*0a6a1f1dSLionel Sambuc ///
114*0a6a1f1dSLionel Sambuc /// \param InsInstrs is a vector of machine instructions
115*0a6a1f1dSLionel Sambuc /// \param InstrIdxForVirtReg is a dense map of virtual register to index
116*0a6a1f1dSLionel Sambuc /// of defining machine instruction in \p InsInstrs
117*0a6a1f1dSLionel Sambuc /// \param BlockTrace is a trace of machine instructions
118*0a6a1f1dSLionel Sambuc ///
119*0a6a1f1dSLionel Sambuc /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
120*0a6a1f1dSLionel Sambuc unsigned
getDepth(SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineTraceMetrics::Trace BlockTrace)121*0a6a1f1dSLionel Sambuc MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
122*0a6a1f1dSLionel Sambuc                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
123*0a6a1f1dSLionel Sambuc                           MachineTraceMetrics::Trace BlockTrace) {
124*0a6a1f1dSLionel Sambuc 
125*0a6a1f1dSLionel Sambuc   SmallVector<unsigned, 16> InstrDepth;
126*0a6a1f1dSLionel Sambuc   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
127*0a6a1f1dSLionel Sambuc 
128*0a6a1f1dSLionel Sambuc   // Foreach instruction in in the new sequence compute the depth based on the
129*0a6a1f1dSLionel Sambuc   // operands. Use the trace information when possible. For new operands which
130*0a6a1f1dSLionel Sambuc   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
131*0a6a1f1dSLionel Sambuc   for (auto *InstrPtr : InsInstrs) { // for each Use
132*0a6a1f1dSLionel Sambuc     unsigned IDepth = 0;
133*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
134*0a6a1f1dSLionel Sambuc     for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
135*0a6a1f1dSLionel Sambuc       const MachineOperand &MO = InstrPtr->getOperand(i);
136*0a6a1f1dSLionel Sambuc       // Check for virtual register operand.
137*0a6a1f1dSLionel Sambuc       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
138*0a6a1f1dSLionel Sambuc         continue;
139*0a6a1f1dSLionel Sambuc       if (!MO.isUse())
140*0a6a1f1dSLionel Sambuc         continue;
141*0a6a1f1dSLionel Sambuc       unsigned DepthOp = 0;
142*0a6a1f1dSLionel Sambuc       unsigned LatencyOp = 0;
143*0a6a1f1dSLionel Sambuc       DenseMap<unsigned, unsigned>::iterator II =
144*0a6a1f1dSLionel Sambuc           InstrIdxForVirtReg.find(MO.getReg());
145*0a6a1f1dSLionel Sambuc       if (II != InstrIdxForVirtReg.end()) {
146*0a6a1f1dSLionel Sambuc         // Operand is new virtual register not in trace
147*0a6a1f1dSLionel Sambuc         assert(II->second < InstrDepth.size() && "Bad Index");
148*0a6a1f1dSLionel Sambuc         MachineInstr *DefInstr = InsInstrs[II->second];
149*0a6a1f1dSLionel Sambuc         assert(DefInstr &&
150*0a6a1f1dSLionel Sambuc                "There must be a definition for a new virtual register");
151*0a6a1f1dSLionel Sambuc         DepthOp = InstrDepth[II->second];
152*0a6a1f1dSLionel Sambuc         LatencyOp = TSchedModel.computeOperandLatency(
153*0a6a1f1dSLionel Sambuc             DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
154*0a6a1f1dSLionel Sambuc             InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
155*0a6a1f1dSLionel Sambuc       } else {
156*0a6a1f1dSLionel Sambuc         MachineInstr *DefInstr = getOperandDef(MO);
157*0a6a1f1dSLionel Sambuc         if (DefInstr) {
158*0a6a1f1dSLionel Sambuc           DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
159*0a6a1f1dSLionel Sambuc           LatencyOp = TSchedModel.computeOperandLatency(
160*0a6a1f1dSLionel Sambuc               DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
161*0a6a1f1dSLionel Sambuc               InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
162*0a6a1f1dSLionel Sambuc         }
163*0a6a1f1dSLionel Sambuc       }
164*0a6a1f1dSLionel Sambuc       IDepth = std::max(IDepth, DepthOp + LatencyOp);
165*0a6a1f1dSLionel Sambuc     }
166*0a6a1f1dSLionel Sambuc     InstrDepth.push_back(IDepth);
167*0a6a1f1dSLionel Sambuc   }
168*0a6a1f1dSLionel Sambuc   unsigned NewRootIdx = InsInstrs.size() - 1;
169*0a6a1f1dSLionel Sambuc   return InstrDepth[NewRootIdx];
170*0a6a1f1dSLionel Sambuc }
171*0a6a1f1dSLionel Sambuc 
172*0a6a1f1dSLionel Sambuc /// getLatency - Computes instruction latency as max of latency of defined
173*0a6a1f1dSLionel Sambuc /// operands
174*0a6a1f1dSLionel Sambuc ///
175*0a6a1f1dSLionel Sambuc /// \param Root is a machine instruction that could be replaced by NewRoot.
176*0a6a1f1dSLionel Sambuc /// It is used to compute a more accurate latency information for NewRoot in
177*0a6a1f1dSLionel Sambuc /// case there is a dependent instruction in the same trace (\p BlockTrace)
178*0a6a1f1dSLionel Sambuc /// \param NewRoot is the instruction for which the latency is computed
179*0a6a1f1dSLionel Sambuc /// \param BlockTrace is a trace of machine instructions
180*0a6a1f1dSLionel Sambuc ///
181*0a6a1f1dSLionel Sambuc /// \returns Latency of \p NewRoot
getLatency(MachineInstr * Root,MachineInstr * NewRoot,MachineTraceMetrics::Trace BlockTrace)182*0a6a1f1dSLionel Sambuc unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
183*0a6a1f1dSLionel Sambuc                                      MachineTraceMetrics::Trace BlockTrace) {
184*0a6a1f1dSLionel Sambuc 
185*0a6a1f1dSLionel Sambuc   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
186*0a6a1f1dSLionel Sambuc 
187*0a6a1f1dSLionel Sambuc   // Check each definition in NewRoot and compute the latency
188*0a6a1f1dSLionel Sambuc   unsigned NewRootLatency = 0;
189*0a6a1f1dSLionel Sambuc 
190*0a6a1f1dSLionel Sambuc   for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
191*0a6a1f1dSLionel Sambuc     const MachineOperand &MO = NewRoot->getOperand(i);
192*0a6a1f1dSLionel Sambuc     // Check for virtual register operand.
193*0a6a1f1dSLionel Sambuc     if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
194*0a6a1f1dSLionel Sambuc       continue;
195*0a6a1f1dSLionel Sambuc     if (!MO.isDef())
196*0a6a1f1dSLionel Sambuc       continue;
197*0a6a1f1dSLionel Sambuc     // Get the first instruction that uses MO
198*0a6a1f1dSLionel Sambuc     MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
199*0a6a1f1dSLionel Sambuc     RI++;
200*0a6a1f1dSLionel Sambuc     MachineInstr *UseMO = RI->getParent();
201*0a6a1f1dSLionel Sambuc     unsigned LatencyOp = 0;
202*0a6a1f1dSLionel Sambuc     if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
203*0a6a1f1dSLionel Sambuc       LatencyOp = TSchedModel.computeOperandLatency(
204*0a6a1f1dSLionel Sambuc           NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
205*0a6a1f1dSLionel Sambuc           UseMO->findRegisterUseOperandIdx(MO.getReg()));
206*0a6a1f1dSLionel Sambuc     } else {
207*0a6a1f1dSLionel Sambuc       LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
208*0a6a1f1dSLionel Sambuc     }
209*0a6a1f1dSLionel Sambuc     NewRootLatency = std::max(NewRootLatency, LatencyOp);
210*0a6a1f1dSLionel Sambuc   }
211*0a6a1f1dSLionel Sambuc   return NewRootLatency;
212*0a6a1f1dSLionel Sambuc }
213*0a6a1f1dSLionel Sambuc 
214*0a6a1f1dSLionel Sambuc /// preservesCriticalPathlen - True when the new instruction sequence does not
215*0a6a1f1dSLionel Sambuc /// lengthen the critical path. The DAGCombine code sequence ends in MI
216*0a6a1f1dSLionel Sambuc /// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
217*0a6a1f1dSLionel Sambuc /// necessary condition for the new sequence to replace the old sequence is that
218*0a6a1f1dSLionel Sambuc /// is cannot lengthen the critical path. This is decided by the formula
219*0a6a1f1dSLionel Sambuc /// (NewRootDepth + NewRootLatency) <=  (RootDepth + RootLatency + RootSlack)).
220*0a6a1f1dSLionel Sambuc /// The slack is the number of cycles Root can be delayed before the critical
221*0a6a1f1dSLionel Sambuc /// patch becomes longer.
preservesCriticalPathLen(MachineBasicBlock * MBB,MachineInstr * Root,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg)222*0a6a1f1dSLionel Sambuc bool MachineCombiner::preservesCriticalPathLen(
223*0a6a1f1dSLionel Sambuc     MachineBasicBlock *MBB, MachineInstr *Root,
224*0a6a1f1dSLionel Sambuc     MachineTraceMetrics::Trace BlockTrace,
225*0a6a1f1dSLionel Sambuc     SmallVectorImpl<MachineInstr *> &InsInstrs,
226*0a6a1f1dSLionel Sambuc     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
227*0a6a1f1dSLionel Sambuc 
228*0a6a1f1dSLionel Sambuc   assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
229*0a6a1f1dSLionel Sambuc   // NewRoot is the last instruction in the \p InsInstrs vector
230*0a6a1f1dSLionel Sambuc   // Get depth and latency of NewRoot
231*0a6a1f1dSLionel Sambuc   unsigned NewRootIdx = InsInstrs.size() - 1;
232*0a6a1f1dSLionel Sambuc   MachineInstr *NewRoot = InsInstrs[NewRootIdx];
233*0a6a1f1dSLionel Sambuc   unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
234*0a6a1f1dSLionel Sambuc   unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
235*0a6a1f1dSLionel Sambuc 
236*0a6a1f1dSLionel Sambuc   // Get depth, latency and slack of Root
237*0a6a1f1dSLionel Sambuc   unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
238*0a6a1f1dSLionel Sambuc   unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
239*0a6a1f1dSLionel Sambuc   unsigned RootSlack = BlockTrace.getInstrSlack(Root);
240*0a6a1f1dSLionel Sambuc 
241*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
242*0a6a1f1dSLionel Sambuc         dbgs() << " NewRootDepth: " << NewRootDepth
243*0a6a1f1dSLionel Sambuc                << " NewRootLatency: " << NewRootLatency << "\n";
244*0a6a1f1dSLionel Sambuc         dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
245*0a6a1f1dSLionel Sambuc                << " RootSlack: " << RootSlack << "\n";
246*0a6a1f1dSLionel Sambuc         dbgs() << " NewRootDepth + NewRootLatency "
247*0a6a1f1dSLionel Sambuc                << NewRootDepth + NewRootLatency << "\n";
248*0a6a1f1dSLionel Sambuc         dbgs() << " RootDepth + RootLatency + RootSlack "
249*0a6a1f1dSLionel Sambuc                << RootDepth + RootLatency + RootSlack << "\n";);
250*0a6a1f1dSLionel Sambuc 
251*0a6a1f1dSLionel Sambuc   /// True when the new sequence does not lenghten the critical path.
252*0a6a1f1dSLionel Sambuc   return ((NewRootDepth + NewRootLatency) <=
253*0a6a1f1dSLionel Sambuc           (RootDepth + RootLatency + RootSlack));
254*0a6a1f1dSLionel Sambuc }
255*0a6a1f1dSLionel Sambuc 
256*0a6a1f1dSLionel Sambuc /// helper routine to convert instructions into SC
instr2instrSC(SmallVectorImpl<MachineInstr * > & Instrs,SmallVectorImpl<const MCSchedClassDesc * > & InstrsSC)257*0a6a1f1dSLionel Sambuc void MachineCombiner::instr2instrSC(
258*0a6a1f1dSLionel Sambuc     SmallVectorImpl<MachineInstr *> &Instrs,
259*0a6a1f1dSLionel Sambuc     SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
260*0a6a1f1dSLionel Sambuc   for (auto *InstrPtr : Instrs) {
261*0a6a1f1dSLionel Sambuc     unsigned Opc = InstrPtr->getOpcode();
262*0a6a1f1dSLionel Sambuc     unsigned Idx = TII->get(Opc).getSchedClass();
263*0a6a1f1dSLionel Sambuc     const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
264*0a6a1f1dSLionel Sambuc     InstrsSC.push_back(SC);
265*0a6a1f1dSLionel Sambuc   }
266*0a6a1f1dSLionel Sambuc }
267*0a6a1f1dSLionel Sambuc /// preservesResourceLen - True when the new instructions do not increase
268*0a6a1f1dSLionel Sambuc /// resource length
preservesResourceLen(MachineBasicBlock * MBB,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs)269*0a6a1f1dSLionel Sambuc bool MachineCombiner::preservesResourceLen(
270*0a6a1f1dSLionel Sambuc     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
271*0a6a1f1dSLionel Sambuc     SmallVectorImpl<MachineInstr *> &InsInstrs,
272*0a6a1f1dSLionel Sambuc     SmallVectorImpl<MachineInstr *> &DelInstrs) {
273*0a6a1f1dSLionel Sambuc 
274*0a6a1f1dSLionel Sambuc   // Compute current resource length
275*0a6a1f1dSLionel Sambuc 
276*0a6a1f1dSLionel Sambuc   //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
277*0a6a1f1dSLionel Sambuc   SmallVector <const MachineBasicBlock *, 1> MBBarr;
278*0a6a1f1dSLionel Sambuc   MBBarr.push_back(MBB);
279*0a6a1f1dSLionel Sambuc   unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
280*0a6a1f1dSLionel Sambuc 
281*0a6a1f1dSLionel Sambuc   // Deal with SC rather than Instructions.
282*0a6a1f1dSLionel Sambuc   SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
283*0a6a1f1dSLionel Sambuc   SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
284*0a6a1f1dSLionel Sambuc 
285*0a6a1f1dSLionel Sambuc   instr2instrSC(InsInstrs, InsInstrsSC);
286*0a6a1f1dSLionel Sambuc   instr2instrSC(DelInstrs, DelInstrsSC);
287*0a6a1f1dSLionel Sambuc 
288*0a6a1f1dSLionel Sambuc   ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
289*0a6a1f1dSLionel Sambuc   ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
290*0a6a1f1dSLionel Sambuc 
291*0a6a1f1dSLionel Sambuc   // Compute new resource length
292*0a6a1f1dSLionel Sambuc   unsigned ResLenAfterCombine =
293*0a6a1f1dSLionel Sambuc       BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
294*0a6a1f1dSLionel Sambuc 
295*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "RESOURCE DATA: \n";
296*0a6a1f1dSLionel Sambuc         dbgs() << " resource len before: " << ResLenBeforeCombine
297*0a6a1f1dSLionel Sambuc                << " after: " << ResLenAfterCombine << "\n";);
298*0a6a1f1dSLionel Sambuc 
299*0a6a1f1dSLionel Sambuc   return ResLenAfterCombine <= ResLenBeforeCombine;
300*0a6a1f1dSLionel Sambuc }
301*0a6a1f1dSLionel Sambuc 
302*0a6a1f1dSLionel Sambuc /// \returns true when new instruction sequence should be generated
303*0a6a1f1dSLionel Sambuc /// independent if it lenghtens critical path or not
doSubstitute(unsigned NewSize,unsigned OldSize)304*0a6a1f1dSLionel Sambuc bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
305*0a6a1f1dSLionel Sambuc   if (OptSize && (NewSize < OldSize))
306*0a6a1f1dSLionel Sambuc     return true;
307*0a6a1f1dSLionel Sambuc   if (!TSchedModel.hasInstrSchedModel())
308*0a6a1f1dSLionel Sambuc     return true;
309*0a6a1f1dSLionel Sambuc   return false;
310*0a6a1f1dSLionel Sambuc }
311*0a6a1f1dSLionel Sambuc 
312*0a6a1f1dSLionel Sambuc /// combineInstructions - substitute a slow code sequence with a faster one by
313*0a6a1f1dSLionel Sambuc /// evaluating instruction combining pattern.
314*0a6a1f1dSLionel Sambuc /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
315*0a6a1f1dSLionel Sambuc /// combining based on machine trace metrics. Only combine a sequence of
316*0a6a1f1dSLionel Sambuc /// instructions  when this neither lengthens the critical path nor increases
317*0a6a1f1dSLionel Sambuc /// resource pressure. When optimizing for codesize always combine when the new
318*0a6a1f1dSLionel Sambuc /// sequence is shorter.
combineInstructions(MachineBasicBlock * MBB)319*0a6a1f1dSLionel Sambuc bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
320*0a6a1f1dSLionel Sambuc   bool Changed = false;
321*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
322*0a6a1f1dSLionel Sambuc 
323*0a6a1f1dSLionel Sambuc   auto BlockIter = MBB->begin();
324*0a6a1f1dSLionel Sambuc 
325*0a6a1f1dSLionel Sambuc   while (BlockIter != MBB->end()) {
326*0a6a1f1dSLionel Sambuc     auto &MI = *BlockIter++;
327*0a6a1f1dSLionel Sambuc 
328*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
329*0a6a1f1dSLionel Sambuc     SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
330*0a6a1f1dSLionel Sambuc     // The motivating example is:
331*0a6a1f1dSLionel Sambuc     //
332*0a6a1f1dSLionel Sambuc     //     MUL  Other        MUL_op1 MUL_op2  Other
333*0a6a1f1dSLionel Sambuc     //      \    /               \      |    /
334*0a6a1f1dSLionel Sambuc     //      ADD/SUB      =>        MADD/MSUB
335*0a6a1f1dSLionel Sambuc     //      (=Root)                (=NewRoot)
336*0a6a1f1dSLionel Sambuc 
337*0a6a1f1dSLionel Sambuc     // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
338*0a6a1f1dSLionel Sambuc     // usually beneficial for code size it unfortunately can hurt performance
339*0a6a1f1dSLionel Sambuc     // when the ADD is on the critical path, but the MUL is not. With the
340*0a6a1f1dSLionel Sambuc     // substitution the MUL becomes part of the critical path (in form of the
341*0a6a1f1dSLionel Sambuc     // MADD) and can lengthen it on architectures where the MADD latency is
342*0a6a1f1dSLionel Sambuc     // longer than the ADD latency.
343*0a6a1f1dSLionel Sambuc     //
344*0a6a1f1dSLionel Sambuc     // For each instruction we check if it can be the root of a combiner
345*0a6a1f1dSLionel Sambuc     // pattern. Then for each pattern the new code sequence in form of MI is
346*0a6a1f1dSLionel Sambuc     // generated and evaluated. When the efficiency criteria (don't lengthen
347*0a6a1f1dSLionel Sambuc     // critical path, don't use more resources) is met the new sequence gets
348*0a6a1f1dSLionel Sambuc     // hooked up into the basic block before the old sequence is removed.
349*0a6a1f1dSLionel Sambuc     //
350*0a6a1f1dSLionel Sambuc     // The algorithm does not try to evaluate all patterns and pick the best.
351*0a6a1f1dSLionel Sambuc     // This is only an artificial restriction though. In practice there is
352*0a6a1f1dSLionel Sambuc     // mostly one pattern and hasPattern() can order patterns based on an
353*0a6a1f1dSLionel Sambuc     // internal cost heuristic.
354*0a6a1f1dSLionel Sambuc 
355*0a6a1f1dSLionel Sambuc     if (TII->hasPattern(MI, Pattern)) {
356*0a6a1f1dSLionel Sambuc       for (auto P : Pattern) {
357*0a6a1f1dSLionel Sambuc         SmallVector<MachineInstr *, 16> InsInstrs;
358*0a6a1f1dSLionel Sambuc         SmallVector<MachineInstr *, 16> DelInstrs;
359*0a6a1f1dSLionel Sambuc         DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
360*0a6a1f1dSLionel Sambuc         if (!MinInstr)
361*0a6a1f1dSLionel Sambuc           MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
362*0a6a1f1dSLionel Sambuc         MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
363*0a6a1f1dSLionel Sambuc         Traces->verifyAnalysis();
364*0a6a1f1dSLionel Sambuc         TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
365*0a6a1f1dSLionel Sambuc                                         InstrIdxForVirtReg);
366*0a6a1f1dSLionel Sambuc         // Found pattern, but did not generate alternative sequence.
367*0a6a1f1dSLionel Sambuc         // This can happen e.g. when an immediate could not be materialized
368*0a6a1f1dSLionel Sambuc         // in a single instruction.
369*0a6a1f1dSLionel Sambuc         if (!InsInstrs.size())
370*0a6a1f1dSLionel Sambuc           continue;
371*0a6a1f1dSLionel Sambuc         // Substitute when we optimize for codesize and the new sequence has
372*0a6a1f1dSLionel Sambuc         // fewer instructions OR
373*0a6a1f1dSLionel Sambuc         // the new sequence neither lenghten the critical path nor increases
374*0a6a1f1dSLionel Sambuc         // resource pressure.
375*0a6a1f1dSLionel Sambuc         if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
376*0a6a1f1dSLionel Sambuc             (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
377*0a6a1f1dSLionel Sambuc                                       InstrIdxForVirtReg) &&
378*0a6a1f1dSLionel Sambuc              preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
379*0a6a1f1dSLionel Sambuc           for (auto *InstrPtr : InsInstrs)
380*0a6a1f1dSLionel Sambuc             MBB->insert((MachineBasicBlock::iterator) & MI,
381*0a6a1f1dSLionel Sambuc                         (MachineInstr *)InstrPtr);
382*0a6a1f1dSLionel Sambuc           for (auto *InstrPtr : DelInstrs)
383*0a6a1f1dSLionel Sambuc             InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
384*0a6a1f1dSLionel Sambuc 
385*0a6a1f1dSLionel Sambuc           Changed = true;
386*0a6a1f1dSLionel Sambuc           ++NumInstCombined;
387*0a6a1f1dSLionel Sambuc 
388*0a6a1f1dSLionel Sambuc           Traces->invalidate(MBB);
389*0a6a1f1dSLionel Sambuc           Traces->verifyAnalysis();
390*0a6a1f1dSLionel Sambuc           // Eagerly stop after the first pattern fired
391*0a6a1f1dSLionel Sambuc           break;
392*0a6a1f1dSLionel Sambuc         } else {
393*0a6a1f1dSLionel Sambuc           // Cleanup instructions of the alternative code sequence. There is no
394*0a6a1f1dSLionel Sambuc           // use for them.
395*0a6a1f1dSLionel Sambuc           for (auto *InstrPtr : InsInstrs) {
396*0a6a1f1dSLionel Sambuc             MachineFunction *MF = MBB->getParent();
397*0a6a1f1dSLionel Sambuc             MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
398*0a6a1f1dSLionel Sambuc           }
399*0a6a1f1dSLionel Sambuc         }
400*0a6a1f1dSLionel Sambuc         InstrIdxForVirtReg.clear();
401*0a6a1f1dSLionel Sambuc       }
402*0a6a1f1dSLionel Sambuc     }
403*0a6a1f1dSLionel Sambuc   }
404*0a6a1f1dSLionel Sambuc 
405*0a6a1f1dSLionel Sambuc   return Changed;
406*0a6a1f1dSLionel Sambuc }
407*0a6a1f1dSLionel Sambuc 
runOnMachineFunction(MachineFunction & MF)408*0a6a1f1dSLionel Sambuc bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
409*0a6a1f1dSLionel Sambuc   const TargetSubtargetInfo &STI =
410*0a6a1f1dSLionel Sambuc       MF.getTarget().getSubtarget<TargetSubtargetInfo>();
411*0a6a1f1dSLionel Sambuc   TII = STI.getInstrInfo();
412*0a6a1f1dSLionel Sambuc   TRI = STI.getRegisterInfo();
413*0a6a1f1dSLionel Sambuc   SchedModel = STI.getSchedModel();
414*0a6a1f1dSLionel Sambuc   TSchedModel.init(SchedModel, &STI, TII);
415*0a6a1f1dSLionel Sambuc   MRI = &MF.getRegInfo();
416*0a6a1f1dSLionel Sambuc   Traces = &getAnalysis<MachineTraceMetrics>();
417*0a6a1f1dSLionel Sambuc   MinInstr = 0;
418*0a6a1f1dSLionel Sambuc 
419*0a6a1f1dSLionel Sambuc   OptSize = MF.getFunction()->getAttributes().hasAttribute(
420*0a6a1f1dSLionel Sambuc       AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
421*0a6a1f1dSLionel Sambuc 
422*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
423*0a6a1f1dSLionel Sambuc   if (!TII->useMachineCombiner()) {
424*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << "  Skipping pass: Target does not support machine combiner\n");
425*0a6a1f1dSLionel Sambuc     return false;
426*0a6a1f1dSLionel Sambuc   }
427*0a6a1f1dSLionel Sambuc 
428*0a6a1f1dSLionel Sambuc   bool Changed = false;
429*0a6a1f1dSLionel Sambuc 
430*0a6a1f1dSLionel Sambuc   // Try to combine instructions.
431*0a6a1f1dSLionel Sambuc   for (auto &MBB : MF)
432*0a6a1f1dSLionel Sambuc     Changed |= combineInstructions(&MBB);
433*0a6a1f1dSLionel Sambuc 
434*0a6a1f1dSLionel Sambuc   return Changed;
435*0a6a1f1dSLionel Sambuc }
436