109467b48Spatrick //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // The machine combiner pass uses machine trace metrics to ensure the combined
1009467b48Spatrick // instructions do not lengthen the critical path or the resource depth.
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "llvm/ADT/DenseMap.h"
1409467b48Spatrick #include "llvm/ADT/Statistic.h"
1509467b48Spatrick #include "llvm/Analysis/ProfileSummaryInfo.h"
1609467b48Spatrick #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
17*d415bd75Srobert #include "llvm/CodeGen/MachineCombinerPattern.h"
1809467b48Spatrick #include "llvm/CodeGen/MachineDominators.h"
1909467b48Spatrick #include "llvm/CodeGen/MachineFunction.h"
2009467b48Spatrick #include "llvm/CodeGen/MachineFunctionPass.h"
2109467b48Spatrick #include "llvm/CodeGen/MachineLoopInfo.h"
2209467b48Spatrick #include "llvm/CodeGen/MachineRegisterInfo.h"
2309467b48Spatrick #include "llvm/CodeGen/MachineSizeOpts.h"
2409467b48Spatrick #include "llvm/CodeGen/MachineTraceMetrics.h"
2573471bf0Spatrick #include "llvm/CodeGen/RegisterClassInfo.h"
2609467b48Spatrick #include "llvm/CodeGen/TargetInstrInfo.h"
2709467b48Spatrick #include "llvm/CodeGen/TargetRegisterInfo.h"
2809467b48Spatrick #include "llvm/CodeGen/TargetSchedule.h"
2909467b48Spatrick #include "llvm/CodeGen/TargetSubtargetInfo.h"
3009467b48Spatrick #include "llvm/InitializePasses.h"
3109467b48Spatrick #include "llvm/Support/CommandLine.h"
3209467b48Spatrick #include "llvm/Support/Debug.h"
3309467b48Spatrick #include "llvm/Support/raw_ostream.h"
3409467b48Spatrick
3509467b48Spatrick using namespace llvm;
3609467b48Spatrick
3709467b48Spatrick #define DEBUG_TYPE "machine-combiner"
3809467b48Spatrick
3909467b48Spatrick STATISTIC(NumInstCombined, "Number of machineinst combined");
4009467b48Spatrick
4109467b48Spatrick static cl::opt<unsigned>
4209467b48Spatrick inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
4309467b48Spatrick cl::desc("Incremental depth computation will be used for basic "
4409467b48Spatrick "blocks with more instructions."), cl::init(500));
4509467b48Spatrick
4609467b48Spatrick static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
4709467b48Spatrick cl::desc("Dump all substituted intrs"),
4809467b48Spatrick cl::init(false));
4909467b48Spatrick
5009467b48Spatrick #ifdef EXPENSIVE_CHECKS
5109467b48Spatrick static cl::opt<bool> VerifyPatternOrder(
5209467b48Spatrick "machine-combiner-verify-pattern-order", cl::Hidden,
5309467b48Spatrick cl::desc(
5409467b48Spatrick "Verify that the generated patterns are ordered by increasing latency"),
5509467b48Spatrick cl::init(true));
5609467b48Spatrick #else
5709467b48Spatrick static cl::opt<bool> VerifyPatternOrder(
5809467b48Spatrick "machine-combiner-verify-pattern-order", cl::Hidden,
5909467b48Spatrick cl::desc(
6009467b48Spatrick "Verify that the generated patterns are ordered by increasing latency"),
6109467b48Spatrick cl::init(false));
6209467b48Spatrick #endif
6309467b48Spatrick
6409467b48Spatrick namespace {
6509467b48Spatrick class MachineCombiner : public MachineFunctionPass {
6609467b48Spatrick const TargetSubtargetInfo *STI;
6709467b48Spatrick const TargetInstrInfo *TII;
6809467b48Spatrick const TargetRegisterInfo *TRI;
6909467b48Spatrick MCSchedModel SchedModel;
7009467b48Spatrick MachineRegisterInfo *MRI;
7109467b48Spatrick MachineLoopInfo *MLI; // Current MachineLoopInfo
7209467b48Spatrick MachineTraceMetrics *Traces;
7309467b48Spatrick MachineTraceMetrics::Ensemble *MinInstr;
7409467b48Spatrick MachineBlockFrequencyInfo *MBFI;
7509467b48Spatrick ProfileSummaryInfo *PSI;
7673471bf0Spatrick RegisterClassInfo RegClassInfo;
7709467b48Spatrick
7809467b48Spatrick TargetSchedModel TSchedModel;
7909467b48Spatrick
8009467b48Spatrick /// True if optimizing for code size.
8109467b48Spatrick bool OptSize;
8209467b48Spatrick
8309467b48Spatrick public:
8409467b48Spatrick static char ID;
MachineCombiner()8509467b48Spatrick MachineCombiner() : MachineFunctionPass(ID) {
8609467b48Spatrick initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
8709467b48Spatrick }
8809467b48Spatrick void getAnalysisUsage(AnalysisUsage &AU) const override;
8909467b48Spatrick bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const9009467b48Spatrick StringRef getPassName() const override { return "Machine InstCombiner"; }
9109467b48Spatrick
9209467b48Spatrick private:
9309467b48Spatrick bool combineInstructions(MachineBasicBlock *);
9409467b48Spatrick MachineInstr *getOperandDef(const MachineOperand &MO);
95*d415bd75Srobert bool isTransientMI(const MachineInstr *MI);
9609467b48Spatrick unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
9709467b48Spatrick DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
9809467b48Spatrick MachineTraceMetrics::Trace BlockTrace);
9909467b48Spatrick unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
10009467b48Spatrick MachineTraceMetrics::Trace BlockTrace);
10109467b48Spatrick bool
10209467b48Spatrick improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
10309467b48Spatrick MachineTraceMetrics::Trace BlockTrace,
10409467b48Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
10509467b48Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs,
10609467b48Spatrick DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
10709467b48Spatrick MachineCombinerPattern Pattern, bool SlackIsAccurate);
10873471bf0Spatrick bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
10973471bf0Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
11073471bf0Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs,
11173471bf0Spatrick MachineCombinerPattern Pattern);
11209467b48Spatrick bool preservesResourceLen(MachineBasicBlock *MBB,
11309467b48Spatrick MachineTraceMetrics::Trace BlockTrace,
11409467b48Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
11509467b48Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs);
11609467b48Spatrick void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
11709467b48Spatrick SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
11809467b48Spatrick std::pair<unsigned, unsigned>
11909467b48Spatrick getLatenciesForInstrSequences(MachineInstr &MI,
12009467b48Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
12109467b48Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs,
12209467b48Spatrick MachineTraceMetrics::Trace BlockTrace);
12309467b48Spatrick
12409467b48Spatrick void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
12509467b48Spatrick SmallVector<MachineCombinerPattern, 16> &Patterns);
12609467b48Spatrick };
12709467b48Spatrick }
12809467b48Spatrick
12909467b48Spatrick char MachineCombiner::ID = 0;
13009467b48Spatrick char &llvm::MachineCombinerID = MachineCombiner::ID;
13109467b48Spatrick
13209467b48Spatrick INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
13309467b48Spatrick "Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)13409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
13509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
13609467b48Spatrick INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
13709467b48Spatrick false, false)
13809467b48Spatrick
13909467b48Spatrick void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
14009467b48Spatrick AU.setPreservesCFG();
14109467b48Spatrick AU.addPreserved<MachineDominatorTree>();
14209467b48Spatrick AU.addRequired<MachineLoopInfo>();
14309467b48Spatrick AU.addPreserved<MachineLoopInfo>();
14409467b48Spatrick AU.addRequired<MachineTraceMetrics>();
14509467b48Spatrick AU.addPreserved<MachineTraceMetrics>();
14609467b48Spatrick AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
14709467b48Spatrick AU.addRequired<ProfileSummaryInfoWrapperPass>();
14809467b48Spatrick MachineFunctionPass::getAnalysisUsage(AU);
14909467b48Spatrick }
15009467b48Spatrick
getOperandDef(const MachineOperand & MO)15109467b48Spatrick MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
15209467b48Spatrick MachineInstr *DefInstr = nullptr;
15309467b48Spatrick // We need a virtual register definition.
154*d415bd75Srobert if (MO.isReg() && MO.getReg().isVirtual())
15509467b48Spatrick DefInstr = MRI->getUniqueVRegDef(MO.getReg());
15609467b48Spatrick // PHI's have no depth etc.
15709467b48Spatrick if (DefInstr && DefInstr->isPHI())
15809467b48Spatrick DefInstr = nullptr;
15909467b48Spatrick return DefInstr;
16009467b48Spatrick }
16109467b48Spatrick
162*d415bd75Srobert /// Return true if MI is unlikely to generate an actual target instruction.
isTransientMI(const MachineInstr * MI)163*d415bd75Srobert bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
164*d415bd75Srobert if (!MI->isCopy())
165*d415bd75Srobert return MI->isTransient();
166*d415bd75Srobert
167*d415bd75Srobert // If MI is a COPY, check if its src and dst registers can be coalesced.
168*d415bd75Srobert Register Dst = MI->getOperand(0).getReg();
169*d415bd75Srobert Register Src = MI->getOperand(1).getReg();
170*d415bd75Srobert
171*d415bd75Srobert if (!MI->isFullCopy()) {
172*d415bd75Srobert // If src RC contains super registers of dst RC, it can also be coalesced.
173*d415bd75Srobert if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical())
174*d415bd75Srobert return false;
175*d415bd75Srobert
176*d415bd75Srobert auto SrcSub = MI->getOperand(1).getSubReg();
177*d415bd75Srobert auto SrcRC = MRI->getRegClass(Src);
178*d415bd75Srobert auto DstRC = MRI->getRegClass(Dst);
179*d415bd75Srobert return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr;
180*d415bd75Srobert }
181*d415bd75Srobert
182*d415bd75Srobert if (Src.isPhysical() && Dst.isPhysical())
183*d415bd75Srobert return Src == Dst;
184*d415bd75Srobert
185*d415bd75Srobert if (Src.isVirtual() && Dst.isVirtual()) {
186*d415bd75Srobert auto SrcRC = MRI->getRegClass(Src);
187*d415bd75Srobert auto DstRC = MRI->getRegClass(Dst);
188*d415bd75Srobert return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC);
189*d415bd75Srobert }
190*d415bd75Srobert
191*d415bd75Srobert if (Src.isVirtual())
192*d415bd75Srobert std::swap(Src, Dst);
193*d415bd75Srobert
194*d415bd75Srobert // Now Src is physical register, Dst is virtual register.
195*d415bd75Srobert auto DstRC = MRI->getRegClass(Dst);
196*d415bd75Srobert return DstRC->contains(Src);
197*d415bd75Srobert }
198*d415bd75Srobert
19909467b48Spatrick /// Computes depth of instructions in vector \InsInstr.
20009467b48Spatrick ///
20109467b48Spatrick /// \param InsInstrs is a vector of machine instructions
20209467b48Spatrick /// \param InstrIdxForVirtReg is a dense map of virtual register to index
20309467b48Spatrick /// of defining machine instruction in \p InsInstrs
20409467b48Spatrick /// \param BlockTrace is a trace of machine instructions
20509467b48Spatrick ///
20609467b48Spatrick /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
20709467b48Spatrick unsigned
getDepth(SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineTraceMetrics::Trace BlockTrace)20809467b48Spatrick MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
20909467b48Spatrick DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
21009467b48Spatrick MachineTraceMetrics::Trace BlockTrace) {
21109467b48Spatrick SmallVector<unsigned, 16> InstrDepth;
21209467b48Spatrick // For each instruction in the new sequence compute the depth based on the
21309467b48Spatrick // operands. Use the trace information when possible. For new operands which
21409467b48Spatrick // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
21509467b48Spatrick for (auto *InstrPtr : InsInstrs) { // for each Use
21609467b48Spatrick unsigned IDepth = 0;
21709467b48Spatrick for (const MachineOperand &MO : InstrPtr->operands()) {
21809467b48Spatrick // Check for virtual register operand.
219*d415bd75Srobert if (!(MO.isReg() && MO.getReg().isVirtual()))
22009467b48Spatrick continue;
22109467b48Spatrick if (!MO.isUse())
22209467b48Spatrick continue;
22309467b48Spatrick unsigned DepthOp = 0;
22409467b48Spatrick unsigned LatencyOp = 0;
22509467b48Spatrick DenseMap<unsigned, unsigned>::iterator II =
22609467b48Spatrick InstrIdxForVirtReg.find(MO.getReg());
22709467b48Spatrick if (II != InstrIdxForVirtReg.end()) {
22809467b48Spatrick // Operand is new virtual register not in trace
22909467b48Spatrick assert(II->second < InstrDepth.size() && "Bad Index");
23009467b48Spatrick MachineInstr *DefInstr = InsInstrs[II->second];
23109467b48Spatrick assert(DefInstr &&
23209467b48Spatrick "There must be a definition for a new virtual register");
23309467b48Spatrick DepthOp = InstrDepth[II->second];
23409467b48Spatrick int DefIdx = DefInstr->findRegisterDefOperandIdx(MO.getReg());
23509467b48Spatrick int UseIdx = InstrPtr->findRegisterUseOperandIdx(MO.getReg());
23609467b48Spatrick LatencyOp = TSchedModel.computeOperandLatency(DefInstr, DefIdx,
23709467b48Spatrick InstrPtr, UseIdx);
23809467b48Spatrick } else {
23909467b48Spatrick MachineInstr *DefInstr = getOperandDef(MO);
24009467b48Spatrick if (DefInstr) {
24109467b48Spatrick DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
242*d415bd75Srobert if (!isTransientMI(DefInstr))
24309467b48Spatrick LatencyOp = TSchedModel.computeOperandLatency(
24409467b48Spatrick DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
24509467b48Spatrick InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
24609467b48Spatrick }
24709467b48Spatrick }
24809467b48Spatrick IDepth = std::max(IDepth, DepthOp + LatencyOp);
24909467b48Spatrick }
25009467b48Spatrick InstrDepth.push_back(IDepth);
25109467b48Spatrick }
25209467b48Spatrick unsigned NewRootIdx = InsInstrs.size() - 1;
25309467b48Spatrick return InstrDepth[NewRootIdx];
25409467b48Spatrick }
25509467b48Spatrick
25609467b48Spatrick /// Computes instruction latency as max of latency of defined operands.
25709467b48Spatrick ///
25809467b48Spatrick /// \param Root is a machine instruction that could be replaced by NewRoot.
25909467b48Spatrick /// It is used to compute a more accurate latency information for NewRoot in
26009467b48Spatrick /// case there is a dependent instruction in the same trace (\p BlockTrace)
26109467b48Spatrick /// \param NewRoot is the instruction for which the latency is computed
26209467b48Spatrick /// \param BlockTrace is a trace of machine instructions
26309467b48Spatrick ///
26409467b48Spatrick /// \returns Latency of \p NewRoot
getLatency(MachineInstr * Root,MachineInstr * NewRoot,MachineTraceMetrics::Trace BlockTrace)26509467b48Spatrick unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
26609467b48Spatrick MachineTraceMetrics::Trace BlockTrace) {
26709467b48Spatrick // Check each definition in NewRoot and compute the latency
26809467b48Spatrick unsigned NewRootLatency = 0;
26909467b48Spatrick
27009467b48Spatrick for (const MachineOperand &MO : NewRoot->operands()) {
27109467b48Spatrick // Check for virtual register operand.
272*d415bd75Srobert if (!(MO.isReg() && MO.getReg().isVirtual()))
27309467b48Spatrick continue;
27409467b48Spatrick if (!MO.isDef())
27509467b48Spatrick continue;
27609467b48Spatrick // Get the first instruction that uses MO
27709467b48Spatrick MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
27809467b48Spatrick RI++;
27909467b48Spatrick if (RI == MRI->reg_end())
28009467b48Spatrick continue;
28109467b48Spatrick MachineInstr *UseMO = RI->getParent();
28209467b48Spatrick unsigned LatencyOp = 0;
28309467b48Spatrick if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
28409467b48Spatrick LatencyOp = TSchedModel.computeOperandLatency(
28509467b48Spatrick NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
28609467b48Spatrick UseMO->findRegisterUseOperandIdx(MO.getReg()));
28709467b48Spatrick } else {
28809467b48Spatrick LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
28909467b48Spatrick }
29009467b48Spatrick NewRootLatency = std::max(NewRootLatency, LatencyOp);
29109467b48Spatrick }
29209467b48Spatrick return NewRootLatency;
29309467b48Spatrick }
29409467b48Spatrick
29509467b48Spatrick /// The combiner's goal may differ based on which pattern it is attempting
29609467b48Spatrick /// to optimize.
29709467b48Spatrick enum class CombinerObjective {
29809467b48Spatrick MustReduceDepth, // The data dependency chain must be improved.
29973471bf0Spatrick MustReduceRegisterPressure, // The register pressure must be reduced.
30009467b48Spatrick Default // The critical path must not be lengthened.
30109467b48Spatrick };
30209467b48Spatrick
getCombinerObjective(MachineCombinerPattern P)30309467b48Spatrick static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
30409467b48Spatrick // TODO: If C++ ever gets a real enum class, make this part of the
30509467b48Spatrick // MachineCombinerPattern class.
30609467b48Spatrick switch (P) {
30709467b48Spatrick case MachineCombinerPattern::REASSOC_AX_BY:
30809467b48Spatrick case MachineCombinerPattern::REASSOC_AX_YB:
30909467b48Spatrick case MachineCombinerPattern::REASSOC_XA_BY:
31009467b48Spatrick case MachineCombinerPattern::REASSOC_XA_YB:
311097a140dSpatrick case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
312097a140dSpatrick case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
313*d415bd75Srobert case MachineCombinerPattern::SUBADD_OP1:
314*d415bd75Srobert case MachineCombinerPattern::SUBADD_OP2:
315*d415bd75Srobert case MachineCombinerPattern::FMADD_AX:
316*d415bd75Srobert case MachineCombinerPattern::FMADD_XA:
317*d415bd75Srobert case MachineCombinerPattern::FMSUB:
318*d415bd75Srobert case MachineCombinerPattern::FNMSUB:
31909467b48Spatrick return CombinerObjective::MustReduceDepth;
32073471bf0Spatrick case MachineCombinerPattern::REASSOC_XY_BCA:
32173471bf0Spatrick case MachineCombinerPattern::REASSOC_XY_BAC:
32273471bf0Spatrick return CombinerObjective::MustReduceRegisterPressure;
32309467b48Spatrick default:
32409467b48Spatrick return CombinerObjective::Default;
32509467b48Spatrick }
32609467b48Spatrick }
32709467b48Spatrick
32809467b48Spatrick /// Estimate the latency of the new and original instruction sequence by summing
32909467b48Spatrick /// up the latencies of the inserted and deleted instructions. This assumes
33009467b48Spatrick /// that the inserted and deleted instructions are dependent instruction chains,
33109467b48Spatrick /// which might not hold in all cases.
getLatenciesForInstrSequences(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineTraceMetrics::Trace BlockTrace)33209467b48Spatrick std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
33309467b48Spatrick MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
33409467b48Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs,
33509467b48Spatrick MachineTraceMetrics::Trace BlockTrace) {
33609467b48Spatrick assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
33709467b48Spatrick unsigned NewRootLatency = 0;
33809467b48Spatrick // NewRoot is the last instruction in the \p InsInstrs vector.
33909467b48Spatrick MachineInstr *NewRoot = InsInstrs.back();
34009467b48Spatrick for (unsigned i = 0; i < InsInstrs.size() - 1; i++)
34109467b48Spatrick NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]);
34209467b48Spatrick NewRootLatency += getLatency(&MI, NewRoot, BlockTrace);
34309467b48Spatrick
34409467b48Spatrick unsigned RootLatency = 0;
345*d415bd75Srobert for (auto *I : DelInstrs)
34609467b48Spatrick RootLatency += TSchedModel.computeInstrLatency(I);
34709467b48Spatrick
34809467b48Spatrick return {NewRootLatency, RootLatency};
34909467b48Spatrick }
35009467b48Spatrick
reduceRegisterPressure(MachineInstr & Root,MachineBasicBlock * MBB,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,MachineCombinerPattern Pattern)35173471bf0Spatrick bool MachineCombiner::reduceRegisterPressure(
35273471bf0Spatrick MachineInstr &Root, MachineBasicBlock *MBB,
35373471bf0Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
35473471bf0Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs,
35573471bf0Spatrick MachineCombinerPattern Pattern) {
35673471bf0Spatrick // FIXME: for now, we don't do any check for the register pressure patterns.
35773471bf0Spatrick // We treat them as always profitable. But we can do better if we make
35873471bf0Spatrick // RegPressureTracker class be aware of TIE attribute. Then we can get an
35973471bf0Spatrick // accurate compare of register pressure with DelInstrs or InsInstrs.
36073471bf0Spatrick return true;
36173471bf0Spatrick }
36273471bf0Spatrick
36309467b48Spatrick /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
36409467b48Spatrick /// The new code sequence ends in MI NewRoot. A necessary condition for the new
36509467b48Spatrick /// sequence to replace the old sequence is that it cannot lengthen the critical
36609467b48Spatrick /// path. The definition of "improve" may be restricted by specifying that the
36709467b48Spatrick /// new path improves the data dependency chain (MustReduceDepth).
improvesCriticalPathLen(MachineBasicBlock * MBB,MachineInstr * Root,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,MachineCombinerPattern Pattern,bool SlackIsAccurate)36809467b48Spatrick bool MachineCombiner::improvesCriticalPathLen(
36909467b48Spatrick MachineBasicBlock *MBB, MachineInstr *Root,
37009467b48Spatrick MachineTraceMetrics::Trace BlockTrace,
37109467b48Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
37209467b48Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs,
37309467b48Spatrick DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
37409467b48Spatrick MachineCombinerPattern Pattern,
37509467b48Spatrick bool SlackIsAccurate) {
37609467b48Spatrick // Get depth and latency of NewRoot and Root.
37709467b48Spatrick unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
37809467b48Spatrick unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
37909467b48Spatrick
38009467b48Spatrick LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
38109467b48Spatrick << NewRootDepth << "\tRootDepth: " << RootDepth);
38209467b48Spatrick
38309467b48Spatrick // For a transform such as reassociation, the cost equation is
38409467b48Spatrick // conservatively calculated so that we must improve the depth (data
38509467b48Spatrick // dependency cycles) in the critical path to proceed with the transform.
38609467b48Spatrick // Being conservative also protects against inaccuracies in the underlying
38709467b48Spatrick // machine trace metrics and CPU models.
38809467b48Spatrick if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
38909467b48Spatrick LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
39009467b48Spatrick LLVM_DEBUG(NewRootDepth < RootDepth
39109467b48Spatrick ? dbgs() << "\t and it does it\n"
39209467b48Spatrick : dbgs() << "\t but it does NOT do it\n");
39309467b48Spatrick return NewRootDepth < RootDepth;
39409467b48Spatrick }
39509467b48Spatrick
39609467b48Spatrick // A more flexible cost calculation for the critical path includes the slack
39709467b48Spatrick // of the original code sequence. This may allow the transform to proceed
39809467b48Spatrick // even if the instruction depths (data dependency cycles) become worse.
39909467b48Spatrick
40009467b48Spatrick // Account for the latency of the inserted and deleted instructions by
40109467b48Spatrick unsigned NewRootLatency, RootLatency;
40209467b48Spatrick std::tie(NewRootLatency, RootLatency) =
40309467b48Spatrick getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
40409467b48Spatrick
40509467b48Spatrick unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
40609467b48Spatrick unsigned NewCycleCount = NewRootDepth + NewRootLatency;
40709467b48Spatrick unsigned OldCycleCount =
40809467b48Spatrick RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0);
40909467b48Spatrick LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
41009467b48Spatrick << "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
41109467b48Spatrick << RootSlack << " SlackIsAccurate=" << SlackIsAccurate
41209467b48Spatrick << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
41309467b48Spatrick << "\n\tRootDepth + RootLatency + RootSlack = "
41409467b48Spatrick << OldCycleCount;);
41509467b48Spatrick LLVM_DEBUG(NewCycleCount <= OldCycleCount
41609467b48Spatrick ? dbgs() << "\n\t It IMPROVES PathLen because"
41709467b48Spatrick : dbgs() << "\n\t It DOES NOT improve PathLen because");
41809467b48Spatrick LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
41909467b48Spatrick << ", OldCycleCount = " << OldCycleCount << "\n");
42009467b48Spatrick
42109467b48Spatrick return NewCycleCount <= OldCycleCount;
42209467b48Spatrick }
42309467b48Spatrick
42409467b48Spatrick /// helper routine to convert instructions into SC
instr2instrSC(SmallVectorImpl<MachineInstr * > & Instrs,SmallVectorImpl<const MCSchedClassDesc * > & InstrsSC)42509467b48Spatrick void MachineCombiner::instr2instrSC(
42609467b48Spatrick SmallVectorImpl<MachineInstr *> &Instrs,
42709467b48Spatrick SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
42809467b48Spatrick for (auto *InstrPtr : Instrs) {
42909467b48Spatrick unsigned Opc = InstrPtr->getOpcode();
43009467b48Spatrick unsigned Idx = TII->get(Opc).getSchedClass();
43109467b48Spatrick const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
43209467b48Spatrick InstrsSC.push_back(SC);
43309467b48Spatrick }
43409467b48Spatrick }
43509467b48Spatrick
43609467b48Spatrick /// True when the new instructions do not increase resource length
preservesResourceLen(MachineBasicBlock * MBB,MachineTraceMetrics::Trace BlockTrace,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs)43709467b48Spatrick bool MachineCombiner::preservesResourceLen(
43809467b48Spatrick MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
43909467b48Spatrick SmallVectorImpl<MachineInstr *> &InsInstrs,
44009467b48Spatrick SmallVectorImpl<MachineInstr *> &DelInstrs) {
44109467b48Spatrick if (!TSchedModel.hasInstrSchedModel())
44209467b48Spatrick return true;
44309467b48Spatrick
44409467b48Spatrick // Compute current resource length
44509467b48Spatrick
44609467b48Spatrick //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
44709467b48Spatrick SmallVector <const MachineBasicBlock *, 1> MBBarr;
44809467b48Spatrick MBBarr.push_back(MBB);
44909467b48Spatrick unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
45009467b48Spatrick
45109467b48Spatrick // Deal with SC rather than Instructions.
45209467b48Spatrick SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
45309467b48Spatrick SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
45409467b48Spatrick
45509467b48Spatrick instr2instrSC(InsInstrs, InsInstrsSC);
45609467b48Spatrick instr2instrSC(DelInstrs, DelInstrsSC);
45709467b48Spatrick
458*d415bd75Srobert ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
459*d415bd75Srobert ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
46009467b48Spatrick
46109467b48Spatrick // Compute new resource length.
46209467b48Spatrick unsigned ResLenAfterCombine =
46309467b48Spatrick BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
46409467b48Spatrick
46509467b48Spatrick LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
46609467b48Spatrick << ResLenBeforeCombine
46709467b48Spatrick << " and after: " << ResLenAfterCombine << "\n";);
46809467b48Spatrick LLVM_DEBUG(
469097a140dSpatrick ResLenAfterCombine <=
470097a140dSpatrick ResLenBeforeCombine + TII->getExtendResourceLenLimit()
47109467b48Spatrick ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
47209467b48Spatrick : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
47309467b48Spatrick "Length\n");
47409467b48Spatrick
475097a140dSpatrick return ResLenAfterCombine <=
476097a140dSpatrick ResLenBeforeCombine + TII->getExtendResourceLenLimit();
47709467b48Spatrick }
47809467b48Spatrick
47909467b48Spatrick /// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
48009467b48Spatrick /// depths if requested.
48109467b48Spatrick ///
48209467b48Spatrick /// \param MBB basic block to insert instructions in
48309467b48Spatrick /// \param MI current machine instruction
48409467b48Spatrick /// \param InsInstrs new instructions to insert in \p MBB
48509467b48Spatrick /// \param DelInstrs instruction to delete from \p MBB
48609467b48Spatrick /// \param MinInstr is a pointer to the machine trace information
48709467b48Spatrick /// \param RegUnits set of live registers, needed to compute instruction depths
48873471bf0Spatrick /// \param TII is target instruction info, used to call target hook
48973471bf0Spatrick /// \param Pattern is used to call target hook finalizeInsInstrs
49009467b48Spatrick /// \param IncrementalUpdate if true, compute instruction depths incrementally,
49109467b48Spatrick /// otherwise invalidate the trace
insertDeleteInstructions(MachineBasicBlock * MBB,MachineInstr & MI,SmallVector<MachineInstr *,16> InsInstrs,SmallVector<MachineInstr *,16> DelInstrs,MachineTraceMetrics::Ensemble * MinInstr,SparseSet<LiveRegUnit> & RegUnits,const TargetInstrInfo * TII,MachineCombinerPattern Pattern,bool IncrementalUpdate)49209467b48Spatrick static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
49309467b48Spatrick SmallVector<MachineInstr *, 16> InsInstrs,
49409467b48Spatrick SmallVector<MachineInstr *, 16> DelInstrs,
49509467b48Spatrick MachineTraceMetrics::Ensemble *MinInstr,
49609467b48Spatrick SparseSet<LiveRegUnit> &RegUnits,
49773471bf0Spatrick const TargetInstrInfo *TII,
49873471bf0Spatrick MachineCombinerPattern Pattern,
49909467b48Spatrick bool IncrementalUpdate) {
50073471bf0Spatrick // If we want to fix up some placeholder for some target, do it now.
50173471bf0Spatrick // We need this because in genAlternativeCodeSequence, we have not decided the
50273471bf0Spatrick // better pattern InsInstrs or DelInstrs, so we don't want generate some
50373471bf0Spatrick // sideeffect to the function. For example we need to delay the constant pool
50473471bf0Spatrick // entry creation here after InsInstrs is selected as better pattern.
50573471bf0Spatrick // Otherwise the constant pool entry created for InsInstrs will not be deleted
50673471bf0Spatrick // even if InsInstrs is not the better pattern.
50773471bf0Spatrick TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
50873471bf0Spatrick
50909467b48Spatrick for (auto *InstrPtr : InsInstrs)
51009467b48Spatrick MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
51109467b48Spatrick
51209467b48Spatrick for (auto *InstrPtr : DelInstrs) {
513*d415bd75Srobert InstrPtr->eraseFromParent();
51409467b48Spatrick // Erase all LiveRegs defined by the removed instruction
515*d415bd75Srobert for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
51609467b48Spatrick if (I->MI == InstrPtr)
51709467b48Spatrick I = RegUnits.erase(I);
51809467b48Spatrick else
51909467b48Spatrick I++;
52009467b48Spatrick }
52109467b48Spatrick }
52209467b48Spatrick
52309467b48Spatrick if (IncrementalUpdate)
52409467b48Spatrick for (auto *InstrPtr : InsInstrs)
52509467b48Spatrick MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
52609467b48Spatrick else
52709467b48Spatrick MinInstr->invalidate(MBB);
52809467b48Spatrick
52909467b48Spatrick NumInstCombined++;
53009467b48Spatrick }
53109467b48Spatrick
53209467b48Spatrick // Check that the difference between original and new latency is decreasing for
53309467b48Spatrick // later patterns. This helps to discover sub-optimal pattern orderings.
verifyPatternOrder(MachineBasicBlock * MBB,MachineInstr & Root,SmallVector<MachineCombinerPattern,16> & Patterns)53409467b48Spatrick void MachineCombiner::verifyPatternOrder(
53509467b48Spatrick MachineBasicBlock *MBB, MachineInstr &Root,
53609467b48Spatrick SmallVector<MachineCombinerPattern, 16> &Patterns) {
53709467b48Spatrick long PrevLatencyDiff = std::numeric_limits<long>::max();
53809467b48Spatrick (void)PrevLatencyDiff; // Variable is used in assert only.
53909467b48Spatrick for (auto P : Patterns) {
54009467b48Spatrick SmallVector<MachineInstr *, 16> InsInstrs;
54109467b48Spatrick SmallVector<MachineInstr *, 16> DelInstrs;
54209467b48Spatrick DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
54309467b48Spatrick TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs,
54409467b48Spatrick InstrIdxForVirtReg);
54509467b48Spatrick // Found pattern, but did not generate alternative sequence.
54609467b48Spatrick // This can happen e.g. when an immediate could not be materialized
54709467b48Spatrick // in a single instruction.
54809467b48Spatrick if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries())
54909467b48Spatrick continue;
55009467b48Spatrick
55109467b48Spatrick unsigned NewRootLatency, RootLatency;
55209467b48Spatrick std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
55309467b48Spatrick Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
55409467b48Spatrick long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
55509467b48Spatrick assert(CurrentLatencyDiff <= PrevLatencyDiff &&
55609467b48Spatrick "Current pattern is better than previous pattern.");
55709467b48Spatrick PrevLatencyDiff = CurrentLatencyDiff;
55809467b48Spatrick }
55909467b48Spatrick }
56009467b48Spatrick
56109467b48Spatrick /// Substitute a slow code sequence with a faster one by
56209467b48Spatrick /// evaluating instruction combining pattern.
56309467b48Spatrick /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
56409467b48Spatrick /// combining based on machine trace metrics. Only combine a sequence of
56509467b48Spatrick /// instructions when this neither lengthens the critical path nor increases
56609467b48Spatrick /// resource pressure. When optimizing for codesize always combine when the new
56709467b48Spatrick /// sequence is shorter.
combineInstructions(MachineBasicBlock * MBB)56809467b48Spatrick bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
56909467b48Spatrick bool Changed = false;
57009467b48Spatrick LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
57109467b48Spatrick
57209467b48Spatrick bool IncrementalUpdate = false;
57309467b48Spatrick auto BlockIter = MBB->begin();
57409467b48Spatrick decltype(BlockIter) LastUpdate;
57509467b48Spatrick // Check if the block is in a loop.
57609467b48Spatrick const MachineLoop *ML = MLI->getLoopFor(MBB);
57709467b48Spatrick if (!MinInstr)
57809467b48Spatrick MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
57909467b48Spatrick
58009467b48Spatrick SparseSet<LiveRegUnit> RegUnits;
58109467b48Spatrick RegUnits.setUniverse(TRI->getNumRegUnits());
58209467b48Spatrick
58309467b48Spatrick bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
58409467b48Spatrick
58573471bf0Spatrick bool DoRegPressureReduce =
58673471bf0Spatrick TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
58773471bf0Spatrick
58809467b48Spatrick while (BlockIter != MBB->end()) {
58909467b48Spatrick auto &MI = *BlockIter++;
59009467b48Spatrick SmallVector<MachineCombinerPattern, 16> Patterns;
59109467b48Spatrick // The motivating example is:
59209467b48Spatrick //
59309467b48Spatrick // MUL Other MUL_op1 MUL_op2 Other
59409467b48Spatrick // \ / \ | /
59509467b48Spatrick // ADD/SUB => MADD/MSUB
59609467b48Spatrick // (=Root) (=NewRoot)
59709467b48Spatrick
59809467b48Spatrick // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
59909467b48Spatrick // usually beneficial for code size it unfortunately can hurt performance
60009467b48Spatrick // when the ADD is on the critical path, but the MUL is not. With the
60109467b48Spatrick // substitution the MUL becomes part of the critical path (in form of the
60209467b48Spatrick // MADD) and can lengthen it on architectures where the MADD latency is
60309467b48Spatrick // longer than the ADD latency.
60409467b48Spatrick //
60509467b48Spatrick // For each instruction we check if it can be the root of a combiner
60609467b48Spatrick // pattern. Then for each pattern the new code sequence in form of MI is
60709467b48Spatrick // generated and evaluated. When the efficiency criteria (don't lengthen
60809467b48Spatrick // critical path, don't use more resources) is met the new sequence gets
60909467b48Spatrick // hooked up into the basic block before the old sequence is removed.
61009467b48Spatrick //
61109467b48Spatrick // The algorithm does not try to evaluate all patterns and pick the best.
61209467b48Spatrick // This is only an artificial restriction though. In practice there is
61309467b48Spatrick // mostly one pattern, and getMachineCombinerPatterns() can order patterns
61409467b48Spatrick // based on an internal cost heuristic. If
61509467b48Spatrick // machine-combiner-verify-pattern-order is enabled, all patterns are
61609467b48Spatrick // checked to ensure later patterns do not provide better latency savings.
61709467b48Spatrick
61873471bf0Spatrick if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
61909467b48Spatrick continue;
62009467b48Spatrick
62109467b48Spatrick if (VerifyPatternOrder)
62209467b48Spatrick verifyPatternOrder(MBB, MI, Patterns);
62309467b48Spatrick
624*d415bd75Srobert for (const auto P : Patterns) {
62509467b48Spatrick SmallVector<MachineInstr *, 16> InsInstrs;
62609467b48Spatrick SmallVector<MachineInstr *, 16> DelInstrs;
62709467b48Spatrick DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
62809467b48Spatrick TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
62909467b48Spatrick InstrIdxForVirtReg);
63009467b48Spatrick // Found pattern, but did not generate alternative sequence.
63109467b48Spatrick // This can happen e.g. when an immediate could not be materialized
63209467b48Spatrick // in a single instruction.
633*d415bd75Srobert if (InsInstrs.empty())
63409467b48Spatrick continue;
63509467b48Spatrick
63609467b48Spatrick LLVM_DEBUG(if (dump_intrs) {
63709467b48Spatrick dbgs() << "\tFor the Pattern (" << (int)P
63809467b48Spatrick << ") these instructions could be removed\n";
63909467b48Spatrick for (auto const *InstrPtr : DelInstrs)
64009467b48Spatrick InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
64109467b48Spatrick /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
64209467b48Spatrick dbgs() << "\tThese instructions could replace the removed ones\n";
64309467b48Spatrick for (auto const *InstrPtr : InsInstrs)
64409467b48Spatrick InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
64509467b48Spatrick /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
64609467b48Spatrick });
64709467b48Spatrick
64873471bf0Spatrick if (IncrementalUpdate && LastUpdate != BlockIter) {
64909467b48Spatrick // Update depths since the last incremental update.
65009467b48Spatrick MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
65109467b48Spatrick LastUpdate = BlockIter;
65209467b48Spatrick }
65309467b48Spatrick
65473471bf0Spatrick if (DoRegPressureReduce &&
65573471bf0Spatrick getCombinerObjective(P) ==
65673471bf0Spatrick CombinerObjective::MustReduceRegisterPressure) {
65773471bf0Spatrick if (MBB->size() > inc_threshold) {
65873471bf0Spatrick // Use incremental depth updates for basic blocks above threshold
65973471bf0Spatrick IncrementalUpdate = true;
66073471bf0Spatrick LastUpdate = BlockIter;
66173471bf0Spatrick }
66273471bf0Spatrick if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
66373471bf0Spatrick // Replace DelInstrs with InsInstrs.
66473471bf0Spatrick insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
66573471bf0Spatrick RegUnits, TII, P, IncrementalUpdate);
66673471bf0Spatrick Changed |= true;
66773471bf0Spatrick
66873471bf0Spatrick // Go back to previous instruction as it may have ILP reassociation
66973471bf0Spatrick // opportunity.
67073471bf0Spatrick BlockIter--;
67173471bf0Spatrick break;
67273471bf0Spatrick }
67373471bf0Spatrick }
67473471bf0Spatrick
675*d415bd75Srobert if (ML && TII->isThroughputPattern(P)) {
676*d415bd75Srobert LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
677*d415bd75Srobert insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
678*d415bd75Srobert RegUnits, TII, P, IncrementalUpdate);
679*d415bd75Srobert // Eagerly stop after the first pattern fires.
680*d415bd75Srobert Changed = true;
681*d415bd75Srobert break;
682*d415bd75Srobert } else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
683*d415bd75Srobert LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
684*d415bd75Srobert << InsInstrs.size() << " < "
685*d415bd75Srobert << DelInstrs.size() << ")\n");
68609467b48Spatrick insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
68773471bf0Spatrick RegUnits, TII, P, IncrementalUpdate);
68809467b48Spatrick // Eagerly stop after the first pattern fires.
68909467b48Spatrick Changed = true;
69009467b48Spatrick break;
69109467b48Spatrick } else {
69209467b48Spatrick // For big basic blocks, we only compute the full trace the first time
69309467b48Spatrick // we hit this. We do not invalidate the trace, but instead update the
69409467b48Spatrick // instruction depths incrementally.
69509467b48Spatrick // NOTE: Only the instruction depths up to MI are accurate. All other
69609467b48Spatrick // trace information is not updated.
69709467b48Spatrick MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
69809467b48Spatrick Traces->verifyAnalysis();
69909467b48Spatrick if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
70009467b48Spatrick InstrIdxForVirtReg, P,
70109467b48Spatrick !IncrementalUpdate) &&
70209467b48Spatrick preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
70309467b48Spatrick if (MBB->size() > inc_threshold) {
70409467b48Spatrick // Use incremental depth updates for basic blocks above treshold
70509467b48Spatrick IncrementalUpdate = true;
70609467b48Spatrick LastUpdate = BlockIter;
70709467b48Spatrick }
70809467b48Spatrick
70909467b48Spatrick insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
71073471bf0Spatrick RegUnits, TII, P, IncrementalUpdate);
71109467b48Spatrick
71209467b48Spatrick // Eagerly stop after the first pattern fires.
71309467b48Spatrick Changed = true;
71409467b48Spatrick break;
71509467b48Spatrick }
71609467b48Spatrick // Cleanup instructions of the alternative code sequence. There is no
71709467b48Spatrick // use for them.
71809467b48Spatrick MachineFunction *MF = MBB->getParent();
71909467b48Spatrick for (auto *InstrPtr : InsInstrs)
720*d415bd75Srobert MF->deleteMachineInstr(InstrPtr);
72109467b48Spatrick }
72209467b48Spatrick InstrIdxForVirtReg.clear();
72309467b48Spatrick }
72409467b48Spatrick }
72509467b48Spatrick
72609467b48Spatrick if (Changed && IncrementalUpdate)
72709467b48Spatrick Traces->invalidate(MBB);
72809467b48Spatrick return Changed;
72909467b48Spatrick }
73009467b48Spatrick
runOnMachineFunction(MachineFunction & MF)73109467b48Spatrick bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
73209467b48Spatrick STI = &MF.getSubtarget();
73309467b48Spatrick TII = STI->getInstrInfo();
73409467b48Spatrick TRI = STI->getRegisterInfo();
73509467b48Spatrick SchedModel = STI->getSchedModel();
73609467b48Spatrick TSchedModel.init(STI);
73709467b48Spatrick MRI = &MF.getRegInfo();
73809467b48Spatrick MLI = &getAnalysis<MachineLoopInfo>();
73909467b48Spatrick Traces = &getAnalysis<MachineTraceMetrics>();
74009467b48Spatrick PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
74109467b48Spatrick MBFI = (PSI && PSI->hasProfileSummary()) ?
74209467b48Spatrick &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
74309467b48Spatrick nullptr;
74409467b48Spatrick MinInstr = nullptr;
74509467b48Spatrick OptSize = MF.getFunction().hasOptSize();
74673471bf0Spatrick RegClassInfo.runOnMachineFunction(MF);
74709467b48Spatrick
74809467b48Spatrick LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
74909467b48Spatrick if (!TII->useMachineCombiner()) {
75009467b48Spatrick LLVM_DEBUG(
75109467b48Spatrick dbgs()
75209467b48Spatrick << " Skipping pass: Target does not support machine combiner\n");
75309467b48Spatrick return false;
75409467b48Spatrick }
75509467b48Spatrick
75609467b48Spatrick bool Changed = false;
75709467b48Spatrick
75809467b48Spatrick // Try to combine instructions.
75909467b48Spatrick for (auto &MBB : MF)
76009467b48Spatrick Changed |= combineInstructions(&MBB);
76109467b48Spatrick
76209467b48Spatrick return Changed;
76309467b48Spatrick }
764