xref: /llvm-project/llvm/lib/CodeGen/MachineLICM.cpp (revision 19032bfe87fa0f4a3a7b3e68daafc93331b71e0d)
1 //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion on machine instructions. We
10 // attempt to remove as much code from the body of a loop as possible.
11 //
12 // This pass is not intended to be a replacement or a complete alternative
13 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14 // constructs that are not exposed before lowering and instruction selection.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/CodeGen/MachineLICM.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
27 #include "llvm/CodeGen/MachineDomTreeUpdater.h"
28 #include "llvm/CodeGen/MachineDominators.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/CodeGen/MachineInstr.h"
33 #include "llvm/CodeGen/MachineLoopInfo.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/MachineOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/PseudoSourceValue.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetLowering.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/CodeGen/TargetSchedule.h"
42 #include "llvm/CodeGen/TargetSubtargetInfo.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/InitializePasses.h"
45 #include "llvm/MC/MCInstrDesc.h"
46 #include "llvm/MC/MCRegister.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <limits>
56 #include <vector>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "machinelicm"
61 
62 static cl::opt<bool>
63 AvoidSpeculation("avoid-speculation",
64                  cl::desc("MachineLICM should avoid speculation"),
65                  cl::init(true), cl::Hidden);
66 
67 static cl::opt<bool>
68 HoistCheapInsts("hoist-cheap-insts",
69                 cl::desc("MachineLICM should hoist even cheap instructions"),
70                 cl::init(false), cl::Hidden);
71 
72 static cl::opt<bool>
73 HoistConstStores("hoist-const-stores",
74                  cl::desc("Hoist invariant stores"),
75                  cl::init(true), cl::Hidden);
76 
77 static cl::opt<bool> HoistConstLoads("hoist-const-loads",
78                                      cl::desc("Hoist invariant loads"),
79                                      cl::init(true), cl::Hidden);
80 
81 // The default threshold of 100 (i.e. if target block is 100 times hotter)
82 // is based on empirical data on a single target and is subject to tuning.
83 static cl::opt<unsigned>
84 BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
85                              cl::desc("Do not hoist instructions if target"
86                              "block is N times hotter than the source."),
87                              cl::init(100), cl::Hidden);
88 
89 enum class UseBFI { None, PGO, All };
90 
91 static cl::opt<UseBFI>
92 DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
93                               cl::desc("Disable hoisting instructions to"
94                               " hotter blocks"),
95                               cl::init(UseBFI::PGO), cl::Hidden,
96                               cl::values(clEnumValN(UseBFI::None, "none",
97                               "disable the feature"),
98                               clEnumValN(UseBFI::PGO, "pgo",
99                               "enable the feature when using profile data"),
100                               clEnumValN(UseBFI::All, "all",
101                               "enable the feature with/wo profile data")));
102 
103 STATISTIC(NumHoisted,
104           "Number of machine instructions hoisted out of loops");
105 STATISTIC(NumLowRP,
106           "Number of instructions hoisted in low reg pressure situation");
107 STATISTIC(NumHighLatency,
108           "Number of high latency instructions hoisted");
109 STATISTIC(NumCSEed,
110           "Number of hoisted machine instructions CSEed");
111 STATISTIC(NumPostRAHoisted,
112           "Number of machine instructions hoisted out of loops post regalloc");
113 STATISTIC(NumStoreConst,
114           "Number of stores of const phys reg hoisted out of loops");
115 STATISTIC(NumNotHoistedDueToHotness,
116           "Number of instructions not hoisted due to block frequency");
117 
118 namespace {
119   enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
120 
121   class MachineLICMImpl {
122     const TargetInstrInfo *TII = nullptr;
123     const TargetLoweringBase *TLI = nullptr;
124     const TargetRegisterInfo *TRI = nullptr;
125     const MachineFrameInfo *MFI = nullptr;
126     MachineRegisterInfo *MRI = nullptr;
127     TargetSchedModel SchedModel;
128     bool PreRegAlloc = false;
129     bool HasProfileData = false;
130     Pass *LegacyPass;
131     MachineFunctionAnalysisManager *MFAM;
132 
133     // Various analyses that we use...
134     AliasAnalysis *AA = nullptr;               // Alias analysis info.
135     MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
136     MachineLoopInfo *MLI = nullptr;            // Current MachineLoopInfo
137     MachineDomTreeUpdater *MDTU = nullptr;     // Wraps current dominator tree
138 
139     // State that is updated as we process loops
140     bool Changed = false;           // True if a loop is changed.
141     bool FirstInLoop = false;       // True if it's the first LICM in the loop.
142 
143     // Holds information about whether it is allowed to move load instructions
144     // out of the loop
145     SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
146 
147     // Exit blocks of each Loop.
148     DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
149 
150     bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
151       auto [It, Inserted] = ExitBlockMap.try_emplace(CurLoop);
152       if (Inserted) {
153         SmallVector<MachineBasicBlock *, 8> ExitBlocks;
154         CurLoop->getExitBlocks(ExitBlocks);
155         It->second = std::move(ExitBlocks);
156       }
157       return is_contained(It->second, MBB);
158     }
159 
160     // Track 'estimated' register pressure.
161     SmallDenseSet<Register> RegSeen;
162     SmallVector<unsigned, 8> RegPressure;
163 
164     // Register pressure "limit" per register pressure set. If the pressure
165     // is higher than the limit, then it's considered high.
166     SmallVector<unsigned, 8> RegLimit;
167 
168     // Register pressure on path leading from loop preheader to current BB.
169     SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
170 
171     // For each opcode per preheader, keep a list of potential CSE instructions.
172     DenseMap<MachineBasicBlock *,
173              DenseMap<unsigned, std::vector<MachineInstr *>>>
174         CSEMap;
175 
176     enum {
177       SpeculateFalse   = 0,
178       SpeculateTrue    = 1,
179       SpeculateUnknown = 2
180     };
181 
182     // If a MBB does not dominate loop exiting blocks then it may not safe
183     // to hoist loads from this block.
184     // Tri-state: 0 - false, 1 - true, 2 - unknown
185     unsigned SpeculationState = SpeculateUnknown;
186 
187   public:
188     MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,
189                     MachineFunctionAnalysisManager *MFAM)
190         : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
191       assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");
192       assert(!(LegacyPass && MFAM) &&
193              "LegacyPass and MFAM cannot be provided at the same time");
194     }
195 
196     bool run(MachineFunction &MF);
197 
198     void releaseMemory() {
199       RegSeen.clear();
200       RegPressure.clear();
201       RegLimit.clear();
202       BackTrace.clear();
203       CSEMap.clear();
204       ExitBlockMap.clear();
205     }
206 
207   private:
208     /// Keep track of information about hoisting candidates.
209     struct CandidateInfo {
210       MachineInstr *MI;
211       unsigned      Def;
212       int           FI;
213 
214       CandidateInfo(MachineInstr *mi, unsigned def, int fi)
215         : MI(mi), Def(def), FI(fi) {}
216     };
217 
218     void HoistRegionPostRA(MachineLoop *CurLoop,
219                            MachineBasicBlock *CurPreheader);
220 
221     void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
222                      MachineBasicBlock *CurPreheader);
223 
224     void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers,
225                    SmallDenseSet<int> &StoredFIs,
226                    SmallVectorImpl<CandidateInfo> &Candidates,
227                    MachineLoop *CurLoop);
228 
229     void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
230 
231     bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
232 
233     bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
234 
235     bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
236 
237     bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
238                                MachineLoop *CurLoop) const;
239 
240     bool IsCheapInstruction(MachineInstr &MI) const;
241 
242     bool CanCauseHighRegPressure(const SmallDenseMap<unsigned, int> &Cost,
243                                  bool Cheap);
244 
245     void UpdateBackTraceRegPressure(const MachineInstr *MI);
246 
247     bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
248 
249     bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
250 
251     bool isTriviallyReMaterializable(const MachineInstr &MI) const;
252 
253     void EnterScope(MachineBasicBlock *MBB);
254 
255     void ExitScope(MachineBasicBlock *MBB);
256 
257     void ExitScopeIfDone(
258         MachineDomTreeNode *Node,
259         DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
260         const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
261 
262     void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop,
263                         MachineBasicBlock *CurPreheader);
264 
265     void InitRegPressure(MachineBasicBlock *BB);
266 
267     SmallDenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
268                                                   bool ConsiderSeen,
269                                                   bool ConsiderUnseenAsDef);
270 
271     void UpdateRegPressure(const MachineInstr *MI,
272                            bool ConsiderUnseenAsDef = false);
273 
274     MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
275 
276     MachineInstr *LookForDuplicate(const MachineInstr *MI,
277                                    std::vector<MachineInstr *> &PrevMIs);
278 
279     bool
280     EliminateCSE(MachineInstr *MI,
281                  DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
282 
283     bool MayCSE(MachineInstr *MI);
284 
285     unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
286                    MachineLoop *CurLoop);
287 
288     void InitCSEMap(MachineBasicBlock *BB);
289 
290     void InitializeLoadsHoistableLoops();
291 
292     bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
293                             MachineBasicBlock *TgtBlock);
294     MachineBasicBlock *getCurPreheader(MachineLoop *CurLoop,
295                                        MachineBasicBlock *CurPreheader);
296   };
297 
298   class MachineLICMBase : public MachineFunctionPass {
299     bool PreRegAlloc;
300 
301   public:
302     MachineLICMBase(char &ID, bool PreRegAlloc)
303         : MachineFunctionPass(ID), PreRegAlloc(PreRegAlloc) {}
304 
305     bool runOnMachineFunction(MachineFunction &MF) override;
306 
307     void getAnalysisUsage(AnalysisUsage &AU) const override {
308       AU.addRequired<MachineLoopInfoWrapperPass>();
309       if (DisableHoistingToHotterBlocks != UseBFI::None)
310         AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
311       AU.addRequired<MachineDominatorTreeWrapperPass>();
312       AU.addRequired<AAResultsWrapperPass>();
313       AU.addPreserved<MachineLoopInfoWrapperPass>();
314       MachineFunctionPass::getAnalysisUsage(AU);
315     }
316   };
317 
318   class MachineLICM : public MachineLICMBase {
319   public:
320     static char ID;
321     MachineLICM() : MachineLICMBase(ID, false) {
322       initializeMachineLICMPass(*PassRegistry::getPassRegistry());
323     }
324   };
325 
326   class EarlyMachineLICM : public MachineLICMBase {
327   public:
328     static char ID;
329     EarlyMachineLICM() : MachineLICMBase(ID, true) {
330       initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry());
331     }
332   };
333 
334 } // end anonymous namespace
335 
336 char MachineLICM::ID;
337 char EarlyMachineLICM::ID;
338 
339 char &llvm::MachineLICMID = MachineLICM::ID;
340 char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
341 
342 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
343                       "Machine Loop Invariant Code Motion", false, false)
344 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
345 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
346 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
347 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
348 INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
349                     "Machine Loop Invariant Code Motion", false, false)
350 
351 INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
352                       "Early Machine Loop Invariant Code Motion", false, false)
353 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
354 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
355 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
356 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
357 INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
358                     "Early Machine Loop Invariant Code Motion", false, false)
359 
360 bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
361   if (skipFunction(MF.getFunction()))
362     return false;
363 
364   MachineLICMImpl Impl(PreRegAlloc, this, nullptr);
365   return Impl.run(MF);
366 }
367 
368 #define GET_RESULT(RESULT, GETTER, INFIX)                                      \
369   ((LegacyPass)                                                                \
370        ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER()       \
371        : &MFAM->getResult<RESULT##Analysis>(MF))
372 
373 bool MachineLICMImpl::run(MachineFunction &MF) {
374   AA = MFAM != nullptr
375            ? &MFAM->getResult<FunctionAnalysisManagerMachineFunctionProxy>(MF)
376                   .getManager()
377                   .getResult<AAManager>(MF.getFunction())
378            : &LegacyPass->getAnalysis<AAResultsWrapperPass>().getAAResults();
379   MachineDomTreeUpdater DTU(GET_RESULT(MachineDominatorTree, getDomTree, ),
380                             MachineDomTreeUpdater::UpdateStrategy::Lazy);
381   MDTU = &DTU;
382   MLI = GET_RESULT(MachineLoop, getLI, Info);
383   MBFI = DisableHoistingToHotterBlocks != UseBFI::None
384              ? GET_RESULT(MachineBlockFrequency, getMBFI, Info)
385              : nullptr;
386 
387   Changed = FirstInLoop = false;
388   const TargetSubtargetInfo &ST = MF.getSubtarget();
389   TII = ST.getInstrInfo();
390   TLI = ST.getTargetLowering();
391   TRI = ST.getRegisterInfo();
392   MFI = &MF.getFrameInfo();
393   MRI = &MF.getRegInfo();
394   SchedModel.init(&ST);
395 
396   HasProfileData = MF.getFunction().hasProfileData();
397 
398   if (PreRegAlloc)
399     LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
400   else
401     LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
402   LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
403 
404   if (PreRegAlloc) {
405     // Estimate register pressure during pre-regalloc pass.
406     unsigned NumRPS = TRI->getNumRegPressureSets();
407     RegPressure.resize(NumRPS);
408     std::fill(RegPressure.begin(), RegPressure.end(), 0);
409     RegLimit.resize(NumRPS);
410     for (unsigned i = 0, e = NumRPS; i != e; ++i)
411       RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
412   }
413 
414   if (HoistConstLoads)
415     InitializeLoadsHoistableLoops();
416 
417   SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
418   while (!Worklist.empty()) {
419     MachineLoop *CurLoop = Worklist.pop_back_val();
420     MachineBasicBlock *CurPreheader = nullptr;
421 
422     if (!PreRegAlloc)
423       HoistRegionPostRA(CurLoop, CurPreheader);
424     else {
425       // CSEMap is initialized for loop header when the first instruction is
426       // being hoisted.
427       MachineDomTreeNode *N = MDTU->getDomTree().getNode(CurLoop->getHeader());
428       FirstInLoop = true;
429       HoistOutOfLoop(N, CurLoop, CurPreheader);
430       CSEMap.clear();
431     }
432   }
433   releaseMemory();
434   return Changed;
435 }
436 
437 /// Return true if instruction stores to the specified frame.
438 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
439   // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
440   // true since they have no memory operands.
441   if (!MI->mayStore())
442      return false;
443   // If we lost memory operands, conservatively assume that the instruction
444   // writes to all slots.
445   if (MI->memoperands_empty())
446     return true;
447   for (const MachineMemOperand *MemOp : MI->memoperands()) {
448     if (!MemOp->isStore() || !MemOp->getPseudoValue())
449       continue;
450     if (const FixedStackPseudoSourceValue *Value =
451         dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
452       if (Value->getFrameIndex() == FI)
453         return true;
454     }
455   }
456   return false;
457 }
458 
459 static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI,
460                                                 BitVector &RUs,
461                                                 const uint32_t *Mask) {
462   // FIXME: This intentionally works in reverse due to some issues with the
463   // Register Units infrastructure.
464   //
465   // This is used to apply callee-saved-register masks to the clobbered regunits
466   // mask.
467   //
468   // The right way to approach this is to start with a BitVector full of ones,
469   // then reset all the bits of the regunits of each register that is set in the
470   // mask (registers preserved), then OR the resulting bits with the Clobbers
471   // mask. This correctly prioritizes the saved registers, so if a RU is shared
472   // between a register that is preserved, and one that is NOT preserved, that
473   // RU will not be set in the output vector (the clobbers).
474   //
475   // What we have to do for now is the opposite: we have to assume that the
476   // regunits of all registers that are NOT preserved are clobbered, even if
477   // those regunits are preserved by another register. So if a RU is shared
478   // like described previously, that RU will be set.
479   //
480   // This is to work around an issue which appears in AArch64, but isn't
481   // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
482   // register (lower 64 bits). A few Dn registers are preserved by some calling
483   // conventions, but Qn and Dn share exactly the same reg units.
484   //
485   // If we do this the right way, Qn will be marked as NOT clobbered even though
486   // its upper 64 bits are NOT preserved. The conservative approach handles this
487   // correctly at the cost of some missed optimizations on other targets.
488   //
489   // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
490   // should have an extra RegUnit to model the "unknown" bits not covered by the
491   // subregs.
492   BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
493   const unsigned NumRegs = TRI.getNumRegs();
494   const unsigned MaskWords = (NumRegs + 31) / 32;
495   for (unsigned K = 0; K < MaskWords; ++K) {
496     const uint32_t Word = Mask[K];
497     for (unsigned Bit = 0; Bit < 32; ++Bit) {
498       const unsigned PhysReg = (K * 32) + Bit;
499       if (PhysReg == NumRegs)
500         break;
501 
502       if (PhysReg && !((Word >> Bit) & 1)) {
503         for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI)
504           RUsFromRegsNotInMask.set(*RUI);
505       }
506     }
507   }
508 
509   RUs |= RUsFromRegsNotInMask;
510 }
511 
512 /// Examine the instruction for potential LICM candidate. Also
513 /// gather register def and frame object update information.
514 void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs,
515                                 BitVector &RUClobbers,
516                                 SmallDenseSet<int> &StoredFIs,
517                                 SmallVectorImpl<CandidateInfo> &Candidates,
518                                 MachineLoop *CurLoop) {
519   bool RuledOut = false;
520   bool HasNonInvariantUse = false;
521   unsigned Def = 0;
522   for (const MachineOperand &MO : MI->operands()) {
523     if (MO.isFI()) {
524       // Remember if the instruction stores to the frame index.
525       int FI = MO.getIndex();
526       if (!StoredFIs.count(FI) &&
527           MFI->isSpillSlotObjectIndex(FI) &&
528           InstructionStoresToFI(MI, FI))
529         StoredFIs.insert(FI);
530       HasNonInvariantUse = true;
531       continue;
532     }
533 
534     // We can't hoist an instruction defining a physreg that is clobbered in
535     // the loop.
536     if (MO.isRegMask()) {
537       applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, MO.getRegMask());
538       continue;
539     }
540 
541     if (!MO.isReg())
542       continue;
543     Register Reg = MO.getReg();
544     if (!Reg)
545       continue;
546     assert(Reg.isPhysical() && "Not expecting virtual register!");
547 
548     if (!MO.isDef()) {
549       if (!HasNonInvariantUse) {
550         for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
551           // If it's using a non-loop-invariant register, then it's obviously
552           // not safe to hoist.
553           if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
554             HasNonInvariantUse = true;
555             break;
556           }
557         }
558       }
559       continue;
560     }
561 
562     if (MO.isImplicit()) {
563       for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
564         RUClobbers.set(*RUI);
565       if (!MO.isDead())
566         // Non-dead implicit def? This cannot be hoisted.
567         RuledOut = true;
568       // No need to check if a dead implicit def is also defined by
569       // another instruction.
570       continue;
571     }
572 
573     // FIXME: For now, avoid instructions with multiple defs, unless
574     // it's a dead implicit def.
575     if (Def)
576       RuledOut = true;
577     else
578       Def = Reg;
579 
580     // If we have already seen another instruction that defines the same
581     // register, then this is not safe.  Two defs is indicated by setting a
582     // PhysRegClobbers bit.
583     for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
584       if (RUDefs.test(*RUI)) {
585         RUClobbers.set(*RUI);
586         RuledOut = true;
587       } else if (RUClobbers.test(*RUI)) {
588         // MI defined register is seen defined by another instruction in
589         // the loop, it cannot be a LICM candidate.
590         RuledOut = true;
591       }
592 
593       RUDefs.set(*RUI);
594     }
595   }
596 
597   // Only consider reloads for now and remats which do not have register
598   // operands. FIXME: Consider unfold load folding instructions.
599   if (Def && !RuledOut) {
600     int FI = std::numeric_limits<int>::min();
601     if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
602         (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
603       Candidates.push_back(CandidateInfo(MI, Def, FI));
604   }
605 }
606 
607 /// Walk the specified region of the CFG and hoist loop invariants out to the
608 /// preheader.
609 void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop,
610                                         MachineBasicBlock *CurPreheader) {
611   MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
612   if (!Preheader)
613     return;
614 
615   unsigned NumRegUnits = TRI->getNumRegUnits();
616   BitVector RUDefs(NumRegUnits);     // RUs defined once in the loop.
617   BitVector RUClobbers(NumRegUnits); // RUs defined more than once.
618 
619   SmallVector<CandidateInfo, 32> Candidates;
620   SmallDenseSet<int> StoredFIs;
621 
622   // Walk the entire region, count number of defs for each register, and
623   // collect potential LICM candidates.
624   for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
625     // If the header of the loop containing this basic block is a landing pad,
626     // then don't try to hoist instructions out of this loop.
627     const MachineLoop *ML = MLI->getLoopFor(BB);
628     if (ML && ML->getHeader()->isEHPad()) continue;
629 
630     // Conservatively treat live-in's as an external def.
631     // FIXME: That means a reload that're reused in successor block(s) will not
632     // be LICM'ed.
633     for (const auto &LI : BB->liveins()) {
634       for (MCRegUnitIterator RUI(LI.PhysReg, TRI); RUI.isValid(); ++RUI)
635         RUDefs.set(*RUI);
636     }
637 
638     // Funclet entry blocks will clobber all registers
639     if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
640       applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, Mask);
641 
642     SpeculationState = SpeculateUnknown;
643     for (MachineInstr &MI : *BB)
644       ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
645   }
646 
647   // Gather the registers read / clobbered by the terminator.
648   BitVector TermRUs(NumRegUnits);
649   MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
650   if (TI != Preheader->end()) {
651     for (const MachineOperand &MO : TI->operands()) {
652       if (!MO.isReg())
653         continue;
654       Register Reg = MO.getReg();
655       if (!Reg)
656         continue;
657       for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
658         TermRUs.set(*RUI);
659     }
660   }
661 
662   // Now evaluate whether the potential candidates qualify.
663   // 1. Check if the candidate defined register is defined by another
664   //    instruction in the loop.
665   // 2. If the candidate is a load from stack slot (always true for now),
666   //    check if the slot is stored anywhere in the loop.
667   // 3. Make sure candidate def should not clobber
668   //    registers read by the terminator. Similarly its def should not be
669   //    clobbered by the terminator.
670   for (CandidateInfo &Candidate : Candidates) {
671     if (Candidate.FI != std::numeric_limits<int>::min() &&
672         StoredFIs.count(Candidate.FI))
673       continue;
674 
675     unsigned Def = Candidate.Def;
676     bool Safe = true;
677     for (MCRegUnitIterator RUI(Def, TRI); RUI.isValid(); ++RUI) {
678       if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) {
679         Safe = false;
680         break;
681       }
682     }
683 
684     if (!Safe)
685       continue;
686 
687     MachineInstr *MI = Candidate.MI;
688     for (const MachineOperand &MO : MI->all_uses()) {
689       if (!MO.getReg())
690         continue;
691       for (MCRegUnitIterator RUI(MO.getReg(), TRI); RUI.isValid(); ++RUI) {
692         if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
693           // If it's using a non-loop-invariant register, then it's obviously
694           // not safe to hoist.
695           Safe = false;
696           break;
697         }
698       }
699 
700       if (!Safe)
701         break;
702     }
703 
704     if (Safe)
705       HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
706   }
707 }
708 
709 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make
710 /// sure it is not killed by any instructions in the loop.
711 void MachineLICMImpl::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
712   for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
713     if (!BB->isLiveIn(Reg))
714       BB->addLiveIn(Reg);
715     for (MachineInstr &MI : *BB) {
716       for (MachineOperand &MO : MI.all_uses()) {
717         if (!MO.getReg())
718           continue;
719         if (TRI->regsOverlap(Reg, MO.getReg()))
720           MO.setIsKill(false);
721       }
722     }
723   }
724 }
725 
726 /// When an instruction is found to only use loop invariant operands that is
727 /// safe to hoist, this instruction is called to do the dirty work.
728 void MachineLICMImpl::HoistPostRA(MachineInstr *MI, unsigned Def,
729                                   MachineLoop *CurLoop,
730                                   MachineBasicBlock *CurPreheader) {
731   MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
732 
733   // Now move the instructions to the predecessor, inserting it before any
734   // terminator instructions.
735   LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
736                     << " from " << printMBBReference(*MI->getParent()) << ": "
737                     << *MI);
738 
739   // Splice the instruction to the preheader.
740   MachineBasicBlock *MBB = MI->getParent();
741   Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
742 
743   // Since we are moving the instruction out of its basic block, we do not
744   // retain its debug location. Doing so would degrade the debugging
745   // experience and adversely affect the accuracy of profiling information.
746   assert(!MI->isDebugInstr() && "Should not hoist debug inst");
747   MI->setDebugLoc(DebugLoc());
748 
749   // Add register to livein list to all the BBs in the current loop since a
750   // loop invariant must be kept live throughout the whole loop. This is
751   // important to ensure later passes do not scavenge the def register.
752   AddToLiveIns(Def, CurLoop);
753 
754   ++NumPostRAHoisted;
755   Changed = true;
756 }
757 
758 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb
759 /// may not be safe to hoist.
760 bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
761                                             MachineLoop *CurLoop) {
762   if (SpeculationState != SpeculateUnknown)
763     return SpeculationState == SpeculateFalse;
764 
765   if (BB != CurLoop->getHeader()) {
766     // Check loop exiting blocks.
767     SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
768     CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
769     for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
770       if (!MDTU->getDomTree().dominates(BB, CurrentLoopExitingBlock)) {
771         SpeculationState = SpeculateTrue;
772         return false;
773       }
774   }
775 
776   SpeculationState = SpeculateFalse;
777   return true;
778 }
779 
780 /// Check if \p MI is trivially remateralizable and if it does not have any
781 /// virtual register uses. Even though rematerializable RA might not actually
782 /// rematerialize it in this scenario. In that case we do not want to hoist such
783 /// instruction out of the loop in a belief RA will sink it back if needed.
784 bool MachineLICMImpl::isTriviallyReMaterializable(
785     const MachineInstr &MI) const {
786   if (!TII->isTriviallyReMaterializable(MI))
787     return false;
788 
789   for (const MachineOperand &MO : MI.all_uses()) {
790     if (MO.getReg().isVirtual())
791       return false;
792   }
793 
794   return true;
795 }
796 
797 void MachineLICMImpl::EnterScope(MachineBasicBlock *MBB) {
798   LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
799 
800   // Remember livein register pressure.
801   BackTrace.push_back(RegPressure);
802 }
803 
804 void MachineLICMImpl::ExitScope(MachineBasicBlock *MBB) {
805   LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
806   BackTrace.pop_back();
807 }
808 
809 /// Destroy scope for the MBB that corresponds to the given dominator tree node
810 /// if its a leaf or all of its children are done. Walk up the dominator tree to
811 /// destroy ancestors which are now done.
812 void MachineLICMImpl::ExitScopeIfDone(
813     MachineDomTreeNode *Node,
814     DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
815     const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
816   if (OpenChildren[Node])
817     return;
818 
819   for(;;) {
820     ExitScope(Node->getBlock());
821     // Now traverse upwards to pop ancestors whose offsprings are all done.
822     MachineDomTreeNode *Parent = ParentMap.lookup(Node);
823     if (!Parent || --OpenChildren[Parent] != 0)
824       break;
825     Node = Parent;
826   }
827 }
828 
829 /// Walk the specified loop in the CFG (defined by all blocks dominated by the
830 /// specified header block, and that are in the current loop) in depth first
831 /// order w.r.t the DominatorTree. This allows us to visit definitions before
832 /// uses, allowing us to hoist a loop body in one pass without iteration.
833 void MachineLICMImpl::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
834                                      MachineLoop *CurLoop,
835                                      MachineBasicBlock *CurPreheader) {
836   MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
837   if (!Preheader)
838     return;
839 
840   SmallVector<MachineDomTreeNode*, 32> Scopes;
841   SmallVector<MachineDomTreeNode*, 8> WorkList;
842   DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
843   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
844 
845   // Perform a DFS walk to determine the order of visit.
846   WorkList.push_back(HeaderN);
847   while (!WorkList.empty()) {
848     MachineDomTreeNode *Node = WorkList.pop_back_val();
849     assert(Node && "Null dominator tree node?");
850     MachineBasicBlock *BB = Node->getBlock();
851 
852     // If the header of the loop containing this basic block is a landing pad,
853     // then don't try to hoist instructions out of this loop.
854     const MachineLoop *ML = MLI->getLoopFor(BB);
855     if (ML && ML->getHeader()->isEHPad())
856       continue;
857 
858     // If this subregion is not in the top level loop at all, exit.
859     if (!CurLoop->contains(BB))
860       continue;
861 
862     Scopes.push_back(Node);
863     unsigned NumChildren = Node->getNumChildren();
864 
865     // Don't hoist things out of a large switch statement.  This often causes
866     // code to be hoisted that wasn't going to be executed, and increases
867     // register pressure in a situation where it's likely to matter.
868     if (BB->succ_size() >= 25)
869       NumChildren = 0;
870 
871     OpenChildren[Node] = NumChildren;
872     if (NumChildren) {
873       // Add children in reverse order as then the next popped worklist node is
874       // the first child of this node.  This means we ultimately traverse the
875       // DOM tree in exactly the same order as if we'd recursed.
876       for (MachineDomTreeNode *Child : reverse(Node->children())) {
877         ParentMap[Child] = Node;
878         WorkList.push_back(Child);
879       }
880     }
881   }
882 
883   if (Scopes.size() == 0)
884     return;
885 
886   // Compute registers which are livein into the loop headers.
887   RegSeen.clear();
888   BackTrace.clear();
889   InitRegPressure(Preheader);
890 
891   // Now perform LICM.
892   for (MachineDomTreeNode *Node : Scopes) {
893     MachineBasicBlock *MBB = Node->getBlock();
894 
895     EnterScope(MBB);
896 
897     // Process the block
898     SpeculationState = SpeculateUnknown;
899     for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
900       unsigned HoistRes = HoistResult::NotHoisted;
901       HoistRes = Hoist(&MI, Preheader, CurLoop);
902       if (HoistRes & HoistResult::NotHoisted) {
903         // We have failed to hoist MI to outermost loop's preheader. If MI is in
904         // a subloop, try to hoist it to subloop's preheader.
905         SmallVector<MachineLoop *> InnerLoopWorkList;
906         for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
907              L = L->getParentLoop())
908           InnerLoopWorkList.push_back(L);
909 
910         while (!InnerLoopWorkList.empty()) {
911           MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
912           MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
913           if (InnerLoopPreheader) {
914             HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
915             if (HoistRes & HoistResult::Hoisted)
916               break;
917           }
918         }
919       }
920 
921       if (HoistRes & HoistResult::ErasedMI)
922         continue;
923 
924       UpdateRegPressure(&MI);
925     }
926 
927     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
928     ExitScopeIfDone(Node, OpenChildren, ParentMap);
929   }
930 }
931 
932 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
933   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
934 }
935 
936 /// Find all virtual register references that are liveout of the preheader to
937 /// initialize the starting "register pressure". Note this does not count live
938 /// through (livein but not used) registers.
939 void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
940   std::fill(RegPressure.begin(), RegPressure.end(), 0);
941 
942   // If the preheader has only a single predecessor and it ends with a
943   // fallthrough or an unconditional branch, then scan its predecessor for live
944   // defs as well. This happens whenever the preheader is created by splitting
945   // the critical edge from the loop predecessor to the loop header.
946   if (BB->pred_size() == 1) {
947     MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
948     SmallVector<MachineOperand, 4> Cond;
949     if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
950       InitRegPressure(*BB->pred_begin());
951   }
952 
953   for (const MachineInstr &MI : *BB)
954     UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
955 }
956 
957 /// Update estimate of register pressure after the specified instruction.
958 void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
959                                         bool ConsiderUnseenAsDef) {
960   auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
961   for (const auto &RPIdAndCost : Cost) {
962     unsigned Class = RPIdAndCost.first;
963     if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
964       RegPressure[Class] = 0;
965     else
966       RegPressure[Class] += RPIdAndCost.second;
967   }
968 }
969 
970 /// Calculate the additional register pressure that the registers used in MI
971 /// cause.
972 ///
973 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
974 /// figure out which usages are live-ins.
975 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
976 SmallDenseMap<unsigned, int>
977 MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
978                                   bool ConsiderUnseenAsDef) {
979   SmallDenseMap<unsigned, int> Cost;
980   if (MI->isImplicitDef())
981     return Cost;
982   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
983     const MachineOperand &MO = MI->getOperand(i);
984     if (!MO.isReg() || MO.isImplicit())
985       continue;
986     Register Reg = MO.getReg();
987     if (!Reg.isVirtual())
988       continue;
989 
990     // FIXME: It seems bad to use RegSeen only for some of these calculations.
991     bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
992     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
993 
994     RegClassWeight W = TRI->getRegClassWeight(RC);
995     int RCCost = 0;
996     if (MO.isDef())
997       RCCost = W.RegWeight;
998     else {
999       bool isKill = isOperandKill(MO, MRI);
1000       if (isNew && !isKill && ConsiderUnseenAsDef)
1001         // Haven't seen this, it must be a livein.
1002         RCCost = W.RegWeight;
1003       else if (!isNew && isKill)
1004         RCCost = -W.RegWeight;
1005     }
1006     if (RCCost == 0)
1007       continue;
1008     const int *PS = TRI->getRegClassPressureSets(RC);
1009     for (; *PS != -1; ++PS)
1010       Cost[*PS] += RCCost;
1011   }
1012   return Cost;
1013 }
1014 
1015 /// Return true if this machine instruction loads from global offset table or
1016 /// constant pool.
1017 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
1018   assert(MI.mayLoad() && "Expected MI that loads!");
1019 
1020   // If we lost memory operands, conservatively assume that the instruction
1021   // reads from everything..
1022   if (MI.memoperands_empty())
1023     return true;
1024 
1025   for (MachineMemOperand *MemOp : MI.memoperands())
1026     if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
1027       if (PSV->isGOT() || PSV->isConstantPool())
1028         return true;
1029 
1030   return false;
1031 }
1032 
1033 // This function iterates through all the operands of the input store MI and
1034 // checks that each register operand statisfies isCallerPreservedPhysReg.
1035 // This means, the value being stored and the address where it is being stored
1036 // is constant throughout the body of the function (not including prologue and
1037 // epilogue). When called with an MI that isn't a store, it returns false.
1038 // A future improvement can be to check if the store registers are constant
1039 // throughout the loop rather than throughout the funtion.
1040 static bool isInvariantStore(const MachineInstr &MI,
1041                              const TargetRegisterInfo *TRI,
1042                              const MachineRegisterInfo *MRI) {
1043 
1044   bool FoundCallerPresReg = false;
1045   if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
1046       (MI.getNumOperands() == 0))
1047     return false;
1048 
1049   // Check that all register operands are caller-preserved physical registers.
1050   for (const MachineOperand &MO : MI.operands()) {
1051     if (MO.isReg()) {
1052       Register Reg = MO.getReg();
1053       // If operand is a virtual register, check if it comes from a copy of a
1054       // physical register.
1055       if (Reg.isVirtual())
1056         Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
1057       if (Reg.isVirtual())
1058         return false;
1059       if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
1060         return false;
1061       else
1062         FoundCallerPresReg = true;
1063     } else if (!MO.isImm()) {
1064         return false;
1065     }
1066   }
1067   return FoundCallerPresReg;
1068 }
1069 
1070 // Return true if the input MI is a copy instruction that feeds an invariant
1071 // store instruction. This means that the src of the copy has to satisfy
1072 // isCallerPreservedPhysReg and atleast one of it's users should satisfy
1073 // isInvariantStore.
1074 static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
1075                                         const MachineRegisterInfo *MRI,
1076                                         const TargetRegisterInfo *TRI) {
1077 
1078   // FIXME: If targets would like to look through instructions that aren't
1079   // pure copies, this can be updated to a query.
1080   if (!MI.isCopy())
1081     return false;
1082 
1083   const MachineFunction *MF = MI.getMF();
1084   // Check that we are copying a constant physical register.
1085   Register CopySrcReg = MI.getOperand(1).getReg();
1086   if (CopySrcReg.isVirtual())
1087     return false;
1088 
1089   if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
1090     return false;
1091 
1092   Register CopyDstReg = MI.getOperand(0).getReg();
1093   // Check if any of the uses of the copy are invariant stores.
1094   assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
1095 
1096   for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
1097     if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
1098       return true;
1099   }
1100   return false;
1101 }
1102 
1103 /// Returns true if the instruction may be a suitable candidate for LICM.
1104 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
1105 bool MachineLICMImpl::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
1106   // Check if it's safe to move the instruction.
1107   bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1108   if ((!I.isSafeToMove(DontMoveAcrossStore)) &&
1109       !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
1110     LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
1111     return false;
1112   }
1113 
1114   // If it is a load then check if it is guaranteed to execute by making sure
1115   // that it dominates all exiting blocks. If it doesn't, then there is a path
1116   // out of the loop which does not execute this load, so we can't hoist it.
1117   // Loads from constant memory are safe to speculate, for example indexed load
1118   // from a jump table.
1119   // Stores and side effects are already checked by isSafeToMove.
1120   if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
1121       !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
1122     LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
1123     return false;
1124   }
1125 
1126   // Convergent attribute has been used on operations that involve inter-thread
1127   // communication which results are implicitly affected by the enclosing
1128   // control flows. It is not safe to hoist or sink such operations across
1129   // control flow.
1130   if (I.isConvergent())
1131     return false;
1132 
1133   if (!TII->shouldHoist(I, CurLoop))
1134     return false;
1135 
1136   return true;
1137 }
1138 
1139 /// Returns true if the instruction is loop invariant.
1140 bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,
1141                                           MachineLoop *CurLoop) {
1142   if (!IsLICMCandidate(I, CurLoop)) {
1143     LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1144     return false;
1145   }
1146   return CurLoop->isLoopInvariant(I);
1147 }
1148 
1149 /// Return true if the specified instruction is used by a phi node and hoisting
1150 /// it could cause a copy to be inserted.
1151 bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,
1152                                     MachineLoop *CurLoop) {
1153   SmallVector<const MachineInstr *, 8> Work(1, MI);
1154   do {
1155     MI = Work.pop_back_val();
1156     for (const MachineOperand &MO : MI->all_defs()) {
1157       Register Reg = MO.getReg();
1158       if (!Reg.isVirtual())
1159         continue;
1160       for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1161         // A PHI may cause a copy to be inserted.
1162         if (UseMI.isPHI()) {
1163           // A PHI inside the loop causes a copy because the live range of Reg is
1164           // extended across the PHI.
1165           if (CurLoop->contains(&UseMI))
1166             return true;
1167           // A PHI in an exit block can cause a copy to be inserted if the PHI
1168           // has multiple predecessors in the loop with different values.
1169           // For now, approximate by rejecting all exit blocks.
1170           if (isExitBlock(CurLoop, UseMI.getParent()))
1171             return true;
1172           continue;
1173         }
1174         // Look past copies as well.
1175         if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1176           Work.push_back(&UseMI);
1177       }
1178     }
1179   } while (!Work.empty());
1180   return false;
1181 }
1182 
1183 /// Compute operand latency between a def of 'Reg' and an use in the current
1184 /// loop, return true if the target considered it high.
1185 bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1186                                             Register Reg,
1187                                             MachineLoop *CurLoop) const {
1188   if (MRI->use_nodbg_empty(Reg))
1189     return false;
1190 
1191   for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1192     if (UseMI.isCopyLike())
1193       continue;
1194     if (!CurLoop->contains(UseMI.getParent()))
1195       continue;
1196     for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1197       const MachineOperand &MO = UseMI.getOperand(i);
1198       if (!MO.isReg() || !MO.isUse())
1199         continue;
1200       Register MOReg = MO.getReg();
1201       if (MOReg != Reg)
1202         continue;
1203 
1204       if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1205         return true;
1206     }
1207 
1208     // Only look at the first in loop use.
1209     break;
1210   }
1211 
1212   return false;
1213 }
1214 
1215 /// Return true if the instruction is marked "cheap" or the operand latency
1216 /// between its def and a use is one or less.
1217 bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
1218   if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1219     return true;
1220 
1221   bool isCheap = false;
1222   unsigned NumDefs = MI.getDesc().getNumDefs();
1223   for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1224     MachineOperand &DefMO = MI.getOperand(i);
1225     if (!DefMO.isReg() || !DefMO.isDef())
1226       continue;
1227     --NumDefs;
1228     Register Reg = DefMO.getReg();
1229     if (Reg.isPhysical())
1230       continue;
1231 
1232     if (!TII->hasLowDefLatency(SchedModel, MI, i))
1233       return false;
1234     isCheap = true;
1235   }
1236 
1237   return isCheap;
1238 }
1239 
1240 /// Visit BBs from header to current BB, check if hoisting an instruction of the
1241 /// given cost matrix can cause high register pressure.
1242 bool MachineLICMImpl::CanCauseHighRegPressure(
1243     const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {
1244   for (const auto &RPIdAndCost : Cost) {
1245     if (RPIdAndCost.second <= 0)
1246       continue;
1247 
1248     unsigned Class = RPIdAndCost.first;
1249     int Limit = RegLimit[Class];
1250 
1251     // Don't hoist cheap instructions if they would increase register pressure,
1252     // even if we're under the limit.
1253     if (CheapInstr && !HoistCheapInsts)
1254       return true;
1255 
1256     for (const auto &RP : BackTrace)
1257       if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1258         return true;
1259   }
1260 
1261   return false;
1262 }
1263 
1264 /// Traverse the back trace from header to the current block and update their
1265 /// register pressures to reflect the effect of hoisting MI from the current
1266 /// block to the preheader.
1267 void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1268   // First compute the 'cost' of the instruction, i.e. its contribution
1269   // to register pressure.
1270   auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1271                                /*ConsiderUnseenAsDef=*/false);
1272 
1273   // Update register pressure of blocks from loop header to current block.
1274   for (auto &RP : BackTrace)
1275     for (const auto &RPIdAndCost : Cost)
1276       RP[RPIdAndCost.first] += RPIdAndCost.second;
1277 }
1278 
1279 /// Return true if it is potentially profitable to hoist the given loop
1280 /// invariant.
1281 bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,
1282                                           MachineLoop *CurLoop) {
1283   if (MI.isImplicitDef())
1284     return true;
1285 
1286   // Besides removing computation from the loop, hoisting an instruction has
1287   // these effects:
1288   //
1289   // - The value defined by the instruction becomes live across the entire
1290   //   loop. This increases register pressure in the loop.
1291   //
1292   // - If the value is used by a PHI in the loop, a copy will be required for
1293   //   lowering the PHI after extending the live range.
1294   //
1295   // - When hoisting the last use of a value in the loop, that value no longer
1296   //   needs to be live in the loop. This lowers register pressure in the loop.
1297 
1298   if (HoistConstStores &&  isCopyFeedingInvariantStore(MI, MRI, TRI))
1299     return true;
1300 
1301   bool CheapInstr = IsCheapInstruction(MI);
1302   bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
1303 
1304   // Don't hoist a cheap instruction if it would create a copy in the loop.
1305   if (CheapInstr && CreatesCopy) {
1306     LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1307     return false;
1308   }
1309 
1310   // Rematerializable instructions should always be hoisted providing the
1311   // register allocator can just pull them down again when needed.
1312   if (isTriviallyReMaterializable(MI))
1313     return true;
1314 
1315   // FIXME: If there are long latency loop-invariant instructions inside the
1316   // loop at this point, why didn't the optimizer's LICM hoist them?
1317   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1318     const MachineOperand &MO = MI.getOperand(i);
1319     if (!MO.isReg() || MO.isImplicit())
1320       continue;
1321     Register Reg = MO.getReg();
1322     if (!Reg.isVirtual())
1323       continue;
1324     if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
1325       LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1326       ++NumHighLatency;
1327       return true;
1328     }
1329   }
1330 
1331   // Estimate register pressure to determine whether to LICM the instruction.
1332   // In low register pressure situation, we can be more aggressive about
1333   // hoisting. Also, favors hoisting long latency instructions even in
1334   // moderately high pressure situation.
1335   // Cheap instructions will only be hoisted if they don't increase register
1336   // pressure at all.
1337   auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1338                                /*ConsiderUnseenAsDef=*/false);
1339 
1340   // Visit BBs from header to current BB, if hoisting this doesn't cause
1341   // high register pressure, then it's safe to proceed.
1342   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1343     LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1344     ++NumLowRP;
1345     return true;
1346   }
1347 
1348   // Don't risk increasing register pressure if it would create copies.
1349   if (CreatesCopy) {
1350     LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1351     return false;
1352   }
1353 
1354   // Do not "speculate" in high register pressure situation. If an
1355   // instruction is not guaranteed to be executed in the loop, it's best to be
1356   // conservative.
1357   if (AvoidSpeculation &&
1358       (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
1359     LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1360     return false;
1361   }
1362 
1363   // If we have a COPY with other uses in the loop, hoist to allow the users to
1364   // also be hoisted.
1365   // TODO: Handle all isCopyLike?
1366   if (MI.isCopy() || MI.isRegSequence()) {
1367     Register DefReg = MI.getOperand(0).getReg();
1368     if (DefReg.isVirtual() &&
1369         all_of(MI.uses(),
1370                [this](const MachineOperand &UseOp) {
1371                  return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1372                         MRI->isConstantPhysReg(UseOp.getReg());
1373                }) &&
1374         IsLoopInvariantInst(MI, CurLoop) &&
1375         any_of(MRI->use_nodbg_instructions(DefReg),
1376                [&CurLoop, this, DefReg,
1377                 Cost = std::move(Cost)](MachineInstr &UseMI) {
1378                  if (!CurLoop->contains(&UseMI))
1379                    return false;
1380 
1381                  // COPY is a cheap instruction, but if moving it won't cause
1382                  // high RP we're fine to hoist it even if the user can't be
1383                  // hoisted later Otherwise we want to check the user if it's
1384                  // hoistable
1385                  if (CanCauseHighRegPressure(Cost, false) &&
1386                      !CurLoop->isLoopInvariant(UseMI, DefReg))
1387                    return false;
1388 
1389                  return true;
1390                }))
1391       return true;
1392   }
1393 
1394   // High register pressure situation, only hoist if the instruction is going
1395   // to be remat'ed.
1396   if (!isTriviallyReMaterializable(MI) &&
1397       !MI.isDereferenceableInvariantLoad()) {
1398     LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1399     return false;
1400   }
1401 
1402   return true;
1403 }
1404 
1405 /// Unfold a load from the given machineinstr if the load itself could be
1406 /// hoisted. Return the unfolded and hoistable load, or null if the load
1407 /// couldn't be unfolded or if it wouldn't be hoistable.
1408 MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI,
1409                                                     MachineLoop *CurLoop) {
1410   // Don't unfold simple loads.
1411   if (MI->canFoldAsLoad())
1412     return nullptr;
1413 
1414   // If not, we may be able to unfold a load and hoist that.
1415   // First test whether the instruction is loading from an amenable
1416   // memory location.
1417   if (!MI->isDereferenceableInvariantLoad())
1418     return nullptr;
1419 
1420   // Next determine the register class for a temporary register.
1421   unsigned LoadRegIndex;
1422   unsigned NewOpc =
1423     TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1424                                     /*UnfoldLoad=*/true,
1425                                     /*UnfoldStore=*/false,
1426                                     &LoadRegIndex);
1427   if (NewOpc == 0) return nullptr;
1428   const MCInstrDesc &MID = TII->get(NewOpc);
1429   MachineFunction &MF = *MI->getMF();
1430   const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1431   // Ok, we're unfolding. Create a temporary register and do the unfold.
1432   Register Reg = MRI->createVirtualRegister(RC);
1433 
1434   SmallVector<MachineInstr *, 2> NewMIs;
1435   bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1436                                           /*UnfoldLoad=*/true,
1437                                           /*UnfoldStore=*/false, NewMIs);
1438   (void)Success;
1439   assert(Success &&
1440          "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1441          "succeeded!");
1442   assert(NewMIs.size() == 2 &&
1443          "Unfolded a load into multiple instructions!");
1444   MachineBasicBlock *MBB = MI->getParent();
1445   MachineBasicBlock::iterator Pos = MI;
1446   MBB->insert(Pos, NewMIs[0]);
1447   MBB->insert(Pos, NewMIs[1]);
1448   // If unfolding produced a load that wasn't loop-invariant or profitable to
1449   // hoist, discard the new instructions and bail.
1450   if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1451       !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1452     NewMIs[0]->eraseFromParent();
1453     NewMIs[1]->eraseFromParent();
1454     return nullptr;
1455   }
1456 
1457   // Update register pressure for the unfolded instruction.
1458   UpdateRegPressure(NewMIs[1]);
1459 
1460   // Otherwise we successfully unfolded a load that we can hoist.
1461 
1462   // Update the call info.
1463   if (MI->shouldUpdateAdditionalCallInfo())
1464     MF.eraseAdditionalCallInfo(MI);
1465 
1466   MI->eraseFromParent();
1467   return NewMIs[0];
1468 }
1469 
1470 /// Initialize the CSE map with instructions that are in the current loop
1471 /// preheader that may become duplicates of instructions that are hoisted
1472 /// out of the loop.
1473 void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1474   for (MachineInstr &MI : *BB)
1475     CSEMap[BB][MI.getOpcode()].push_back(&MI);
1476 }
1477 
1478 /// Initialize AllowedToHoistLoads with information about whether invariant
1479 /// loads can be moved outside a given loop
1480 void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1481   SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
1482   SmallVector<MachineLoop *, 8> LoopsInPreOrder;
1483 
1484   // Mark all loops as hoistable initially and prepare a list of loops in
1485   // pre-order DFS.
1486   while (!Worklist.empty()) {
1487     auto *L = Worklist.pop_back_val();
1488     AllowedToHoistLoads[L] = true;
1489     LoopsInPreOrder.push_back(L);
1490     Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
1491                     L->getSubLoops().end());
1492   }
1493 
1494   // Going from the innermost to outermost loops, check if a loop has
1495   // instructions preventing invariant load hoisting. If such instruction is
1496   // found, mark this loop and its parent as non-hoistable and continue
1497   // investigating the next loop.
1498   // Visiting in a reversed pre-ordered DFS manner
1499   // allows us to not process all the instructions of the outer loop if the
1500   // inner loop is proved to be non-load-hoistable.
1501   for (auto *Loop : reverse(LoopsInPreOrder)) {
1502     for (auto *MBB : Loop->blocks()) {
1503       // If this loop has already been marked as non-hoistable, skip it.
1504       if (!AllowedToHoistLoads[Loop])
1505         continue;
1506       for (auto &MI : *MBB) {
1507         if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() &&
1508             !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
1509           continue;
1510         for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
1511           AllowedToHoistLoads[L] = false;
1512         break;
1513       }
1514     }
1515   }
1516 }
1517 
1518 /// Find an instruction amount PrevMIs that is a duplicate of MI.
1519 /// Return this instruction if it's found.
1520 MachineInstr *
1521 MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,
1522                                   std::vector<MachineInstr *> &PrevMIs) {
1523   for (MachineInstr *PrevMI : PrevMIs)
1524     if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1525       return PrevMI;
1526 
1527   return nullptr;
1528 }
1529 
1530 /// Given a LICM'ed instruction, look for an instruction on the preheader that
1531 /// computes the same value. If it's found, do a RAU on with the definition of
1532 /// the existing instruction rather than hoisting the instruction to the
1533 /// preheader.
1534 bool MachineLICMImpl::EliminateCSE(
1535     MachineInstr *MI,
1536     DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1537   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1538   // the undef property onto uses.
1539   if (MI->isImplicitDef())
1540     return false;
1541 
1542   // Do not CSE normal loads because between them could be store instructions
1543   // that change the loaded value
1544   if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1545     return false;
1546 
1547   if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1548     LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1549 
1550     // Replace virtual registers defined by MI by their counterparts defined
1551     // by Dup.
1552     SmallVector<unsigned, 2> Defs;
1553     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1554       const MachineOperand &MO = MI->getOperand(i);
1555 
1556       // Physical registers may not differ here.
1557       assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
1558               MO.getReg() == Dup->getOperand(i).getReg()) &&
1559              "Instructions with different phys regs are not identical!");
1560 
1561       if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
1562         Defs.push_back(i);
1563     }
1564 
1565     SmallVector<const TargetRegisterClass*, 2> OrigRCs;
1566     for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1567       unsigned Idx = Defs[i];
1568       Register Reg = MI->getOperand(Idx).getReg();
1569       Register DupReg = Dup->getOperand(Idx).getReg();
1570       OrigRCs.push_back(MRI->getRegClass(DupReg));
1571 
1572       if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1573         // Restore old RCs if more than one defs.
1574         for (unsigned j = 0; j != i; ++j)
1575           MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1576         return false;
1577       }
1578     }
1579 
1580     for (unsigned Idx : Defs) {
1581       Register Reg = MI->getOperand(Idx).getReg();
1582       Register DupReg = Dup->getOperand(Idx).getReg();
1583       MRI->replaceRegWith(Reg, DupReg);
1584       MRI->clearKillFlags(DupReg);
1585       // Clear Dup dead flag if any, we reuse it for Reg.
1586       if (!MRI->use_nodbg_empty(DupReg))
1587         Dup->getOperand(Idx).setIsDead(false);
1588     }
1589 
1590     MI->eraseFromParent();
1591     ++NumCSEed;
1592     return true;
1593   }
1594   return false;
1595 }
1596 
1597 /// Return true if the given instruction will be CSE'd if it's hoisted out of
1598 /// the loop.
1599 bool MachineLICMImpl::MayCSE(MachineInstr *MI) {
1600   if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1601     return false;
1602 
1603   unsigned Opcode = MI->getOpcode();
1604   for (auto &Map : CSEMap) {
1605     // Check this CSEMap's preheader dominates MI's basic block.
1606     if (MDTU->getDomTree().dominates(Map.first, MI->getParent())) {
1607       DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1608           Map.second.find(Opcode);
1609       // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1610       // the undef property onto uses.
1611       if (CI == Map.second.end() || MI->isImplicitDef())
1612         continue;
1613       if (LookForDuplicate(MI, CI->second) != nullptr)
1614         return true;
1615     }
1616   }
1617 
1618   return false;
1619 }
1620 
1621 /// When an instruction is found to use only loop invariant operands
1622 /// that are safe to hoist, this instruction is called to do the dirty work.
1623 /// It returns true if the instruction is hoisted.
1624 unsigned MachineLICMImpl::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
1625                                 MachineLoop *CurLoop) {
1626   MachineBasicBlock *SrcBlock = MI->getParent();
1627 
1628   // Disable the instruction hoisting due to block hotness
1629   if ((DisableHoistingToHotterBlocks == UseBFI::All ||
1630       (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1631       isTgtHotterThanSrc(SrcBlock, Preheader)) {
1632     ++NumNotHoistedDueToHotness;
1633     return HoistResult::NotHoisted;
1634   }
1635   // First check whether we should hoist this instruction.
1636   bool HasExtractHoistableLoad = false;
1637   if (!IsLoopInvariantInst(*MI, CurLoop) ||
1638       !IsProfitableToHoist(*MI, CurLoop)) {
1639     // If not, try unfolding a hoistable load.
1640     MI = ExtractHoistableLoad(MI, CurLoop);
1641     if (!MI)
1642       return HoistResult::NotHoisted;
1643     HasExtractHoistableLoad = true;
1644   }
1645 
1646   // If we have hoisted an instruction that may store, it can only be a constant
1647   // store.
1648   if (MI->mayStore())
1649     NumStoreConst++;
1650 
1651   // Now move the instructions to the predecessor, inserting it before any
1652   // terminator instructions.
1653   LLVM_DEBUG({
1654     dbgs() << "Hoisting " << *MI;
1655     if (MI->getParent()->getBasicBlock())
1656       dbgs() << " from " << printMBBReference(*MI->getParent());
1657     if (Preheader->getBasicBlock())
1658       dbgs() << " to " << printMBBReference(*Preheader);
1659     dbgs() << "\n";
1660   });
1661 
1662   // If this is the first instruction being hoisted to the preheader,
1663   // initialize the CSE map with potential common expressions.
1664   if (FirstInLoop) {
1665     InitCSEMap(Preheader);
1666     FirstInLoop = false;
1667   }
1668 
1669   // Look for opportunity to CSE the hoisted instruction.
1670   unsigned Opcode = MI->getOpcode();
1671   bool HasCSEDone = false;
1672   for (auto &Map : CSEMap) {
1673     // Check this CSEMap's preheader dominates MI's basic block.
1674     if (MDTU->getDomTree().dominates(Map.first, MI->getParent())) {
1675       DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1676           Map.second.find(Opcode);
1677       if (CI != Map.second.end()) {
1678         if (EliminateCSE(MI, CI)) {
1679           HasCSEDone = true;
1680           break;
1681         }
1682       }
1683     }
1684   }
1685 
1686   if (!HasCSEDone) {
1687     // Otherwise, splice the instruction to the preheader.
1688     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1689 
1690     // Since we are moving the instruction out of its basic block, we do not
1691     // retain its debug location. Doing so would degrade the debugging
1692     // experience and adversely affect the accuracy of profiling information.
1693     assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1694     MI->setDebugLoc(DebugLoc());
1695 
1696     // Update register pressure for BBs from header to this block.
1697     UpdateBackTraceRegPressure(MI);
1698 
1699     // Clear the kill flags of any register this instruction defines,
1700     // since they may need to be live throughout the entire loop
1701     // rather than just live for part of it.
1702     for (MachineOperand &MO : MI->all_defs())
1703       if (!MO.isDead())
1704         MRI->clearKillFlags(MO.getReg());
1705 
1706     CSEMap[Preheader][Opcode].push_back(MI);
1707   }
1708 
1709   ++NumHoisted;
1710   Changed = true;
1711 
1712   if (HasCSEDone || HasExtractHoistableLoad)
1713     return HoistResult::Hoisted | HoistResult::ErasedMI;
1714   return HoistResult::Hoisted;
1715 }
1716 
1717 /// Get the preheader for the current loop, splitting a critical edge if needed.
1718 MachineBasicBlock *
1719 MachineLICMImpl::getCurPreheader(MachineLoop *CurLoop,
1720                                  MachineBasicBlock *CurPreheader) {
1721   // Determine the block to which to hoist instructions. If we can't find a
1722   // suitable loop predecessor, we can't do any hoisting.
1723 
1724   // If we've tried to get a preheader and failed, don't try again.
1725   if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1726     return nullptr;
1727 
1728   if (!CurPreheader) {
1729     CurPreheader = CurLoop->getLoopPreheader();
1730     if (!CurPreheader) {
1731       MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1732       if (!Pred) {
1733         CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1734         return nullptr;
1735       }
1736 
1737       CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), LegacyPass,
1738                                              MFAM, nullptr, MDTU);
1739       if (!CurPreheader) {
1740         CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1741         return nullptr;
1742       }
1743     }
1744   }
1745   return CurPreheader;
1746 }
1747 
1748 /// Is the target basic block at least "BlockFrequencyRatioThreshold"
1749 /// times hotter than the source basic block.
1750 bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1751                                          MachineBasicBlock *TgtBlock) {
1752   // Parse source and target basic block frequency from MBFI
1753   uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1754   uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1755 
1756   // Disable the hoisting if source block frequency is zero
1757   if (!SrcBF)
1758     return true;
1759 
1760   double Ratio = (double)DstBF / SrcBF;
1761 
1762   // Compare the block frequency ratio with the threshold
1763   return Ratio > BlockFrequencyRatioThreshold;
1764 }
1765 
1766 template <typename DerivedT, bool PreRegAlloc>
1767 PreservedAnalyses MachineLICMBasePass<DerivedT, PreRegAlloc>::run(
1768     MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) {
1769   bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);
1770   if (!Changed)
1771     return PreservedAnalyses::all();
1772   auto PA = getMachineFunctionPassPreservedAnalyses();
1773   PA.preserve<MachineLoopAnalysis>();
1774   return PA;
1775 }
1776 
1777 template class llvm::MachineLICMBasePass<EarlyMachineLICMPass, true>;
1778 template class llvm::MachineLICMBasePass<MachineLICMPass, false>;
1779