xref: /llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp (revision 1be19e6f5bf8bbddc0fb1881040c98e47281980b)
1 //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the Hexagon hardware
11 // loop instruction.  The hardware loop can perform loop branches with a
12 // zero-cycle overhead.
13 //
14 // The pattern that defines the induction variable can changed depending on
15 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
16 // normalizes induction variables, and the Loop Strength Reduction pass
17 // run by 'llc' may also make changes to the induction variable.
18 // The pattern detected by this phase is due to running Strength Reduction.
19 //
20 // Criteria for hardware loops:
21 //  - Countable loops (w/ ind. var for a trip count)
22 //  - Assumes loops are normalized by IndVarSimplify
23 //  - Try inner-most loops first
24 //  - No function calls in loops.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "HexagonInstrInfo.h"
29 #include "HexagonSubtarget.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/CodeGen/MachineBasicBlock.h"
37 #include "llvm/CodeGen/MachineDominators.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineFunctionPass.h"
40 #include "llvm/CodeGen/MachineInstr.h"
41 #include "llvm/CodeGen/MachineInstrBuilder.h"
42 #include "llvm/CodeGen/MachineLoopInfo.h"
43 #include "llvm/CodeGen/MachineOperand.h"
44 #include "llvm/CodeGen/MachineRegisterInfo.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Target/TargetRegisterInfo.h"
54 #include <cassert>
55 #include <cstdint>
56 #include <cstdlib>
57 #include <iterator>
58 #include <map>
59 #include <set>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "hwloops"
67 
68 #ifndef NDEBUG
69 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70 
71 // Option to create preheader only for a specific function.
72 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73                                  cl::init(""));
74 #endif
75 
76 // Option to create a preheader if one doesn't exist.
77 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78     cl::Hidden, cl::init(true),
79     cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80 
81 // Turn it off by default. If a preheader block is not created here, the
82 // software pipeliner may be unable to find a block suitable to serve as
83 // a preheader. In that case SWP will not run.
84 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
85   cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
86   "instructions"));
87 
88 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89 
90 namespace llvm {
91 
92   FunctionPass *createHexagonHardwareLoops();
93   void initializeHexagonHardwareLoopsPass(PassRegistry&);
94 
95 } // end namespace llvm
96 
97 namespace {
98 
99   class CountValue;
100 
101   struct HexagonHardwareLoops : public MachineFunctionPass {
102     MachineLoopInfo            *MLI;
103     MachineRegisterInfo        *MRI;
104     MachineDominatorTree       *MDT;
105     const HexagonInstrInfo     *TII;
106     const HexagonRegisterInfo  *TRI;
107 #ifndef NDEBUG
108     static int Counter;
109 #endif
110 
111   public:
112     static char ID;
113 
114     HexagonHardwareLoops() : MachineFunctionPass(ID) {
115       initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
116     }
117 
118     bool runOnMachineFunction(MachineFunction &MF) override;
119 
120     StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
121 
122     void getAnalysisUsage(AnalysisUsage &AU) const override {
123       AU.addRequired<MachineDominatorTree>();
124       AU.addRequired<MachineLoopInfo>();
125       MachineFunctionPass::getAnalysisUsage(AU);
126     }
127 
128   private:
129     using LoopFeederMap = std::map<unsigned, MachineInstr *>;
130 
131     /// Kinds of comparisons in the compare instructions.
132     struct Comparison {
133       enum Kind {
134         EQ  = 0x01,
135         NE  = 0x02,
136         L   = 0x04,
137         G   = 0x08,
138         U   = 0x40,
139         LTs = L,
140         LEs = L | EQ,
141         GTs = G,
142         GEs = G | EQ,
143         LTu = L      | U,
144         LEu = L | EQ | U,
145         GTu = G      | U,
146         GEu = G | EQ | U
147       };
148 
149       static Kind getSwappedComparison(Kind Cmp) {
150         assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
151         if ((Cmp & L) || (Cmp & G))
152           return (Kind)(Cmp ^ (L|G));
153         return Cmp;
154       }
155 
156       static Kind getNegatedComparison(Kind Cmp) {
157         if ((Cmp & L) || (Cmp & G))
158           return (Kind)((Cmp ^ (L | G)) ^ EQ);
159         if ((Cmp & NE) || (Cmp & EQ))
160           return (Kind)(Cmp ^ (EQ | NE));
161         return (Kind)0;
162       }
163 
164       static bool isSigned(Kind Cmp) {
165         return (Cmp & (L | G) && !(Cmp & U));
166       }
167 
168       static bool isUnsigned(Kind Cmp) {
169         return (Cmp & U);
170       }
171     };
172 
173     /// \brief Find the register that contains the loop controlling
174     /// induction variable.
175     /// If successful, it will return true and set the \p Reg, \p IVBump
176     /// and \p IVOp arguments.  Otherwise it will return false.
177     /// The returned induction register is the register R that follows the
178     /// following induction pattern:
179     /// loop:
180     ///   R = phi ..., [ R.next, LatchBlock ]
181     ///   R.next = R + #bump
182     ///   if (R.next < #N) goto loop
183     /// IVBump is the immediate value added to R, and IVOp is the instruction
184     /// "R.next = R + #bump".
185     bool findInductionRegister(MachineLoop *L, unsigned &Reg,
186                                int64_t &IVBump, MachineInstr *&IVOp) const;
187 
188     /// \brief Return the comparison kind for the specified opcode.
189     Comparison::Kind getComparisonKind(unsigned CondOpc,
190                                        MachineOperand *InitialValue,
191                                        const MachineOperand *Endvalue,
192                                        int64_t IVBump) const;
193 
194     /// \brief Analyze the statements in a loop to determine if the loop
195     /// has a computable trip count and, if so, return a value that represents
196     /// the trip count expression.
197     CountValue *getLoopTripCount(MachineLoop *L,
198                                  SmallVectorImpl<MachineInstr *> &OldInsts);
199 
200     /// \brief Return the expression that represents the number of times
201     /// a loop iterates.  The function takes the operands that represent the
202     /// loop start value, loop end value, and induction value.  Based upon
203     /// these operands, the function attempts to compute the trip count.
204     /// If the trip count is not directly available (as an immediate value,
205     /// or a register), the function will attempt to insert computation of it
206     /// to the loop's preheader.
207     CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
208                              const MachineOperand *End, unsigned IVReg,
209                              int64_t IVBump, Comparison::Kind Cmp) const;
210 
211     /// \brief Return true if the instruction is not valid within a hardware
212     /// loop.
213     bool isInvalidLoopOperation(const MachineInstr *MI,
214                                 bool IsInnerHWLoop) const;
215 
216     /// \brief Return true if the loop contains an instruction that inhibits
217     /// using the hardware loop.
218     bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
219 
220     /// \brief Given a loop, check if we can convert it to a hardware loop.
221     /// If so, then perform the conversion and return true.
222     bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
223 
224     /// \brief Return true if the instruction is now dead.
225     bool isDead(const MachineInstr *MI,
226                 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
227 
228     /// \brief Remove the instruction if it is now dead.
229     void removeIfDead(MachineInstr *MI);
230 
231     /// \brief Make sure that the "bump" instruction executes before the
232     /// compare.  We need that for the IV fixup, so that the compare
233     /// instruction would not use a bumped value that has not yet been
234     /// defined.  If the instructions are out of order, try to reorder them.
235     bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
236 
237     /// \brief Return true if MO and MI pair is visited only once. If visited
238     /// more than once, this indicates there is recursion. In such a case,
239     /// return false.
240     bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
241                       const MachineOperand *MO,
242                       LoopFeederMap &LoopFeederPhi) const;
243 
244     /// \brief Return true if the Phi may generate a value that may underflow,
245     /// or may wrap.
246     bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
247                                MachineBasicBlock *MBB, MachineLoop *L,
248                                LoopFeederMap &LoopFeederPhi) const;
249 
250     /// \brief Return true if the induction variable may underflow an unsigned
251     /// value in the first iteration.
252     bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
253                                      const MachineOperand *EndVal,
254                                      MachineBasicBlock *MBB, MachineLoop *L,
255                                      LoopFeederMap &LoopFeederPhi) const;
256 
257     /// \brief Check if the given operand has a compile-time known constant
258     /// value. Return true if yes, and false otherwise. When returning true, set
259     /// Val to the corresponding constant value.
260     bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
261 
262     /// \brief Check if the operand has a compile-time known constant value.
263     bool isImmediate(const MachineOperand &MO) const {
264       int64_t V;
265       return checkForImmediate(MO, V);
266     }
267 
268     /// \brief Return the immediate for the specified operand.
269     int64_t getImmediate(const MachineOperand &MO) const {
270       int64_t V;
271       if (!checkForImmediate(MO, V))
272         llvm_unreachable("Invalid operand");
273       return V;
274     }
275 
276     /// \brief Reset the given machine operand to now refer to a new immediate
277     /// value.  Assumes that the operand was already referencing an immediate
278     /// value, either directly, or via a register.
279     void setImmediate(MachineOperand &MO, int64_t Val);
280 
281     /// \brief Fix the data flow of the induction variable.
282     /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
283     ///                                     |
284     ///                                     +-> back to phi
285     /// where "bump" is the increment of the induction variable:
286     ///   iv = iv + #const.
287     /// Due to some prior code transformations, the actual flow may look
288     /// like this:
289     ///   phi -+-> bump ---> back to phi
290     ///        |
291     ///        +-> comparison-in-latch (against upper_bound-bump),
292     /// i.e. the comparison that controls the loop execution may be using
293     /// the value of the induction variable from before the increment.
294     ///
295     /// Return true if the loop's flow is the desired one (i.e. it's
296     /// either been fixed, or no fixing was necessary).
297     /// Otherwise, return false.  This can happen if the induction variable
298     /// couldn't be identified, or if the value in the latch's comparison
299     /// cannot be adjusted to reflect the post-bump value.
300     bool fixupInductionVariable(MachineLoop *L);
301 
302     /// \brief Given a loop, if it does not have a preheader, create one.
303     /// Return the block that is the preheader.
304     MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
305   };
306 
307   char HexagonHardwareLoops::ID = 0;
308 #ifndef NDEBUG
309   int HexagonHardwareLoops::Counter = 0;
310 #endif
311 
312   /// \brief Abstraction for a trip count of a loop. A smaller version
313   /// of the MachineOperand class without the concerns of changing the
314   /// operand representation.
315   class CountValue {
316   public:
317     enum CountValueType {
318       CV_Register,
319       CV_Immediate
320     };
321 
322   private:
323     CountValueType Kind;
324     union Values {
325       struct {
326         unsigned Reg;
327         unsigned Sub;
328       } R;
329       unsigned ImmVal;
330     } Contents;
331 
332   public:
333     explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
334       Kind = t;
335       if (Kind == CV_Register) {
336         Contents.R.Reg = v;
337         Contents.R.Sub = u;
338       } else {
339         Contents.ImmVal = v;
340       }
341     }
342 
343     bool isReg() const { return Kind == CV_Register; }
344     bool isImm() const { return Kind == CV_Immediate; }
345 
346     unsigned getReg() const {
347       assert(isReg() && "Wrong CountValue accessor");
348       return Contents.R.Reg;
349     }
350 
351     unsigned getSubReg() const {
352       assert(isReg() && "Wrong CountValue accessor");
353       return Contents.R.Sub;
354     }
355 
356     unsigned getImm() const {
357       assert(isImm() && "Wrong CountValue accessor");
358       return Contents.ImmVal;
359     }
360 
361     void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
362       if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
363       if (isImm()) { OS << Contents.ImmVal; }
364     }
365   };
366 
367 } // end anonymous namespace
368 
369 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
370                       "Hexagon Hardware Loops", false, false)
371 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
372 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
373 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
374                     "Hexagon Hardware Loops", false, false)
375 
376 FunctionPass *llvm::createHexagonHardwareLoops() {
377   return new HexagonHardwareLoops();
378 }
379 
380 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
381   DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
382   if (skipFunction(*MF.getFunction()))
383     return false;
384 
385   bool Changed = false;
386 
387   MLI = &getAnalysis<MachineLoopInfo>();
388   MRI = &MF.getRegInfo();
389   MDT = &getAnalysis<MachineDominatorTree>();
390   const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
391   TII = HST.getInstrInfo();
392   TRI = HST.getRegisterInfo();
393 
394   for (auto &L : *MLI)
395     if (!L->getParentLoop()) {
396       bool L0Used = false;
397       bool L1Used = false;
398       Changed |= convertToHardwareLoop(L, L0Used, L1Used);
399     }
400 
401   return Changed;
402 }
403 
404 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
405                                                  unsigned &Reg,
406                                                  int64_t &IVBump,
407                                                  MachineInstr *&IVOp
408                                                  ) const {
409   MachineBasicBlock *Header = L->getHeader();
410   MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
411   MachineBasicBlock *Latch = L->getLoopLatch();
412   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
413   if (!Header || !Preheader || !Latch || !ExitingBlock)
414     return false;
415 
416   // This pair represents an induction register together with an immediate
417   // value that will be added to it in each loop iteration.
418   using RegisterBump = std::pair<unsigned, int64_t>;
419 
420   // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
421   // from an induction operation
422   //   R.next = R + bump
423   // where bump is an immediate value.
424   using InductionMap = std::map<unsigned, RegisterBump>;
425 
426   InductionMap IndMap;
427 
428   using instr_iterator = MachineBasicBlock::instr_iterator;
429 
430   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
431        I != E && I->isPHI(); ++I) {
432     MachineInstr *Phi = &*I;
433 
434     // Have a PHI instruction.  Get the operand that corresponds to the
435     // latch block, and see if is a result of an addition of form "reg+imm",
436     // where the "reg" is defined by the PHI node we are looking at.
437     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
438       if (Phi->getOperand(i+1).getMBB() != Latch)
439         continue;
440 
441       unsigned PhiOpReg = Phi->getOperand(i).getReg();
442       MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
443 
444       if (DI->getDesc().isAdd()) {
445         // If the register operand to the add is the PHI we're looking at, this
446         // meets the induction pattern.
447         unsigned IndReg = DI->getOperand(1).getReg();
448         MachineOperand &Opnd2 = DI->getOperand(2);
449         int64_t V;
450         if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
451           unsigned UpdReg = DI->getOperand(0).getReg();
452           IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
453         }
454       }
455     }  // for (i)
456   }  // for (instr)
457 
458   SmallVector<MachineOperand,2> Cond;
459   MachineBasicBlock *TB = nullptr, *FB = nullptr;
460   bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
461   if (NotAnalyzed)
462     return false;
463 
464   unsigned PredR, PredPos, PredRegFlags;
465   if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
466     return false;
467 
468   MachineInstr *PredI = MRI->getVRegDef(PredR);
469   if (!PredI->isCompare())
470     return false;
471 
472   unsigned CmpReg1 = 0, CmpReg2 = 0;
473   int CmpImm = 0, CmpMask = 0;
474   bool CmpAnalyzed =
475       TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
476   // Fail if the compare was not analyzed, or it's not comparing a register
477   // with an immediate value.  Not checking the mask here, since we handle
478   // the individual compare opcodes (including A4_cmpb*) later on.
479   if (!CmpAnalyzed)
480     return false;
481 
482   // Exactly one of the input registers to the comparison should be among
483   // the induction registers.
484   InductionMap::iterator IndMapEnd = IndMap.end();
485   InductionMap::iterator F = IndMapEnd;
486   if (CmpReg1 != 0) {
487     InductionMap::iterator F1 = IndMap.find(CmpReg1);
488     if (F1 != IndMapEnd)
489       F = F1;
490   }
491   if (CmpReg2 != 0) {
492     InductionMap::iterator F2 = IndMap.find(CmpReg2);
493     if (F2 != IndMapEnd) {
494       if (F != IndMapEnd)
495         return false;
496       F = F2;
497     }
498   }
499   if (F == IndMapEnd)
500     return false;
501 
502   Reg = F->second.first;
503   IVBump = F->second.second;
504   IVOp = MRI->getVRegDef(F->first);
505   return true;
506 }
507 
508 // Return the comparison kind for the specified opcode.
509 HexagonHardwareLoops::Comparison::Kind
510 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
511                                         MachineOperand *InitialValue,
512                                         const MachineOperand *EndValue,
513                                         int64_t IVBump) const {
514   Comparison::Kind Cmp = (Comparison::Kind)0;
515   switch (CondOpc) {
516   case Hexagon::C2_cmpeqi:
517   case Hexagon::C2_cmpeq:
518   case Hexagon::C2_cmpeqp:
519     Cmp = Comparison::EQ;
520     break;
521   case Hexagon::C4_cmpneq:
522   case Hexagon::C4_cmpneqi:
523     Cmp = Comparison::NE;
524     break;
525   case Hexagon::C4_cmplte:
526     Cmp = Comparison::LEs;
527     break;
528   case Hexagon::C4_cmplteu:
529     Cmp = Comparison::LEu;
530     break;
531   case Hexagon::C2_cmpgtui:
532   case Hexagon::C2_cmpgtu:
533   case Hexagon::C2_cmpgtup:
534     Cmp = Comparison::GTu;
535     break;
536   case Hexagon::C2_cmpgti:
537   case Hexagon::C2_cmpgt:
538   case Hexagon::C2_cmpgtp:
539     Cmp = Comparison::GTs;
540     break;
541   default:
542     return (Comparison::Kind)0;
543   }
544   return Cmp;
545 }
546 
547 /// \brief Analyze the statements in a loop to determine if the loop has
548 /// a computable trip count and, if so, return a value that represents
549 /// the trip count expression.
550 ///
551 /// This function iterates over the phi nodes in the loop to check for
552 /// induction variable patterns that are used in the calculation for
553 /// the number of time the loop is executed.
554 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
555     SmallVectorImpl<MachineInstr *> &OldInsts) {
556   MachineBasicBlock *TopMBB = L->getTopBlock();
557   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
558   assert(PI != TopMBB->pred_end() &&
559          "Loop must have more than one incoming edge!");
560   MachineBasicBlock *Backedge = *PI++;
561   if (PI == TopMBB->pred_end())  // dead loop?
562     return nullptr;
563   MachineBasicBlock *Incoming = *PI++;
564   if (PI != TopMBB->pred_end())  // multiple backedges?
565     return nullptr;
566 
567   // Make sure there is one incoming and one backedge and determine which
568   // is which.
569   if (L->contains(Incoming)) {
570     if (L->contains(Backedge))
571       return nullptr;
572     std::swap(Incoming, Backedge);
573   } else if (!L->contains(Backedge))
574     return nullptr;
575 
576   // Look for the cmp instruction to determine if we can get a useful trip
577   // count.  The trip count can be either a register or an immediate.  The
578   // location of the value depends upon the type (reg or imm).
579   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
580   if (!ExitingBlock)
581     return nullptr;
582 
583   unsigned IVReg = 0;
584   int64_t IVBump = 0;
585   MachineInstr *IVOp;
586   bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
587   if (!FoundIV)
588     return nullptr;
589 
590   MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
591 
592   MachineOperand *InitialValue = nullptr;
593   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
594   MachineBasicBlock *Latch = L->getLoopLatch();
595   for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
596     MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
597     if (MBB == Preheader)
598       InitialValue = &IV_Phi->getOperand(i);
599     else if (MBB == Latch)
600       IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
601   }
602   if (!InitialValue)
603     return nullptr;
604 
605   SmallVector<MachineOperand,2> Cond;
606   MachineBasicBlock *TB = nullptr, *FB = nullptr;
607   bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
608   if (NotAnalyzed)
609     return nullptr;
610 
611   MachineBasicBlock *Header = L->getHeader();
612   // TB must be non-null.  If FB is also non-null, one of them must be
613   // the header.  Otherwise, branch to TB could be exiting the loop, and
614   // the fall through can go to the header.
615   assert (TB && "Exit block without a branch?");
616   if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
617     MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
618     SmallVector<MachineOperand,2> LCond;
619     bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
620     if (NotAnalyzed)
621       return nullptr;
622     if (TB == Latch)
623       TB = (LTB == Header) ? LTB : LFB;
624     else
625       FB = (LTB == Header) ? LTB: LFB;
626   }
627   assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
628   if (!TB || (FB && TB != Header && FB != Header))
629     return nullptr;
630 
631   // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
632   // to put imm(0), followed by P in the vector Cond.
633   // If TB is not the header, it means that the "not-taken" path must lead
634   // to the header.
635   bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
636   unsigned PredReg, PredPos, PredRegFlags;
637   if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
638     return nullptr;
639   MachineInstr *CondI = MRI->getVRegDef(PredReg);
640   unsigned CondOpc = CondI->getOpcode();
641 
642   unsigned CmpReg1 = 0, CmpReg2 = 0;
643   int Mask = 0, ImmValue = 0;
644   bool AnalyzedCmp =
645       TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
646   if (!AnalyzedCmp)
647     return nullptr;
648 
649   // The comparison operator type determines how we compute the loop
650   // trip count.
651   OldInsts.push_back(CondI);
652   OldInsts.push_back(IVOp);
653 
654   // Sadly, the following code gets information based on the position
655   // of the operands in the compare instruction.  This has to be done
656   // this way, because the comparisons check for a specific relationship
657   // between the operands (e.g. is-less-than), rather than to find out
658   // what relationship the operands are in (as on PPC).
659   Comparison::Kind Cmp;
660   bool isSwapped = false;
661   const MachineOperand &Op1 = CondI->getOperand(1);
662   const MachineOperand &Op2 = CondI->getOperand(2);
663   const MachineOperand *EndValue = nullptr;
664 
665   if (Op1.isReg()) {
666     if (Op2.isImm() || Op1.getReg() == IVReg)
667       EndValue = &Op2;
668     else {
669       EndValue = &Op1;
670       isSwapped = true;
671     }
672   }
673 
674   if (!EndValue)
675     return nullptr;
676 
677   Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
678   if (!Cmp)
679     return nullptr;
680   if (Negated)
681     Cmp = Comparison::getNegatedComparison(Cmp);
682   if (isSwapped)
683     Cmp = Comparison::getSwappedComparison(Cmp);
684 
685   if (InitialValue->isReg()) {
686     unsigned R = InitialValue->getReg();
687     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
688     if (!MDT->properlyDominates(DefBB, Header))
689       return nullptr;
690     OldInsts.push_back(MRI->getVRegDef(R));
691   }
692   if (EndValue->isReg()) {
693     unsigned R = EndValue->getReg();
694     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
695     if (!MDT->properlyDominates(DefBB, Header))
696       return nullptr;
697     OldInsts.push_back(MRI->getVRegDef(R));
698   }
699 
700   return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
701 }
702 
703 /// \brief Helper function that returns the expression that represents the
704 /// number of times a loop iterates.  The function takes the operands that
705 /// represent the loop start value, loop end value, and induction value.
706 /// Based upon these operands, the function attempts to compute the trip count.
707 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
708                                                const MachineOperand *Start,
709                                                const MachineOperand *End,
710                                                unsigned IVReg,
711                                                int64_t IVBump,
712                                                Comparison::Kind Cmp) const {
713   // Cannot handle comparison EQ, i.e. while (A == B).
714   if (Cmp == Comparison::EQ)
715     return nullptr;
716 
717   // Check if either the start or end values are an assignment of an immediate.
718   // If so, use the immediate value rather than the register.
719   if (Start->isReg()) {
720     const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
721     if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
722                           StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
723       Start = &StartValInstr->getOperand(1);
724   }
725   if (End->isReg()) {
726     const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
727     if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
728                         EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
729       End = &EndValInstr->getOperand(1);
730   }
731 
732   if (!Start->isReg() && !Start->isImm())
733     return nullptr;
734   if (!End->isReg() && !End->isImm())
735     return nullptr;
736 
737   bool CmpLess =     Cmp & Comparison::L;
738   bool CmpGreater =  Cmp & Comparison::G;
739   bool CmpHasEqual = Cmp & Comparison::EQ;
740 
741   // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
742   if (CmpLess && IVBump < 0)
743     // Loop going while iv is "less" with the iv value going down.  Must wrap.
744     return nullptr;
745 
746   if (CmpGreater && IVBump > 0)
747     // Loop going while iv is "greater" with the iv value going up.  Must wrap.
748     return nullptr;
749 
750   // Phis that may feed into the loop.
751   LoopFeederMap LoopFeederPhi;
752 
753   // Check if the initial value may be zero and can be decremented in the first
754   // iteration. If the value is zero, the endloop instruction will not decrement
755   // the loop counter, so we shouldn't generate a hardware loop in this case.
756   if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
757                                   LoopFeederPhi))
758       return nullptr;
759 
760   if (Start->isImm() && End->isImm()) {
761     // Both, start and end are immediates.
762     int64_t StartV = Start->getImm();
763     int64_t EndV = End->getImm();
764     int64_t Dist = EndV - StartV;
765     if (Dist == 0)
766       return nullptr;
767 
768     bool Exact = (Dist % IVBump) == 0;
769 
770     if (Cmp == Comparison::NE) {
771       if (!Exact)
772         return nullptr;
773       if ((Dist < 0) ^ (IVBump < 0))
774         return nullptr;
775     }
776 
777     // For comparisons that include the final value (i.e. include equality
778     // with the final value), we need to increase the distance by 1.
779     if (CmpHasEqual)
780       Dist = Dist > 0 ? Dist+1 : Dist-1;
781 
782     // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
783     // CmpGreater should imply Dist < 0.  These conditions could actually
784     // fail, for example, in unreachable code (which may still appear to be
785     // reachable in the CFG).
786     if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
787       return nullptr;
788 
789     // "Normalized" distance, i.e. with the bump set to +-1.
790     int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump - 1)) / IVBump
791                                  : (-Dist + (-IVBump - 1)) / (-IVBump);
792     assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
793 
794     uint64_t Count = Dist1;
795 
796     if (Count > 0xFFFFFFFFULL)
797       return nullptr;
798 
799     return new CountValue(CountValue::CV_Immediate, Count);
800   }
801 
802   // A general case: Start and End are some values, but the actual
803   // iteration count may not be available.  If it is not, insert
804   // a computation of it into the preheader.
805 
806   // If the induction variable bump is not a power of 2, quit.
807   // Othwerise we'd need a general integer division.
808   if (!isPowerOf2_64(std::abs(IVBump)))
809     return nullptr;
810 
811   MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
812   assert (PH && "Should have a preheader by now");
813   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
814   DebugLoc DL;
815   if (InsertPos != PH->end())
816     DL = InsertPos->getDebugLoc();
817 
818   // If Start is an immediate and End is a register, the trip count
819   // will be "reg - imm".  Hexagon's "subtract immediate" instruction
820   // is actually "reg + -imm".
821 
822   // If the loop IV is going downwards, i.e. if the bump is negative,
823   // then the iteration count (computed as End-Start) will need to be
824   // negated.  To avoid the negation, just swap Start and End.
825   if (IVBump < 0) {
826     std::swap(Start, End);
827     IVBump = -IVBump;
828   }
829   // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
830   // Signedness, and "including equality" are preserved.
831 
832   bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
833   bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
834 
835   int64_t StartV = 0, EndV = 0;
836   if (Start->isImm())
837     StartV = Start->getImm();
838   if (End->isImm())
839     EndV = End->getImm();
840 
841   int64_t AdjV = 0;
842   // To compute the iteration count, we would need this computation:
843   //   Count = (End - Start + (IVBump-1)) / IVBump
844   // or, when CmpHasEqual:
845   //   Count = (End - Start + (IVBump-1)+1) / IVBump
846   // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
847   // generating an instruction specifically to add it if we can adjust
848   // the immediate values for Start or End.
849 
850   if (CmpHasEqual) {
851     // Need to add 1 to the total iteration count.
852     if (Start->isImm())
853       StartV--;
854     else if (End->isImm())
855       EndV++;
856     else
857       AdjV += 1;
858   }
859 
860   if (Cmp != Comparison::NE) {
861     if (Start->isImm())
862       StartV -= (IVBump-1);
863     else if (End->isImm())
864       EndV += (IVBump-1);
865     else
866       AdjV += (IVBump-1);
867   }
868 
869   unsigned R = 0, SR = 0;
870   if (Start->isReg()) {
871     R = Start->getReg();
872     SR = Start->getSubReg();
873   } else {
874     R = End->getReg();
875     SR = End->getSubReg();
876   }
877   const TargetRegisterClass *RC = MRI->getRegClass(R);
878   // Hardware loops cannot handle 64-bit registers.  If it's a double
879   // register, it has to have a subregister.
880   if (!SR && RC == &Hexagon::DoubleRegsRegClass)
881     return nullptr;
882   const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
883 
884   // Compute DistR (register with the distance between Start and End).
885   unsigned DistR, DistSR;
886 
887   // Avoid special case, where the start value is an imm(0).
888   if (Start->isImm() && StartV == 0) {
889     DistR = End->getReg();
890     DistSR = End->getSubReg();
891   } else {
892     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
893                               (RegToImm ? TII->get(Hexagon::A2_subri) :
894                                           TII->get(Hexagon::A2_addi));
895     if (RegToReg || RegToImm) {
896       unsigned SubR = MRI->createVirtualRegister(IntRC);
897       MachineInstrBuilder SubIB =
898         BuildMI(*PH, InsertPos, DL, SubD, SubR);
899 
900       if (RegToReg)
901         SubIB.addReg(End->getReg(), 0, End->getSubReg())
902           .addReg(Start->getReg(), 0, Start->getSubReg());
903       else
904         SubIB.addImm(EndV)
905           .addReg(Start->getReg(), 0, Start->getSubReg());
906       DistR = SubR;
907     } else {
908       // If the loop has been unrolled, we should use the original loop count
909       // instead of recalculating the value. This will avoid additional
910       // 'Add' instruction.
911       const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
912       if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
913           EndValInstr->getOperand(2).getImm() == StartV) {
914         DistR = EndValInstr->getOperand(1).getReg();
915       } else {
916         unsigned SubR = MRI->createVirtualRegister(IntRC);
917         MachineInstrBuilder SubIB =
918           BuildMI(*PH, InsertPos, DL, SubD, SubR);
919         SubIB.addReg(End->getReg(), 0, End->getSubReg())
920              .addImm(-StartV);
921         DistR = SubR;
922       }
923     }
924     DistSR = 0;
925   }
926 
927   // From DistR, compute AdjR (register with the adjusted distance).
928   unsigned AdjR, AdjSR;
929 
930   if (AdjV == 0) {
931     AdjR = DistR;
932     AdjSR = DistSR;
933   } else {
934     // Generate CountR = ADD DistR, AdjVal
935     unsigned AddR = MRI->createVirtualRegister(IntRC);
936     MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
937     BuildMI(*PH, InsertPos, DL, AddD, AddR)
938       .addReg(DistR, 0, DistSR)
939       .addImm(AdjV);
940 
941     AdjR = AddR;
942     AdjSR = 0;
943   }
944 
945   // From AdjR, compute CountR (register with the final count).
946   unsigned CountR, CountSR;
947 
948   if (IVBump == 1) {
949     CountR = AdjR;
950     CountSR = AdjSR;
951   } else {
952     // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
953     unsigned Shift = Log2_32(IVBump);
954 
955     // Generate NormR = LSR DistR, Shift.
956     unsigned LsrR = MRI->createVirtualRegister(IntRC);
957     const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
958     BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
959       .addReg(AdjR, 0, AdjSR)
960       .addImm(Shift);
961 
962     CountR = LsrR;
963     CountSR = 0;
964   }
965 
966   return new CountValue(CountValue::CV_Register, CountR, CountSR);
967 }
968 
969 /// \brief Return true if the operation is invalid within hardware loop.
970 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
971                                                   bool IsInnerHWLoop) const {
972   // Call is not allowed because the callee may use a hardware loop except for
973   // the case when the call never returns.
974   if (MI->getDesc().isCall())
975     return !TII->doesNotReturn(*MI);
976 
977   // Check if the instruction defines a hardware loop register.
978   using namespace Hexagon;
979 
980   static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
981   static const unsigned Regs1[]  = { LC1, SA1 };
982   auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
983                                  : makeArrayRef(Regs1, array_lengthof(Regs1));
984   for (unsigned R : CheckRegs)
985     if (MI->modifiesRegister(R, TRI))
986       return true;
987 
988   return false;
989 }
990 
991 /// \brief Return true if the loop contains an instruction that inhibits
992 /// the use of the hardware loop instruction.
993 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
994     bool IsInnerHWLoop) const {
995   const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
996   DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
997   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
998     MachineBasicBlock *MBB = Blocks[i];
999     for (MachineBasicBlock::iterator
1000            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
1001       const MachineInstr *MI = &*MII;
1002       if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
1003         DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
1004         return true;
1005       }
1006     }
1007   }
1008   return false;
1009 }
1010 
1011 /// \brief Returns true if the instruction is dead.  This was essentially
1012 /// copied from DeadMachineInstructionElim::isDead, but with special cases
1013 /// for inline asm, physical registers and instructions with side effects
1014 /// removed.
1015 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1016                               SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1017   // Examine each operand.
1018   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1019     const MachineOperand &MO = MI->getOperand(i);
1020     if (!MO.isReg() || !MO.isDef())
1021       continue;
1022 
1023     unsigned Reg = MO.getReg();
1024     if (MRI->use_nodbg_empty(Reg))
1025       continue;
1026 
1027     using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1028 
1029     // This instruction has users, but if the only user is the phi node for the
1030     // parent block, and the only use of that phi node is this instruction, then
1031     // this instruction is dead: both it (and the phi node) can be removed.
1032     use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1033     use_nodbg_iterator End = MRI->use_nodbg_end();
1034     if (std::next(I) != End || !I->getParent()->isPHI())
1035       return false;
1036 
1037     MachineInstr *OnePhi = I->getParent();
1038     for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1039       const MachineOperand &OPO = OnePhi->getOperand(j);
1040       if (!OPO.isReg() || !OPO.isDef())
1041         continue;
1042 
1043       unsigned OPReg = OPO.getReg();
1044       use_nodbg_iterator nextJ;
1045       for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1046            J != End; J = nextJ) {
1047         nextJ = std::next(J);
1048         MachineOperand &Use = *J;
1049         MachineInstr *UseMI = Use.getParent();
1050 
1051         // If the phi node has a user that is not MI, bail.
1052         if (MI != UseMI)
1053           return false;
1054       }
1055     }
1056     DeadPhis.push_back(OnePhi);
1057   }
1058 
1059   // If there are no defs with uses, the instruction is dead.
1060   return true;
1061 }
1062 
1063 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1064   // This procedure was essentially copied from DeadMachineInstructionElim.
1065 
1066   SmallVector<MachineInstr*, 1> DeadPhis;
1067   if (isDead(MI, DeadPhis)) {
1068     DEBUG(dbgs() << "HW looping will remove: " << *MI);
1069 
1070     // It is possible that some DBG_VALUE instructions refer to this
1071     // instruction.  Examine each def operand for such references;
1072     // if found, mark the DBG_VALUE as undef (but don't delete it).
1073     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1074       const MachineOperand &MO = MI->getOperand(i);
1075       if (!MO.isReg() || !MO.isDef())
1076         continue;
1077       unsigned Reg = MO.getReg();
1078       MachineRegisterInfo::use_iterator nextI;
1079       for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1080            E = MRI->use_end(); I != E; I = nextI) {
1081         nextI = std::next(I);  // I is invalidated by the setReg
1082         MachineOperand &Use = *I;
1083         MachineInstr *UseMI = I->getParent();
1084         if (UseMI == MI)
1085           continue;
1086         if (Use.isDebug())
1087           UseMI->getOperand(0).setReg(0U);
1088       }
1089     }
1090 
1091     MI->eraseFromParent();
1092     for (unsigned i = 0; i < DeadPhis.size(); ++i)
1093       DeadPhis[i]->eraseFromParent();
1094   }
1095 }
1096 
1097 /// \brief Check if the loop is a candidate for converting to a hardware
1098 /// loop.  If so, then perform the transformation.
1099 ///
1100 /// This function works on innermost loops first.  A loop can be converted
1101 /// if it is a counting loop; either a register value or an immediate.
1102 ///
1103 /// The code makes several assumptions about the representation of the loop
1104 /// in llvm.
1105 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1106                                                  bool &RecL0used,
1107                                                  bool &RecL1used) {
1108   // This is just for sanity.
1109   assert(L->getHeader() && "Loop without a header?");
1110 
1111   bool Changed = false;
1112   bool L0Used = false;
1113   bool L1Used = false;
1114 
1115   // Process nested loops first.
1116   for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1117     Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1118     L0Used |= RecL0used;
1119     L1Used |= RecL1used;
1120   }
1121 
1122   // If a nested loop has been converted, then we can't convert this loop.
1123   if (Changed && L0Used && L1Used)
1124     return Changed;
1125 
1126   unsigned LOOP_i;
1127   unsigned LOOP_r;
1128   unsigned ENDLOOP;
1129 
1130   // Flag used to track loopN instruction:
1131   // 1 - Hardware loop is being generated for the inner most loop.
1132   // 0 - Hardware loop is being generated for the outer loop.
1133   unsigned IsInnerHWLoop = 1;
1134 
1135   if (L0Used) {
1136     LOOP_i = Hexagon::J2_loop1i;
1137     LOOP_r = Hexagon::J2_loop1r;
1138     ENDLOOP = Hexagon::ENDLOOP1;
1139     IsInnerHWLoop = 0;
1140   } else {
1141     LOOP_i = Hexagon::J2_loop0i;
1142     LOOP_r = Hexagon::J2_loop0r;
1143     ENDLOOP = Hexagon::ENDLOOP0;
1144   }
1145 
1146 #ifndef NDEBUG
1147   // Stop trying after reaching the limit (if any).
1148   int Limit = HWLoopLimit;
1149   if (Limit >= 0) {
1150     if (Counter >= HWLoopLimit)
1151       return false;
1152     Counter++;
1153   }
1154 #endif
1155 
1156   // Does the loop contain any invalid instructions?
1157   if (containsInvalidInstruction(L, IsInnerHWLoop))
1158     return false;
1159 
1160   MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1161   // Don't generate hw loop if the loop has more than one exit.
1162   if (!LastMBB)
1163     return false;
1164 
1165   MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1166   if (LastI == LastMBB->end())
1167     return false;
1168 
1169   // Is the induction variable bump feeding the latch condition?
1170   if (!fixupInductionVariable(L))
1171     return false;
1172 
1173   // Ensure the loop has a preheader: the loop instruction will be
1174   // placed there.
1175   MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1176   if (!Preheader) {
1177     Preheader = createPreheaderForLoop(L);
1178     if (!Preheader)
1179       return false;
1180   }
1181 
1182   MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1183 
1184   SmallVector<MachineInstr*, 2> OldInsts;
1185   // Are we able to determine the trip count for the loop?
1186   CountValue *TripCount = getLoopTripCount(L, OldInsts);
1187   if (!TripCount)
1188     return false;
1189 
1190   // Is the trip count available in the preheader?
1191   if (TripCount->isReg()) {
1192     // There will be a use of the register inserted into the preheader,
1193     // so make sure that the register is actually defined at that point.
1194     MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1195     MachineBasicBlock *BBDef = TCDef->getParent();
1196     if (!MDT->dominates(BBDef, Preheader))
1197       return false;
1198   }
1199 
1200   // Determine the loop start.
1201   MachineBasicBlock *TopBlock = L->getTopBlock();
1202   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1203   MachineBasicBlock *LoopStart = nullptr;
1204   if (ExitingBlock !=  L->getLoopLatch()) {
1205     MachineBasicBlock *TB = nullptr, *FB = nullptr;
1206     SmallVector<MachineOperand, 2> Cond;
1207 
1208     if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1209       return false;
1210 
1211     if (L->contains(TB))
1212       LoopStart = TB;
1213     else if (L->contains(FB))
1214       LoopStart = FB;
1215     else
1216       return false;
1217   }
1218   else
1219     LoopStart = TopBlock;
1220 
1221   // Convert the loop to a hardware loop.
1222   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1223   DebugLoc DL;
1224   if (InsertPos != Preheader->end())
1225     DL = InsertPos->getDebugLoc();
1226 
1227   if (TripCount->isReg()) {
1228     // Create a copy of the loop count register.
1229     unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1230     BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1231       .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1232     // Add the Loop instruction to the beginning of the loop.
1233     BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1234       .addReg(CountReg);
1235   } else {
1236     assert(TripCount->isImm() && "Expecting immediate value for trip count");
1237     // Add the Loop immediate instruction to the beginning of the loop,
1238     // if the immediate fits in the instructions.  Otherwise, we need to
1239     // create a new virtual register.
1240     int64_t CountImm = TripCount->getImm();
1241     if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1242       unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1243       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1244         .addImm(CountImm);
1245       BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1246         .addMBB(LoopStart).addReg(CountReg);
1247     } else
1248       BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1249         .addMBB(LoopStart).addImm(CountImm);
1250   }
1251 
1252   // Make sure the loop start always has a reference in the CFG.  We need
1253   // to create a BlockAddress operand to get this mechanism to work both the
1254   // MachineBasicBlock and BasicBlock objects need the flag set.
1255   LoopStart->setHasAddressTaken();
1256   // This line is needed to set the hasAddressTaken flag on the BasicBlock
1257   // object.
1258   BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1259 
1260   // Replace the loop branch with an endloop instruction.
1261   DebugLoc LastIDL = LastI->getDebugLoc();
1262   BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1263 
1264   // The loop ends with either:
1265   //  - a conditional branch followed by an unconditional branch, or
1266   //  - a conditional branch to the loop start.
1267   if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1268       LastI->getOpcode() == Hexagon::J2_jumpf) {
1269     // Delete one and change/add an uncond. branch to out of the loop.
1270     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1271     LastI = LastMBB->erase(LastI);
1272     if (!L->contains(BranchTarget)) {
1273       if (LastI != LastMBB->end())
1274         LastI = LastMBB->erase(LastI);
1275       SmallVector<MachineOperand, 0> Cond;
1276       TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1277     }
1278   } else {
1279     // Conditional branch to loop start; just delete it.
1280     LastMBB->erase(LastI);
1281   }
1282   delete TripCount;
1283 
1284   // The induction operation and the comparison may now be
1285   // unneeded. If these are unneeded, then remove them.
1286   for (unsigned i = 0; i < OldInsts.size(); ++i)
1287     removeIfDead(OldInsts[i]);
1288 
1289   ++NumHWLoops;
1290 
1291   // Set RecL1used and RecL0used only after hardware loop has been
1292   // successfully generated. Doing it earlier can cause wrong loop instruction
1293   // to be used.
1294   if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1295     RecL1used = true;
1296   else
1297     RecL0used = true;
1298 
1299   return true;
1300 }
1301 
1302 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1303                                             MachineInstr *CmpI) {
1304   assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1305 
1306   MachineBasicBlock *BB = BumpI->getParent();
1307   if (CmpI->getParent() != BB)
1308     return false;
1309 
1310   using instr_iterator = MachineBasicBlock::instr_iterator;
1311 
1312   // Check if things are in order to begin with.
1313   for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1314     if (&*I == CmpI)
1315       return true;
1316 
1317   // Out of order.
1318   unsigned PredR = CmpI->getOperand(0).getReg();
1319   bool FoundBump = false;
1320   instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1321   for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1322     MachineInstr *In = &*I;
1323     for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1324       MachineOperand &MO = In->getOperand(i);
1325       if (MO.isReg() && MO.isUse()) {
1326         if (MO.getReg() == PredR)  // Found an intervening use of PredR.
1327           return false;
1328       }
1329     }
1330 
1331     if (In == BumpI) {
1332       BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1333       FoundBump = true;
1334       break;
1335     }
1336   }
1337   assert (FoundBump && "Cannot determine instruction order");
1338   return FoundBump;
1339 }
1340 
1341 /// This function is required to break recursion. Visiting phis in a loop may
1342 /// result in recursion during compilation. We break the recursion by making
1343 /// sure that we visit a MachineOperand and its definition in a
1344 /// MachineInstruction only once. If we attempt to visit more than once, then
1345 /// there is recursion, and will return false.
1346 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1347                                         MachineInstr *MI,
1348                                         const MachineOperand *MO,
1349                                         LoopFeederMap &LoopFeederPhi) const {
1350   if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1351     const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1352     DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1353     // Ignore all BBs that form Loop.
1354     for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1355       MachineBasicBlock *MBB = Blocks[i];
1356       if (A == MBB)
1357         return false;
1358     }
1359     MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1360     LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1361     return true;
1362   } else
1363     // Already visited node.
1364     return false;
1365 }
1366 
1367 /// Return true if a Phi may generate a value that can underflow.
1368 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1369 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1370     MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1371     MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1372   assert(Phi->isPHI() && "Expecting a Phi.");
1373   // Walk through each Phi, and its used operands. Make sure that
1374   // if there is recursion in Phi, we won't generate hardware loops.
1375   for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1376     if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1377       if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1378                                       Phi->getParent(), L, LoopFeederPhi))
1379         return true;
1380   return false;
1381 }
1382 
1383 /// Return true if the induction variable can underflow in the first iteration.
1384 /// An example, is an initial unsigned value that is 0 and is decrement in the
1385 /// first itertion of a do-while loop.  In this case, we cannot generate a
1386 /// hardware loop because the endloop instruction does not decrement the loop
1387 /// counter if it is <= 1. We only need to perform this analysis if the
1388 /// initial value is a register.
1389 ///
1390 /// This function assumes the initial value may underfow unless proven
1391 /// otherwise. If the type is signed, then we don't care because signed
1392 /// underflow is undefined. We attempt to prove the initial value is not
1393 /// zero by perfoming a crude analysis of the loop counter. This function
1394 /// checks if the initial value is used in any comparison prior to the loop
1395 /// and, if so, assumes the comparison is a range check. This is inexact,
1396 /// but will catch the simple cases.
1397 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1398     const MachineOperand *InitVal, const MachineOperand *EndVal,
1399     MachineBasicBlock *MBB, MachineLoop *L,
1400     LoopFeederMap &LoopFeederPhi) const {
1401   // Only check register values since they are unknown.
1402   if (!InitVal->isReg())
1403     return false;
1404 
1405   if (!EndVal->isImm())
1406     return false;
1407 
1408   // A register value that is assigned an immediate is a known value, and it
1409   // won't underflow in the first iteration.
1410   int64_t Imm;
1411   if (checkForImmediate(*InitVal, Imm))
1412     return (EndVal->getImm() == Imm);
1413 
1414   unsigned Reg = InitVal->getReg();
1415 
1416   // We don't know the value of a physical register.
1417   if (!TargetRegisterInfo::isVirtualRegister(Reg))
1418     return true;
1419 
1420   MachineInstr *Def = MRI->getVRegDef(Reg);
1421   if (!Def)
1422     return true;
1423 
1424   // If the initial value is a Phi or copy and the operands may not underflow,
1425   // then the definition cannot be underflow either.
1426   if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1427                                              L, LoopFeederPhi))
1428     return false;
1429   if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1430                                                     EndVal, Def->getParent(),
1431                                                     L, LoopFeederPhi))
1432     return false;
1433 
1434   // Iterate over the uses of the initial value. If the initial value is used
1435   // in a compare, then we assume this is a range check that ensures the loop
1436   // doesn't underflow. This is not an exact test and should be improved.
1437   for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1438          E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1439     MachineInstr *MI = &*I;
1440     unsigned CmpReg1 = 0, CmpReg2 = 0;
1441     int CmpMask = 0, CmpValue = 0;
1442 
1443     if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1444       continue;
1445 
1446     MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1447     SmallVector<MachineOperand, 2> Cond;
1448     if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1449       continue;
1450 
1451     Comparison::Kind Cmp =
1452         getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1453     if (Cmp == 0)
1454       continue;
1455     if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1456       Cmp = Comparison::getNegatedComparison(Cmp);
1457     if (CmpReg2 != 0 && CmpReg2 == Reg)
1458       Cmp = Comparison::getSwappedComparison(Cmp);
1459 
1460     // Signed underflow is undefined.
1461     if (Comparison::isSigned(Cmp))
1462       return false;
1463 
1464     // Check if there is a comparison of the initial value. If the initial value
1465     // is greater than or not equal to another value, then assume this is a
1466     // range check.
1467     if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1468       return false;
1469   }
1470 
1471   // OK - this is a hack that needs to be improved. We really need to analyze
1472   // the instructions performed on the initial value. This works on the simplest
1473   // cases only.
1474   if (!Def->isCopy() && !Def->isPHI())
1475     return false;
1476 
1477   return true;
1478 }
1479 
1480 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1481                                              int64_t &Val) const {
1482   if (MO.isImm()) {
1483     Val = MO.getImm();
1484     return true;
1485   }
1486   if (!MO.isReg())
1487     return false;
1488 
1489   // MO is a register. Check whether it is defined as an immediate value,
1490   // and if so, get the value of it in TV. That value will then need to be
1491   // processed to handle potential subregisters in MO.
1492   int64_t TV;
1493 
1494   unsigned R = MO.getReg();
1495   if (!TargetRegisterInfo::isVirtualRegister(R))
1496     return false;
1497   MachineInstr *DI = MRI->getVRegDef(R);
1498   unsigned DOpc = DI->getOpcode();
1499   switch (DOpc) {
1500     case TargetOpcode::COPY:
1501     case Hexagon::A2_tfrsi:
1502     case Hexagon::A2_tfrpi:
1503     case Hexagon::CONST32:
1504     case Hexagon::CONST64:
1505       // Call recursively to avoid an extra check whether operand(1) is
1506       // indeed an immediate (it could be a global address, for example),
1507       // plus we can handle COPY at the same time.
1508       if (!checkForImmediate(DI->getOperand(1), TV))
1509         return false;
1510       break;
1511     case Hexagon::A2_combineii:
1512     case Hexagon::A4_combineir:
1513     case Hexagon::A4_combineii:
1514     case Hexagon::A4_combineri:
1515     case Hexagon::A2_combinew: {
1516       const MachineOperand &S1 = DI->getOperand(1);
1517       const MachineOperand &S2 = DI->getOperand(2);
1518       int64_t V1, V2;
1519       if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1520         return false;
1521       TV = V2 | (static_cast<uint64_t>(V1) << 32);
1522       break;
1523     }
1524     case TargetOpcode::REG_SEQUENCE: {
1525       const MachineOperand &S1 = DI->getOperand(1);
1526       const MachineOperand &S3 = DI->getOperand(3);
1527       int64_t V1, V3;
1528       if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1529         return false;
1530       unsigned Sub2 = DI->getOperand(2).getImm();
1531       unsigned Sub4 = DI->getOperand(4).getImm();
1532       if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1533         TV = V1 | (V3 << 32);
1534       else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1535         TV = V3 | (V1 << 32);
1536       else
1537         llvm_unreachable("Unexpected form of REG_SEQUENCE");
1538       break;
1539     }
1540 
1541     default:
1542       return false;
1543   }
1544 
1545   // By now, we should have successfully obtained the immediate value defining
1546   // the register referenced in MO. Handle a potential use of a subregister.
1547   switch (MO.getSubReg()) {
1548     case Hexagon::isub_lo:
1549       Val = TV & 0xFFFFFFFFULL;
1550       break;
1551     case Hexagon::isub_hi:
1552       Val = (TV >> 32) & 0xFFFFFFFFULL;
1553       break;
1554     default:
1555       Val = TV;
1556       break;
1557   }
1558   return true;
1559 }
1560 
1561 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1562   if (MO.isImm()) {
1563     MO.setImm(Val);
1564     return;
1565   }
1566 
1567   assert(MO.isReg());
1568   unsigned R = MO.getReg();
1569   MachineInstr *DI = MRI->getVRegDef(R);
1570 
1571   const TargetRegisterClass *RC = MRI->getRegClass(R);
1572   unsigned NewR = MRI->createVirtualRegister(RC);
1573   MachineBasicBlock &B = *DI->getParent();
1574   DebugLoc DL = DI->getDebugLoc();
1575   BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1576   MO.setReg(NewR);
1577 }
1578 
1579 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1580   // These two instructions are not extendable.
1581   if (CmpOpc == Hexagon::A4_cmpbeqi)
1582     return isUInt<8>(Imm);
1583   if (CmpOpc == Hexagon::A4_cmpbgti)
1584     return isInt<8>(Imm);
1585   // The rest of the comparison-with-immediate instructions are extendable.
1586   return true;
1587 }
1588 
1589 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1590   MachineBasicBlock *Header = L->getHeader();
1591   MachineBasicBlock *Latch = L->getLoopLatch();
1592   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1593 
1594   if (!(Header && Latch && ExitingBlock))
1595     return false;
1596 
1597   // These data structures follow the same concept as the corresponding
1598   // ones in findInductionRegister (where some comments are).
1599   using RegisterBump = std::pair<unsigned, int64_t>;
1600   using RegisterInduction = std::pair<unsigned, RegisterBump>;
1601   using RegisterInductionSet = std::set<RegisterInduction>;
1602 
1603   // Register candidates for induction variables, with their associated bumps.
1604   RegisterInductionSet IndRegs;
1605 
1606   // Look for induction patterns:
1607   //   vreg1 = PHI ..., [ latch, vreg2 ]
1608   //   vreg2 = ADD vreg1, imm
1609   using instr_iterator = MachineBasicBlock::instr_iterator;
1610 
1611   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1612        I != E && I->isPHI(); ++I) {
1613     MachineInstr *Phi = &*I;
1614 
1615     // Have a PHI instruction.
1616     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1617       if (Phi->getOperand(i+1).getMBB() != Latch)
1618         continue;
1619 
1620       unsigned PhiReg = Phi->getOperand(i).getReg();
1621       MachineInstr *DI = MRI->getVRegDef(PhiReg);
1622 
1623       if (DI->getDesc().isAdd()) {
1624         // If the register operand to the add/sub is the PHI we are looking
1625         // at, this meets the induction pattern.
1626         unsigned IndReg = DI->getOperand(1).getReg();
1627         MachineOperand &Opnd2 = DI->getOperand(2);
1628         int64_t V;
1629         if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1630           unsigned UpdReg = DI->getOperand(0).getReg();
1631           IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1632         }
1633       }
1634     }  // for (i)
1635   }  // for (instr)
1636 
1637   if (IndRegs.empty())
1638     return false;
1639 
1640   MachineBasicBlock *TB = nullptr, *FB = nullptr;
1641   SmallVector<MachineOperand,2> Cond;
1642   // AnalyzeBranch returns true if it fails to analyze branch.
1643   bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1644   if (NotAnalyzed || Cond.empty())
1645     return false;
1646 
1647   if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1648     MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1649     SmallVector<MachineOperand,2> LCond;
1650     bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1651     if (NotAnalyzed)
1652       return false;
1653 
1654     // Since latch is not the exiting block, the latch branch should be an
1655     // unconditional branch to the loop header.
1656     if (TB == Latch)
1657       TB = (LTB == Header) ? LTB : LFB;
1658     else
1659       FB = (LTB == Header) ? LTB : LFB;
1660   }
1661   if (TB != Header) {
1662     if (FB != Header) {
1663       // The latch/exit block does not go back to the header.
1664       return false;
1665     }
1666     // FB is the header (i.e., uncond. jump to branch header)
1667     // In this case, the LoopBody -> TB should not be a back edge otherwise
1668     // it could result in an infinite loop after conversion to hw_loop.
1669     // This case can happen when the Latch has two jumps like this:
1670     // Jmp_c OuterLoopHeader <-- TB
1671     // Jmp   InnerLoopHeader <-- FB
1672     if (MDT->dominates(TB, FB))
1673       return false;
1674   }
1675 
1676   // Expecting a predicate register as a condition.  It won't be a hardware
1677   // predicate register at this point yet, just a vreg.
1678   // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1679   // into Cond, followed by the predicate register.  For non-negated branches
1680   // it's just the register.
1681   unsigned CSz = Cond.size();
1682   if (CSz != 1 && CSz != 2)
1683     return false;
1684 
1685   if (!Cond[CSz-1].isReg())
1686     return false;
1687 
1688   unsigned P = Cond[CSz-1].getReg();
1689   MachineInstr *PredDef = MRI->getVRegDef(P);
1690 
1691   if (!PredDef->isCompare())
1692     return false;
1693 
1694   SmallSet<unsigned,2> CmpRegs;
1695   MachineOperand *CmpImmOp = nullptr;
1696 
1697   // Go over all operands to the compare and look for immediate and register
1698   // operands.  Assume that if the compare has a single register use and a
1699   // single immediate operand, then the register is being compared with the
1700   // immediate value.
1701   for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1702     MachineOperand &MO = PredDef->getOperand(i);
1703     if (MO.isReg()) {
1704       // Skip all implicit references.  In one case there was:
1705       //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1706       if (MO.isImplicit())
1707         continue;
1708       if (MO.isUse()) {
1709         if (!isImmediate(MO)) {
1710           CmpRegs.insert(MO.getReg());
1711           continue;
1712         }
1713         // Consider the register to be the "immediate" operand.
1714         if (CmpImmOp)
1715           return false;
1716         CmpImmOp = &MO;
1717       }
1718     } else if (MO.isImm()) {
1719       if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
1720         return false;
1721       CmpImmOp = &MO;
1722     }
1723   }
1724 
1725   if (CmpRegs.empty())
1726     return false;
1727 
1728   // Check if the compared register follows the order we want.  Fix if needed.
1729   for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1730        I != E; ++I) {
1731     // This is a success.  If the register used in the comparison is one that
1732     // we have identified as a bumped (updated) induction register, there is
1733     // nothing to do.
1734     if (CmpRegs.count(I->first))
1735       return true;
1736 
1737     // Otherwise, if the register being compared comes out of a PHI node,
1738     // and has been recognized as following the induction pattern, and is
1739     // compared against an immediate, we can fix it.
1740     const RegisterBump &RB = I->second;
1741     if (CmpRegs.count(RB.first)) {
1742       if (!CmpImmOp) {
1743         // If both operands to the compare instruction are registers, see if
1744         // it can be changed to use induction register as one of the operands.
1745         MachineInstr *IndI = nullptr;
1746         MachineInstr *nonIndI = nullptr;
1747         MachineOperand *IndMO = nullptr;
1748         MachineOperand *nonIndMO = nullptr;
1749 
1750         for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1751           MachineOperand &MO = PredDef->getOperand(i);
1752           if (MO.isReg() && MO.getReg() == RB.first) {
1753             DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1754                          << *(MRI->getVRegDef(I->first)));
1755             if (IndI)
1756               return false;
1757 
1758             IndI = MRI->getVRegDef(I->first);
1759             IndMO = &MO;
1760           } else if (MO.isReg()) {
1761             DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1762                          << *(MRI->getVRegDef(MO.getReg())));
1763             if (nonIndI)
1764               return false;
1765 
1766             nonIndI = MRI->getVRegDef(MO.getReg());
1767             nonIndMO = &MO;
1768           }
1769         }
1770         if (IndI && nonIndI &&
1771             nonIndI->getOpcode() == Hexagon::A2_addi &&
1772             nonIndI->getOperand(2).isImm() &&
1773             nonIndI->getOperand(2).getImm() == - RB.second) {
1774           bool Order = orderBumpCompare(IndI, PredDef);
1775           if (Order) {
1776             IndMO->setReg(I->first);
1777             nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1778             return true;
1779           }
1780         }
1781         return false;
1782       }
1783 
1784       // It is not valid to do this transformation on an unsigned comparison
1785       // because it may underflow.
1786       Comparison::Kind Cmp =
1787           getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1788       if (!Cmp || Comparison::isUnsigned(Cmp))
1789         return false;
1790 
1791       // If the register is being compared against an immediate, try changing
1792       // the compare instruction to use induction register and adjust the
1793       // immediate operand.
1794       int64_t CmpImm = getImmediate(*CmpImmOp);
1795       int64_t V = RB.second;
1796       // Handle Overflow (64-bit).
1797       if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1798           ((V < 0) && (CmpImm < INT64_MIN - V)))
1799         return false;
1800       CmpImm += V;
1801       // Most comparisons of register against an immediate value allow
1802       // the immediate to be constant-extended. There are some exceptions
1803       // though. Make sure the new combination will work.
1804       if (CmpImmOp->isImm())
1805         if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1806           return false;
1807 
1808       // Make sure that the compare happens after the bump.  Otherwise,
1809       // after the fixup, the compare would use a yet-undefined register.
1810       MachineInstr *BumpI = MRI->getVRegDef(I->first);
1811       bool Order = orderBumpCompare(BumpI, PredDef);
1812       if (!Order)
1813         return false;
1814 
1815       // Finally, fix the compare instruction.
1816       setImmediate(*CmpImmOp, CmpImm);
1817       for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1818         MachineOperand &MO = PredDef->getOperand(i);
1819         if (MO.isReg() && MO.getReg() == RB.first) {
1820           MO.setReg(I->first);
1821           return true;
1822         }
1823       }
1824     }
1825   }
1826 
1827   return false;
1828 }
1829 
1830 /// createPreheaderForLoop - Create a preheader for a given loop.
1831 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1832       MachineLoop *L) {
1833   if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1834     return TmpPH;
1835   if (!HWCreatePreheader)
1836     return nullptr;
1837 
1838   MachineBasicBlock *Header = L->getHeader();
1839   MachineBasicBlock *Latch = L->getLoopLatch();
1840   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1841   MachineFunction *MF = Header->getParent();
1842   DebugLoc DL;
1843 
1844 #ifndef NDEBUG
1845   if ((!PHFn.empty()) && (PHFn != MF->getName()))
1846     return nullptr;
1847 #endif
1848 
1849   if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1850     return nullptr;
1851 
1852   using instr_iterator = MachineBasicBlock::instr_iterator;
1853 
1854   // Verify that all existing predecessors have analyzable branches
1855   // (or no branches at all).
1856   using MBBVector = std::vector<MachineBasicBlock *>;
1857 
1858   MBBVector Preds(Header->pred_begin(), Header->pred_end());
1859   SmallVector<MachineOperand,2> Tmp1;
1860   MachineBasicBlock *TB = nullptr, *FB = nullptr;
1861 
1862   if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1863     return nullptr;
1864 
1865   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1866     MachineBasicBlock *PB = *I;
1867     bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1868     if (NotAnalyzed)
1869       return nullptr;
1870   }
1871 
1872   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1873   MF->insert(Header->getIterator(), NewPH);
1874 
1875   if (Header->pred_size() > 2) {
1876     // Ensure that the header has only two predecessors: the preheader and
1877     // the loop latch.  Any additional predecessors of the header should
1878     // join at the newly created preheader. Inspect all PHI nodes from the
1879     // header and create appropriate corresponding PHI nodes in the preheader.
1880 
1881     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1882          I != E && I->isPHI(); ++I) {
1883       MachineInstr *PN = &*I;
1884 
1885       const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1886       MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1887       NewPH->insert(NewPH->end(), NewPN);
1888 
1889       unsigned PR = PN->getOperand(0).getReg();
1890       const TargetRegisterClass *RC = MRI->getRegClass(PR);
1891       unsigned NewPR = MRI->createVirtualRegister(RC);
1892       NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1893 
1894       // Copy all non-latch operands of a header's PHI node to the newly
1895       // created PHI node in the preheader.
1896       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1897         unsigned PredR = PN->getOperand(i).getReg();
1898         unsigned PredRSub = PN->getOperand(i).getSubReg();
1899         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1900         if (PredB == Latch)
1901           continue;
1902 
1903         MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1904         MO.setSubReg(PredRSub);
1905         NewPN->addOperand(MO);
1906         NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1907       }
1908 
1909       // Remove copied operands from the old PHI node and add the value
1910       // coming from the preheader's PHI.
1911       for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1912         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1913         if (PredB != Latch) {
1914           PN->RemoveOperand(i+1);
1915           PN->RemoveOperand(i);
1916         }
1917       }
1918       PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1919       PN->addOperand(MachineOperand::CreateMBB(NewPH));
1920     }
1921   } else {
1922     assert(Header->pred_size() == 2);
1923 
1924     // The header has only two predecessors, but the non-latch predecessor
1925     // is not a preheader (e.g. it has other successors, etc.)
1926     // In such a case we don't need any extra PHI nodes in the new preheader,
1927     // all we need is to adjust existing PHIs in the header to now refer to
1928     // the new preheader.
1929     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1930          I != E && I->isPHI(); ++I) {
1931       MachineInstr *PN = &*I;
1932       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1933         MachineOperand &MO = PN->getOperand(i+1);
1934         if (MO.getMBB() != Latch)
1935           MO.setMBB(NewPH);
1936       }
1937     }
1938   }
1939 
1940   // "Reroute" the CFG edges to link in the new preheader.
1941   // If any of the predecessors falls through to the header, insert a branch
1942   // to the new preheader in that place.
1943   SmallVector<MachineOperand,1> Tmp2;
1944   SmallVector<MachineOperand,1> EmptyCond;
1945 
1946   TB = FB = nullptr;
1947 
1948   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1949     MachineBasicBlock *PB = *I;
1950     if (PB != Latch) {
1951       Tmp2.clear();
1952       bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1953       (void)NotAnalyzed; // suppress compiler warning
1954       assert (!NotAnalyzed && "Should be analyzable!");
1955       if (TB != Header && (Tmp2.empty() || FB != Header))
1956         TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1957       PB->ReplaceUsesOfBlockWith(Header, NewPH);
1958     }
1959   }
1960 
1961   // It can happen that the latch block will fall through into the header.
1962   // Insert an unconditional branch to the header.
1963   TB = FB = nullptr;
1964   bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1965   (void)LatchNotAnalyzed; // suppress compiler warning
1966   assert (!LatchNotAnalyzed && "Should be analyzable!");
1967   if (!TB && !FB)
1968     TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1969 
1970   // Finally, the branch from the preheader to the header.
1971   TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1972   NewPH->addSuccessor(Header);
1973 
1974   MachineLoop *ParentLoop = L->getParentLoop();
1975   if (ParentLoop)
1976     ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1977 
1978   // Update the dominator information with the new preheader.
1979   if (MDT) {
1980     if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1981       if (MachineDomTreeNode *DHN = HN->getIDom()) {
1982         MDT->addNewBlock(NewPH, DHN->getBlock());
1983         MDT->changeImmediateDominator(Header, NewPH);
1984       }
1985     }
1986   }
1987 
1988   return NewPH;
1989 }
1990