xref: /llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp (revision 6ba672e542f9f8143d0da1ca8942df7519a29a60)
1 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass identifies loops where we can generate the Hexagon hardware
11 // loop instruction.  The hardware loop can perform loop branches with a
12 // zero-cycle overhead.
13 //
14 // The pattern that defines the induction variable can changed depending on
15 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
16 // normalizes induction variables, and the Loop Strength Reduction pass
17 // run by 'llc' may also make changes to the induction variable.
18 // The pattern detected by this phase is due to running Strength Reduction.
19 //
20 // Criteria for hardware loops:
21 //  - Countable loops (w/ ind. var for a trip count)
22 //  - Assumes loops are normalized by IndVarSimplify
23 //  - Try inner-most loops first
24 //  - No function calls in loops.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "llvm/ADT/SmallSet.h"
29 #include "Hexagon.h"
30 #include "HexagonSubtarget.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/CodeGen/MachineDominators.h"
33 #include "llvm/CodeGen/MachineFunction.h"
34 #include "llvm/CodeGen/MachineFunctionPass.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineLoopInfo.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/PassSupport.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include <algorithm>
44 #include <vector>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "hwloops"
49 
50 #ifndef NDEBUG
51 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
52 
53 // Option to create preheader only for a specific function.
54 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
55                                  cl::init(""));
56 #endif
57 
58 // Option to create a preheader if one doesn't exist.
59 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
60     cl::Hidden, cl::init(true),
61     cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
62 
63 // Turn it off by default. If a preheader block is not created here, the
64 // software pipeliner may be unable to find a block suitable to serve as
65 // a preheader. In that case SWP will not run.
66 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
67   cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
68   "instructions"));
69 
70 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
71 
72 namespace llvm {
73   FunctionPass *createHexagonHardwareLoops();
74   void initializeHexagonHardwareLoopsPass(PassRegistry&);
75 }
76 
77 namespace {
78   class CountValue;
79   struct HexagonHardwareLoops : public MachineFunctionPass {
80     MachineLoopInfo            *MLI;
81     MachineRegisterInfo        *MRI;
82     MachineDominatorTree       *MDT;
83     const HexagonInstrInfo     *TII;
84 #ifndef NDEBUG
85     static int Counter;
86 #endif
87 
88   public:
89     static char ID;
90 
91     HexagonHardwareLoops() : MachineFunctionPass(ID) {
92       initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
93     }
94 
95     bool runOnMachineFunction(MachineFunction &MF) override;
96 
97     StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
98 
99     void getAnalysisUsage(AnalysisUsage &AU) const override {
100       AU.addRequired<MachineDominatorTree>();
101       AU.addRequired<MachineLoopInfo>();
102       MachineFunctionPass::getAnalysisUsage(AU);
103     }
104 
105   private:
106     typedef std::map<unsigned, MachineInstr *> LoopFeederMap;
107 
108     /// Kinds of comparisons in the compare instructions.
109     struct Comparison {
110       enum Kind {
111         EQ  = 0x01,
112         NE  = 0x02,
113         L   = 0x04,
114         G   = 0x08,
115         U   = 0x40,
116         LTs = L,
117         LEs = L | EQ,
118         GTs = G,
119         GEs = G | EQ,
120         LTu = L      | U,
121         LEu = L | EQ | U,
122         GTu = G      | U,
123         GEu = G | EQ | U
124       };
125 
126       static Kind getSwappedComparison(Kind Cmp) {
127         assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
128         if ((Cmp & L) || (Cmp & G))
129           return (Kind)(Cmp ^ (L|G));
130         return Cmp;
131       }
132 
133       static Kind getNegatedComparison(Kind Cmp) {
134         if ((Cmp & L) || (Cmp & G))
135           return (Kind)((Cmp ^ (L | G)) ^ EQ);
136         if ((Cmp & NE) || (Cmp & EQ))
137           return (Kind)(Cmp ^ (EQ | NE));
138         return (Kind)0;
139       }
140 
141       static bool isSigned(Kind Cmp) {
142         return (Cmp & (L | G) && !(Cmp & U));
143       }
144 
145       static bool isUnsigned(Kind Cmp) {
146         return (Cmp & U);
147       }
148 
149     };
150 
151     /// \brief Find the register that contains the loop controlling
152     /// induction variable.
153     /// If successful, it will return true and set the \p Reg, \p IVBump
154     /// and \p IVOp arguments.  Otherwise it will return false.
155     /// The returned induction register is the register R that follows the
156     /// following induction pattern:
157     /// loop:
158     ///   R = phi ..., [ R.next, LatchBlock ]
159     ///   R.next = R + #bump
160     ///   if (R.next < #N) goto loop
161     /// IVBump is the immediate value added to R, and IVOp is the instruction
162     /// "R.next = R + #bump".
163     bool findInductionRegister(MachineLoop *L, unsigned &Reg,
164                                int64_t &IVBump, MachineInstr *&IVOp) const;
165 
166     /// \brief Return the comparison kind for the specified opcode.
167     Comparison::Kind getComparisonKind(unsigned CondOpc,
168                                        MachineOperand *InitialValue,
169                                        const MachineOperand *Endvalue,
170                                        int64_t IVBump) const;
171 
172     /// \brief Analyze the statements in a loop to determine if the loop
173     /// has a computable trip count and, if so, return a value that represents
174     /// the trip count expression.
175     CountValue *getLoopTripCount(MachineLoop *L,
176                                  SmallVectorImpl<MachineInstr *> &OldInsts);
177 
178     /// \brief Return the expression that represents the number of times
179     /// a loop iterates.  The function takes the operands that represent the
180     /// loop start value, loop end value, and induction value.  Based upon
181     /// these operands, the function attempts to compute the trip count.
182     /// If the trip count is not directly available (as an immediate value,
183     /// or a register), the function will attempt to insert computation of it
184     /// to the loop's preheader.
185     CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
186                              const MachineOperand *End, unsigned IVReg,
187                              int64_t IVBump, Comparison::Kind Cmp) const;
188 
189     /// \brief Return true if the instruction is not valid within a hardware
190     /// loop.
191     bool isInvalidLoopOperation(const MachineInstr *MI,
192                                 bool IsInnerHWLoop) const;
193 
194     /// \brief Return true if the loop contains an instruction that inhibits
195     /// using the hardware loop.
196     bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
197 
198     /// \brief Given a loop, check if we can convert it to a hardware loop.
199     /// If so, then perform the conversion and return true.
200     bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
201 
202     /// \brief Return true if the instruction is now dead.
203     bool isDead(const MachineInstr *MI,
204                 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
205 
206     /// \brief Remove the instruction if it is now dead.
207     void removeIfDead(MachineInstr *MI);
208 
209     /// \brief Make sure that the "bump" instruction executes before the
210     /// compare.  We need that for the IV fixup, so that the compare
211     /// instruction would not use a bumped value that has not yet been
212     /// defined.  If the instructions are out of order, try to reorder them.
213     bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
214 
215     /// \brief Return true if MO and MI pair is visited only once. If visited
216     /// more than once, this indicates there is recursion. In such a case,
217     /// return false.
218     bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
219                       const MachineOperand *MO,
220                       LoopFeederMap &LoopFeederPhi) const;
221 
222     /// \brief Return true if the Phi may generate a value that may underflow,
223     /// or may wrap.
224     bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
225                                MachineBasicBlock *MBB, MachineLoop *L,
226                                LoopFeederMap &LoopFeederPhi) const;
227 
228     /// \brief Return true if the induction variable may underflow an unsigned
229     /// value in the first iteration.
230     bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
231                                      const MachineOperand *EndVal,
232                                      MachineBasicBlock *MBB, MachineLoop *L,
233                                      LoopFeederMap &LoopFeederPhi) const;
234 
235     /// \brief Check if the given operand has a compile-time known constant
236     /// value. Return true if yes, and false otherwise. When returning true, set
237     /// Val to the corresponding constant value.
238     bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
239 
240     /// \brief Check if the operand has a compile-time known constant value.
241     bool isImmediate(const MachineOperand &MO) const {
242       int64_t V;
243       return checkForImmediate(MO, V);
244     }
245 
246     /// \brief Return the immediate for the specified operand.
247     int64_t getImmediate(const MachineOperand &MO) const {
248       int64_t V;
249       if (!checkForImmediate(MO, V))
250         llvm_unreachable("Invalid operand");
251       return V;
252     }
253 
254     /// \brief Reset the given machine operand to now refer to a new immediate
255     /// value.  Assumes that the operand was already referencing an immediate
256     /// value, either directly, or via a register.
257     void setImmediate(MachineOperand &MO, int64_t Val);
258 
259     /// \brief Fix the data flow of the induction varible.
260     /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
261     ///                                     |
262     ///                                     +-> back to phi
263     /// where "bump" is the increment of the induction variable:
264     ///   iv = iv + #const.
265     /// Due to some prior code transformations, the actual flow may look
266     /// like this:
267     ///   phi -+-> bump ---> back to phi
268     ///        |
269     ///        +-> comparison-in-latch (against upper_bound-bump),
270     /// i.e. the comparison that controls the loop execution may be using
271     /// the value of the induction variable from before the increment.
272     ///
273     /// Return true if the loop's flow is the desired one (i.e. it's
274     /// either been fixed, or no fixing was necessary).
275     /// Otherwise, return false.  This can happen if the induction variable
276     /// couldn't be identified, or if the value in the latch's comparison
277     /// cannot be adjusted to reflect the post-bump value.
278     bool fixupInductionVariable(MachineLoop *L);
279 
280     /// \brief Given a loop, if it does not have a preheader, create one.
281     /// Return the block that is the preheader.
282     MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
283   };
284 
285   char HexagonHardwareLoops::ID = 0;
286 #ifndef NDEBUG
287   int HexagonHardwareLoops::Counter = 0;
288 #endif
289 
290   /// \brief Abstraction for a trip count of a loop. A smaller version
291   /// of the MachineOperand class without the concerns of changing the
292   /// operand representation.
293   class CountValue {
294   public:
295     enum CountValueType {
296       CV_Register,
297       CV_Immediate
298     };
299   private:
300     CountValueType Kind;
301     union Values {
302       struct {
303         unsigned Reg;
304         unsigned Sub;
305       } R;
306       unsigned ImmVal;
307     } Contents;
308 
309   public:
310     explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
311       Kind = t;
312       if (Kind == CV_Register) {
313         Contents.R.Reg = v;
314         Contents.R.Sub = u;
315       } else {
316         Contents.ImmVal = v;
317       }
318     }
319     bool isReg() const { return Kind == CV_Register; }
320     bool isImm() const { return Kind == CV_Immediate; }
321 
322     unsigned getReg() const {
323       assert(isReg() && "Wrong CountValue accessor");
324       return Contents.R.Reg;
325     }
326     unsigned getSubReg() const {
327       assert(isReg() && "Wrong CountValue accessor");
328       return Contents.R.Sub;
329     }
330     unsigned getImm() const {
331       assert(isImm() && "Wrong CountValue accessor");
332       return Contents.ImmVal;
333     }
334 
335     void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
336       if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
337       if (isImm()) { OS << Contents.ImmVal; }
338     }
339   };
340 } // end anonymous namespace
341 
342 
343 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
344                       "Hexagon Hardware Loops", false, false)
345 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
346 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
347 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
348                     "Hexagon Hardware Loops", false, false)
349 
350 FunctionPass *llvm::createHexagonHardwareLoops() {
351   return new HexagonHardwareLoops();
352 }
353 
354 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
355   DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
356   if (skipFunction(*MF.getFunction()))
357     return false;
358 
359   bool Changed = false;
360 
361   MLI = &getAnalysis<MachineLoopInfo>();
362   MRI = &MF.getRegInfo();
363   MDT = &getAnalysis<MachineDominatorTree>();
364   TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
365 
366   for (auto &L : *MLI)
367     if (!L->getParentLoop()) {
368       bool L0Used = false;
369       bool L1Used = false;
370       Changed |= convertToHardwareLoop(L, L0Used, L1Used);
371     }
372 
373   return Changed;
374 }
375 
376 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
377                                                  unsigned &Reg,
378                                                  int64_t &IVBump,
379                                                  MachineInstr *&IVOp
380                                                  ) const {
381   MachineBasicBlock *Header = L->getHeader();
382   MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
383   MachineBasicBlock *Latch = L->getLoopLatch();
384   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
385   if (!Header || !Preheader || !Latch || !ExitingBlock)
386     return false;
387 
388   // This pair represents an induction register together with an immediate
389   // value that will be added to it in each loop iteration.
390   typedef std::pair<unsigned,int64_t> RegisterBump;
391 
392   // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
393   // from an induction operation
394   //   R.next = R + bump
395   // where bump is an immediate value.
396   typedef std::map<unsigned,RegisterBump> InductionMap;
397 
398   InductionMap IndMap;
399 
400   typedef MachineBasicBlock::instr_iterator instr_iterator;
401   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
402        I != E && I->isPHI(); ++I) {
403     MachineInstr *Phi = &*I;
404 
405     // Have a PHI instruction.  Get the operand that corresponds to the
406     // latch block, and see if is a result of an addition of form "reg+imm",
407     // where the "reg" is defined by the PHI node we are looking at.
408     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
409       if (Phi->getOperand(i+1).getMBB() != Latch)
410         continue;
411 
412       unsigned PhiOpReg = Phi->getOperand(i).getReg();
413       MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
414 
415       if (DI->getDesc().isAdd()) {
416         // If the register operand to the add is the PHI we're looking at, this
417         // meets the induction pattern.
418         unsigned IndReg = DI->getOperand(1).getReg();
419         MachineOperand &Opnd2 = DI->getOperand(2);
420         int64_t V;
421         if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
422           unsigned UpdReg = DI->getOperand(0).getReg();
423           IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
424         }
425       }
426     }  // for (i)
427   }  // for (instr)
428 
429   SmallVector<MachineOperand,2> Cond;
430   MachineBasicBlock *TB = nullptr, *FB = nullptr;
431   bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
432   if (NotAnalyzed)
433     return false;
434 
435   unsigned PredR, PredPos, PredRegFlags;
436   if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
437     return false;
438 
439   MachineInstr *PredI = MRI->getVRegDef(PredR);
440   if (!PredI->isCompare())
441     return false;
442 
443   unsigned CmpReg1 = 0, CmpReg2 = 0;
444   int CmpImm = 0, CmpMask = 0;
445   bool CmpAnalyzed =
446       TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
447   // Fail if the compare was not analyzed, or it's not comparing a register
448   // with an immediate value.  Not checking the mask here, since we handle
449   // the individual compare opcodes (including A4_cmpb*) later on.
450   if (!CmpAnalyzed)
451     return false;
452 
453   // Exactly one of the input registers to the comparison should be among
454   // the induction registers.
455   InductionMap::iterator IndMapEnd = IndMap.end();
456   InductionMap::iterator F = IndMapEnd;
457   if (CmpReg1 != 0) {
458     InductionMap::iterator F1 = IndMap.find(CmpReg1);
459     if (F1 != IndMapEnd)
460       F = F1;
461   }
462   if (CmpReg2 != 0) {
463     InductionMap::iterator F2 = IndMap.find(CmpReg2);
464     if (F2 != IndMapEnd) {
465       if (F != IndMapEnd)
466         return false;
467       F = F2;
468     }
469   }
470   if (F == IndMapEnd)
471     return false;
472 
473   Reg = F->second.first;
474   IVBump = F->second.second;
475   IVOp = MRI->getVRegDef(F->first);
476   return true;
477 }
478 
479 // Return the comparison kind for the specified opcode.
480 HexagonHardwareLoops::Comparison::Kind
481 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
482                                         MachineOperand *InitialValue,
483                                         const MachineOperand *EndValue,
484                                         int64_t IVBump) const {
485   Comparison::Kind Cmp = (Comparison::Kind)0;
486   switch (CondOpc) {
487   case Hexagon::C2_cmpeqi:
488   case Hexagon::C2_cmpeq:
489   case Hexagon::C2_cmpeqp:
490     Cmp = Comparison::EQ;
491     break;
492   case Hexagon::C4_cmpneq:
493   case Hexagon::C4_cmpneqi:
494     Cmp = Comparison::NE;
495     break;
496   case Hexagon::C4_cmplte:
497     Cmp = Comparison::LEs;
498     break;
499   case Hexagon::C4_cmplteu:
500     Cmp = Comparison::LEu;
501     break;
502   case Hexagon::C2_cmpgtui:
503   case Hexagon::C2_cmpgtu:
504   case Hexagon::C2_cmpgtup:
505     Cmp = Comparison::GTu;
506     break;
507   case Hexagon::C2_cmpgti:
508   case Hexagon::C2_cmpgt:
509   case Hexagon::C2_cmpgtp:
510     Cmp = Comparison::GTs;
511     break;
512   default:
513     return (Comparison::Kind)0;
514   }
515   return Cmp;
516 }
517 
518 /// \brief Analyze the statements in a loop to determine if the loop has
519 /// a computable trip count and, if so, return a value that represents
520 /// the trip count expression.
521 ///
522 /// This function iterates over the phi nodes in the loop to check for
523 /// induction variable patterns that are used in the calculation for
524 /// the number of time the loop is executed.
525 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
526     SmallVectorImpl<MachineInstr *> &OldInsts) {
527   MachineBasicBlock *TopMBB = L->getTopBlock();
528   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
529   assert(PI != TopMBB->pred_end() &&
530          "Loop must have more than one incoming edge!");
531   MachineBasicBlock *Backedge = *PI++;
532   if (PI == TopMBB->pred_end())  // dead loop?
533     return nullptr;
534   MachineBasicBlock *Incoming = *PI++;
535   if (PI != TopMBB->pred_end())  // multiple backedges?
536     return nullptr;
537 
538   // Make sure there is one incoming and one backedge and determine which
539   // is which.
540   if (L->contains(Incoming)) {
541     if (L->contains(Backedge))
542       return nullptr;
543     std::swap(Incoming, Backedge);
544   } else if (!L->contains(Backedge))
545     return nullptr;
546 
547   // Look for the cmp instruction to determine if we can get a useful trip
548   // count.  The trip count can be either a register or an immediate.  The
549   // location of the value depends upon the type (reg or imm).
550   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
551   if (!ExitingBlock)
552     return nullptr;
553 
554   unsigned IVReg = 0;
555   int64_t IVBump = 0;
556   MachineInstr *IVOp;
557   bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
558   if (!FoundIV)
559     return nullptr;
560 
561   MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
562 
563   MachineOperand *InitialValue = nullptr;
564   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
565   MachineBasicBlock *Latch = L->getLoopLatch();
566   for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
567     MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
568     if (MBB == Preheader)
569       InitialValue = &IV_Phi->getOperand(i);
570     else if (MBB == Latch)
571       IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
572   }
573   if (!InitialValue)
574     return nullptr;
575 
576   SmallVector<MachineOperand,2> Cond;
577   MachineBasicBlock *TB = nullptr, *FB = nullptr;
578   bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
579   if (NotAnalyzed)
580     return nullptr;
581 
582   MachineBasicBlock *Header = L->getHeader();
583   // TB must be non-null.  If FB is also non-null, one of them must be
584   // the header.  Otherwise, branch to TB could be exiting the loop, and
585   // the fall through can go to the header.
586   assert (TB && "Exit block without a branch?");
587   if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
588     MachineBasicBlock *LTB = 0, *LFB = 0;
589     SmallVector<MachineOperand,2> LCond;
590     bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
591     if (NotAnalyzed)
592       return nullptr;
593     if (TB == Latch)
594       TB = (LTB == Header) ? LTB : LFB;
595     else
596       FB = (LTB == Header) ? LTB: LFB;
597   }
598   assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
599   if (!TB || (FB && TB != Header && FB != Header))
600     return nullptr;
601 
602   // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
603   // to put imm(0), followed by P in the vector Cond.
604   // If TB is not the header, it means that the "not-taken" path must lead
605   // to the header.
606   bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
607   unsigned PredReg, PredPos, PredRegFlags;
608   if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
609     return nullptr;
610   MachineInstr *CondI = MRI->getVRegDef(PredReg);
611   unsigned CondOpc = CondI->getOpcode();
612 
613   unsigned CmpReg1 = 0, CmpReg2 = 0;
614   int Mask = 0, ImmValue = 0;
615   bool AnalyzedCmp =
616       TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
617   if (!AnalyzedCmp)
618     return nullptr;
619 
620   // The comparison operator type determines how we compute the loop
621   // trip count.
622   OldInsts.push_back(CondI);
623   OldInsts.push_back(IVOp);
624 
625   // Sadly, the following code gets information based on the position
626   // of the operands in the compare instruction.  This has to be done
627   // this way, because the comparisons check for a specific relationship
628   // between the operands (e.g. is-less-than), rather than to find out
629   // what relationship the operands are in (as on PPC).
630   Comparison::Kind Cmp;
631   bool isSwapped = false;
632   const MachineOperand &Op1 = CondI->getOperand(1);
633   const MachineOperand &Op2 = CondI->getOperand(2);
634   const MachineOperand *EndValue = nullptr;
635 
636   if (Op1.isReg()) {
637     if (Op2.isImm() || Op1.getReg() == IVReg)
638       EndValue = &Op2;
639     else {
640       EndValue = &Op1;
641       isSwapped = true;
642     }
643   }
644 
645   if (!EndValue)
646     return nullptr;
647 
648   Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
649   if (!Cmp)
650     return nullptr;
651   if (Negated)
652     Cmp = Comparison::getNegatedComparison(Cmp);
653   if (isSwapped)
654     Cmp = Comparison::getSwappedComparison(Cmp);
655 
656   if (InitialValue->isReg()) {
657     unsigned R = InitialValue->getReg();
658     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
659     if (!MDT->properlyDominates(DefBB, Header))
660       return nullptr;
661     OldInsts.push_back(MRI->getVRegDef(R));
662   }
663   if (EndValue->isReg()) {
664     unsigned R = EndValue->getReg();
665     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
666     if (!MDT->properlyDominates(DefBB, Header))
667       return nullptr;
668     OldInsts.push_back(MRI->getVRegDef(R));
669   }
670 
671   return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
672 }
673 
674 /// \brief Helper function that returns the expression that represents the
675 /// number of times a loop iterates.  The function takes the operands that
676 /// represent the loop start value, loop end value, and induction value.
677 /// Based upon these operands, the function attempts to compute the trip count.
678 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
679                                                const MachineOperand *Start,
680                                                const MachineOperand *End,
681                                                unsigned IVReg,
682                                                int64_t IVBump,
683                                                Comparison::Kind Cmp) const {
684   // Cannot handle comparison EQ, i.e. while (A == B).
685   if (Cmp == Comparison::EQ)
686     return nullptr;
687 
688   // Check if either the start or end values are an assignment of an immediate.
689   // If so, use the immediate value rather than the register.
690   if (Start->isReg()) {
691     const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
692     if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
693                           StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
694       Start = &StartValInstr->getOperand(1);
695   }
696   if (End->isReg()) {
697     const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
698     if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
699                         EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
700       End = &EndValInstr->getOperand(1);
701   }
702 
703   if (!Start->isReg() && !Start->isImm())
704     return nullptr;
705   if (!End->isReg() && !End->isImm())
706     return nullptr;
707 
708   bool CmpLess =     Cmp & Comparison::L;
709   bool CmpGreater =  Cmp & Comparison::G;
710   bool CmpHasEqual = Cmp & Comparison::EQ;
711 
712   // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
713   if (CmpLess && IVBump < 0)
714     // Loop going while iv is "less" with the iv value going down.  Must wrap.
715     return nullptr;
716 
717   if (CmpGreater && IVBump > 0)
718     // Loop going while iv is "greater" with the iv value going up.  Must wrap.
719     return nullptr;
720 
721   // Phis that may feed into the loop.
722   LoopFeederMap LoopFeederPhi;
723 
724   // Check if the initial value may be zero and can be decremented in the first
725   // iteration. If the value is zero, the endloop instruction will not decrement
726   // the loop counter, so we shouldn't generate a hardware loop in this case.
727   if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
728                                   LoopFeederPhi))
729       return nullptr;
730 
731   if (Start->isImm() && End->isImm()) {
732     // Both, start and end are immediates.
733     int64_t StartV = Start->getImm();
734     int64_t EndV = End->getImm();
735     int64_t Dist = EndV - StartV;
736     if (Dist == 0)
737       return nullptr;
738 
739     bool Exact = (Dist % IVBump) == 0;
740 
741     if (Cmp == Comparison::NE) {
742       if (!Exact)
743         return nullptr;
744       if ((Dist < 0) ^ (IVBump < 0))
745         return nullptr;
746     }
747 
748     // For comparisons that include the final value (i.e. include equality
749     // with the final value), we need to increase the distance by 1.
750     if (CmpHasEqual)
751       Dist = Dist > 0 ? Dist+1 : Dist-1;
752 
753     // For the loop to iterate, CmpLess should imply Dist > 0.  Similarly,
754     // CmpGreater should imply Dist < 0.  These conditions could actually
755     // fail, for example, in unreachable code (which may still appear to be
756     // reachable in the CFG).
757     if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
758       return nullptr;
759 
760     // "Normalized" distance, i.e. with the bump set to +-1.
761     int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump - 1)) / IVBump
762                                  : (-Dist + (-IVBump - 1)) / (-IVBump);
763     assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
764 
765     uint64_t Count = Dist1;
766 
767     if (Count > 0xFFFFFFFFULL)
768       return nullptr;
769 
770     return new CountValue(CountValue::CV_Immediate, Count);
771   }
772 
773   // A general case: Start and End are some values, but the actual
774   // iteration count may not be available.  If it is not, insert
775   // a computation of it into the preheader.
776 
777   // If the induction variable bump is not a power of 2, quit.
778   // Othwerise we'd need a general integer division.
779   if (!isPowerOf2_64(std::abs(IVBump)))
780     return nullptr;
781 
782   MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
783   assert (PH && "Should have a preheader by now");
784   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
785   DebugLoc DL;
786   if (InsertPos != PH->end())
787     DL = InsertPos->getDebugLoc();
788 
789   // If Start is an immediate and End is a register, the trip count
790   // will be "reg - imm".  Hexagon's "subtract immediate" instruction
791   // is actually "reg + -imm".
792 
793   // If the loop IV is going downwards, i.e. if the bump is negative,
794   // then the iteration count (computed as End-Start) will need to be
795   // negated.  To avoid the negation, just swap Start and End.
796   if (IVBump < 0) {
797     std::swap(Start, End);
798     IVBump = -IVBump;
799   }
800   // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
801   // Signedness, and "including equality" are preserved.
802 
803   bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
804   bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
805 
806   int64_t StartV = 0, EndV = 0;
807   if (Start->isImm())
808     StartV = Start->getImm();
809   if (End->isImm())
810     EndV = End->getImm();
811 
812   int64_t AdjV = 0;
813   // To compute the iteration count, we would need this computation:
814   //   Count = (End - Start + (IVBump-1)) / IVBump
815   // or, when CmpHasEqual:
816   //   Count = (End - Start + (IVBump-1)+1) / IVBump
817   // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
818   // generating an instruction specifically to add it if we can adjust
819   // the immediate values for Start or End.
820 
821   if (CmpHasEqual) {
822     // Need to add 1 to the total iteration count.
823     if (Start->isImm())
824       StartV--;
825     else if (End->isImm())
826       EndV++;
827     else
828       AdjV += 1;
829   }
830 
831   if (Cmp != Comparison::NE) {
832     if (Start->isImm())
833       StartV -= (IVBump-1);
834     else if (End->isImm())
835       EndV += (IVBump-1);
836     else
837       AdjV += (IVBump-1);
838   }
839 
840   unsigned R = 0, SR = 0;
841   if (Start->isReg()) {
842     R = Start->getReg();
843     SR = Start->getSubReg();
844   } else {
845     R = End->getReg();
846     SR = End->getSubReg();
847   }
848   const TargetRegisterClass *RC = MRI->getRegClass(R);
849   // Hardware loops cannot handle 64-bit registers.  If it's a double
850   // register, it has to have a subregister.
851   if (!SR && RC == &Hexagon::DoubleRegsRegClass)
852     return nullptr;
853   const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
854 
855   // Compute DistR (register with the distance between Start and End).
856   unsigned DistR, DistSR;
857 
858   // Avoid special case, where the start value is an imm(0).
859   if (Start->isImm() && StartV == 0) {
860     DistR = End->getReg();
861     DistSR = End->getSubReg();
862   } else {
863     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
864                               (RegToImm ? TII->get(Hexagon::A2_subri) :
865                                           TII->get(Hexagon::A2_addi));
866     if (RegToReg || RegToImm) {
867       unsigned SubR = MRI->createVirtualRegister(IntRC);
868       MachineInstrBuilder SubIB =
869         BuildMI(*PH, InsertPos, DL, SubD, SubR);
870 
871       if (RegToReg)
872         SubIB.addReg(End->getReg(), 0, End->getSubReg())
873           .addReg(Start->getReg(), 0, Start->getSubReg());
874       else
875         SubIB.addImm(EndV)
876           .addReg(Start->getReg(), 0, Start->getSubReg());
877       DistR = SubR;
878     } else {
879       // If the loop has been unrolled, we should use the original loop count
880       // instead of recalculating the value. This will avoid additional
881       // 'Add' instruction.
882       const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
883       if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
884           EndValInstr->getOperand(2).getImm() == StartV) {
885         DistR = EndValInstr->getOperand(1).getReg();
886       } else {
887         unsigned SubR = MRI->createVirtualRegister(IntRC);
888         MachineInstrBuilder SubIB =
889           BuildMI(*PH, InsertPos, DL, SubD, SubR);
890         SubIB.addReg(End->getReg(), 0, End->getSubReg())
891              .addImm(-StartV);
892         DistR = SubR;
893       }
894     }
895     DistSR = 0;
896   }
897 
898   // From DistR, compute AdjR (register with the adjusted distance).
899   unsigned AdjR, AdjSR;
900 
901   if (AdjV == 0) {
902     AdjR = DistR;
903     AdjSR = DistSR;
904   } else {
905     // Generate CountR = ADD DistR, AdjVal
906     unsigned AddR = MRI->createVirtualRegister(IntRC);
907     MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
908     BuildMI(*PH, InsertPos, DL, AddD, AddR)
909       .addReg(DistR, 0, DistSR)
910       .addImm(AdjV);
911 
912     AdjR = AddR;
913     AdjSR = 0;
914   }
915 
916   // From AdjR, compute CountR (register with the final count).
917   unsigned CountR, CountSR;
918 
919   if (IVBump == 1) {
920     CountR = AdjR;
921     CountSR = AdjSR;
922   } else {
923     // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
924     unsigned Shift = Log2_32(IVBump);
925 
926     // Generate NormR = LSR DistR, Shift.
927     unsigned LsrR = MRI->createVirtualRegister(IntRC);
928     const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
929     BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
930       .addReg(AdjR, 0, AdjSR)
931       .addImm(Shift);
932 
933     CountR = LsrR;
934     CountSR = 0;
935   }
936 
937   return new CountValue(CountValue::CV_Register, CountR, CountSR);
938 }
939 
940 /// \brief Return true if the operation is invalid within hardware loop.
941 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
942                                                   bool IsInnerHWLoop) const {
943 
944   // Call is not allowed because the callee may use a hardware loop except for
945   // the case when the call never returns.
946   if (MI->getDesc().isCall())
947     return !TII->doesNotReturn(*MI);
948 
949   // Check if the instruction defines a hardware loop register.
950   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
951     const MachineOperand &MO = MI->getOperand(i);
952     if (!MO.isReg() || !MO.isDef())
953       continue;
954     unsigned R = MO.getReg();
955     if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
956                           R == Hexagon::LC1 || R == Hexagon::SA1))
957       return true;
958     if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
959       return true;
960   }
961   return false;
962 }
963 
964 /// \brief Return true if the loop contains an instruction that inhibits
965 /// the use of the hardware loop instruction.
966 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
967     bool IsInnerHWLoop) const {
968   const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
969   DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
970   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
971     MachineBasicBlock *MBB = Blocks[i];
972     for (MachineBasicBlock::iterator
973            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
974       const MachineInstr *MI = &*MII;
975       if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
976         DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
977         return true;
978       }
979     }
980   }
981   return false;
982 }
983 
984 /// \brief Returns true if the instruction is dead.  This was essentially
985 /// copied from DeadMachineInstructionElim::isDead, but with special cases
986 /// for inline asm, physical registers and instructions with side effects
987 /// removed.
988 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
989                               SmallVectorImpl<MachineInstr *> &DeadPhis) const {
990   // Examine each operand.
991   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
992     const MachineOperand &MO = MI->getOperand(i);
993     if (!MO.isReg() || !MO.isDef())
994       continue;
995 
996     unsigned Reg = MO.getReg();
997     if (MRI->use_nodbg_empty(Reg))
998       continue;
999 
1000     typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
1001 
1002     // This instruction has users, but if the only user is the phi node for the
1003     // parent block, and the only use of that phi node is this instruction, then
1004     // this instruction is dead: both it (and the phi node) can be removed.
1005     use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1006     use_nodbg_iterator End = MRI->use_nodbg_end();
1007     if (std::next(I) != End || !I->getParent()->isPHI())
1008       return false;
1009 
1010     MachineInstr *OnePhi = I->getParent();
1011     for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1012       const MachineOperand &OPO = OnePhi->getOperand(j);
1013       if (!OPO.isReg() || !OPO.isDef())
1014         continue;
1015 
1016       unsigned OPReg = OPO.getReg();
1017       use_nodbg_iterator nextJ;
1018       for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1019            J != End; J = nextJ) {
1020         nextJ = std::next(J);
1021         MachineOperand &Use = *J;
1022         MachineInstr *UseMI = Use.getParent();
1023 
1024         // If the phi node has a user that is not MI, bail.
1025         if (MI != UseMI)
1026           return false;
1027       }
1028     }
1029     DeadPhis.push_back(OnePhi);
1030   }
1031 
1032   // If there are no defs with uses, the instruction is dead.
1033   return true;
1034 }
1035 
1036 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1037   // This procedure was essentially copied from DeadMachineInstructionElim.
1038 
1039   SmallVector<MachineInstr*, 1> DeadPhis;
1040   if (isDead(MI, DeadPhis)) {
1041     DEBUG(dbgs() << "HW looping will remove: " << *MI);
1042 
1043     // It is possible that some DBG_VALUE instructions refer to this
1044     // instruction.  Examine each def operand for such references;
1045     // if found, mark the DBG_VALUE as undef (but don't delete it).
1046     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1047       const MachineOperand &MO = MI->getOperand(i);
1048       if (!MO.isReg() || !MO.isDef())
1049         continue;
1050       unsigned Reg = MO.getReg();
1051       MachineRegisterInfo::use_iterator nextI;
1052       for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
1053            E = MRI->use_end(); I != E; I = nextI) {
1054         nextI = std::next(I);  // I is invalidated by the setReg
1055         MachineOperand &Use = *I;
1056         MachineInstr *UseMI = I->getParent();
1057         if (UseMI == MI)
1058           continue;
1059         if (Use.isDebug())
1060           UseMI->getOperand(0).setReg(0U);
1061       }
1062     }
1063 
1064     MI->eraseFromParent();
1065     for (unsigned i = 0; i < DeadPhis.size(); ++i)
1066       DeadPhis[i]->eraseFromParent();
1067   }
1068 }
1069 
1070 /// \brief Check if the loop is a candidate for converting to a hardware
1071 /// loop.  If so, then perform the transformation.
1072 ///
1073 /// This function works on innermost loops first.  A loop can be converted
1074 /// if it is a counting loop; either a register value or an immediate.
1075 ///
1076 /// The code makes several assumptions about the representation of the loop
1077 /// in llvm.
1078 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1079                                                  bool &RecL0used,
1080                                                  bool &RecL1used) {
1081   // This is just for sanity.
1082   assert(L->getHeader() && "Loop without a header?");
1083 
1084   bool Changed = false;
1085   bool L0Used = false;
1086   bool L1Used = false;
1087 
1088   // Process nested loops first.
1089   for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1090     Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1091     L0Used |= RecL0used;
1092     L1Used |= RecL1used;
1093   }
1094 
1095   // If a nested loop has been converted, then we can't convert this loop.
1096   if (Changed && L0Used && L1Used)
1097     return Changed;
1098 
1099   unsigned LOOP_i;
1100   unsigned LOOP_r;
1101   unsigned ENDLOOP;
1102 
1103   // Flag used to track loopN instruction:
1104   // 1 - Hardware loop is being generated for the inner most loop.
1105   // 0 - Hardware loop is being generated for the outer loop.
1106   unsigned IsInnerHWLoop = 1;
1107 
1108   if (L0Used) {
1109     LOOP_i = Hexagon::J2_loop1i;
1110     LOOP_r = Hexagon::J2_loop1r;
1111     ENDLOOP = Hexagon::ENDLOOP1;
1112     IsInnerHWLoop = 0;
1113   } else {
1114     LOOP_i = Hexagon::J2_loop0i;
1115     LOOP_r = Hexagon::J2_loop0r;
1116     ENDLOOP = Hexagon::ENDLOOP0;
1117   }
1118 
1119 #ifndef NDEBUG
1120   // Stop trying after reaching the limit (if any).
1121   int Limit = HWLoopLimit;
1122   if (Limit >= 0) {
1123     if (Counter >= HWLoopLimit)
1124       return false;
1125     Counter++;
1126   }
1127 #endif
1128 
1129   // Does the loop contain any invalid instructions?
1130   if (containsInvalidInstruction(L, IsInnerHWLoop))
1131     return false;
1132 
1133   MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1134   // Don't generate hw loop if the loop has more than one exit.
1135   if (!LastMBB)
1136     return false;
1137 
1138   MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1139   if (LastI == LastMBB->end())
1140     return false;
1141 
1142   // Is the induction variable bump feeding the latch condition?
1143   if (!fixupInductionVariable(L))
1144     return false;
1145 
1146   // Ensure the loop has a preheader: the loop instruction will be
1147   // placed there.
1148   MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1149   if (!Preheader) {
1150     Preheader = createPreheaderForLoop(L);
1151     if (!Preheader)
1152       return false;
1153   }
1154 
1155   MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1156 
1157   SmallVector<MachineInstr*, 2> OldInsts;
1158   // Are we able to determine the trip count for the loop?
1159   CountValue *TripCount = getLoopTripCount(L, OldInsts);
1160   if (!TripCount)
1161     return false;
1162 
1163   // Is the trip count available in the preheader?
1164   if (TripCount->isReg()) {
1165     // There will be a use of the register inserted into the preheader,
1166     // so make sure that the register is actually defined at that point.
1167     MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1168     MachineBasicBlock *BBDef = TCDef->getParent();
1169     if (!MDT->dominates(BBDef, Preheader))
1170       return false;
1171   }
1172 
1173   // Determine the loop start.
1174   MachineBasicBlock *TopBlock = L->getTopBlock();
1175   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1176   MachineBasicBlock *LoopStart = 0;
1177   if (ExitingBlock !=  L->getLoopLatch()) {
1178     MachineBasicBlock *TB = 0, *FB = 0;
1179     SmallVector<MachineOperand, 2> Cond;
1180 
1181     if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1182       return false;
1183 
1184     if (L->contains(TB))
1185       LoopStart = TB;
1186     else if (L->contains(FB))
1187       LoopStart = FB;
1188     else
1189       return false;
1190   }
1191   else
1192     LoopStart = TopBlock;
1193 
1194   // Convert the loop to a hardware loop.
1195   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1196   DebugLoc DL;
1197   if (InsertPos != Preheader->end())
1198     DL = InsertPos->getDebugLoc();
1199 
1200   if (TripCount->isReg()) {
1201     // Create a copy of the loop count register.
1202     unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1203     BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1204       .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1205     // Add the Loop instruction to the beginning of the loop.
1206     BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1207       .addReg(CountReg);
1208   } else {
1209     assert(TripCount->isImm() && "Expecting immediate value for trip count");
1210     // Add the Loop immediate instruction to the beginning of the loop,
1211     // if the immediate fits in the instructions.  Otherwise, we need to
1212     // create a new virtual register.
1213     int64_t CountImm = TripCount->getImm();
1214     if (!TII->isValidOffset(LOOP_i, CountImm)) {
1215       unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1216       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1217         .addImm(CountImm);
1218       BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1219         .addMBB(LoopStart).addReg(CountReg);
1220     } else
1221       BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1222         .addMBB(LoopStart).addImm(CountImm);
1223   }
1224 
1225   // Make sure the loop start always has a reference in the CFG.  We need
1226   // to create a BlockAddress operand to get this mechanism to work both the
1227   // MachineBasicBlock and BasicBlock objects need the flag set.
1228   LoopStart->setHasAddressTaken();
1229   // This line is needed to set the hasAddressTaken flag on the BasicBlock
1230   // object.
1231   BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1232 
1233   // Replace the loop branch with an endloop instruction.
1234   DebugLoc LastIDL = LastI->getDebugLoc();
1235   BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1236 
1237   // The loop ends with either:
1238   //  - a conditional branch followed by an unconditional branch, or
1239   //  - a conditional branch to the loop start.
1240   if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1241       LastI->getOpcode() == Hexagon::J2_jumpf) {
1242     // Delete one and change/add an uncond. branch to out of the loop.
1243     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1244     LastI = LastMBB->erase(LastI);
1245     if (!L->contains(BranchTarget)) {
1246       if (LastI != LastMBB->end())
1247         LastI = LastMBB->erase(LastI);
1248       SmallVector<MachineOperand, 0> Cond;
1249       TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1250     }
1251   } else {
1252     // Conditional branch to loop start; just delete it.
1253     LastMBB->erase(LastI);
1254   }
1255   delete TripCount;
1256 
1257   // The induction operation and the comparison may now be
1258   // unneeded. If these are unneeded, then remove them.
1259   for (unsigned i = 0; i < OldInsts.size(); ++i)
1260     removeIfDead(OldInsts[i]);
1261 
1262   ++NumHWLoops;
1263 
1264   // Set RecL1used and RecL0used only after hardware loop has been
1265   // successfully generated. Doing it earlier can cause wrong loop instruction
1266   // to be used.
1267   if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1268     RecL1used = true;
1269   else
1270     RecL0used = true;
1271 
1272   return true;
1273 }
1274 
1275 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1276                                             MachineInstr *CmpI) {
1277   assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1278 
1279   MachineBasicBlock *BB = BumpI->getParent();
1280   if (CmpI->getParent() != BB)
1281     return false;
1282 
1283   typedef MachineBasicBlock::instr_iterator instr_iterator;
1284   // Check if things are in order to begin with.
1285   for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1286     if (&*I == CmpI)
1287       return true;
1288 
1289   // Out of order.
1290   unsigned PredR = CmpI->getOperand(0).getReg();
1291   bool FoundBump = false;
1292   instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1293   for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1294     MachineInstr *In = &*I;
1295     for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1296       MachineOperand &MO = In->getOperand(i);
1297       if (MO.isReg() && MO.isUse()) {
1298         if (MO.getReg() == PredR)  // Found an intervening use of PredR.
1299           return false;
1300       }
1301     }
1302 
1303     if (In == BumpI) {
1304       BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1305       FoundBump = true;
1306       break;
1307     }
1308   }
1309   assert (FoundBump && "Cannot determine instruction order");
1310   return FoundBump;
1311 }
1312 
1313 /// This function is required to break recursion. Visiting phis in a loop may
1314 /// result in recursion during compilation. We break the recursion by making
1315 /// sure that we visit a MachineOperand and its definition in a
1316 /// MachineInstruction only once. If we attempt to visit more than once, then
1317 /// there is recursion, and will return false.
1318 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1319                                         MachineInstr *MI,
1320                                         const MachineOperand *MO,
1321                                         LoopFeederMap &LoopFeederPhi) const {
1322   if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1323     const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
1324     DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber(););
1325     // Ignore all BBs that form Loop.
1326     for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1327       MachineBasicBlock *MBB = Blocks[i];
1328       if (A == MBB)
1329         return false;
1330     }
1331     MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1332     LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1333     return true;
1334   } else
1335     // Already visited node.
1336     return false;
1337 }
1338 
1339 /// Return true if a Phi may generate a value that can underflow.
1340 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1341 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1342     MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1343     MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1344   assert(Phi->isPHI() && "Expecting a Phi.");
1345   // Walk through each Phi, and its used operands. Make sure that
1346   // if there is recursion in Phi, we won't generate hardware loops.
1347   for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1348     if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1349       if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1350                                       Phi->getParent(), L, LoopFeederPhi))
1351         return true;
1352   return false;
1353 }
1354 
1355 /// Return true if the induction variable can underflow in the first iteration.
1356 /// An example, is an initial unsigned value that is 0 and is decrement in the
1357 /// first itertion of a do-while loop.  In this case, we cannot generate a
1358 /// hardware loop because the endloop instruction does not decrement the loop
1359 /// counter if it is <= 1. We only need to perform this analysis if the
1360 /// initial value is a register.
1361 ///
1362 /// This function assumes the initial value may underfow unless proven
1363 /// otherwise. If the type is signed, then we don't care because signed
1364 /// underflow is undefined. We attempt to prove the initial value is not
1365 /// zero by perfoming a crude analysis of the loop counter. This function
1366 /// checks if the initial value is used in any comparison prior to the loop
1367 /// and, if so, assumes the comparison is a range check. This is inexact,
1368 /// but will catch the simple cases.
1369 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1370     const MachineOperand *InitVal, const MachineOperand *EndVal,
1371     MachineBasicBlock *MBB, MachineLoop *L,
1372     LoopFeederMap &LoopFeederPhi) const {
1373   // Only check register values since they are unknown.
1374   if (!InitVal->isReg())
1375     return false;
1376 
1377   if (!EndVal->isImm())
1378     return false;
1379 
1380   // A register value that is assigned an immediate is a known value, and it
1381   // won't underflow in the first iteration.
1382   int64_t Imm;
1383   if (checkForImmediate(*InitVal, Imm))
1384     return (EndVal->getImm() == Imm);
1385 
1386   unsigned Reg = InitVal->getReg();
1387 
1388   // We don't know the value of a physical register.
1389   if (!TargetRegisterInfo::isVirtualRegister(Reg))
1390     return true;
1391 
1392   MachineInstr *Def = MRI->getVRegDef(Reg);
1393   if (!Def)
1394     return true;
1395 
1396   // If the initial value is a Phi or copy and the operands may not underflow,
1397   // then the definition cannot be underflow either.
1398   if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1399                                              L, LoopFeederPhi))
1400     return false;
1401   if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1402                                                     EndVal, Def->getParent(),
1403                                                     L, LoopFeederPhi))
1404     return false;
1405 
1406   // Iterate over the uses of the initial value. If the initial value is used
1407   // in a compare, then we assume this is a range check that ensures the loop
1408   // doesn't underflow. This is not an exact test and should be improved.
1409   for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1410          E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1411     MachineInstr *MI = &*I;
1412     unsigned CmpReg1 = 0, CmpReg2 = 0;
1413     int CmpMask = 0, CmpValue = 0;
1414 
1415     if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1416       continue;
1417 
1418     MachineBasicBlock *TBB = 0, *FBB = 0;
1419     SmallVector<MachineOperand, 2> Cond;
1420     if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1421       continue;
1422 
1423     Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
1424     if (Cmp == 0)
1425       continue;
1426     if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1427       Cmp = Comparison::getNegatedComparison(Cmp);
1428     if (CmpReg2 != 0 && CmpReg2 == Reg)
1429       Cmp = Comparison::getSwappedComparison(Cmp);
1430 
1431     // Signed underflow is undefined.
1432     if (Comparison::isSigned(Cmp))
1433       return false;
1434 
1435     // Check if there is a comparison of the initial value. If the initial value
1436     // is greater than or not equal to another value, then assume this is a
1437     // range check.
1438     if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1439       return false;
1440   }
1441 
1442   // OK - this is a hack that needs to be improved. We really need to analyze
1443   // the instructions performed on the initial value. This works on the simplest
1444   // cases only.
1445   if (!Def->isCopy() && !Def->isPHI())
1446     return false;
1447 
1448   return true;
1449 }
1450 
1451 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1452                                              int64_t &Val) const {
1453   if (MO.isImm()) {
1454     Val = MO.getImm();
1455     return true;
1456   }
1457   if (!MO.isReg())
1458     return false;
1459 
1460   // MO is a register. Check whether it is defined as an immediate value,
1461   // and if so, get the value of it in TV. That value will then need to be
1462   // processed to handle potential subregisters in MO.
1463   int64_t TV;
1464 
1465   unsigned R = MO.getReg();
1466   if (!TargetRegisterInfo::isVirtualRegister(R))
1467     return false;
1468   MachineInstr *DI = MRI->getVRegDef(R);
1469   unsigned DOpc = DI->getOpcode();
1470   switch (DOpc) {
1471     case TargetOpcode::COPY:
1472     case Hexagon::A2_tfrsi:
1473     case Hexagon::A2_tfrpi:
1474     case Hexagon::CONST32:
1475     case Hexagon::CONST64: {
1476       // Call recursively to avoid an extra check whether operand(1) is
1477       // indeed an immediate (it could be a global address, for example),
1478       // plus we can handle COPY at the same time.
1479       if (!checkForImmediate(DI->getOperand(1), TV))
1480         return false;
1481       break;
1482     }
1483     case Hexagon::A2_combineii:
1484     case Hexagon::A4_combineir:
1485     case Hexagon::A4_combineii:
1486     case Hexagon::A4_combineri:
1487     case Hexagon::A2_combinew: {
1488       const MachineOperand &S1 = DI->getOperand(1);
1489       const MachineOperand &S2 = DI->getOperand(2);
1490       int64_t V1, V2;
1491       if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1492         return false;
1493       TV = V2 | (V1 << 32);
1494       break;
1495     }
1496     case TargetOpcode::REG_SEQUENCE: {
1497       const MachineOperand &S1 = DI->getOperand(1);
1498       const MachineOperand &S3 = DI->getOperand(3);
1499       int64_t V1, V3;
1500       if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1501         return false;
1502       unsigned Sub2 = DI->getOperand(2).getImm();
1503       unsigned Sub4 = DI->getOperand(4).getImm();
1504       if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1505         TV = V1 | (V3 << 32);
1506       else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1507         TV = V3 | (V1 << 32);
1508       else
1509         llvm_unreachable("Unexpected form of REG_SEQUENCE");
1510       break;
1511     }
1512 
1513     default:
1514       return false;
1515   }
1516 
1517   // By now, we should have successfully obtained the immediate value defining
1518   // the register referenced in MO. Handle a potential use of a subregister.
1519   switch (MO.getSubReg()) {
1520     case Hexagon::isub_lo:
1521       Val = TV & 0xFFFFFFFFULL;
1522       break;
1523     case Hexagon::isub_hi:
1524       Val = (TV >> 32) & 0xFFFFFFFFULL;
1525       break;
1526     default:
1527       Val = TV;
1528       break;
1529   }
1530   return true;
1531 }
1532 
1533 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1534   if (MO.isImm()) {
1535     MO.setImm(Val);
1536     return;
1537   }
1538 
1539   assert(MO.isReg());
1540   unsigned R = MO.getReg();
1541   MachineInstr *DI = MRI->getVRegDef(R);
1542 
1543   const TargetRegisterClass *RC = MRI->getRegClass(R);
1544   unsigned NewR = MRI->createVirtualRegister(RC);
1545   MachineBasicBlock &B = *DI->getParent();
1546   DebugLoc DL = DI->getDebugLoc();
1547   BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1548   MO.setReg(NewR);
1549 }
1550 
1551 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1552   // These two instructions are not extendable.
1553   if (CmpOpc == Hexagon::A4_cmpbeqi)
1554     return isUInt<8>(Imm);
1555   if (CmpOpc == Hexagon::A4_cmpbgti)
1556     return isInt<8>(Imm);
1557   // The rest of the comparison-with-immediate instructions are extendable.
1558   return true;
1559 }
1560 
1561 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1562   MachineBasicBlock *Header = L->getHeader();
1563   MachineBasicBlock *Latch = L->getLoopLatch();
1564   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1565 
1566   if (!(Header && Latch && ExitingBlock))
1567     return false;
1568 
1569   // These data structures follow the same concept as the corresponding
1570   // ones in findInductionRegister (where some comments are).
1571   typedef std::pair<unsigned,int64_t> RegisterBump;
1572   typedef std::pair<unsigned,RegisterBump> RegisterInduction;
1573   typedef std::set<RegisterInduction> RegisterInductionSet;
1574 
1575   // Register candidates for induction variables, with their associated bumps.
1576   RegisterInductionSet IndRegs;
1577 
1578   // Look for induction patterns:
1579   //   vreg1 = PHI ..., [ latch, vreg2 ]
1580   //   vreg2 = ADD vreg1, imm
1581   typedef MachineBasicBlock::instr_iterator instr_iterator;
1582   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1583        I != E && I->isPHI(); ++I) {
1584     MachineInstr *Phi = &*I;
1585 
1586     // Have a PHI instruction.
1587     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1588       if (Phi->getOperand(i+1).getMBB() != Latch)
1589         continue;
1590 
1591       unsigned PhiReg = Phi->getOperand(i).getReg();
1592       MachineInstr *DI = MRI->getVRegDef(PhiReg);
1593 
1594       if (DI->getDesc().isAdd()) {
1595         // If the register operand to the add/sub is the PHI we are looking
1596         // at, this meets the induction pattern.
1597         unsigned IndReg = DI->getOperand(1).getReg();
1598         MachineOperand &Opnd2 = DI->getOperand(2);
1599         int64_t V;
1600         if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1601           unsigned UpdReg = DI->getOperand(0).getReg();
1602           IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1603         }
1604       }
1605     }  // for (i)
1606   }  // for (instr)
1607 
1608   if (IndRegs.empty())
1609     return false;
1610 
1611   MachineBasicBlock *TB = nullptr, *FB = nullptr;
1612   SmallVector<MachineOperand,2> Cond;
1613   // AnalyzeBranch returns true if it fails to analyze branch.
1614   bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1615   if (NotAnalyzed || Cond.empty())
1616     return false;
1617 
1618   if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1619     MachineBasicBlock *LTB = 0, *LFB = 0;
1620     SmallVector<MachineOperand,2> LCond;
1621     bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1622     if (NotAnalyzed)
1623       return false;
1624 
1625     // Since latch is not the exiting block, the latch branch should be an
1626     // unconditional branch to the loop header.
1627     if (TB == Latch)
1628       TB = (LTB == Header) ? LTB : LFB;
1629     else
1630       FB = (LTB == Header) ? LTB : LFB;
1631   }
1632   if (TB != Header) {
1633     if (FB != Header) {
1634       // The latch/exit block does not go back to the header.
1635       return false;
1636     }
1637     // FB is the header (i.e., uncond. jump to branch header)
1638     // In this case, the LoopBody -> TB should not be a back edge otherwise
1639     // it could result in an infinite loop after conversion to hw_loop.
1640     // This case can happen when the Latch has two jumps like this:
1641     // Jmp_c OuterLoopHeader <-- TB
1642     // Jmp   InnerLoopHeader <-- FB
1643     if (MDT->dominates(TB, FB))
1644       return false;
1645   }
1646 
1647   // Expecting a predicate register as a condition.  It won't be a hardware
1648   // predicate register at this point yet, just a vreg.
1649   // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1650   // into Cond, followed by the predicate register.  For non-negated branches
1651   // it's just the register.
1652   unsigned CSz = Cond.size();
1653   if (CSz != 1 && CSz != 2)
1654     return false;
1655 
1656   if (!Cond[CSz-1].isReg())
1657     return false;
1658 
1659   unsigned P = Cond[CSz-1].getReg();
1660   MachineInstr *PredDef = MRI->getVRegDef(P);
1661 
1662   if (!PredDef->isCompare())
1663     return false;
1664 
1665   SmallSet<unsigned,2> CmpRegs;
1666   MachineOperand *CmpImmOp = nullptr;
1667 
1668   // Go over all operands to the compare and look for immediate and register
1669   // operands.  Assume that if the compare has a single register use and a
1670   // single immediate operand, then the register is being compared with the
1671   // immediate value.
1672   for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1673     MachineOperand &MO = PredDef->getOperand(i);
1674     if (MO.isReg()) {
1675       // Skip all implicit references.  In one case there was:
1676       //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
1677       if (MO.isImplicit())
1678         continue;
1679       if (MO.isUse()) {
1680         if (!isImmediate(MO)) {
1681           CmpRegs.insert(MO.getReg());
1682           continue;
1683         }
1684         // Consider the register to be the "immediate" operand.
1685         if (CmpImmOp)
1686           return false;
1687         CmpImmOp = &MO;
1688       }
1689     } else if (MO.isImm()) {
1690       if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
1691         return false;
1692       CmpImmOp = &MO;
1693     }
1694   }
1695 
1696   if (CmpRegs.empty())
1697     return false;
1698 
1699   // Check if the compared register follows the order we want.  Fix if needed.
1700   for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1701        I != E; ++I) {
1702     // This is a success.  If the register used in the comparison is one that
1703     // we have identified as a bumped (updated) induction register, there is
1704     // nothing to do.
1705     if (CmpRegs.count(I->first))
1706       return true;
1707 
1708     // Otherwise, if the register being compared comes out of a PHI node,
1709     // and has been recognized as following the induction pattern, and is
1710     // compared against an immediate, we can fix it.
1711     const RegisterBump &RB = I->second;
1712     if (CmpRegs.count(RB.first)) {
1713       if (!CmpImmOp) {
1714         // If both operands to the compare instruction are registers, see if
1715         // it can be changed to use induction register as one of the operands.
1716         MachineInstr *IndI = nullptr;
1717         MachineInstr *nonIndI = nullptr;
1718         MachineOperand *IndMO = nullptr;
1719         MachineOperand *nonIndMO = nullptr;
1720 
1721         for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1722           MachineOperand &MO = PredDef->getOperand(i);
1723           if (MO.isReg() && MO.getReg() == RB.first) {
1724             DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1725                          << *(MRI->getVRegDef(I->first)));
1726             if (IndI)
1727               return false;
1728 
1729             IndI = MRI->getVRegDef(I->first);
1730             IndMO = &MO;
1731           } else if (MO.isReg()) {
1732             DEBUG(dbgs() << "\n DefMI(" << i << ") = "
1733                          << *(MRI->getVRegDef(MO.getReg())));
1734             if (nonIndI)
1735               return false;
1736 
1737             nonIndI = MRI->getVRegDef(MO.getReg());
1738             nonIndMO = &MO;
1739           }
1740         }
1741         if (IndI && nonIndI &&
1742             nonIndI->getOpcode() == Hexagon::A2_addi &&
1743             nonIndI->getOperand(2).isImm() &&
1744             nonIndI->getOperand(2).getImm() == - RB.second) {
1745           bool Order = orderBumpCompare(IndI, PredDef);
1746           if (Order) {
1747             IndMO->setReg(I->first);
1748             nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1749             return true;
1750           }
1751         }
1752         return false;
1753       }
1754 
1755       // It is not valid to do this transformation on an unsigned comparison
1756       // because it may underflow.
1757       Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
1758       if (!Cmp || Comparison::isUnsigned(Cmp))
1759         return false;
1760 
1761       // If the register is being compared against an immediate, try changing
1762       // the compare instruction to use induction register and adjust the
1763       // immediate operand.
1764       int64_t CmpImm = getImmediate(*CmpImmOp);
1765       int64_t V = RB.second;
1766       // Handle Overflow (64-bit).
1767       if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1768           ((V < 0) && (CmpImm < INT64_MIN - V)))
1769         return false;
1770       CmpImm += V;
1771       // Most comparisons of register against an immediate value allow
1772       // the immediate to be constant-extended. There are some exceptions
1773       // though. Make sure the new combination will work.
1774       if (CmpImmOp->isImm())
1775         if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1776           return false;
1777 
1778       // Make sure that the compare happens after the bump.  Otherwise,
1779       // after the fixup, the compare would use a yet-undefined register.
1780       MachineInstr *BumpI = MRI->getVRegDef(I->first);
1781       bool Order = orderBumpCompare(BumpI, PredDef);
1782       if (!Order)
1783         return false;
1784 
1785       // Finally, fix the compare instruction.
1786       setImmediate(*CmpImmOp, CmpImm);
1787       for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1788         MachineOperand &MO = PredDef->getOperand(i);
1789         if (MO.isReg() && MO.getReg() == RB.first) {
1790           MO.setReg(I->first);
1791           return true;
1792         }
1793       }
1794     }
1795   }
1796 
1797   return false;
1798 }
1799 
1800 /// createPreheaderForLoop - Create a preheader for a given loop.
1801 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1802       MachineLoop *L) {
1803   if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1804     return TmpPH;
1805   if (!HWCreatePreheader)
1806     return nullptr;
1807 
1808   MachineBasicBlock *Header = L->getHeader();
1809   MachineBasicBlock *Latch = L->getLoopLatch();
1810   MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1811   MachineFunction *MF = Header->getParent();
1812   DebugLoc DL;
1813 
1814 #ifndef NDEBUG
1815   if ((PHFn != "") && (PHFn != MF->getName()))
1816     return nullptr;
1817 #endif
1818 
1819   if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1820     return nullptr;
1821 
1822   typedef MachineBasicBlock::instr_iterator instr_iterator;
1823 
1824   // Verify that all existing predecessors have analyzable branches
1825   // (or no branches at all).
1826   typedef std::vector<MachineBasicBlock*> MBBVector;
1827   MBBVector Preds(Header->pred_begin(), Header->pred_end());
1828   SmallVector<MachineOperand,2> Tmp1;
1829   MachineBasicBlock *TB = nullptr, *FB = nullptr;
1830 
1831   if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1832     return nullptr;
1833 
1834   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1835     MachineBasicBlock *PB = *I;
1836     bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1837     if (NotAnalyzed)
1838       return nullptr;
1839   }
1840 
1841   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1842   MF->insert(Header->getIterator(), NewPH);
1843 
1844   if (Header->pred_size() > 2) {
1845     // Ensure that the header has only two predecessors: the preheader and
1846     // the loop latch.  Any additional predecessors of the header should
1847     // join at the newly created preheader. Inspect all PHI nodes from the
1848     // header and create appropriate corresponding PHI nodes in the preheader.
1849 
1850     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1851          I != E && I->isPHI(); ++I) {
1852       MachineInstr *PN = &*I;
1853 
1854       const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1855       MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1856       NewPH->insert(NewPH->end(), NewPN);
1857 
1858       unsigned PR = PN->getOperand(0).getReg();
1859       const TargetRegisterClass *RC = MRI->getRegClass(PR);
1860       unsigned NewPR = MRI->createVirtualRegister(RC);
1861       NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1862 
1863       // Copy all non-latch operands of a header's PHI node to the newly
1864       // created PHI node in the preheader.
1865       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1866         unsigned PredR = PN->getOperand(i).getReg();
1867         unsigned PredRSub = PN->getOperand(i).getSubReg();
1868         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1869         if (PredB == Latch)
1870           continue;
1871 
1872         MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1873         MO.setSubReg(PredRSub);
1874         NewPN->addOperand(MO);
1875         NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1876       }
1877 
1878       // Remove copied operands from the old PHI node and add the value
1879       // coming from the preheader's PHI.
1880       for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1881         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1882         if (PredB != Latch) {
1883           PN->RemoveOperand(i+1);
1884           PN->RemoveOperand(i);
1885         }
1886       }
1887       PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1888       PN->addOperand(MachineOperand::CreateMBB(NewPH));
1889     }
1890 
1891   } else {
1892     assert(Header->pred_size() == 2);
1893 
1894     // The header has only two predecessors, but the non-latch predecessor
1895     // is not a preheader (e.g. it has other successors, etc.)
1896     // In such a case we don't need any extra PHI nodes in the new preheader,
1897     // all we need is to adjust existing PHIs in the header to now refer to
1898     // the new preheader.
1899     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1900          I != E && I->isPHI(); ++I) {
1901       MachineInstr *PN = &*I;
1902       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1903         MachineOperand &MO = PN->getOperand(i+1);
1904         if (MO.getMBB() != Latch)
1905           MO.setMBB(NewPH);
1906       }
1907     }
1908   }
1909 
1910   // "Reroute" the CFG edges to link in the new preheader.
1911   // If any of the predecessors falls through to the header, insert a branch
1912   // to the new preheader in that place.
1913   SmallVector<MachineOperand,1> Tmp2;
1914   SmallVector<MachineOperand,1> EmptyCond;
1915 
1916   TB = FB = nullptr;
1917 
1918   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1919     MachineBasicBlock *PB = *I;
1920     if (PB != Latch) {
1921       Tmp2.clear();
1922       bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1923       (void)NotAnalyzed; // suppress compiler warning
1924       assert (!NotAnalyzed && "Should be analyzable!");
1925       if (TB != Header && (Tmp2.empty() || FB != Header))
1926         TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1927       PB->ReplaceUsesOfBlockWith(Header, NewPH);
1928     }
1929   }
1930 
1931   // It can happen that the latch block will fall through into the header.
1932   // Insert an unconditional branch to the header.
1933   TB = FB = nullptr;
1934   bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1935   (void)LatchNotAnalyzed; // suppress compiler warning
1936   assert (!LatchNotAnalyzed && "Should be analyzable!");
1937   if (!TB && !FB)
1938     TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1939 
1940   // Finally, the branch from the preheader to the header.
1941   TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1942   NewPH->addSuccessor(Header);
1943 
1944   MachineLoop *ParentLoop = L->getParentLoop();
1945   if (ParentLoop)
1946     ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1947 
1948   // Update the dominator information with the new preheader.
1949   if (MDT) {
1950     if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1951       if (MachineDomTreeNode *DHN = HN->getIDom()) {
1952         MDT->addNewBlock(NewPH, DHN->getBlock());
1953         MDT->changeImmediateDominator(Header, NewPH);
1954       }
1955     }
1956   }
1957 
1958   return NewPH;
1959 }
1960