10b57cec5SDimitry Andric //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass identifies loops where we can generate the Hexagon hardware 100b57cec5SDimitry Andric // loop instruction. The hardware loop can perform loop branches with a 110b57cec5SDimitry Andric // zero-cycle overhead. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The pattern that defines the induction variable can changed depending on 140b57cec5SDimitry Andric // prior optimizations. For example, the IndVarSimplify phase run by 'opt' 150b57cec5SDimitry Andric // normalizes induction variables, and the Loop Strength Reduction pass 160b57cec5SDimitry Andric // run by 'llc' may also make changes to the induction variable. 170b57cec5SDimitry Andric // The pattern detected by this phase is due to running Strength Reduction. 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // Criteria for hardware loops: 200b57cec5SDimitry Andric // - Countable loops (w/ ind. var for a trip count) 210b57cec5SDimitry Andric // - Assumes loops are normalized by IndVarSimplify 220b57cec5SDimitry Andric // - Try inner-most loops first 230b57cec5SDimitry Andric // - No function calls in loops. 240b57cec5SDimitry Andric // 250b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric #include "HexagonInstrInfo.h" 280b57cec5SDimitry Andric #include "HexagonSubtarget.h" 290b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 300b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 310b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 320b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 330b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 340b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 400b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 410b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 420b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 430b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 440b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 450b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 460b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 47480093f4SDimitry Andric #include "llvm/InitializePasses.h" 480b57cec5SDimitry Andric #include "llvm/Pass.h" 490b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 500b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 510b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 520b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 530b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 540b57cec5SDimitry Andric #include <cassert> 550b57cec5SDimitry Andric #include <cstdint> 560b57cec5SDimitry Andric #include <cstdlib> 570b57cec5SDimitry Andric #include <iterator> 580b57cec5SDimitry Andric #include <map> 590b57cec5SDimitry Andric #include <set> 600b57cec5SDimitry Andric #include <string> 610b57cec5SDimitry Andric #include <utility> 620b57cec5SDimitry Andric #include <vector> 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric using namespace llvm; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric #define DEBUG_TYPE "hwloops" 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric #ifndef NDEBUG 690b57cec5SDimitry Andric static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1)); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric // Option to create preheader only for a specific function. 720b57cec5SDimitry Andric static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden, 730b57cec5SDimitry Andric cl::init("")); 740b57cec5SDimitry Andric #endif 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric // Option to create a preheader if one doesn't exist. 770b57cec5SDimitry Andric static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader", 780b57cec5SDimitry Andric cl::Hidden, cl::init(true), 790b57cec5SDimitry Andric cl::desc("Add a preheader to a hardware loop if one doesn't exist")); 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric // Turn it off by default. If a preheader block is not created here, the 820b57cec5SDimitry Andric // software pipeliner may be unable to find a block suitable to serve as 830b57cec5SDimitry Andric // a preheader. In that case SWP will not run. 8481ad6265SDimitry Andric static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden, 8581ad6265SDimitry Andric cl::desc("Allow speculation of preheader " 860b57cec5SDimitry Andric "instructions")); 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric namespace llvm { 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric FunctionPass *createHexagonHardwareLoops(); 930b57cec5SDimitry Andric void initializeHexagonHardwareLoopsPass(PassRegistry&); 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric } // end namespace llvm 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric namespace { 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric class CountValue; 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric struct HexagonHardwareLoops : public MachineFunctionPass { 1020b57cec5SDimitry Andric MachineLoopInfo *MLI; 1030b57cec5SDimitry Andric MachineRegisterInfo *MRI; 1040b57cec5SDimitry Andric MachineDominatorTree *MDT; 1050b57cec5SDimitry Andric const HexagonInstrInfo *TII; 1060b57cec5SDimitry Andric const HexagonRegisterInfo *TRI; 1070b57cec5SDimitry Andric #ifndef NDEBUG 1080b57cec5SDimitry Andric static int Counter; 1090b57cec5SDimitry Andric #endif 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric public: 1120b57cec5SDimitry Andric static char ID; 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric HexagonHardwareLoops() : MachineFunctionPass(ID) {} 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric StringRef getPassName() const override { return "Hexagon Hardware Loops"; } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 121*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>(); 122*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 1230b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric private: 127bdd1243dSDimitry Andric using LoopFeederMap = std::map<Register, MachineInstr *>; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric /// Kinds of comparisons in the compare instructions. 1300b57cec5SDimitry Andric struct Comparison { 1310b57cec5SDimitry Andric enum Kind { 1320b57cec5SDimitry Andric EQ = 0x01, 1330b57cec5SDimitry Andric NE = 0x02, 1340b57cec5SDimitry Andric L = 0x04, 1350b57cec5SDimitry Andric G = 0x08, 1360b57cec5SDimitry Andric U = 0x40, 1370b57cec5SDimitry Andric LTs = L, 1380b57cec5SDimitry Andric LEs = L | EQ, 1390b57cec5SDimitry Andric GTs = G, 1400b57cec5SDimitry Andric GEs = G | EQ, 1410b57cec5SDimitry Andric LTu = L | U, 1420b57cec5SDimitry Andric LEu = L | EQ | U, 1430b57cec5SDimitry Andric GTu = G | U, 1440b57cec5SDimitry Andric GEu = G | EQ | U 1450b57cec5SDimitry Andric }; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric static Kind getSwappedComparison(Kind Cmp) { 1480b57cec5SDimitry Andric assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator"); 1490b57cec5SDimitry Andric if ((Cmp & L) || (Cmp & G)) 1500b57cec5SDimitry Andric return (Kind)(Cmp ^ (L|G)); 1510b57cec5SDimitry Andric return Cmp; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric static Kind getNegatedComparison(Kind Cmp) { 1550b57cec5SDimitry Andric if ((Cmp & L) || (Cmp & G)) 1560b57cec5SDimitry Andric return (Kind)((Cmp ^ (L | G)) ^ EQ); 1570b57cec5SDimitry Andric if ((Cmp & NE) || (Cmp & EQ)) 1580b57cec5SDimitry Andric return (Kind)(Cmp ^ (EQ | NE)); 1590b57cec5SDimitry Andric return (Kind)0; 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric static bool isSigned(Kind Cmp) { 1630b57cec5SDimitry Andric return (Cmp & (L | G) && !(Cmp & U)); 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric static bool isUnsigned(Kind Cmp) { 1670b57cec5SDimitry Andric return (Cmp & U); 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric }; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric /// Find the register that contains the loop controlling 1720b57cec5SDimitry Andric /// induction variable. 1730b57cec5SDimitry Andric /// If successful, it will return true and set the \p Reg, \p IVBump 1740b57cec5SDimitry Andric /// and \p IVOp arguments. Otherwise it will return false. 1750b57cec5SDimitry Andric /// The returned induction register is the register R that follows the 1760b57cec5SDimitry Andric /// following induction pattern: 1770b57cec5SDimitry Andric /// loop: 1780b57cec5SDimitry Andric /// R = phi ..., [ R.next, LatchBlock ] 1790b57cec5SDimitry Andric /// R.next = R + #bump 1800b57cec5SDimitry Andric /// if (R.next < #N) goto loop 1810b57cec5SDimitry Andric /// IVBump is the immediate value added to R, and IVOp is the instruction 1820b57cec5SDimitry Andric /// "R.next = R + #bump". 183bdd1243dSDimitry Andric bool findInductionRegister(MachineLoop *L, Register &Reg, 1840b57cec5SDimitry Andric int64_t &IVBump, MachineInstr *&IVOp) const; 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric /// Return the comparison kind for the specified opcode. 1870b57cec5SDimitry Andric Comparison::Kind getComparisonKind(unsigned CondOpc, 1880b57cec5SDimitry Andric MachineOperand *InitialValue, 1890b57cec5SDimitry Andric const MachineOperand *Endvalue, 1900b57cec5SDimitry Andric int64_t IVBump) const; 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric /// Analyze the statements in a loop to determine if the loop 1930b57cec5SDimitry Andric /// has a computable trip count and, if so, return a value that represents 1940b57cec5SDimitry Andric /// the trip count expression. 1950b57cec5SDimitry Andric CountValue *getLoopTripCount(MachineLoop *L, 1960b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &OldInsts); 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric /// Return the expression that represents the number of times 1990b57cec5SDimitry Andric /// a loop iterates. The function takes the operands that represent the 2000b57cec5SDimitry Andric /// loop start value, loop end value, and induction value. Based upon 2010b57cec5SDimitry Andric /// these operands, the function attempts to compute the trip count. 2020b57cec5SDimitry Andric /// If the trip count is not directly available (as an immediate value, 2030b57cec5SDimitry Andric /// or a register), the function will attempt to insert computation of it 2040b57cec5SDimitry Andric /// to the loop's preheader. 2050b57cec5SDimitry Andric CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start, 206bdd1243dSDimitry Andric const MachineOperand *End, Register IVReg, 2070b57cec5SDimitry Andric int64_t IVBump, Comparison::Kind Cmp) const; 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric /// Return true if the instruction is not valid within a hardware 2100b57cec5SDimitry Andric /// loop. 2110b57cec5SDimitry Andric bool isInvalidLoopOperation(const MachineInstr *MI, 2120b57cec5SDimitry Andric bool IsInnerHWLoop) const; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric /// Return true if the loop contains an instruction that inhibits 2150b57cec5SDimitry Andric /// using the hardware loop. 2160b57cec5SDimitry Andric bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric /// Given a loop, check if we can convert it to a hardware loop. 2190b57cec5SDimitry Andric /// If so, then perform the conversion and return true. 2200b57cec5SDimitry Andric bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric /// Return true if the instruction is now dead. 2230b57cec5SDimitry Andric bool isDead(const MachineInstr *MI, 2240b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DeadPhis) const; 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric /// Remove the instruction if it is now dead. 2270b57cec5SDimitry Andric void removeIfDead(MachineInstr *MI); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric /// Make sure that the "bump" instruction executes before the 2300b57cec5SDimitry Andric /// compare. We need that for the IV fixup, so that the compare 2310b57cec5SDimitry Andric /// instruction would not use a bumped value that has not yet been 2320b57cec5SDimitry Andric /// defined. If the instructions are out of order, try to reorder them. 2330b57cec5SDimitry Andric bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric /// Return true if MO and MI pair is visited only once. If visited 2360b57cec5SDimitry Andric /// more than once, this indicates there is recursion. In such a case, 2370b57cec5SDimitry Andric /// return false. 2380b57cec5SDimitry Andric bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, 2390b57cec5SDimitry Andric const MachineOperand *MO, 2400b57cec5SDimitry Andric LoopFeederMap &LoopFeederPhi) const; 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric /// Return true if the Phi may generate a value that may underflow, 2430b57cec5SDimitry Andric /// or may wrap. 2440b57cec5SDimitry Andric bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, 2450b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineLoop *L, 2460b57cec5SDimitry Andric LoopFeederMap &LoopFeederPhi) const; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric /// Return true if the induction variable may underflow an unsigned 2490b57cec5SDimitry Andric /// value in the first iteration. 2500b57cec5SDimitry Andric bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, 2510b57cec5SDimitry Andric const MachineOperand *EndVal, 2520b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineLoop *L, 2530b57cec5SDimitry Andric LoopFeederMap &LoopFeederPhi) const; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric /// Check if the given operand has a compile-time known constant 2560b57cec5SDimitry Andric /// value. Return true if yes, and false otherwise. When returning true, set 2570b57cec5SDimitry Andric /// Val to the corresponding constant value. 2580b57cec5SDimitry Andric bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric /// Check if the operand has a compile-time known constant value. 2610b57cec5SDimitry Andric bool isImmediate(const MachineOperand &MO) const { 2620b57cec5SDimitry Andric int64_t V; 2630b57cec5SDimitry Andric return checkForImmediate(MO, V); 2640b57cec5SDimitry Andric } 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric /// Return the immediate for the specified operand. 2670b57cec5SDimitry Andric int64_t getImmediate(const MachineOperand &MO) const { 2680b57cec5SDimitry Andric int64_t V; 2690b57cec5SDimitry Andric if (!checkForImmediate(MO, V)) 2700b57cec5SDimitry Andric llvm_unreachable("Invalid operand"); 2710b57cec5SDimitry Andric return V; 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric /// Reset the given machine operand to now refer to a new immediate 2750b57cec5SDimitry Andric /// value. Assumes that the operand was already referencing an immediate 2760b57cec5SDimitry Andric /// value, either directly, or via a register. 2770b57cec5SDimitry Andric void setImmediate(MachineOperand &MO, int64_t Val); 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric /// Fix the data flow of the induction variable. 2800b57cec5SDimitry Andric /// The desired flow is: phi ---> bump -+-> comparison-in-latch. 2810b57cec5SDimitry Andric /// | 2820b57cec5SDimitry Andric /// +-> back to phi 2830b57cec5SDimitry Andric /// where "bump" is the increment of the induction variable: 2840b57cec5SDimitry Andric /// iv = iv + #const. 2850b57cec5SDimitry Andric /// Due to some prior code transformations, the actual flow may look 2860b57cec5SDimitry Andric /// like this: 2870b57cec5SDimitry Andric /// phi -+-> bump ---> back to phi 2880b57cec5SDimitry Andric /// | 2890b57cec5SDimitry Andric /// +-> comparison-in-latch (against upper_bound-bump), 2900b57cec5SDimitry Andric /// i.e. the comparison that controls the loop execution may be using 2910b57cec5SDimitry Andric /// the value of the induction variable from before the increment. 2920b57cec5SDimitry Andric /// 2930b57cec5SDimitry Andric /// Return true if the loop's flow is the desired one (i.e. it's 2940b57cec5SDimitry Andric /// either been fixed, or no fixing was necessary). 2950b57cec5SDimitry Andric /// Otherwise, return false. This can happen if the induction variable 2960b57cec5SDimitry Andric /// couldn't be identified, or if the value in the latch's comparison 2970b57cec5SDimitry Andric /// cannot be adjusted to reflect the post-bump value. 2980b57cec5SDimitry Andric bool fixupInductionVariable(MachineLoop *L); 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric /// Given a loop, if it does not have a preheader, create one. 3010b57cec5SDimitry Andric /// Return the block that is the preheader. 3020b57cec5SDimitry Andric MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); 3030b57cec5SDimitry Andric }; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric char HexagonHardwareLoops::ID = 0; 3060b57cec5SDimitry Andric #ifndef NDEBUG 3070b57cec5SDimitry Andric int HexagonHardwareLoops::Counter = 0; 3080b57cec5SDimitry Andric #endif 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric /// Abstraction for a trip count of a loop. A smaller version 3110b57cec5SDimitry Andric /// of the MachineOperand class without the concerns of changing the 3120b57cec5SDimitry Andric /// operand representation. 3130b57cec5SDimitry Andric class CountValue { 3140b57cec5SDimitry Andric public: 3150b57cec5SDimitry Andric enum CountValueType { 3160b57cec5SDimitry Andric CV_Register, 3170b57cec5SDimitry Andric CV_Immediate 3180b57cec5SDimitry Andric }; 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric private: 3210b57cec5SDimitry Andric CountValueType Kind; 3220b57cec5SDimitry Andric union Values { 323bdd1243dSDimitry Andric Values() : R{Register(), 0} {} 324bdd1243dSDimitry Andric Values(const Values&) = default; 3250b57cec5SDimitry Andric struct { 326bdd1243dSDimitry Andric Register Reg; 3270b57cec5SDimitry Andric unsigned Sub; 3280b57cec5SDimitry Andric } R; 3290b57cec5SDimitry Andric unsigned ImmVal; 3300b57cec5SDimitry Andric } Contents; 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric public: 333bdd1243dSDimitry Andric explicit CountValue(CountValueType t, Register v, unsigned u = 0) { 3340b57cec5SDimitry Andric Kind = t; 3350b57cec5SDimitry Andric if (Kind == CV_Register) { 3360b57cec5SDimitry Andric Contents.R.Reg = v; 3370b57cec5SDimitry Andric Contents.R.Sub = u; 3380b57cec5SDimitry Andric } else { 3390b57cec5SDimitry Andric Contents.ImmVal = v; 3400b57cec5SDimitry Andric } 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric bool isReg() const { return Kind == CV_Register; } 3440b57cec5SDimitry Andric bool isImm() const { return Kind == CV_Immediate; } 3450b57cec5SDimitry Andric 346bdd1243dSDimitry Andric Register getReg() const { 3470b57cec5SDimitry Andric assert(isReg() && "Wrong CountValue accessor"); 3480b57cec5SDimitry Andric return Contents.R.Reg; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric unsigned getSubReg() const { 3520b57cec5SDimitry Andric assert(isReg() && "Wrong CountValue accessor"); 3530b57cec5SDimitry Andric return Contents.R.Sub; 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric unsigned getImm() const { 3570b57cec5SDimitry Andric assert(isImm() && "Wrong CountValue accessor"); 3580b57cec5SDimitry Andric return Contents.ImmVal; 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const { 3620b57cec5SDimitry Andric if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); } 3630b57cec5SDimitry Andric if (isImm()) { OS << Contents.ImmVal; } 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric }; 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric } // end anonymous namespace 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", 3700b57cec5SDimitry Andric "Hexagon Hardware Loops", false, false) 371*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 372*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 3730b57cec5SDimitry Andric INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops", 3740b57cec5SDimitry Andric "Hexagon Hardware Loops", false, false) 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric FunctionPass *llvm::createHexagonHardwareLoops() { 3770b57cec5SDimitry Andric return new HexagonHardwareLoops(); 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { 3810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); 3820b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 3830b57cec5SDimitry Andric return false; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric bool Changed = false; 3860b57cec5SDimitry Andric 387*0fca6ea1SDimitry Andric MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 3880b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 389*0fca6ea1SDimitry Andric MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 3900b57cec5SDimitry Andric const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); 3910b57cec5SDimitry Andric TII = HST.getInstrInfo(); 3920b57cec5SDimitry Andric TRI = HST.getRegisterInfo(); 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric for (auto &L : *MLI) 395e8d8bef9SDimitry Andric if (L->isOutermost()) { 3960b57cec5SDimitry Andric bool L0Used = false; 3970b57cec5SDimitry Andric bool L1Used = false; 3980b57cec5SDimitry Andric Changed |= convertToHardwareLoop(L, L0Used, L1Used); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric return Changed; 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, 405bdd1243dSDimitry Andric Register &Reg, 4060b57cec5SDimitry Andric int64_t &IVBump, 4070b57cec5SDimitry Andric MachineInstr *&IVOp 4080b57cec5SDimitry Andric ) const { 4090b57cec5SDimitry Andric MachineBasicBlock *Header = L->getHeader(); 4100b57cec5SDimitry Andric MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 4110b57cec5SDimitry Andric MachineBasicBlock *Latch = L->getLoopLatch(); 4120b57cec5SDimitry Andric MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 4130b57cec5SDimitry Andric if (!Header || !Preheader || !Latch || !ExitingBlock) 4140b57cec5SDimitry Andric return false; 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric // This pair represents an induction register together with an immediate 4170b57cec5SDimitry Andric // value that will be added to it in each loop iteration. 418bdd1243dSDimitry Andric using RegisterBump = std::pair<Register, int64_t>; 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric // Mapping: R.next -> (R, bump), where R, R.next and bump are derived 4210b57cec5SDimitry Andric // from an induction operation 4220b57cec5SDimitry Andric // R.next = R + bump 4230b57cec5SDimitry Andric // where bump is an immediate value. 424bdd1243dSDimitry Andric using InductionMap = std::map<Register, RegisterBump>; 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric InductionMap IndMap; 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric using instr_iterator = MachineBasicBlock::instr_iterator; 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 4310b57cec5SDimitry Andric I != E && I->isPHI(); ++I) { 4320b57cec5SDimitry Andric MachineInstr *Phi = &*I; 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric // Have a PHI instruction. Get the operand that corresponds to the 4350b57cec5SDimitry Andric // latch block, and see if is a result of an addition of form "reg+imm", 4360b57cec5SDimitry Andric // where the "reg" is defined by the PHI node we are looking at. 4370b57cec5SDimitry Andric for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 4380b57cec5SDimitry Andric if (Phi->getOperand(i+1).getMBB() != Latch) 4390b57cec5SDimitry Andric continue; 4400b57cec5SDimitry Andric 4418bcb0991SDimitry Andric Register PhiOpReg = Phi->getOperand(i).getReg(); 4420b57cec5SDimitry Andric MachineInstr *DI = MRI->getVRegDef(PhiOpReg); 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric if (DI->getDesc().isAdd()) { 4450b57cec5SDimitry Andric // If the register operand to the add is the PHI we're looking at, this 4460b57cec5SDimitry Andric // meets the induction pattern. 4478bcb0991SDimitry Andric Register IndReg = DI->getOperand(1).getReg(); 4480b57cec5SDimitry Andric MachineOperand &Opnd2 = DI->getOperand(2); 4490b57cec5SDimitry Andric int64_t V; 4500b57cec5SDimitry Andric if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { 4518bcb0991SDimitry Andric Register UpdReg = DI->getOperand(0).getReg(); 4520b57cec5SDimitry Andric IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 4530b57cec5SDimitry Andric } 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric } // for (i) 4560b57cec5SDimitry Andric } // for (instr) 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric SmallVector<MachineOperand,2> Cond; 4590b57cec5SDimitry Andric MachineBasicBlock *TB = nullptr, *FB = nullptr; 4600b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 4610b57cec5SDimitry Andric if (NotAnalyzed) 4620b57cec5SDimitry Andric return false; 4630b57cec5SDimitry Andric 464bdd1243dSDimitry Andric Register PredR; 465bdd1243dSDimitry Andric unsigned PredPos, PredRegFlags; 4660b57cec5SDimitry Andric if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags)) 4670b57cec5SDimitry Andric return false; 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric MachineInstr *PredI = MRI->getVRegDef(PredR); 4700b57cec5SDimitry Andric if (!PredI->isCompare()) 4710b57cec5SDimitry Andric return false; 4720b57cec5SDimitry Andric 4735ffd83dbSDimitry Andric Register CmpReg1, CmpReg2; 474349cc55cSDimitry Andric int64_t CmpImm = 0, CmpMask = 0; 4750b57cec5SDimitry Andric bool CmpAnalyzed = 4760b57cec5SDimitry Andric TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm); 4770b57cec5SDimitry Andric // Fail if the compare was not analyzed, or it's not comparing a register 4780b57cec5SDimitry Andric // with an immediate value. Not checking the mask here, since we handle 4790b57cec5SDimitry Andric // the individual compare opcodes (including A4_cmpb*) later on. 4800b57cec5SDimitry Andric if (!CmpAnalyzed) 4810b57cec5SDimitry Andric return false; 4820b57cec5SDimitry Andric 4830b57cec5SDimitry Andric // Exactly one of the input registers to the comparison should be among 4840b57cec5SDimitry Andric // the induction registers. 4850b57cec5SDimitry Andric InductionMap::iterator IndMapEnd = IndMap.end(); 4860b57cec5SDimitry Andric InductionMap::iterator F = IndMapEnd; 4870b57cec5SDimitry Andric if (CmpReg1 != 0) { 4880b57cec5SDimitry Andric InductionMap::iterator F1 = IndMap.find(CmpReg1); 4890b57cec5SDimitry Andric if (F1 != IndMapEnd) 4900b57cec5SDimitry Andric F = F1; 4910b57cec5SDimitry Andric } 4920b57cec5SDimitry Andric if (CmpReg2 != 0) { 4930b57cec5SDimitry Andric InductionMap::iterator F2 = IndMap.find(CmpReg2); 4940b57cec5SDimitry Andric if (F2 != IndMapEnd) { 4950b57cec5SDimitry Andric if (F != IndMapEnd) 4960b57cec5SDimitry Andric return false; 4970b57cec5SDimitry Andric F = F2; 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric if (F == IndMapEnd) 5010b57cec5SDimitry Andric return false; 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric Reg = F->second.first; 5040b57cec5SDimitry Andric IVBump = F->second.second; 5050b57cec5SDimitry Andric IVOp = MRI->getVRegDef(F->first); 5060b57cec5SDimitry Andric return true; 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric // Return the comparison kind for the specified opcode. 5100b57cec5SDimitry Andric HexagonHardwareLoops::Comparison::Kind 5110b57cec5SDimitry Andric HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, 5120b57cec5SDimitry Andric MachineOperand *InitialValue, 5130b57cec5SDimitry Andric const MachineOperand *EndValue, 5140b57cec5SDimitry Andric int64_t IVBump) const { 5150b57cec5SDimitry Andric Comparison::Kind Cmp = (Comparison::Kind)0; 5160b57cec5SDimitry Andric switch (CondOpc) { 5170b57cec5SDimitry Andric case Hexagon::C2_cmpeq: 5180b57cec5SDimitry Andric case Hexagon::C2_cmpeqi: 5190b57cec5SDimitry Andric case Hexagon::C2_cmpeqp: 5200b57cec5SDimitry Andric Cmp = Comparison::EQ; 5210b57cec5SDimitry Andric break; 5220b57cec5SDimitry Andric case Hexagon::C4_cmpneq: 5230b57cec5SDimitry Andric case Hexagon::C4_cmpneqi: 5240b57cec5SDimitry Andric Cmp = Comparison::NE; 5250b57cec5SDimitry Andric break; 5260b57cec5SDimitry Andric case Hexagon::C2_cmplt: 5270b57cec5SDimitry Andric Cmp = Comparison::LTs; 5280b57cec5SDimitry Andric break; 5290b57cec5SDimitry Andric case Hexagon::C2_cmpltu: 5300b57cec5SDimitry Andric Cmp = Comparison::LTu; 5310b57cec5SDimitry Andric break; 5320b57cec5SDimitry Andric case Hexagon::C4_cmplte: 5330b57cec5SDimitry Andric case Hexagon::C4_cmpltei: 5340b57cec5SDimitry Andric Cmp = Comparison::LEs; 5350b57cec5SDimitry Andric break; 5360b57cec5SDimitry Andric case Hexagon::C4_cmplteu: 5370b57cec5SDimitry Andric case Hexagon::C4_cmplteui: 5380b57cec5SDimitry Andric Cmp = Comparison::LEu; 5390b57cec5SDimitry Andric break; 5400b57cec5SDimitry Andric case Hexagon::C2_cmpgt: 5410b57cec5SDimitry Andric case Hexagon::C2_cmpgti: 5420b57cec5SDimitry Andric case Hexagon::C2_cmpgtp: 5430b57cec5SDimitry Andric Cmp = Comparison::GTs; 5440b57cec5SDimitry Andric break; 5450b57cec5SDimitry Andric case Hexagon::C2_cmpgtu: 5460b57cec5SDimitry Andric case Hexagon::C2_cmpgtui: 5470b57cec5SDimitry Andric case Hexagon::C2_cmpgtup: 5480b57cec5SDimitry Andric Cmp = Comparison::GTu; 5490b57cec5SDimitry Andric break; 5500b57cec5SDimitry Andric case Hexagon::C2_cmpgei: 5510b57cec5SDimitry Andric Cmp = Comparison::GEs; 5520b57cec5SDimitry Andric break; 5530b57cec5SDimitry Andric case Hexagon::C2_cmpgeui: 5540b57cec5SDimitry Andric Cmp = Comparison::GEs; 5550b57cec5SDimitry Andric break; 5560b57cec5SDimitry Andric default: 5570b57cec5SDimitry Andric return (Comparison::Kind)0; 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric return Cmp; 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric /// Analyze the statements in a loop to determine if the loop has 5630b57cec5SDimitry Andric /// a computable trip count and, if so, return a value that represents 5640b57cec5SDimitry Andric /// the trip count expression. 5650b57cec5SDimitry Andric /// 5660b57cec5SDimitry Andric /// This function iterates over the phi nodes in the loop to check for 5670b57cec5SDimitry Andric /// induction variable patterns that are used in the calculation for 5680b57cec5SDimitry Andric /// the number of time the loop is executed. 5690b57cec5SDimitry Andric CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, 5700b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &OldInsts) { 5710b57cec5SDimitry Andric MachineBasicBlock *TopMBB = L->getTopBlock(); 5720b57cec5SDimitry Andric MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); 5730b57cec5SDimitry Andric assert(PI != TopMBB->pred_end() && 5740b57cec5SDimitry Andric "Loop must have more than one incoming edge!"); 5750b57cec5SDimitry Andric MachineBasicBlock *Backedge = *PI++; 5760b57cec5SDimitry Andric if (PI == TopMBB->pred_end()) // dead loop? 5770b57cec5SDimitry Andric return nullptr; 5780b57cec5SDimitry Andric MachineBasicBlock *Incoming = *PI++; 5790b57cec5SDimitry Andric if (PI != TopMBB->pred_end()) // multiple backedges? 5800b57cec5SDimitry Andric return nullptr; 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric // Make sure there is one incoming and one backedge and determine which 5830b57cec5SDimitry Andric // is which. 5840b57cec5SDimitry Andric if (L->contains(Incoming)) { 5850b57cec5SDimitry Andric if (L->contains(Backedge)) 5860b57cec5SDimitry Andric return nullptr; 5870b57cec5SDimitry Andric std::swap(Incoming, Backedge); 5880b57cec5SDimitry Andric } else if (!L->contains(Backedge)) 5890b57cec5SDimitry Andric return nullptr; 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric // Look for the cmp instruction to determine if we can get a useful trip 5920b57cec5SDimitry Andric // count. The trip count can be either a register or an immediate. The 5930b57cec5SDimitry Andric // location of the value depends upon the type (reg or imm). 5940b57cec5SDimitry Andric MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 5950b57cec5SDimitry Andric if (!ExitingBlock) 5960b57cec5SDimitry Andric return nullptr; 5970b57cec5SDimitry Andric 598bdd1243dSDimitry Andric Register IVReg = 0; 5990b57cec5SDimitry Andric int64_t IVBump = 0; 6000b57cec5SDimitry Andric MachineInstr *IVOp; 6010b57cec5SDimitry Andric bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); 6020b57cec5SDimitry Andric if (!FoundIV) 6030b57cec5SDimitry Andric return nullptr; 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 6060b57cec5SDimitry Andric 6070b57cec5SDimitry Andric MachineOperand *InitialValue = nullptr; 6080b57cec5SDimitry Andric MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); 6090b57cec5SDimitry Andric MachineBasicBlock *Latch = L->getLoopLatch(); 6100b57cec5SDimitry Andric for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { 6110b57cec5SDimitry Andric MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); 6120b57cec5SDimitry Andric if (MBB == Preheader) 6130b57cec5SDimitry Andric InitialValue = &IV_Phi->getOperand(i); 6140b57cec5SDimitry Andric else if (MBB == Latch) 6150b57cec5SDimitry Andric IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. 6160b57cec5SDimitry Andric } 6170b57cec5SDimitry Andric if (!InitialValue) 6180b57cec5SDimitry Andric return nullptr; 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric SmallVector<MachineOperand,2> Cond; 6210b57cec5SDimitry Andric MachineBasicBlock *TB = nullptr, *FB = nullptr; 6220b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 6230b57cec5SDimitry Andric if (NotAnalyzed) 6240b57cec5SDimitry Andric return nullptr; 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric MachineBasicBlock *Header = L->getHeader(); 6270b57cec5SDimitry Andric // TB must be non-null. If FB is also non-null, one of them must be 6280b57cec5SDimitry Andric // the header. Otherwise, branch to TB could be exiting the loop, and 6290b57cec5SDimitry Andric // the fall through can go to the header. 6300b57cec5SDimitry Andric assert (TB && "Exit block without a branch?"); 6310b57cec5SDimitry Andric if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { 6320b57cec5SDimitry Andric MachineBasicBlock *LTB = nullptr, *LFB = nullptr; 6330b57cec5SDimitry Andric SmallVector<MachineOperand,2> LCond; 6340b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); 6350b57cec5SDimitry Andric if (NotAnalyzed) 6360b57cec5SDimitry Andric return nullptr; 6370b57cec5SDimitry Andric if (TB == Latch) 6380b57cec5SDimitry Andric TB = (LTB == Header) ? LTB : LFB; 6390b57cec5SDimitry Andric else 6400b57cec5SDimitry Andric FB = (LTB == Header) ? LTB: LFB; 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); 6430b57cec5SDimitry Andric if (!TB || (FB && TB != Header && FB != Header)) 6440b57cec5SDimitry Andric return nullptr; 6450b57cec5SDimitry Andric 6465ffd83dbSDimitry Andric // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch 6470b57cec5SDimitry Andric // to put imm(0), followed by P in the vector Cond. 6480b57cec5SDimitry Andric // If TB is not the header, it means that the "not-taken" path must lead 6490b57cec5SDimitry Andric // to the header. 6500b57cec5SDimitry Andric bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header); 651bdd1243dSDimitry Andric Register PredReg; 652bdd1243dSDimitry Andric unsigned PredPos, PredRegFlags; 6530b57cec5SDimitry Andric if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags)) 6540b57cec5SDimitry Andric return nullptr; 6550b57cec5SDimitry Andric MachineInstr *CondI = MRI->getVRegDef(PredReg); 6560b57cec5SDimitry Andric unsigned CondOpc = CondI->getOpcode(); 6570b57cec5SDimitry Andric 6585ffd83dbSDimitry Andric Register CmpReg1, CmpReg2; 659349cc55cSDimitry Andric int64_t Mask = 0, ImmValue = 0; 6600b57cec5SDimitry Andric bool AnalyzedCmp = 6610b57cec5SDimitry Andric TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue); 6620b57cec5SDimitry Andric if (!AnalyzedCmp) 6630b57cec5SDimitry Andric return nullptr; 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric // The comparison operator type determines how we compute the loop 6660b57cec5SDimitry Andric // trip count. 6670b57cec5SDimitry Andric OldInsts.push_back(CondI); 6680b57cec5SDimitry Andric OldInsts.push_back(IVOp); 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric // Sadly, the following code gets information based on the position 6710b57cec5SDimitry Andric // of the operands in the compare instruction. This has to be done 6720b57cec5SDimitry Andric // this way, because the comparisons check for a specific relationship 6730b57cec5SDimitry Andric // between the operands (e.g. is-less-than), rather than to find out 6740b57cec5SDimitry Andric // what relationship the operands are in (as on PPC). 6750b57cec5SDimitry Andric Comparison::Kind Cmp; 6760b57cec5SDimitry Andric bool isSwapped = false; 6770b57cec5SDimitry Andric const MachineOperand &Op1 = CondI->getOperand(1); 6780b57cec5SDimitry Andric const MachineOperand &Op2 = CondI->getOperand(2); 6790b57cec5SDimitry Andric const MachineOperand *EndValue = nullptr; 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric if (Op1.isReg()) { 6820b57cec5SDimitry Andric if (Op2.isImm() || Op1.getReg() == IVReg) 6830b57cec5SDimitry Andric EndValue = &Op2; 6840b57cec5SDimitry Andric else { 6850b57cec5SDimitry Andric EndValue = &Op1; 6860b57cec5SDimitry Andric isSwapped = true; 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric 6900b57cec5SDimitry Andric if (!EndValue) 6910b57cec5SDimitry Andric return nullptr; 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump); 6940b57cec5SDimitry Andric if (!Cmp) 6950b57cec5SDimitry Andric return nullptr; 6960b57cec5SDimitry Andric if (Negated) 6970b57cec5SDimitry Andric Cmp = Comparison::getNegatedComparison(Cmp); 6980b57cec5SDimitry Andric if (isSwapped) 6990b57cec5SDimitry Andric Cmp = Comparison::getSwappedComparison(Cmp); 7000b57cec5SDimitry Andric 7010b57cec5SDimitry Andric if (InitialValue->isReg()) { 7028bcb0991SDimitry Andric Register R = InitialValue->getReg(); 7030b57cec5SDimitry Andric MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 7040b57cec5SDimitry Andric if (!MDT->properlyDominates(DefBB, Header)) { 7050b57cec5SDimitry Andric int64_t V; 7060b57cec5SDimitry Andric if (!checkForImmediate(*InitialValue, V)) 7070b57cec5SDimitry Andric return nullptr; 7080b57cec5SDimitry Andric } 7090b57cec5SDimitry Andric OldInsts.push_back(MRI->getVRegDef(R)); 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric if (EndValue->isReg()) { 7128bcb0991SDimitry Andric Register R = EndValue->getReg(); 7130b57cec5SDimitry Andric MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); 7140b57cec5SDimitry Andric if (!MDT->properlyDominates(DefBB, Header)) { 7150b57cec5SDimitry Andric int64_t V; 7160b57cec5SDimitry Andric if (!checkForImmediate(*EndValue, V)) 7170b57cec5SDimitry Andric return nullptr; 7180b57cec5SDimitry Andric } 7190b57cec5SDimitry Andric OldInsts.push_back(MRI->getVRegDef(R)); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric 7250b57cec5SDimitry Andric /// Helper function that returns the expression that represents the 7260b57cec5SDimitry Andric /// number of times a loop iterates. The function takes the operands that 7270b57cec5SDimitry Andric /// represent the loop start value, loop end value, and induction value. 7280b57cec5SDimitry Andric /// Based upon these operands, the function attempts to compute the trip count. 7290b57cec5SDimitry Andric CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, 7300b57cec5SDimitry Andric const MachineOperand *Start, 7310b57cec5SDimitry Andric const MachineOperand *End, 732bdd1243dSDimitry Andric Register IVReg, 7330b57cec5SDimitry Andric int64_t IVBump, 7340b57cec5SDimitry Andric Comparison::Kind Cmp) const { 7350b57cec5SDimitry Andric // Cannot handle comparison EQ, i.e. while (A == B). 7360b57cec5SDimitry Andric if (Cmp == Comparison::EQ) 7370b57cec5SDimitry Andric return nullptr; 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric // Check if either the start or end values are an assignment of an immediate. 7400b57cec5SDimitry Andric // If so, use the immediate value rather than the register. 7410b57cec5SDimitry Andric if (Start->isReg()) { 7420b57cec5SDimitry Andric const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg()); 7430b57cec5SDimitry Andric if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi || 7440b57cec5SDimitry Andric StartValInstr->getOpcode() == Hexagon::A2_tfrpi)) 7450b57cec5SDimitry Andric Start = &StartValInstr->getOperand(1); 7460b57cec5SDimitry Andric } 7470b57cec5SDimitry Andric if (End->isReg()) { 7480b57cec5SDimitry Andric const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 7490b57cec5SDimitry Andric if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi || 7500b57cec5SDimitry Andric EndValInstr->getOpcode() == Hexagon::A2_tfrpi)) 7510b57cec5SDimitry Andric End = &EndValInstr->getOperand(1); 7520b57cec5SDimitry Andric } 7530b57cec5SDimitry Andric 7540b57cec5SDimitry Andric if (!Start->isReg() && !Start->isImm()) 7550b57cec5SDimitry Andric return nullptr; 7560b57cec5SDimitry Andric if (!End->isReg() && !End->isImm()) 7570b57cec5SDimitry Andric return nullptr; 7580b57cec5SDimitry Andric 7590b57cec5SDimitry Andric bool CmpLess = Cmp & Comparison::L; 7600b57cec5SDimitry Andric bool CmpGreater = Cmp & Comparison::G; 7610b57cec5SDimitry Andric bool CmpHasEqual = Cmp & Comparison::EQ; 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds. 7640b57cec5SDimitry Andric if (CmpLess && IVBump < 0) 7650b57cec5SDimitry Andric // Loop going while iv is "less" with the iv value going down. Must wrap. 7660b57cec5SDimitry Andric return nullptr; 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric if (CmpGreater && IVBump > 0) 7690b57cec5SDimitry Andric // Loop going while iv is "greater" with the iv value going up. Must wrap. 7700b57cec5SDimitry Andric return nullptr; 7710b57cec5SDimitry Andric 7720b57cec5SDimitry Andric // Phis that may feed into the loop. 7730b57cec5SDimitry Andric LoopFeederMap LoopFeederPhi; 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric // Check if the initial value may be zero and can be decremented in the first 7760b57cec5SDimitry Andric // iteration. If the value is zero, the endloop instruction will not decrement 7770b57cec5SDimitry Andric // the loop counter, so we shouldn't generate a hardware loop in this case. 7780b57cec5SDimitry Andric if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop, 7790b57cec5SDimitry Andric LoopFeederPhi)) 7800b57cec5SDimitry Andric return nullptr; 7810b57cec5SDimitry Andric 7820b57cec5SDimitry Andric if (Start->isImm() && End->isImm()) { 7830b57cec5SDimitry Andric // Both, start and end are immediates. 7840b57cec5SDimitry Andric int64_t StartV = Start->getImm(); 7850b57cec5SDimitry Andric int64_t EndV = End->getImm(); 7860b57cec5SDimitry Andric int64_t Dist = EndV - StartV; 7870b57cec5SDimitry Andric if (Dist == 0) 7880b57cec5SDimitry Andric return nullptr; 7890b57cec5SDimitry Andric 7900b57cec5SDimitry Andric bool Exact = (Dist % IVBump) == 0; 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric if (Cmp == Comparison::NE) { 7930b57cec5SDimitry Andric if (!Exact) 7940b57cec5SDimitry Andric return nullptr; 7950b57cec5SDimitry Andric if ((Dist < 0) ^ (IVBump < 0)) 7960b57cec5SDimitry Andric return nullptr; 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric // For comparisons that include the final value (i.e. include equality 8000b57cec5SDimitry Andric // with the final value), we need to increase the distance by 1. 8010b57cec5SDimitry Andric if (CmpHasEqual) 8020b57cec5SDimitry Andric Dist = Dist > 0 ? Dist+1 : Dist-1; 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric // For the loop to iterate, CmpLess should imply Dist > 0. Similarly, 8050b57cec5SDimitry Andric // CmpGreater should imply Dist < 0. These conditions could actually 8060b57cec5SDimitry Andric // fail, for example, in unreachable code (which may still appear to be 8070b57cec5SDimitry Andric // reachable in the CFG). 8080b57cec5SDimitry Andric if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0)) 8090b57cec5SDimitry Andric return nullptr; 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric // "Normalized" distance, i.e. with the bump set to +-1. 8120b57cec5SDimitry Andric int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump 8130b57cec5SDimitry Andric : (-Dist + (-IVBump - 1)) / (-IVBump); 8140b57cec5SDimitry Andric assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign."); 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric uint64_t Count = Dist1; 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric if (Count > 0xFFFFFFFFULL) 8190b57cec5SDimitry Andric return nullptr; 8200b57cec5SDimitry Andric 8210b57cec5SDimitry Andric return new CountValue(CountValue::CV_Immediate, Count); 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric // A general case: Start and End are some values, but the actual 8250b57cec5SDimitry Andric // iteration count may not be available. If it is not, insert 8260b57cec5SDimitry Andric // a computation of it into the preheader. 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric // If the induction variable bump is not a power of 2, quit. 8290b57cec5SDimitry Andric // Othwerise we'd need a general integer division. 8300b57cec5SDimitry Andric if (!isPowerOf2_64(std::abs(IVBump))) 8310b57cec5SDimitry Andric return nullptr; 8320b57cec5SDimitry Andric 8330b57cec5SDimitry Andric MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader); 8340b57cec5SDimitry Andric assert (PH && "Should have a preheader by now"); 8350b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator(); 8360b57cec5SDimitry Andric DebugLoc DL; 8370b57cec5SDimitry Andric if (InsertPos != PH->end()) 8380b57cec5SDimitry Andric DL = InsertPos->getDebugLoc(); 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric // If Start is an immediate and End is a register, the trip count 8410b57cec5SDimitry Andric // will be "reg - imm". Hexagon's "subtract immediate" instruction 8420b57cec5SDimitry Andric // is actually "reg + -imm". 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andric // If the loop IV is going downwards, i.e. if the bump is negative, 8450b57cec5SDimitry Andric // then the iteration count (computed as End-Start) will need to be 8460b57cec5SDimitry Andric // negated. To avoid the negation, just swap Start and End. 8470b57cec5SDimitry Andric if (IVBump < 0) { 8480b57cec5SDimitry Andric std::swap(Start, End); 8490b57cec5SDimitry Andric IVBump = -IVBump; 8500b57cec5SDimitry Andric } 8510b57cec5SDimitry Andric // Cmp may now have a wrong direction, e.g. LEs may now be GEs. 8520b57cec5SDimitry Andric // Signedness, and "including equality" are preserved. 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm) 8550b57cec5SDimitry Andric bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg) 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric int64_t StartV = 0, EndV = 0; 8580b57cec5SDimitry Andric if (Start->isImm()) 8590b57cec5SDimitry Andric StartV = Start->getImm(); 8600b57cec5SDimitry Andric if (End->isImm()) 8610b57cec5SDimitry Andric EndV = End->getImm(); 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric int64_t AdjV = 0; 8640b57cec5SDimitry Andric // To compute the iteration count, we would need this computation: 8650b57cec5SDimitry Andric // Count = (End - Start + (IVBump-1)) / IVBump 8660b57cec5SDimitry Andric // or, when CmpHasEqual: 8670b57cec5SDimitry Andric // Count = (End - Start + (IVBump-1)+1) / IVBump 8680b57cec5SDimitry Andric // The "IVBump-1" part is the adjustment (AdjV). We can avoid 8690b57cec5SDimitry Andric // generating an instruction specifically to add it if we can adjust 8700b57cec5SDimitry Andric // the immediate values for Start or End. 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric if (CmpHasEqual) { 8730b57cec5SDimitry Andric // Need to add 1 to the total iteration count. 8740b57cec5SDimitry Andric if (Start->isImm()) 8750b57cec5SDimitry Andric StartV--; 8760b57cec5SDimitry Andric else if (End->isImm()) 8770b57cec5SDimitry Andric EndV++; 8780b57cec5SDimitry Andric else 8790b57cec5SDimitry Andric AdjV += 1; 8800b57cec5SDimitry Andric } 8810b57cec5SDimitry Andric 8820b57cec5SDimitry Andric if (Cmp != Comparison::NE) { 8830b57cec5SDimitry Andric if (Start->isImm()) 8840b57cec5SDimitry Andric StartV -= (IVBump-1); 8850b57cec5SDimitry Andric else if (End->isImm()) 8860b57cec5SDimitry Andric EndV += (IVBump-1); 8870b57cec5SDimitry Andric else 8880b57cec5SDimitry Andric AdjV += (IVBump-1); 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric 891bdd1243dSDimitry Andric Register R = 0; 892bdd1243dSDimitry Andric unsigned SR = 0; 8930b57cec5SDimitry Andric if (Start->isReg()) { 8940b57cec5SDimitry Andric R = Start->getReg(); 8950b57cec5SDimitry Andric SR = Start->getSubReg(); 8960b57cec5SDimitry Andric } else { 8970b57cec5SDimitry Andric R = End->getReg(); 8980b57cec5SDimitry Andric SR = End->getSubReg(); 8990b57cec5SDimitry Andric } 9000b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(R); 9010b57cec5SDimitry Andric // Hardware loops cannot handle 64-bit registers. If it's a double 9020b57cec5SDimitry Andric // register, it has to have a subregister. 9030b57cec5SDimitry Andric if (!SR && RC == &Hexagon::DoubleRegsRegClass) 9040b57cec5SDimitry Andric return nullptr; 9050b57cec5SDimitry Andric const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; 9060b57cec5SDimitry Andric 9070b57cec5SDimitry Andric // Compute DistR (register with the distance between Start and End). 908bdd1243dSDimitry Andric Register DistR; 909bdd1243dSDimitry Andric unsigned DistSR; 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric // Avoid special case, where the start value is an imm(0). 9120b57cec5SDimitry Andric if (Start->isImm() && StartV == 0) { 9130b57cec5SDimitry Andric DistR = End->getReg(); 9140b57cec5SDimitry Andric DistSR = End->getSubReg(); 9150b57cec5SDimitry Andric } else { 9160b57cec5SDimitry Andric const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) : 9170b57cec5SDimitry Andric (RegToImm ? TII->get(Hexagon::A2_subri) : 9180b57cec5SDimitry Andric TII->get(Hexagon::A2_addi)); 9190b57cec5SDimitry Andric if (RegToReg || RegToImm) { 9208bcb0991SDimitry Andric Register SubR = MRI->createVirtualRegister(IntRC); 9210b57cec5SDimitry Andric MachineInstrBuilder SubIB = 9220b57cec5SDimitry Andric BuildMI(*PH, InsertPos, DL, SubD, SubR); 9230b57cec5SDimitry Andric 9240b57cec5SDimitry Andric if (RegToReg) 9250b57cec5SDimitry Andric SubIB.addReg(End->getReg(), 0, End->getSubReg()) 9260b57cec5SDimitry Andric .addReg(Start->getReg(), 0, Start->getSubReg()); 9270b57cec5SDimitry Andric else 9280b57cec5SDimitry Andric SubIB.addImm(EndV) 9290b57cec5SDimitry Andric .addReg(Start->getReg(), 0, Start->getSubReg()); 9300b57cec5SDimitry Andric DistR = SubR; 9310b57cec5SDimitry Andric } else { 9320b57cec5SDimitry Andric // If the loop has been unrolled, we should use the original loop count 9330b57cec5SDimitry Andric // instead of recalculating the value. This will avoid additional 9340b57cec5SDimitry Andric // 'Add' instruction. 9350b57cec5SDimitry Andric const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); 9360b57cec5SDimitry Andric if (EndValInstr->getOpcode() == Hexagon::A2_addi && 9370b57cec5SDimitry Andric EndValInstr->getOperand(1).getSubReg() == 0 && 9380b57cec5SDimitry Andric EndValInstr->getOperand(2).getImm() == StartV) { 9390b57cec5SDimitry Andric DistR = EndValInstr->getOperand(1).getReg(); 9400b57cec5SDimitry Andric } else { 9418bcb0991SDimitry Andric Register SubR = MRI->createVirtualRegister(IntRC); 9420b57cec5SDimitry Andric MachineInstrBuilder SubIB = 9430b57cec5SDimitry Andric BuildMI(*PH, InsertPos, DL, SubD, SubR); 9440b57cec5SDimitry Andric SubIB.addReg(End->getReg(), 0, End->getSubReg()) 9450b57cec5SDimitry Andric .addImm(-StartV); 9460b57cec5SDimitry Andric DistR = SubR; 9470b57cec5SDimitry Andric } 9480b57cec5SDimitry Andric } 9490b57cec5SDimitry Andric DistSR = 0; 9500b57cec5SDimitry Andric } 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric // From DistR, compute AdjR (register with the adjusted distance). 953bdd1243dSDimitry Andric Register AdjR; 954bdd1243dSDimitry Andric unsigned AdjSR; 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric if (AdjV == 0) { 9570b57cec5SDimitry Andric AdjR = DistR; 9580b57cec5SDimitry Andric AdjSR = DistSR; 9590b57cec5SDimitry Andric } else { 9600b57cec5SDimitry Andric // Generate CountR = ADD DistR, AdjVal 9618bcb0991SDimitry Andric Register AddR = MRI->createVirtualRegister(IntRC); 9620b57cec5SDimitry Andric MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi); 9630b57cec5SDimitry Andric BuildMI(*PH, InsertPos, DL, AddD, AddR) 9640b57cec5SDimitry Andric .addReg(DistR, 0, DistSR) 9650b57cec5SDimitry Andric .addImm(AdjV); 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric AdjR = AddR; 9680b57cec5SDimitry Andric AdjSR = 0; 9690b57cec5SDimitry Andric } 9700b57cec5SDimitry Andric 9710b57cec5SDimitry Andric // From AdjR, compute CountR (register with the final count). 972bdd1243dSDimitry Andric Register CountR; 973bdd1243dSDimitry Andric unsigned CountSR; 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric if (IVBump == 1) { 9760b57cec5SDimitry Andric CountR = AdjR; 9770b57cec5SDimitry Andric CountSR = AdjSR; 9780b57cec5SDimitry Andric } else { 9790b57cec5SDimitry Andric // The IV bump is a power of two. Log_2(IV bump) is the shift amount. 9800b57cec5SDimitry Andric unsigned Shift = Log2_32(IVBump); 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric // Generate NormR = LSR DistR, Shift. 9838bcb0991SDimitry Andric Register LsrR = MRI->createVirtualRegister(IntRC); 9840b57cec5SDimitry Andric const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r); 9850b57cec5SDimitry Andric BuildMI(*PH, InsertPos, DL, LsrD, LsrR) 9860b57cec5SDimitry Andric .addReg(AdjR, 0, AdjSR) 9870b57cec5SDimitry Andric .addImm(Shift); 9880b57cec5SDimitry Andric 9890b57cec5SDimitry Andric CountR = LsrR; 9900b57cec5SDimitry Andric CountSR = 0; 9910b57cec5SDimitry Andric } 9920b57cec5SDimitry Andric 9930b57cec5SDimitry Andric return new CountValue(CountValue::CV_Register, CountR, CountSR); 9940b57cec5SDimitry Andric } 9950b57cec5SDimitry Andric 9960b57cec5SDimitry Andric /// Return true if the operation is invalid within hardware loop. 9970b57cec5SDimitry Andric bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, 9980b57cec5SDimitry Andric bool IsInnerHWLoop) const { 9990b57cec5SDimitry Andric // Call is not allowed because the callee may use a hardware loop except for 10000b57cec5SDimitry Andric // the case when the call never returns. 10010b57cec5SDimitry Andric if (MI->getDesc().isCall()) 10020b57cec5SDimitry Andric return !TII->doesNotReturn(*MI); 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric // Check if the instruction defines a hardware loop register. 10050b57cec5SDimitry Andric using namespace Hexagon; 10060b57cec5SDimitry Andric 1007bdd1243dSDimitry Andric static const Register Regs01[] = { LC0, SA0, LC1, SA1 }; 1008bdd1243dSDimitry Andric static const Register Regs1[] = { LC1, SA1 }; 1009*0fca6ea1SDimitry Andric auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01) : ArrayRef(Regs1); 1010bdd1243dSDimitry Andric for (Register R : CheckRegs) 10110b57cec5SDimitry Andric if (MI->modifiesRegister(R, TRI)) 10120b57cec5SDimitry Andric return true; 10130b57cec5SDimitry Andric 10140b57cec5SDimitry Andric return false; 10150b57cec5SDimitry Andric } 10160b57cec5SDimitry Andric 10170b57cec5SDimitry Andric /// Return true if the loop contains an instruction that inhibits 10180b57cec5SDimitry Andric /// the use of the hardware loop instruction. 10190b57cec5SDimitry Andric bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, 10200b57cec5SDimitry Andric bool IsInnerHWLoop) const { 10210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nhw_loop head, " 10220b57cec5SDimitry Andric << printMBBReference(**L->block_begin())); 10230b57cec5SDimitry Andric for (MachineBasicBlock *MBB : L->getBlocks()) { 10244824e7fdSDimitry Andric for (const MachineInstr &MI : *MBB) { 10254824e7fdSDimitry Andric if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) { 10260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:"; 10274824e7fdSDimitry Andric MI.dump();); 10280b57cec5SDimitry Andric return true; 10290b57cec5SDimitry Andric } 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric } 10320b57cec5SDimitry Andric return false; 10330b57cec5SDimitry Andric } 10340b57cec5SDimitry Andric 10350b57cec5SDimitry Andric /// Returns true if the instruction is dead. This was essentially 10360b57cec5SDimitry Andric /// copied from DeadMachineInstructionElim::isDead, but with special cases 10370b57cec5SDimitry Andric /// for inline asm, physical registers and instructions with side effects 10380b57cec5SDimitry Andric /// removed. 10390b57cec5SDimitry Andric bool HexagonHardwareLoops::isDead(const MachineInstr *MI, 10400b57cec5SDimitry Andric SmallVectorImpl<MachineInstr *> &DeadPhis) const { 10410b57cec5SDimitry Andric // Examine each operand. 10424824e7fdSDimitry Andric for (const MachineOperand &MO : MI->operands()) { 10430b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef()) 10440b57cec5SDimitry Andric continue; 10450b57cec5SDimitry Andric 10468bcb0991SDimitry Andric Register Reg = MO.getReg(); 10470b57cec5SDimitry Andric if (MRI->use_nodbg_empty(Reg)) 10480b57cec5SDimitry Andric continue; 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator; 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric // This instruction has users, but if the only user is the phi node for the 10530b57cec5SDimitry Andric // parent block, and the only use of that phi node is this instruction, then 10540b57cec5SDimitry Andric // this instruction is dead: both it (and the phi node) can be removed. 10550b57cec5SDimitry Andric use_nodbg_iterator I = MRI->use_nodbg_begin(Reg); 10560b57cec5SDimitry Andric use_nodbg_iterator End = MRI->use_nodbg_end(); 10570b57cec5SDimitry Andric if (std::next(I) != End || !I->getParent()->isPHI()) 10580b57cec5SDimitry Andric return false; 10590b57cec5SDimitry Andric 10600b57cec5SDimitry Andric MachineInstr *OnePhi = I->getParent(); 106106c3fb27SDimitry Andric for (const MachineOperand &OPO : OnePhi->operands()) { 10620b57cec5SDimitry Andric if (!OPO.isReg() || !OPO.isDef()) 10630b57cec5SDimitry Andric continue; 10640b57cec5SDimitry Andric 10658bcb0991SDimitry Andric Register OPReg = OPO.getReg(); 10660b57cec5SDimitry Andric use_nodbg_iterator nextJ; 10670b57cec5SDimitry Andric for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg); 10680b57cec5SDimitry Andric J != End; J = nextJ) { 10690b57cec5SDimitry Andric nextJ = std::next(J); 10700b57cec5SDimitry Andric MachineOperand &Use = *J; 10710b57cec5SDimitry Andric MachineInstr *UseMI = Use.getParent(); 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric // If the phi node has a user that is not MI, bail. 10740b57cec5SDimitry Andric if (MI != UseMI) 10750b57cec5SDimitry Andric return false; 10760b57cec5SDimitry Andric } 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric DeadPhis.push_back(OnePhi); 10790b57cec5SDimitry Andric } 10800b57cec5SDimitry Andric 10810b57cec5SDimitry Andric // If there are no defs with uses, the instruction is dead. 10820b57cec5SDimitry Andric return true; 10830b57cec5SDimitry Andric } 10840b57cec5SDimitry Andric 10850b57cec5SDimitry Andric void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { 10860b57cec5SDimitry Andric // This procedure was essentially copied from DeadMachineInstructionElim. 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric SmallVector<MachineInstr*, 1> DeadPhis; 10890b57cec5SDimitry Andric if (isDead(MI, DeadPhis)) { 10900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI); 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric // It is possible that some DBG_VALUE instructions refer to this 10930b57cec5SDimitry Andric // instruction. Examine each def operand for such references; 10940b57cec5SDimitry Andric // if found, mark the DBG_VALUE as undef (but don't delete it). 10954824e7fdSDimitry Andric for (const MachineOperand &MO : MI->operands()) { 10960b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef()) 10970b57cec5SDimitry Andric continue; 10988bcb0991SDimitry Andric Register Reg = MO.getReg(); 1099349cc55cSDimitry Andric // We use make_early_inc_range here because setReg below invalidates the 1100349cc55cSDimitry Andric // iterator. 1101349cc55cSDimitry Andric for (MachineOperand &MO : 1102349cc55cSDimitry Andric llvm::make_early_inc_range(MRI->use_operands(Reg))) { 1103349cc55cSDimitry Andric MachineInstr *UseMI = MO.getParent(); 11040b57cec5SDimitry Andric if (UseMI == MI) 11050b57cec5SDimitry Andric continue; 1106349cc55cSDimitry Andric if (MO.isDebug()) 1107349cc55cSDimitry Andric MO.setReg(0U); 11080b57cec5SDimitry Andric } 11090b57cec5SDimitry Andric } 11100b57cec5SDimitry Andric 11110b57cec5SDimitry Andric MI->eraseFromParent(); 11120b57cec5SDimitry Andric for (unsigned i = 0; i < DeadPhis.size(); ++i) 11130b57cec5SDimitry Andric DeadPhis[i]->eraseFromParent(); 11140b57cec5SDimitry Andric } 11150b57cec5SDimitry Andric } 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric /// Check if the loop is a candidate for converting to a hardware 11180b57cec5SDimitry Andric /// loop. If so, then perform the transformation. 11190b57cec5SDimitry Andric /// 11200b57cec5SDimitry Andric /// This function works on innermost loops first. A loop can be converted 11210b57cec5SDimitry Andric /// if it is a counting loop; either a register value or an immediate. 11220b57cec5SDimitry Andric /// 11230b57cec5SDimitry Andric /// The code makes several assumptions about the representation of the loop 11240b57cec5SDimitry Andric /// in llvm. 11250b57cec5SDimitry Andric bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, 11260b57cec5SDimitry Andric bool &RecL0used, 11270b57cec5SDimitry Andric bool &RecL1used) { 11284824e7fdSDimitry Andric // This is just to confirm basic correctness. 11290b57cec5SDimitry Andric assert(L->getHeader() && "Loop without a header?"); 11300b57cec5SDimitry Andric 11310b57cec5SDimitry Andric bool Changed = false; 11320b57cec5SDimitry Andric bool L0Used = false; 11330b57cec5SDimitry Andric bool L1Used = false; 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric // Process nested loops first. 113604eeddc0SDimitry Andric for (MachineLoop *I : *L) { 113704eeddc0SDimitry Andric Changed |= convertToHardwareLoop(I, RecL0used, RecL1used); 11380b57cec5SDimitry Andric L0Used |= RecL0used; 11390b57cec5SDimitry Andric L1Used |= RecL1used; 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric // If a nested loop has been converted, then we can't convert this loop. 11430b57cec5SDimitry Andric if (Changed && L0Used && L1Used) 11440b57cec5SDimitry Andric return Changed; 11450b57cec5SDimitry Andric 11460b57cec5SDimitry Andric unsigned LOOP_i; 11470b57cec5SDimitry Andric unsigned LOOP_r; 11480b57cec5SDimitry Andric unsigned ENDLOOP; 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric // Flag used to track loopN instruction: 11510b57cec5SDimitry Andric // 1 - Hardware loop is being generated for the inner most loop. 11520b57cec5SDimitry Andric // 0 - Hardware loop is being generated for the outer loop. 11530b57cec5SDimitry Andric unsigned IsInnerHWLoop = 1; 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric if (L0Used) { 11560b57cec5SDimitry Andric LOOP_i = Hexagon::J2_loop1i; 11570b57cec5SDimitry Andric LOOP_r = Hexagon::J2_loop1r; 11580b57cec5SDimitry Andric ENDLOOP = Hexagon::ENDLOOP1; 11590b57cec5SDimitry Andric IsInnerHWLoop = 0; 11600b57cec5SDimitry Andric } else { 11610b57cec5SDimitry Andric LOOP_i = Hexagon::J2_loop0i; 11620b57cec5SDimitry Andric LOOP_r = Hexagon::J2_loop0r; 11630b57cec5SDimitry Andric ENDLOOP = Hexagon::ENDLOOP0; 11640b57cec5SDimitry Andric } 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric #ifndef NDEBUG 11670b57cec5SDimitry Andric // Stop trying after reaching the limit (if any). 11680b57cec5SDimitry Andric int Limit = HWLoopLimit; 11690b57cec5SDimitry Andric if (Limit >= 0) { 11700b57cec5SDimitry Andric if (Counter >= HWLoopLimit) 11710b57cec5SDimitry Andric return false; 11720b57cec5SDimitry Andric Counter++; 11730b57cec5SDimitry Andric } 11740b57cec5SDimitry Andric #endif 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric // Does the loop contain any invalid instructions? 11770b57cec5SDimitry Andric if (containsInvalidInstruction(L, IsInnerHWLoop)) 11780b57cec5SDimitry Andric return false; 11790b57cec5SDimitry Andric 11800b57cec5SDimitry Andric MachineBasicBlock *LastMBB = L->findLoopControlBlock(); 11810b57cec5SDimitry Andric // Don't generate hw loop if the loop has more than one exit. 11820b57cec5SDimitry Andric if (!LastMBB) 11830b57cec5SDimitry Andric return false; 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); 11860b57cec5SDimitry Andric if (LastI == LastMBB->end()) 11870b57cec5SDimitry Andric return false; 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric // Is the induction variable bump feeding the latch condition? 11900b57cec5SDimitry Andric if (!fixupInductionVariable(L)) 11910b57cec5SDimitry Andric return false; 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric // Ensure the loop has a preheader: the loop instruction will be 11940b57cec5SDimitry Andric // placed there. 11950b57cec5SDimitry Andric MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader); 11960b57cec5SDimitry Andric if (!Preheader) { 11970b57cec5SDimitry Andric Preheader = createPreheaderForLoop(L); 11980b57cec5SDimitry Andric if (!Preheader) 11990b57cec5SDimitry Andric return false; 12000b57cec5SDimitry Andric } 12010b57cec5SDimitry Andric 12020b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric SmallVector<MachineInstr*, 2> OldInsts; 12050b57cec5SDimitry Andric // Are we able to determine the trip count for the loop? 12060b57cec5SDimitry Andric CountValue *TripCount = getLoopTripCount(L, OldInsts); 12070b57cec5SDimitry Andric if (!TripCount) 12080b57cec5SDimitry Andric return false; 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric // Is the trip count available in the preheader? 12110b57cec5SDimitry Andric if (TripCount->isReg()) { 12120b57cec5SDimitry Andric // There will be a use of the register inserted into the preheader, 12130b57cec5SDimitry Andric // so make sure that the register is actually defined at that point. 12140b57cec5SDimitry Andric MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg()); 12150b57cec5SDimitry Andric MachineBasicBlock *BBDef = TCDef->getParent(); 12160b57cec5SDimitry Andric if (!MDT->dominates(BBDef, Preheader)) 12170b57cec5SDimitry Andric return false; 12180b57cec5SDimitry Andric } 12190b57cec5SDimitry Andric 12200b57cec5SDimitry Andric // Determine the loop start. 12210b57cec5SDimitry Andric MachineBasicBlock *TopBlock = L->getTopBlock(); 12220b57cec5SDimitry Andric MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 12230b57cec5SDimitry Andric MachineBasicBlock *LoopStart = nullptr; 12240b57cec5SDimitry Andric if (ExitingBlock != L->getLoopLatch()) { 12250b57cec5SDimitry Andric MachineBasicBlock *TB = nullptr, *FB = nullptr; 12260b57cec5SDimitry Andric SmallVector<MachineOperand, 2> Cond; 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false)) 12290b57cec5SDimitry Andric return false; 12300b57cec5SDimitry Andric 12310b57cec5SDimitry Andric if (L->contains(TB)) 12320b57cec5SDimitry Andric LoopStart = TB; 12330b57cec5SDimitry Andric else if (L->contains(FB)) 12340b57cec5SDimitry Andric LoopStart = FB; 12350b57cec5SDimitry Andric else 12360b57cec5SDimitry Andric return false; 12370b57cec5SDimitry Andric } 12380b57cec5SDimitry Andric else 12390b57cec5SDimitry Andric LoopStart = TopBlock; 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric // Convert the loop to a hardware loop. 12420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); 12430b57cec5SDimitry Andric DebugLoc DL; 12440b57cec5SDimitry Andric if (InsertPos != Preheader->end()) 12450b57cec5SDimitry Andric DL = InsertPos->getDebugLoc(); 12460b57cec5SDimitry Andric 12470b57cec5SDimitry Andric if (TripCount->isReg()) { 12480b57cec5SDimitry Andric // Create a copy of the loop count register. 12498bcb0991SDimitry Andric Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 12500b57cec5SDimitry Andric BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg) 12510b57cec5SDimitry Andric .addReg(TripCount->getReg(), 0, TripCount->getSubReg()); 12520b57cec5SDimitry Andric // Add the Loop instruction to the beginning of the loop. 12530b57cec5SDimitry Andric BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart) 12540b57cec5SDimitry Andric .addReg(CountReg); 12550b57cec5SDimitry Andric } else { 12560b57cec5SDimitry Andric assert(TripCount->isImm() && "Expecting immediate value for trip count"); 12570b57cec5SDimitry Andric // Add the Loop immediate instruction to the beginning of the loop, 12580b57cec5SDimitry Andric // if the immediate fits in the instructions. Otherwise, we need to 12590b57cec5SDimitry Andric // create a new virtual register. 12600b57cec5SDimitry Andric int64_t CountImm = TripCount->getImm(); 12610b57cec5SDimitry Andric if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) { 12628bcb0991SDimitry Andric Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); 12630b57cec5SDimitry Andric BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg) 12640b57cec5SDimitry Andric .addImm(CountImm); 12650b57cec5SDimitry Andric BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)) 12660b57cec5SDimitry Andric .addMBB(LoopStart).addReg(CountReg); 12670b57cec5SDimitry Andric } else 12680b57cec5SDimitry Andric BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i)) 12690b57cec5SDimitry Andric .addMBB(LoopStart).addImm(CountImm); 12700b57cec5SDimitry Andric } 12710b57cec5SDimitry Andric 1272bdd1243dSDimitry Andric // Make sure the loop start always has a reference in the CFG. 1273bdd1243dSDimitry Andric LoopStart->setMachineBlockAddressTaken(); 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric // Replace the loop branch with an endloop instruction. 12760b57cec5SDimitry Andric DebugLoc LastIDL = LastI->getDebugLoc(); 12770b57cec5SDimitry Andric BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart); 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric // The loop ends with either: 12800b57cec5SDimitry Andric // - a conditional branch followed by an unconditional branch, or 12810b57cec5SDimitry Andric // - a conditional branch to the loop start. 12820b57cec5SDimitry Andric if (LastI->getOpcode() == Hexagon::J2_jumpt || 12830b57cec5SDimitry Andric LastI->getOpcode() == Hexagon::J2_jumpf) { 12840b57cec5SDimitry Andric // Delete one and change/add an uncond. branch to out of the loop. 12850b57cec5SDimitry Andric MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); 12860b57cec5SDimitry Andric LastI = LastMBB->erase(LastI); 12870b57cec5SDimitry Andric if (!L->contains(BranchTarget)) { 12880b57cec5SDimitry Andric if (LastI != LastMBB->end()) 12890b57cec5SDimitry Andric LastI = LastMBB->erase(LastI); 12900b57cec5SDimitry Andric SmallVector<MachineOperand, 0> Cond; 12910b57cec5SDimitry Andric TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); 12920b57cec5SDimitry Andric } 12930b57cec5SDimitry Andric } else { 12940b57cec5SDimitry Andric // Conditional branch to loop start; just delete it. 12950b57cec5SDimitry Andric LastMBB->erase(LastI); 12960b57cec5SDimitry Andric } 12970b57cec5SDimitry Andric delete TripCount; 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric // The induction operation and the comparison may now be 13000b57cec5SDimitry Andric // unneeded. If these are unneeded, then remove them. 13010b57cec5SDimitry Andric for (unsigned i = 0; i < OldInsts.size(); ++i) 13020b57cec5SDimitry Andric removeIfDead(OldInsts[i]); 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric ++NumHWLoops; 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric // Set RecL1used and RecL0used only after hardware loop has been 13070b57cec5SDimitry Andric // successfully generated. Doing it earlier can cause wrong loop instruction 13080b57cec5SDimitry Andric // to be used. 13090b57cec5SDimitry Andric if (L0Used) // Loop0 was already used. So, the correct loop must be loop1. 13100b57cec5SDimitry Andric RecL1used = true; 13110b57cec5SDimitry Andric else 13120b57cec5SDimitry Andric RecL0used = true; 13130b57cec5SDimitry Andric 13140b57cec5SDimitry Andric return true; 13150b57cec5SDimitry Andric } 13160b57cec5SDimitry Andric 13170b57cec5SDimitry Andric bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI, 13180b57cec5SDimitry Andric MachineInstr *CmpI) { 13190b57cec5SDimitry Andric assert (BumpI != CmpI && "Bump and compare in the same instruction?"); 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric MachineBasicBlock *BB = BumpI->getParent(); 13220b57cec5SDimitry Andric if (CmpI->getParent() != BB) 13230b57cec5SDimitry Andric return false; 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric using instr_iterator = MachineBasicBlock::instr_iterator; 13260b57cec5SDimitry Andric 13270b57cec5SDimitry Andric // Check if things are in order to begin with. 13280b57cec5SDimitry Andric for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I) 13290b57cec5SDimitry Andric if (&*I == CmpI) 13300b57cec5SDimitry Andric return true; 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric // Out of order. 13338bcb0991SDimitry Andric Register PredR = CmpI->getOperand(0).getReg(); 13340b57cec5SDimitry Andric bool FoundBump = false; 13350b57cec5SDimitry Andric instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt); 13360b57cec5SDimitry Andric for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) { 13370b57cec5SDimitry Andric MachineInstr *In = &*I; 13380b57cec5SDimitry Andric for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) { 13390b57cec5SDimitry Andric MachineOperand &MO = In->getOperand(i); 13400b57cec5SDimitry Andric if (MO.isReg() && MO.isUse()) { 13410b57cec5SDimitry Andric if (MO.getReg() == PredR) // Found an intervening use of PredR. 13420b57cec5SDimitry Andric return false; 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric } 13450b57cec5SDimitry Andric 13460b57cec5SDimitry Andric if (In == BumpI) { 13470b57cec5SDimitry Andric BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator()); 13480b57cec5SDimitry Andric FoundBump = true; 13490b57cec5SDimitry Andric break; 13500b57cec5SDimitry Andric } 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric assert (FoundBump && "Cannot determine instruction order"); 13530b57cec5SDimitry Andric return FoundBump; 13540b57cec5SDimitry Andric } 13550b57cec5SDimitry Andric 13560b57cec5SDimitry Andric /// This function is required to break recursion. Visiting phis in a loop may 13570b57cec5SDimitry Andric /// result in recursion during compilation. We break the recursion by making 13580b57cec5SDimitry Andric /// sure that we visit a MachineOperand and its definition in a 13590b57cec5SDimitry Andric /// MachineInstruction only once. If we attempt to visit more than once, then 13600b57cec5SDimitry Andric /// there is recursion, and will return false. 13610b57cec5SDimitry Andric bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, 13620b57cec5SDimitry Andric MachineInstr *MI, 13630b57cec5SDimitry Andric const MachineOperand *MO, 13640b57cec5SDimitry Andric LoopFeederMap &LoopFeederPhi) const { 13650b57cec5SDimitry Andric if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { 13660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nhw_loop head, " 13670b57cec5SDimitry Andric << printMBBReference(**L->block_begin())); 13680b57cec5SDimitry Andric // Ignore all BBs that form Loop. 1369fe6060f1SDimitry Andric if (llvm::is_contained(L->getBlocks(), A)) 13700b57cec5SDimitry Andric return false; 13710b57cec5SDimitry Andric MachineInstr *Def = MRI->getVRegDef(MO->getReg()); 13720b57cec5SDimitry Andric LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def)); 13730b57cec5SDimitry Andric return true; 13740b57cec5SDimitry Andric } else 13750b57cec5SDimitry Andric // Already visited node. 13760b57cec5SDimitry Andric return false; 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric /// Return true if a Phi may generate a value that can underflow. 13800b57cec5SDimitry Andric /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand. 13810b57cec5SDimitry Andric bool HexagonHardwareLoops::phiMayWrapOrUnderflow( 13820b57cec5SDimitry Andric MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, 13830b57cec5SDimitry Andric MachineLoop *L, LoopFeederMap &LoopFeederPhi) const { 13840b57cec5SDimitry Andric assert(Phi->isPHI() && "Expecting a Phi."); 13850b57cec5SDimitry Andric // Walk through each Phi, and its used operands. Make sure that 13860b57cec5SDimitry Andric // if there is recursion in Phi, we won't generate hardware loops. 13870b57cec5SDimitry Andric for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2) 13880b57cec5SDimitry Andric if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi)) 13890b57cec5SDimitry Andric if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal, 13900b57cec5SDimitry Andric Phi->getParent(), L, LoopFeederPhi)) 13910b57cec5SDimitry Andric return true; 13920b57cec5SDimitry Andric return false; 13930b57cec5SDimitry Andric } 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric /// Return true if the induction variable can underflow in the first iteration. 13960b57cec5SDimitry Andric /// An example, is an initial unsigned value that is 0 and is decrement in the 13970b57cec5SDimitry Andric /// first itertion of a do-while loop. In this case, we cannot generate a 13980b57cec5SDimitry Andric /// hardware loop because the endloop instruction does not decrement the loop 13990b57cec5SDimitry Andric /// counter if it is <= 1. We only need to perform this analysis if the 14000b57cec5SDimitry Andric /// initial value is a register. 14010b57cec5SDimitry Andric /// 14020b57cec5SDimitry Andric /// This function assumes the initial value may underfow unless proven 14030b57cec5SDimitry Andric /// otherwise. If the type is signed, then we don't care because signed 14040b57cec5SDimitry Andric /// underflow is undefined. We attempt to prove the initial value is not 14050b57cec5SDimitry Andric /// zero by perfoming a crude analysis of the loop counter. This function 14060b57cec5SDimitry Andric /// checks if the initial value is used in any comparison prior to the loop 14070b57cec5SDimitry Andric /// and, if so, assumes the comparison is a range check. This is inexact, 14080b57cec5SDimitry Andric /// but will catch the simple cases. 14090b57cec5SDimitry Andric bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow( 14100b57cec5SDimitry Andric const MachineOperand *InitVal, const MachineOperand *EndVal, 14110b57cec5SDimitry Andric MachineBasicBlock *MBB, MachineLoop *L, 14120b57cec5SDimitry Andric LoopFeederMap &LoopFeederPhi) const { 14130b57cec5SDimitry Andric // Only check register values since they are unknown. 14140b57cec5SDimitry Andric if (!InitVal->isReg()) 14150b57cec5SDimitry Andric return false; 14160b57cec5SDimitry Andric 14170b57cec5SDimitry Andric if (!EndVal->isImm()) 14180b57cec5SDimitry Andric return false; 14190b57cec5SDimitry Andric 14200b57cec5SDimitry Andric // A register value that is assigned an immediate is a known value, and it 14210b57cec5SDimitry Andric // won't underflow in the first iteration. 14220b57cec5SDimitry Andric int64_t Imm; 14230b57cec5SDimitry Andric if (checkForImmediate(*InitVal, Imm)) 14240b57cec5SDimitry Andric return (EndVal->getImm() == Imm); 14250b57cec5SDimitry Andric 14268bcb0991SDimitry Andric Register Reg = InitVal->getReg(); 14270b57cec5SDimitry Andric 14280b57cec5SDimitry Andric // We don't know the value of a physical register. 1429e8d8bef9SDimitry Andric if (!Reg.isVirtual()) 14300b57cec5SDimitry Andric return true; 14310b57cec5SDimitry Andric 14320b57cec5SDimitry Andric MachineInstr *Def = MRI->getVRegDef(Reg); 14330b57cec5SDimitry Andric if (!Def) 14340b57cec5SDimitry Andric return true; 14350b57cec5SDimitry Andric 14360b57cec5SDimitry Andric // If the initial value is a Phi or copy and the operands may not underflow, 14370b57cec5SDimitry Andric // then the definition cannot be underflow either. 14380b57cec5SDimitry Andric if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(), 14390b57cec5SDimitry Andric L, LoopFeederPhi)) 14400b57cec5SDimitry Andric return false; 14410b57cec5SDimitry Andric if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)), 14420b57cec5SDimitry Andric EndVal, Def->getParent(), 14430b57cec5SDimitry Andric L, LoopFeederPhi)) 14440b57cec5SDimitry Andric return false; 14450b57cec5SDimitry Andric 14460b57cec5SDimitry Andric // Iterate over the uses of the initial value. If the initial value is used 14470b57cec5SDimitry Andric // in a compare, then we assume this is a range check that ensures the loop 14480b57cec5SDimitry Andric // doesn't underflow. This is not an exact test and should be improved. 14490b57cec5SDimitry Andric for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg), 14500b57cec5SDimitry Andric E = MRI->use_instr_nodbg_end(); I != E; ++I) { 14510b57cec5SDimitry Andric MachineInstr *MI = &*I; 14525ffd83dbSDimitry Andric Register CmpReg1, CmpReg2; 1453349cc55cSDimitry Andric int64_t CmpMask = 0, CmpValue = 0; 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue)) 14560b57cec5SDimitry Andric continue; 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andric MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 14590b57cec5SDimitry Andric SmallVector<MachineOperand, 2> Cond; 14600b57cec5SDimitry Andric if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false)) 14610b57cec5SDimitry Andric continue; 14620b57cec5SDimitry Andric 14630b57cec5SDimitry Andric Comparison::Kind Cmp = 14640b57cec5SDimitry Andric getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0); 14650b57cec5SDimitry Andric if (Cmp == 0) 14660b57cec5SDimitry Andric continue; 14670b57cec5SDimitry Andric if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB)) 14680b57cec5SDimitry Andric Cmp = Comparison::getNegatedComparison(Cmp); 14690b57cec5SDimitry Andric if (CmpReg2 != 0 && CmpReg2 == Reg) 14700b57cec5SDimitry Andric Cmp = Comparison::getSwappedComparison(Cmp); 14710b57cec5SDimitry Andric 14720b57cec5SDimitry Andric // Signed underflow is undefined. 14730b57cec5SDimitry Andric if (Comparison::isSigned(Cmp)) 14740b57cec5SDimitry Andric return false; 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric // Check if there is a comparison of the initial value. If the initial value 14770b57cec5SDimitry Andric // is greater than or not equal to another value, then assume this is a 14780b57cec5SDimitry Andric // range check. 14790b57cec5SDimitry Andric if ((Cmp & Comparison::G) || Cmp == Comparison::NE) 14800b57cec5SDimitry Andric return false; 14810b57cec5SDimitry Andric } 14820b57cec5SDimitry Andric 14830b57cec5SDimitry Andric // OK - this is a hack that needs to be improved. We really need to analyze 14840b57cec5SDimitry Andric // the instructions performed on the initial value. This works on the simplest 14850b57cec5SDimitry Andric // cases only. 14860b57cec5SDimitry Andric if (!Def->isCopy() && !Def->isPHI()) 14870b57cec5SDimitry Andric return false; 14880b57cec5SDimitry Andric 14890b57cec5SDimitry Andric return true; 14900b57cec5SDimitry Andric } 14910b57cec5SDimitry Andric 14920b57cec5SDimitry Andric bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO, 14930b57cec5SDimitry Andric int64_t &Val) const { 14940b57cec5SDimitry Andric if (MO.isImm()) { 14950b57cec5SDimitry Andric Val = MO.getImm(); 14960b57cec5SDimitry Andric return true; 14970b57cec5SDimitry Andric } 14980b57cec5SDimitry Andric if (!MO.isReg()) 14990b57cec5SDimitry Andric return false; 15000b57cec5SDimitry Andric 15010b57cec5SDimitry Andric // MO is a register. Check whether it is defined as an immediate value, 15020b57cec5SDimitry Andric // and if so, get the value of it in TV. That value will then need to be 15030b57cec5SDimitry Andric // processed to handle potential subregisters in MO. 15040b57cec5SDimitry Andric int64_t TV; 15050b57cec5SDimitry Andric 15068bcb0991SDimitry Andric Register R = MO.getReg(); 1507e8d8bef9SDimitry Andric if (!R.isVirtual()) 15080b57cec5SDimitry Andric return false; 15090b57cec5SDimitry Andric MachineInstr *DI = MRI->getVRegDef(R); 15100b57cec5SDimitry Andric unsigned DOpc = DI->getOpcode(); 15110b57cec5SDimitry Andric switch (DOpc) { 15120b57cec5SDimitry Andric case TargetOpcode::COPY: 15130b57cec5SDimitry Andric case Hexagon::A2_tfrsi: 15140b57cec5SDimitry Andric case Hexagon::A2_tfrpi: 15150b57cec5SDimitry Andric case Hexagon::CONST32: 15160b57cec5SDimitry Andric case Hexagon::CONST64: 15170b57cec5SDimitry Andric // Call recursively to avoid an extra check whether operand(1) is 15180b57cec5SDimitry Andric // indeed an immediate (it could be a global address, for example), 15190b57cec5SDimitry Andric // plus we can handle COPY at the same time. 15200b57cec5SDimitry Andric if (!checkForImmediate(DI->getOperand(1), TV)) 15210b57cec5SDimitry Andric return false; 15220b57cec5SDimitry Andric break; 15230b57cec5SDimitry Andric case Hexagon::A2_combineii: 15240b57cec5SDimitry Andric case Hexagon::A4_combineir: 15250b57cec5SDimitry Andric case Hexagon::A4_combineii: 15260b57cec5SDimitry Andric case Hexagon::A4_combineri: 15270b57cec5SDimitry Andric case Hexagon::A2_combinew: { 15280b57cec5SDimitry Andric const MachineOperand &S1 = DI->getOperand(1); 15290b57cec5SDimitry Andric const MachineOperand &S2 = DI->getOperand(2); 15300b57cec5SDimitry Andric int64_t V1, V2; 15310b57cec5SDimitry Andric if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2)) 15320b57cec5SDimitry Andric return false; 15330b57cec5SDimitry Andric TV = V2 | (static_cast<uint64_t>(V1) << 32); 15340b57cec5SDimitry Andric break; 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric case TargetOpcode::REG_SEQUENCE: { 15370b57cec5SDimitry Andric const MachineOperand &S1 = DI->getOperand(1); 15380b57cec5SDimitry Andric const MachineOperand &S3 = DI->getOperand(3); 15390b57cec5SDimitry Andric int64_t V1, V3; 15400b57cec5SDimitry Andric if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3)) 15410b57cec5SDimitry Andric return false; 15420b57cec5SDimitry Andric unsigned Sub2 = DI->getOperand(2).getImm(); 15430b57cec5SDimitry Andric unsigned Sub4 = DI->getOperand(4).getImm(); 15440b57cec5SDimitry Andric if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi) 15450b57cec5SDimitry Andric TV = V1 | (V3 << 32); 15460b57cec5SDimitry Andric else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo) 15470b57cec5SDimitry Andric TV = V3 | (V1 << 32); 15480b57cec5SDimitry Andric else 15490b57cec5SDimitry Andric llvm_unreachable("Unexpected form of REG_SEQUENCE"); 15500b57cec5SDimitry Andric break; 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric 15530b57cec5SDimitry Andric default: 15540b57cec5SDimitry Andric return false; 15550b57cec5SDimitry Andric } 15560b57cec5SDimitry Andric 15570b57cec5SDimitry Andric // By now, we should have successfully obtained the immediate value defining 15580b57cec5SDimitry Andric // the register referenced in MO. Handle a potential use of a subregister. 15590b57cec5SDimitry Andric switch (MO.getSubReg()) { 15600b57cec5SDimitry Andric case Hexagon::isub_lo: 15610b57cec5SDimitry Andric Val = TV & 0xFFFFFFFFULL; 15620b57cec5SDimitry Andric break; 15630b57cec5SDimitry Andric case Hexagon::isub_hi: 15640b57cec5SDimitry Andric Val = (TV >> 32) & 0xFFFFFFFFULL; 15650b57cec5SDimitry Andric break; 15660b57cec5SDimitry Andric default: 15670b57cec5SDimitry Andric Val = TV; 15680b57cec5SDimitry Andric break; 15690b57cec5SDimitry Andric } 15700b57cec5SDimitry Andric return true; 15710b57cec5SDimitry Andric } 15720b57cec5SDimitry Andric 15730b57cec5SDimitry Andric void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) { 15740b57cec5SDimitry Andric if (MO.isImm()) { 15750b57cec5SDimitry Andric MO.setImm(Val); 15760b57cec5SDimitry Andric return; 15770b57cec5SDimitry Andric } 15780b57cec5SDimitry Andric 15790b57cec5SDimitry Andric assert(MO.isReg()); 15808bcb0991SDimitry Andric Register R = MO.getReg(); 15810b57cec5SDimitry Andric MachineInstr *DI = MRI->getVRegDef(R); 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(R); 15848bcb0991SDimitry Andric Register NewR = MRI->createVirtualRegister(RC); 15850b57cec5SDimitry Andric MachineBasicBlock &B = *DI->getParent(); 15860b57cec5SDimitry Andric DebugLoc DL = DI->getDebugLoc(); 15870b57cec5SDimitry Andric BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val); 15880b57cec5SDimitry Andric MO.setReg(NewR); 15890b57cec5SDimitry Andric } 15900b57cec5SDimitry Andric 15910b57cec5SDimitry Andric bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { 15920b57cec5SDimitry Andric MachineBasicBlock *Header = L->getHeader(); 15930b57cec5SDimitry Andric MachineBasicBlock *Latch = L->getLoopLatch(); 15940b57cec5SDimitry Andric MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 15950b57cec5SDimitry Andric 15960b57cec5SDimitry Andric if (!(Header && Latch && ExitingBlock)) 15970b57cec5SDimitry Andric return false; 15980b57cec5SDimitry Andric 15990b57cec5SDimitry Andric // These data structures follow the same concept as the corresponding 16000b57cec5SDimitry Andric // ones in findInductionRegister (where some comments are). 1601bdd1243dSDimitry Andric using RegisterBump = std::pair<Register, int64_t>; 1602bdd1243dSDimitry Andric using RegisterInduction = std::pair<Register, RegisterBump>; 16030b57cec5SDimitry Andric using RegisterInductionSet = std::set<RegisterInduction>; 16040b57cec5SDimitry Andric 16050b57cec5SDimitry Andric // Register candidates for induction variables, with their associated bumps. 16060b57cec5SDimitry Andric RegisterInductionSet IndRegs; 16070b57cec5SDimitry Andric 16080b57cec5SDimitry Andric // Look for induction patterns: 16090b57cec5SDimitry Andric // %1 = PHI ..., [ latch, %2 ] 16100b57cec5SDimitry Andric // %2 = ADD %1, imm 16110b57cec5SDimitry Andric using instr_iterator = MachineBasicBlock::instr_iterator; 16120b57cec5SDimitry Andric 16130b57cec5SDimitry Andric for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 16140b57cec5SDimitry Andric I != E && I->isPHI(); ++I) { 16150b57cec5SDimitry Andric MachineInstr *Phi = &*I; 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric // Have a PHI instruction. 16180b57cec5SDimitry Andric for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) { 16190b57cec5SDimitry Andric if (Phi->getOperand(i+1).getMBB() != Latch) 16200b57cec5SDimitry Andric continue; 16210b57cec5SDimitry Andric 16228bcb0991SDimitry Andric Register PhiReg = Phi->getOperand(i).getReg(); 16230b57cec5SDimitry Andric MachineInstr *DI = MRI->getVRegDef(PhiReg); 16240b57cec5SDimitry Andric 16250b57cec5SDimitry Andric if (DI->getDesc().isAdd()) { 16260b57cec5SDimitry Andric // If the register operand to the add/sub is the PHI we are looking 16270b57cec5SDimitry Andric // at, this meets the induction pattern. 16288bcb0991SDimitry Andric Register IndReg = DI->getOperand(1).getReg(); 16290b57cec5SDimitry Andric MachineOperand &Opnd2 = DI->getOperand(2); 16300b57cec5SDimitry Andric int64_t V; 16310b57cec5SDimitry Andric if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) { 16328bcb0991SDimitry Andric Register UpdReg = DI->getOperand(0).getReg(); 16330b57cec5SDimitry Andric IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V))); 16340b57cec5SDimitry Andric } 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric } // for (i) 16370b57cec5SDimitry Andric } // for (instr) 16380b57cec5SDimitry Andric 16390b57cec5SDimitry Andric if (IndRegs.empty()) 16400b57cec5SDimitry Andric return false; 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andric MachineBasicBlock *TB = nullptr, *FB = nullptr; 16430b57cec5SDimitry Andric SmallVector<MachineOperand,2> Cond; 16445ffd83dbSDimitry Andric // analyzeBranch returns true if it fails to analyze branch. 16450b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false); 16460b57cec5SDimitry Andric if (NotAnalyzed || Cond.empty()) 16470b57cec5SDimitry Andric return false; 16480b57cec5SDimitry Andric 16490b57cec5SDimitry Andric if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) { 16500b57cec5SDimitry Andric MachineBasicBlock *LTB = nullptr, *LFB = nullptr; 16510b57cec5SDimitry Andric SmallVector<MachineOperand,2> LCond; 16520b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false); 16530b57cec5SDimitry Andric if (NotAnalyzed) 16540b57cec5SDimitry Andric return false; 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric // Since latch is not the exiting block, the latch branch should be an 16570b57cec5SDimitry Andric // unconditional branch to the loop header. 16580b57cec5SDimitry Andric if (TB == Latch) 16590b57cec5SDimitry Andric TB = (LTB == Header) ? LTB : LFB; 16600b57cec5SDimitry Andric else 16610b57cec5SDimitry Andric FB = (LTB == Header) ? LTB : LFB; 16620b57cec5SDimitry Andric } 16630b57cec5SDimitry Andric if (TB != Header) { 16640b57cec5SDimitry Andric if (FB != Header) { 16650b57cec5SDimitry Andric // The latch/exit block does not go back to the header. 16660b57cec5SDimitry Andric return false; 16670b57cec5SDimitry Andric } 16680b57cec5SDimitry Andric // FB is the header (i.e., uncond. jump to branch header) 16690b57cec5SDimitry Andric // In this case, the LoopBody -> TB should not be a back edge otherwise 16700b57cec5SDimitry Andric // it could result in an infinite loop after conversion to hw_loop. 16710b57cec5SDimitry Andric // This case can happen when the Latch has two jumps like this: 16720b57cec5SDimitry Andric // Jmp_c OuterLoopHeader <-- TB 16730b57cec5SDimitry Andric // Jmp InnerLoopHeader <-- FB 16740b57cec5SDimitry Andric if (MDT->dominates(TB, FB)) 16750b57cec5SDimitry Andric return false; 16760b57cec5SDimitry Andric } 16770b57cec5SDimitry Andric 16780b57cec5SDimitry Andric // Expecting a predicate register as a condition. It won't be a hardware 16790b57cec5SDimitry Andric // predicate register at this point yet, just a vreg. 16805ffd83dbSDimitry Andric // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0) 16810b57cec5SDimitry Andric // into Cond, followed by the predicate register. For non-negated branches 16820b57cec5SDimitry Andric // it's just the register. 16830b57cec5SDimitry Andric unsigned CSz = Cond.size(); 16840b57cec5SDimitry Andric if (CSz != 1 && CSz != 2) 16850b57cec5SDimitry Andric return false; 16860b57cec5SDimitry Andric 16870b57cec5SDimitry Andric if (!Cond[CSz-1].isReg()) 16880b57cec5SDimitry Andric return false; 16890b57cec5SDimitry Andric 16908bcb0991SDimitry Andric Register P = Cond[CSz - 1].getReg(); 16910b57cec5SDimitry Andric MachineInstr *PredDef = MRI->getVRegDef(P); 16920b57cec5SDimitry Andric 16930b57cec5SDimitry Andric if (!PredDef->isCompare()) 16940b57cec5SDimitry Andric return false; 16950b57cec5SDimitry Andric 1696bdd1243dSDimitry Andric SmallSet<Register,2> CmpRegs; 16970b57cec5SDimitry Andric MachineOperand *CmpImmOp = nullptr; 16980b57cec5SDimitry Andric 16990b57cec5SDimitry Andric // Go over all operands to the compare and look for immediate and register 17000b57cec5SDimitry Andric // operands. Assume that if the compare has a single register use and a 17010b57cec5SDimitry Andric // single immediate operand, then the register is being compared with the 17020b57cec5SDimitry Andric // immediate value. 170306c3fb27SDimitry Andric for (MachineOperand &MO : PredDef->operands()) { 17040b57cec5SDimitry Andric if (MO.isReg()) { 17050b57cec5SDimitry Andric // Skip all implicit references. In one case there was: 17060b57cec5SDimitry Andric // %140 = FCMPUGT32_rr %138, %139, implicit %usr 17070b57cec5SDimitry Andric if (MO.isImplicit()) 17080b57cec5SDimitry Andric continue; 17090b57cec5SDimitry Andric if (MO.isUse()) { 17100b57cec5SDimitry Andric if (!isImmediate(MO)) { 17110b57cec5SDimitry Andric CmpRegs.insert(MO.getReg()); 17120b57cec5SDimitry Andric continue; 17130b57cec5SDimitry Andric } 17140b57cec5SDimitry Andric // Consider the register to be the "immediate" operand. 17150b57cec5SDimitry Andric if (CmpImmOp) 17160b57cec5SDimitry Andric return false; 17170b57cec5SDimitry Andric CmpImmOp = &MO; 17180b57cec5SDimitry Andric } 17190b57cec5SDimitry Andric } else if (MO.isImm()) { 17200b57cec5SDimitry Andric if (CmpImmOp) // A second immediate argument? Confusing. Bail out. 17210b57cec5SDimitry Andric return false; 17220b57cec5SDimitry Andric CmpImmOp = &MO; 17230b57cec5SDimitry Andric } 17240b57cec5SDimitry Andric } 17250b57cec5SDimitry Andric 17260b57cec5SDimitry Andric if (CmpRegs.empty()) 17270b57cec5SDimitry Andric return false; 17280b57cec5SDimitry Andric 17290b57cec5SDimitry Andric // Check if the compared register follows the order we want. Fix if needed. 17300b57cec5SDimitry Andric for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end(); 17310b57cec5SDimitry Andric I != E; ++I) { 17320b57cec5SDimitry Andric // This is a success. If the register used in the comparison is one that 17330b57cec5SDimitry Andric // we have identified as a bumped (updated) induction register, there is 17340b57cec5SDimitry Andric // nothing to do. 17350b57cec5SDimitry Andric if (CmpRegs.count(I->first)) 17360b57cec5SDimitry Andric return true; 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric // Otherwise, if the register being compared comes out of a PHI node, 17390b57cec5SDimitry Andric // and has been recognized as following the induction pattern, and is 17400b57cec5SDimitry Andric // compared against an immediate, we can fix it. 17410b57cec5SDimitry Andric const RegisterBump &RB = I->second; 17420b57cec5SDimitry Andric if (CmpRegs.count(RB.first)) { 17430b57cec5SDimitry Andric if (!CmpImmOp) { 17440b57cec5SDimitry Andric // If both operands to the compare instruction are registers, see if 17450b57cec5SDimitry Andric // it can be changed to use induction register as one of the operands. 17460b57cec5SDimitry Andric MachineInstr *IndI = nullptr; 17470b57cec5SDimitry Andric MachineInstr *nonIndI = nullptr; 17480b57cec5SDimitry Andric MachineOperand *IndMO = nullptr; 17490b57cec5SDimitry Andric MachineOperand *nonIndMO = nullptr; 17500b57cec5SDimitry Andric 17510b57cec5SDimitry Andric for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { 17520b57cec5SDimitry Andric MachineOperand &MO = PredDef->getOperand(i); 17530b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == RB.first) { 17540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n DefMI(" << i 17550b57cec5SDimitry Andric << ") = " << *(MRI->getVRegDef(I->first))); 17560b57cec5SDimitry Andric if (IndI) 17570b57cec5SDimitry Andric return false; 17580b57cec5SDimitry Andric 17590b57cec5SDimitry Andric IndI = MRI->getVRegDef(I->first); 17600b57cec5SDimitry Andric IndMO = &MO; 17610b57cec5SDimitry Andric } else if (MO.isReg()) { 17620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n DefMI(" << i 17630b57cec5SDimitry Andric << ") = " << *(MRI->getVRegDef(MO.getReg()))); 17640b57cec5SDimitry Andric if (nonIndI) 17650b57cec5SDimitry Andric return false; 17660b57cec5SDimitry Andric 17670b57cec5SDimitry Andric nonIndI = MRI->getVRegDef(MO.getReg()); 17680b57cec5SDimitry Andric nonIndMO = &MO; 17690b57cec5SDimitry Andric } 17700b57cec5SDimitry Andric } 17710b57cec5SDimitry Andric if (IndI && nonIndI && 17720b57cec5SDimitry Andric nonIndI->getOpcode() == Hexagon::A2_addi && 17730b57cec5SDimitry Andric nonIndI->getOperand(2).isImm() && 17740b57cec5SDimitry Andric nonIndI->getOperand(2).getImm() == - RB.second) { 17750b57cec5SDimitry Andric bool Order = orderBumpCompare(IndI, PredDef); 17760b57cec5SDimitry Andric if (Order) { 17770b57cec5SDimitry Andric IndMO->setReg(I->first); 17780b57cec5SDimitry Andric nonIndMO->setReg(nonIndI->getOperand(1).getReg()); 17790b57cec5SDimitry Andric return true; 17800b57cec5SDimitry Andric } 17810b57cec5SDimitry Andric } 17820b57cec5SDimitry Andric return false; 17830b57cec5SDimitry Andric } 17840b57cec5SDimitry Andric 17850b57cec5SDimitry Andric // It is not valid to do this transformation on an unsigned comparison 17860b57cec5SDimitry Andric // because it may underflow. 17870b57cec5SDimitry Andric Comparison::Kind Cmp = 17880b57cec5SDimitry Andric getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0); 17890b57cec5SDimitry Andric if (!Cmp || Comparison::isUnsigned(Cmp)) 17900b57cec5SDimitry Andric return false; 17910b57cec5SDimitry Andric 17920b57cec5SDimitry Andric // If the register is being compared against an immediate, try changing 17930b57cec5SDimitry Andric // the compare instruction to use induction register and adjust the 17940b57cec5SDimitry Andric // immediate operand. 17950b57cec5SDimitry Andric int64_t CmpImm = getImmediate(*CmpImmOp); 17960b57cec5SDimitry Andric int64_t V = RB.second; 17970b57cec5SDimitry Andric // Handle Overflow (64-bit). 17980b57cec5SDimitry Andric if (((V > 0) && (CmpImm > INT64_MAX - V)) || 17990b57cec5SDimitry Andric ((V < 0) && (CmpImm < INT64_MIN - V))) 18000b57cec5SDimitry Andric return false; 18010b57cec5SDimitry Andric CmpImm += V; 18020b57cec5SDimitry Andric // Most comparisons of register against an immediate value allow 18030b57cec5SDimitry Andric // the immediate to be constant-extended. There are some exceptions 18040b57cec5SDimitry Andric // though. Make sure the new combination will work. 180504eeddc0SDimitry Andric if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) && 180604eeddc0SDimitry Andric !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false)) 18070b57cec5SDimitry Andric return false; 18080b57cec5SDimitry Andric 18090b57cec5SDimitry Andric // Make sure that the compare happens after the bump. Otherwise, 18100b57cec5SDimitry Andric // after the fixup, the compare would use a yet-undefined register. 18110b57cec5SDimitry Andric MachineInstr *BumpI = MRI->getVRegDef(I->first); 18120b57cec5SDimitry Andric bool Order = orderBumpCompare(BumpI, PredDef); 18130b57cec5SDimitry Andric if (!Order) 18140b57cec5SDimitry Andric return false; 18150b57cec5SDimitry Andric 18160b57cec5SDimitry Andric // Finally, fix the compare instruction. 18170b57cec5SDimitry Andric setImmediate(*CmpImmOp, CmpImm); 181806c3fb27SDimitry Andric for (MachineOperand &MO : PredDef->operands()) { 18190b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == RB.first) { 18200b57cec5SDimitry Andric MO.setReg(I->first); 18210b57cec5SDimitry Andric return true; 18220b57cec5SDimitry Andric } 18230b57cec5SDimitry Andric } 18240b57cec5SDimitry Andric } 18250b57cec5SDimitry Andric } 18260b57cec5SDimitry Andric 18270b57cec5SDimitry Andric return false; 18280b57cec5SDimitry Andric } 18290b57cec5SDimitry Andric 18300b57cec5SDimitry Andric /// createPreheaderForLoop - Create a preheader for a given loop. 18310b57cec5SDimitry Andric MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( 18320b57cec5SDimitry Andric MachineLoop *L) { 18330b57cec5SDimitry Andric if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader)) 18340b57cec5SDimitry Andric return TmpPH; 18350b57cec5SDimitry Andric if (!HWCreatePreheader) 18360b57cec5SDimitry Andric return nullptr; 18370b57cec5SDimitry Andric 18380b57cec5SDimitry Andric MachineBasicBlock *Header = L->getHeader(); 18390b57cec5SDimitry Andric MachineBasicBlock *Latch = L->getLoopLatch(); 18400b57cec5SDimitry Andric MachineBasicBlock *ExitingBlock = L->findLoopControlBlock(); 18410b57cec5SDimitry Andric MachineFunction *MF = Header->getParent(); 18420b57cec5SDimitry Andric DebugLoc DL; 18430b57cec5SDimitry Andric 18440b57cec5SDimitry Andric #ifndef NDEBUG 18450b57cec5SDimitry Andric if ((!PHFn.empty()) && (PHFn != MF->getName())) 18460b57cec5SDimitry Andric return nullptr; 18470b57cec5SDimitry Andric #endif 18480b57cec5SDimitry Andric 18490b57cec5SDimitry Andric if (!Latch || !ExitingBlock || Header->hasAddressTaken()) 18500b57cec5SDimitry Andric return nullptr; 18510b57cec5SDimitry Andric 18520b57cec5SDimitry Andric using instr_iterator = MachineBasicBlock::instr_iterator; 18530b57cec5SDimitry Andric 18540b57cec5SDimitry Andric // Verify that all existing predecessors have analyzable branches 18550b57cec5SDimitry Andric // (or no branches at all). 18560b57cec5SDimitry Andric using MBBVector = std::vector<MachineBasicBlock *>; 18570b57cec5SDimitry Andric 18580b57cec5SDimitry Andric MBBVector Preds(Header->pred_begin(), Header->pred_end()); 18590b57cec5SDimitry Andric SmallVector<MachineOperand,2> Tmp1; 18600b57cec5SDimitry Andric MachineBasicBlock *TB = nullptr, *FB = nullptr; 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) 18630b57cec5SDimitry Andric return nullptr; 18640b57cec5SDimitry Andric 18654824e7fdSDimitry Andric for (MachineBasicBlock *PB : Preds) { 18660b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); 18670b57cec5SDimitry Andric if (NotAnalyzed) 18680b57cec5SDimitry Andric return nullptr; 18690b57cec5SDimitry Andric } 18700b57cec5SDimitry Andric 18710b57cec5SDimitry Andric MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock(); 18720b57cec5SDimitry Andric MF->insert(Header->getIterator(), NewPH); 18730b57cec5SDimitry Andric 18740b57cec5SDimitry Andric if (Header->pred_size() > 2) { 18750b57cec5SDimitry Andric // Ensure that the header has only two predecessors: the preheader and 18760b57cec5SDimitry Andric // the loop latch. Any additional predecessors of the header should 18770b57cec5SDimitry Andric // join at the newly created preheader. Inspect all PHI nodes from the 18780b57cec5SDimitry Andric // header and create appropriate corresponding PHI nodes in the preheader. 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 18810b57cec5SDimitry Andric I != E && I->isPHI(); ++I) { 18820b57cec5SDimitry Andric MachineInstr *PN = &*I; 18830b57cec5SDimitry Andric 18840b57cec5SDimitry Andric const MCInstrDesc &PD = TII->get(TargetOpcode::PHI); 18850b57cec5SDimitry Andric MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL); 18860b57cec5SDimitry Andric NewPH->insert(NewPH->end(), NewPN); 18870b57cec5SDimitry Andric 18888bcb0991SDimitry Andric Register PR = PN->getOperand(0).getReg(); 18890b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(PR); 18908bcb0991SDimitry Andric Register NewPR = MRI->createVirtualRegister(RC); 18910b57cec5SDimitry Andric NewPN->addOperand(MachineOperand::CreateReg(NewPR, true)); 18920b57cec5SDimitry Andric 18930b57cec5SDimitry Andric // Copy all non-latch operands of a header's PHI node to the newly 18940b57cec5SDimitry Andric // created PHI node in the preheader. 18950b57cec5SDimitry Andric for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 18968bcb0991SDimitry Andric Register PredR = PN->getOperand(i).getReg(); 18970b57cec5SDimitry Andric unsigned PredRSub = PN->getOperand(i).getSubReg(); 18980b57cec5SDimitry Andric MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 18990b57cec5SDimitry Andric if (PredB == Latch) 19000b57cec5SDimitry Andric continue; 19010b57cec5SDimitry Andric 19020b57cec5SDimitry Andric MachineOperand MO = MachineOperand::CreateReg(PredR, false); 19030b57cec5SDimitry Andric MO.setSubReg(PredRSub); 19040b57cec5SDimitry Andric NewPN->addOperand(MO); 19050b57cec5SDimitry Andric NewPN->addOperand(MachineOperand::CreateMBB(PredB)); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric // Remove copied operands from the old PHI node and add the value 19090b57cec5SDimitry Andric // coming from the preheader's PHI. 19100b57cec5SDimitry Andric for (int i = PN->getNumOperands()-2; i > 0; i -= 2) { 19110b57cec5SDimitry Andric MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB(); 19120b57cec5SDimitry Andric if (PredB != Latch) { 191381ad6265SDimitry Andric PN->removeOperand(i+1); 191481ad6265SDimitry Andric PN->removeOperand(i); 19150b57cec5SDimitry Andric } 19160b57cec5SDimitry Andric } 19170b57cec5SDimitry Andric PN->addOperand(MachineOperand::CreateReg(NewPR, false)); 19180b57cec5SDimitry Andric PN->addOperand(MachineOperand::CreateMBB(NewPH)); 19190b57cec5SDimitry Andric } 19200b57cec5SDimitry Andric } else { 19210b57cec5SDimitry Andric assert(Header->pred_size() == 2); 19220b57cec5SDimitry Andric 19230b57cec5SDimitry Andric // The header has only two predecessors, but the non-latch predecessor 19240b57cec5SDimitry Andric // is not a preheader (e.g. it has other successors, etc.) 19250b57cec5SDimitry Andric // In such a case we don't need any extra PHI nodes in the new preheader, 19260b57cec5SDimitry Andric // all we need is to adjust existing PHIs in the header to now refer to 19270b57cec5SDimitry Andric // the new preheader. 19280b57cec5SDimitry Andric for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); 19290b57cec5SDimitry Andric I != E && I->isPHI(); ++I) { 19300b57cec5SDimitry Andric MachineInstr *PN = &*I; 19310b57cec5SDimitry Andric for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) { 19320b57cec5SDimitry Andric MachineOperand &MO = PN->getOperand(i+1); 19330b57cec5SDimitry Andric if (MO.getMBB() != Latch) 19340b57cec5SDimitry Andric MO.setMBB(NewPH); 19350b57cec5SDimitry Andric } 19360b57cec5SDimitry Andric } 19370b57cec5SDimitry Andric } 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric // "Reroute" the CFG edges to link in the new preheader. 19400b57cec5SDimitry Andric // If any of the predecessors falls through to the header, insert a branch 19410b57cec5SDimitry Andric // to the new preheader in that place. 19420b57cec5SDimitry Andric SmallVector<MachineOperand,1> Tmp2; 19430b57cec5SDimitry Andric SmallVector<MachineOperand,1> EmptyCond; 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric TB = FB = nullptr; 19460b57cec5SDimitry Andric 19474824e7fdSDimitry Andric for (MachineBasicBlock *PB : Preds) { 19480b57cec5SDimitry Andric if (PB != Latch) { 19490b57cec5SDimitry Andric Tmp2.clear(); 19500b57cec5SDimitry Andric bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); 19510b57cec5SDimitry Andric (void)NotAnalyzed; // suppress compiler warning 19520b57cec5SDimitry Andric assert (!NotAnalyzed && "Should be analyzable!"); 19530b57cec5SDimitry Andric if (TB != Header && (Tmp2.empty() || FB != Header)) 19540b57cec5SDimitry Andric TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL); 19550b57cec5SDimitry Andric PB->ReplaceUsesOfBlockWith(Header, NewPH); 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric } 19580b57cec5SDimitry Andric 19590b57cec5SDimitry Andric // It can happen that the latch block will fall through into the header. 19600b57cec5SDimitry Andric // Insert an unconditional branch to the header. 19610b57cec5SDimitry Andric TB = FB = nullptr; 19620b57cec5SDimitry Andric bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false); 19630b57cec5SDimitry Andric (void)LatchNotAnalyzed; // suppress compiler warning 19640b57cec5SDimitry Andric assert (!LatchNotAnalyzed && "Should be analyzable!"); 19650b57cec5SDimitry Andric if (!TB && !FB) 19660b57cec5SDimitry Andric TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL); 19670b57cec5SDimitry Andric 19680b57cec5SDimitry Andric // Finally, the branch from the preheader to the header. 19690b57cec5SDimitry Andric TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL); 19700b57cec5SDimitry Andric NewPH->addSuccessor(Header); 19710b57cec5SDimitry Andric 19720b57cec5SDimitry Andric MachineLoop *ParentLoop = L->getParentLoop(); 19730b57cec5SDimitry Andric if (ParentLoop) 1974*0fca6ea1SDimitry Andric ParentLoop->addBasicBlockToLoop(NewPH, *MLI); 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric // Update the dominator information with the new preheader. 19770b57cec5SDimitry Andric if (MDT) { 19780b57cec5SDimitry Andric if (MachineDomTreeNode *HN = MDT->getNode(Header)) { 19790b57cec5SDimitry Andric if (MachineDomTreeNode *DHN = HN->getIDom()) { 19800b57cec5SDimitry Andric MDT->addNewBlock(NewPH, DHN->getBlock()); 19810b57cec5SDimitry Andric MDT->changeImmediateDominator(Header, NewPH); 19820b57cec5SDimitry Andric } 19830b57cec5SDimitry Andric } 19840b57cec5SDimitry Andric } 19850b57cec5SDimitry Andric 19860b57cec5SDimitry Andric return NewPH; 19870b57cec5SDimitry Andric } 1988