xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This Pass handles loop interchange transform.
100b57cec5SDimitry Andric // This pass interchanges loops to provide a more cache-friendly memory access
110b57cec5SDimitry Andric // patterns.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
15e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/LoopInterchange.h"
160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
180b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
190b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
2181ad6265SDimitry Andric #include "llvm/Analysis/LoopCacheAnalysis.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
23fe6060f1SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
280b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
290b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
300b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
310b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
320b57cec5SDimitry Andric #include "llvm/IR/Function.h"
330b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
340b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
360b57cec5SDimitry Andric #include "llvm/IR/User.h"
370b57cec5SDimitry Andric #include "llvm/IR/Value.h"
380b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
390b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
400b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
410b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
420b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
43bdd1243dSDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h"
440b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
450b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
460b57cec5SDimitry Andric #include <cassert>
470b57cec5SDimitry Andric #include <utility>
480b57cec5SDimitry Andric #include <vector>
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric using namespace llvm;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric #define DEBUG_TYPE "loop-interchange"
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric STATISTIC(LoopsInterchanged, "Number of loops interchanged");
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric static cl::opt<int> LoopInterchangeCostThreshold(
570b57cec5SDimitry Andric     "loop-interchange-threshold", cl::init(0), cl::Hidden,
580b57cec5SDimitry Andric     cl::desc("Interchange if you gain more than this number"));
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric namespace {
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric using LoopVector = SmallVector<Loop *, 8>;
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric // TODO: Check if we can use a sparse matrix here.
650b57cec5SDimitry Andric using CharMatrix = std::vector<std::vector<char>>;
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric } // end anonymous namespace
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric // Maximum number of dependencies that can be handled in the dependency matrix.
700b57cec5SDimitry Andric static const unsigned MaxMemInstrCount = 100;
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric // Maximum loop depth supported.
730b57cec5SDimitry Andric static const unsigned MaxLoopNestDepth = 10;
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES
760b57cec5SDimitry Andric static void printDepMatrix(CharMatrix &DepMatrix) {
770b57cec5SDimitry Andric   for (auto &Row : DepMatrix) {
780b57cec5SDimitry Andric     for (auto D : Row)
790b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << D << " ");
800b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\n");
810b57cec5SDimitry Andric   }
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric #endif
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
86bdd1243dSDimitry Andric                                      Loop *L, DependenceInfo *DI,
87bdd1243dSDimitry Andric                                      ScalarEvolution *SE) {
880b57cec5SDimitry Andric   using ValueVector = SmallVector<Value *, 16>;
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   ValueVector MemInstr;
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   // For each block.
930b57cec5SDimitry Andric   for (BasicBlock *BB : L->blocks()) {
940b57cec5SDimitry Andric     // Scan the BB and collect legal loads and stores.
950b57cec5SDimitry Andric     for (Instruction &I : *BB) {
960b57cec5SDimitry Andric       if (!isa<Instruction>(I))
970b57cec5SDimitry Andric         return false;
980b57cec5SDimitry Andric       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
990b57cec5SDimitry Andric         if (!Ld->isSimple())
1000b57cec5SDimitry Andric           return false;
1010b57cec5SDimitry Andric         MemInstr.push_back(&I);
1020b57cec5SDimitry Andric       } else if (auto *St = dyn_cast<StoreInst>(&I)) {
1030b57cec5SDimitry Andric         if (!St->isSimple())
1040b57cec5SDimitry Andric           return false;
1050b57cec5SDimitry Andric         MemInstr.push_back(&I);
1060b57cec5SDimitry Andric       }
1070b57cec5SDimitry Andric     }
1080b57cec5SDimitry Andric   }
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
1110b57cec5SDimitry Andric                     << " Loads and Stores to analyze\n");
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   ValueVector::iterator I, IE, J, JE;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
1160b57cec5SDimitry Andric     for (J = I, JE = MemInstr.end(); J != JE; ++J) {
1170b57cec5SDimitry Andric       std::vector<char> Dep;
1180b57cec5SDimitry Andric       Instruction *Src = cast<Instruction>(*I);
1190b57cec5SDimitry Andric       Instruction *Dst = cast<Instruction>(*J);
1200b57cec5SDimitry Andric       // Ignore Input dependencies.
1210b57cec5SDimitry Andric       if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
1220b57cec5SDimitry Andric         continue;
1230b57cec5SDimitry Andric       // Track Output, Flow, and Anti dependencies.
1240b57cec5SDimitry Andric       if (auto D = DI->depends(Src, Dst, true)) {
1250b57cec5SDimitry Andric         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
126bdd1243dSDimitry Andric         // If the direction vector is negative, normalize it to
127bdd1243dSDimitry Andric         // make it non-negative.
128bdd1243dSDimitry Andric         if (D->normalize(SE))
129bdd1243dSDimitry Andric           LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
1300b57cec5SDimitry Andric         LLVM_DEBUG(StringRef DepType =
1310b57cec5SDimitry Andric                        D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
1320b57cec5SDimitry Andric                    dbgs() << "Found " << DepType
1330b57cec5SDimitry Andric                           << " dependency between Src and Dst\n"
1340b57cec5SDimitry Andric                           << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
1350b57cec5SDimitry Andric         unsigned Levels = D->getLevels();
1360b57cec5SDimitry Andric         char Direction;
1370b57cec5SDimitry Andric         for (unsigned II = 1; II <= Levels; ++II) {
138bdd1243dSDimitry Andric           if (D->isScalar(II)) {
1390b57cec5SDimitry Andric             Direction = 'S';
1400b57cec5SDimitry Andric             Dep.push_back(Direction);
1410b57cec5SDimitry Andric           } else {
1420b57cec5SDimitry Andric             unsigned Dir = D->getDirection(II);
1430b57cec5SDimitry Andric             if (Dir == Dependence::DVEntry::LT ||
1440b57cec5SDimitry Andric                 Dir == Dependence::DVEntry::LE)
1450b57cec5SDimitry Andric               Direction = '<';
1460b57cec5SDimitry Andric             else if (Dir == Dependence::DVEntry::GT ||
1470b57cec5SDimitry Andric                      Dir == Dependence::DVEntry::GE)
1480b57cec5SDimitry Andric               Direction = '>';
1490b57cec5SDimitry Andric             else if (Dir == Dependence::DVEntry::EQ)
1500b57cec5SDimitry Andric               Direction = '=';
1510b57cec5SDimitry Andric             else
1520b57cec5SDimitry Andric               Direction = '*';
1530b57cec5SDimitry Andric             Dep.push_back(Direction);
1540b57cec5SDimitry Andric           }
1550b57cec5SDimitry Andric         }
1560b57cec5SDimitry Andric         while (Dep.size() != Level) {
1570b57cec5SDimitry Andric           Dep.push_back('I');
1580b57cec5SDimitry Andric         }
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric         DepMatrix.push_back(Dep);
1610b57cec5SDimitry Andric         if (DepMatrix.size() > MaxMemInstrCount) {
1620b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
1630b57cec5SDimitry Andric                             << " dependencies inside loop\n");
1640b57cec5SDimitry Andric           return false;
1650b57cec5SDimitry Andric         }
1660b57cec5SDimitry Andric       }
1670b57cec5SDimitry Andric     }
1680b57cec5SDimitry Andric   }
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   return true;
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric // A loop is moved from index 'from' to an index 'to'. Update the Dependence
1740b57cec5SDimitry Andric // matrix by exchanging the two columns.
1750b57cec5SDimitry Andric static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
1760b57cec5SDimitry Andric                                     unsigned ToIndx) {
177fe6060f1SDimitry Andric   for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
178fe6060f1SDimitry Andric     std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
181bdd1243dSDimitry Andric // After interchanging, check if the direction vector is valid.
1820b57cec5SDimitry Andric // [Theorem] A permutation of the loops in a perfect nest is legal if and only
1830b57cec5SDimitry Andric // if the direction matrix, after the same permutation is applied to its
1840b57cec5SDimitry Andric // columns, has no ">" direction as the leftmost non-"=" direction in any row.
185bdd1243dSDimitry Andric static bool isLexicographicallyPositive(std::vector<char> &DV) {
18606c3fb27SDimitry Andric   for (unsigned char Direction : DV) {
187bdd1243dSDimitry Andric     if (Direction == '<')
188bdd1243dSDimitry Andric       return true;
189bdd1243dSDimitry Andric     if (Direction == '>' || Direction == '*')
190bdd1243dSDimitry Andric       return false;
191bdd1243dSDimitry Andric   }
192bdd1243dSDimitry Andric   return true;
193bdd1243dSDimitry Andric }
194bdd1243dSDimitry Andric 
195bdd1243dSDimitry Andric // Checks if it is legal to interchange 2 loops.
1960b57cec5SDimitry Andric static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
1970b57cec5SDimitry Andric                                       unsigned InnerLoopId,
1980b57cec5SDimitry Andric                                       unsigned OuterLoopId) {
1990b57cec5SDimitry Andric   unsigned NumRows = DepMatrix.size();
200bdd1243dSDimitry Andric   std::vector<char> Cur;
2010b57cec5SDimitry Andric   // For each row check if it is valid to interchange.
2020b57cec5SDimitry Andric   for (unsigned Row = 0; Row < NumRows; ++Row) {
203bdd1243dSDimitry Andric     // Create temporary DepVector check its lexicographical order
204bdd1243dSDimitry Andric     // before and after swapping OuterLoop vs InnerLoop
205bdd1243dSDimitry Andric     Cur = DepMatrix[Row];
206bdd1243dSDimitry Andric     if (!isLexicographicallyPositive(Cur))
2070b57cec5SDimitry Andric       return false;
208bdd1243dSDimitry Andric     std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
209bdd1243dSDimitry Andric     if (!isLexicographicallyPositive(Cur))
2100b57cec5SDimitry Andric       return false;
2110b57cec5SDimitry Andric   }
2120b57cec5SDimitry Andric   return true;
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric 
21581ad6265SDimitry Andric static void populateWorklist(Loop &L, LoopVector &LoopList) {
2160b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
2170b57cec5SDimitry Andric                     << L.getHeader()->getParent()->getName() << " Loop: %"
2180b57cec5SDimitry Andric                     << L.getHeader()->getName() << '\n');
21981ad6265SDimitry Andric   assert(LoopList.empty() && "LoopList should initially be empty!");
2200b57cec5SDimitry Andric   Loop *CurrentLoop = &L;
2210b57cec5SDimitry Andric   const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
2220b57cec5SDimitry Andric   while (!Vec->empty()) {
2230b57cec5SDimitry Andric     // The current loop has multiple subloops in it hence it is not tightly
2240b57cec5SDimitry Andric     // nested.
2250b57cec5SDimitry Andric     // Discard all loops above it added into Worklist.
22681ad6265SDimitry Andric     if (Vec->size() != 1) {
22781ad6265SDimitry Andric       LoopList = {};
22881ad6265SDimitry Andric       return;
22981ad6265SDimitry Andric     }
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric     LoopList.push_back(CurrentLoop);
2320b57cec5SDimitry Andric     CurrentLoop = Vec->front();
2330b57cec5SDimitry Andric     Vec = &CurrentLoop->getSubLoops();
2340b57cec5SDimitry Andric   }
2350b57cec5SDimitry Andric   LoopList.push_back(CurrentLoop);
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric namespace {
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric /// LoopInterchangeLegality checks if it is legal to interchange the loop.
2410b57cec5SDimitry Andric class LoopInterchangeLegality {
2420b57cec5SDimitry Andric public:
2430b57cec5SDimitry Andric   LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
2440b57cec5SDimitry Andric                           OptimizationRemarkEmitter *ORE)
2450b57cec5SDimitry Andric       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric   /// Check if the loops can be interchanged.
2480b57cec5SDimitry Andric   bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
2490b57cec5SDimitry Andric                            CharMatrix &DepMatrix);
2500b57cec5SDimitry Andric 
25104eeddc0SDimitry Andric   /// Discover induction PHIs in the header of \p L. Induction
25204eeddc0SDimitry Andric   /// PHIs are added to \p Inductions.
25304eeddc0SDimitry Andric   bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
25404eeddc0SDimitry Andric 
2550b57cec5SDimitry Andric   /// Check if the loop structure is understood. We do not handle triangular
2560b57cec5SDimitry Andric   /// loops for now.
25704eeddc0SDimitry Andric   bool isLoopStructureUnderstood();
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric   bool currentLimitations();
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
2620b57cec5SDimitry Andric     return OuterInnerReductions;
2630b57cec5SDimitry Andric   }
2640b57cec5SDimitry Andric 
26504eeddc0SDimitry Andric   const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
26604eeddc0SDimitry Andric     return InnerLoopInductions;
26704eeddc0SDimitry Andric   }
26804eeddc0SDimitry Andric 
2690b57cec5SDimitry Andric private:
2700b57cec5SDimitry Andric   bool tightlyNested(Loop *Outer, Loop *Inner);
2710b57cec5SDimitry Andric   bool containsUnsafeInstructions(BasicBlock *BB);
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric   /// Discover induction and reduction PHIs in the header of \p L. Induction
2740b57cec5SDimitry Andric   /// PHIs are added to \p Inductions, reductions are added to
2750b57cec5SDimitry Andric   /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
2760b57cec5SDimitry Andric   /// to be passed as \p InnerLoop.
2770b57cec5SDimitry Andric   bool findInductionAndReductions(Loop *L,
2780b57cec5SDimitry Andric                                   SmallVector<PHINode *, 8> &Inductions,
2790b57cec5SDimitry Andric                                   Loop *InnerLoop);
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   Loop *OuterLoop;
2820b57cec5SDimitry Andric   Loop *InnerLoop;
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric   ScalarEvolution *SE;
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric   /// Interface to emit optimization remarks.
2870b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE;
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric   /// Set of reduction PHIs taking part of a reduction across the inner and
2900b57cec5SDimitry Andric   /// outer loop.
2910b57cec5SDimitry Andric   SmallPtrSet<PHINode *, 4> OuterInnerReductions;
29204eeddc0SDimitry Andric 
29304eeddc0SDimitry Andric   /// Set of inner loop induction PHIs
29404eeddc0SDimitry Andric   SmallVector<PHINode *, 8> InnerLoopInductions;
2950b57cec5SDimitry Andric };
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric /// LoopInterchangeProfitability checks if it is profitable to interchange the
2980b57cec5SDimitry Andric /// loop.
2990b57cec5SDimitry Andric class LoopInterchangeProfitability {
3000b57cec5SDimitry Andric public:
3010b57cec5SDimitry Andric   LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
3020b57cec5SDimitry Andric                                OptimizationRemarkEmitter *ORE)
3030b57cec5SDimitry Andric       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric   /// Check if the loop interchange is profitable.
30681ad6265SDimitry Andric   bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
30781ad6265SDimitry Andric                     unsigned InnerLoopId, unsigned OuterLoopId,
30881ad6265SDimitry Andric                     CharMatrix &DepMatrix,
309bdd1243dSDimitry Andric                     const DenseMap<const Loop *, unsigned> &CostMap,
310bdd1243dSDimitry Andric                     std::unique_ptr<CacheCost> &CC);
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric private:
3130b57cec5SDimitry Andric   int getInstrOrderCost();
314bdd1243dSDimitry Andric   std::optional<bool> isProfitablePerLoopCacheAnalysis(
315bdd1243dSDimitry Andric       const DenseMap<const Loop *, unsigned> &CostMap,
316bdd1243dSDimitry Andric       std::unique_ptr<CacheCost> &CC);
317bdd1243dSDimitry Andric   std::optional<bool> isProfitablePerInstrOrderCost();
318bdd1243dSDimitry Andric   std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
319bdd1243dSDimitry Andric                                                    unsigned OuterLoopId,
320bdd1243dSDimitry Andric                                                    CharMatrix &DepMatrix);
3210b57cec5SDimitry Andric   Loop *OuterLoop;
3220b57cec5SDimitry Andric   Loop *InnerLoop;
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric   /// Scev analysis.
3250b57cec5SDimitry Andric   ScalarEvolution *SE;
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   /// Interface to emit optimization remarks.
3280b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE;
3290b57cec5SDimitry Andric };
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric /// LoopInterchangeTransform interchanges the loop.
3320b57cec5SDimitry Andric class LoopInterchangeTransform {
3330b57cec5SDimitry Andric public:
3340b57cec5SDimitry Andric   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
3350b57cec5SDimitry Andric                            LoopInfo *LI, DominatorTree *DT,
3360b57cec5SDimitry Andric                            const LoopInterchangeLegality &LIL)
337fe6060f1SDimitry Andric       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   /// Interchange OuterLoop and InnerLoop.
3400b57cec5SDimitry Andric   bool transform();
3410b57cec5SDimitry Andric   void restructureLoops(Loop *NewInner, Loop *NewOuter,
3420b57cec5SDimitry Andric                         BasicBlock *OrigInnerPreHeader,
3430b57cec5SDimitry Andric                         BasicBlock *OrigOuterPreHeader);
3440b57cec5SDimitry Andric   void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric private:
3470b57cec5SDimitry Andric   bool adjustLoopLinks();
3480b57cec5SDimitry Andric   bool adjustLoopBranches();
3490b57cec5SDimitry Andric 
3500b57cec5SDimitry Andric   Loop *OuterLoop;
3510b57cec5SDimitry Andric   Loop *InnerLoop;
3520b57cec5SDimitry Andric 
3530b57cec5SDimitry Andric   /// Scev analysis.
3540b57cec5SDimitry Andric   ScalarEvolution *SE;
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric   LoopInfo *LI;
3570b57cec5SDimitry Andric   DominatorTree *DT;
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric   const LoopInterchangeLegality &LIL;
3600b57cec5SDimitry Andric };
3610b57cec5SDimitry Andric 
362e8d8bef9SDimitry Andric struct LoopInterchange {
3630b57cec5SDimitry Andric   ScalarEvolution *SE = nullptr;
3640b57cec5SDimitry Andric   LoopInfo *LI = nullptr;
3650b57cec5SDimitry Andric   DependenceInfo *DI = nullptr;
3660b57cec5SDimitry Andric   DominatorTree *DT = nullptr;
36781ad6265SDimitry Andric   std::unique_ptr<CacheCost> CC = nullptr;
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   /// Interface to emit optimization remarks.
3700b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE;
3710b57cec5SDimitry Andric 
372e8d8bef9SDimitry Andric   LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
37381ad6265SDimitry Andric                   DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
37481ad6265SDimitry Andric                   OptimizationRemarkEmitter *ORE)
37581ad6265SDimitry Andric       : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
3760b57cec5SDimitry Andric 
377e8d8bef9SDimitry Andric   bool run(Loop *L) {
378e8d8bef9SDimitry Andric     if (L->getParentLoop())
3790b57cec5SDimitry Andric       return false;
38081ad6265SDimitry Andric     SmallVector<Loop *, 8> LoopList;
38181ad6265SDimitry Andric     populateWorklist(*L, LoopList);
38281ad6265SDimitry Andric     return processLoopList(LoopList);
3830b57cec5SDimitry Andric   }
3840b57cec5SDimitry Andric 
385fe6060f1SDimitry Andric   bool run(LoopNest &LN) {
38681ad6265SDimitry Andric     SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
387fe6060f1SDimitry Andric     for (unsigned I = 1; I < LoopList.size(); ++I)
388fe6060f1SDimitry Andric       if (LoopList[I]->getParentLoop() != LoopList[I - 1])
389fe6060f1SDimitry Andric         return false;
390fe6060f1SDimitry Andric     return processLoopList(LoopList);
391fe6060f1SDimitry Andric   }
392fe6060f1SDimitry Andric 
393fe6060f1SDimitry Andric   bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
3940b57cec5SDimitry Andric     for (Loop *L : LoopList) {
3950b57cec5SDimitry Andric       const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
396e8d8bef9SDimitry Andric       if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
3970b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
3980b57cec5SDimitry Andric         return false;
3990b57cec5SDimitry Andric       }
4000b57cec5SDimitry Andric       if (L->getNumBackEdges() != 1) {
4010b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
4020b57cec5SDimitry Andric         return false;
4030b57cec5SDimitry Andric       }
4040b57cec5SDimitry Andric       if (!L->getExitingBlock()) {
4050b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
4060b57cec5SDimitry Andric         return false;
4070b57cec5SDimitry Andric       }
4080b57cec5SDimitry Andric     }
4090b57cec5SDimitry Andric     return true;
4100b57cec5SDimitry Andric   }
4110b57cec5SDimitry Andric 
412fe6060f1SDimitry Andric   unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
4130b57cec5SDimitry Andric     // TODO: Add a better heuristic to select the loop to be interchanged based
4140b57cec5SDimitry Andric     // on the dependence matrix. Currently we select the innermost loop.
4150b57cec5SDimitry Andric     return LoopList.size() - 1;
4160b57cec5SDimitry Andric   }
4170b57cec5SDimitry Andric 
41881ad6265SDimitry Andric   bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
4190b57cec5SDimitry Andric     bool Changed = false;
4200b57cec5SDimitry Andric     unsigned LoopNestDepth = LoopList.size();
4210b57cec5SDimitry Andric     if (LoopNestDepth < 2) {
4220b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
4230b57cec5SDimitry Andric       return false;
4240b57cec5SDimitry Andric     }
4250b57cec5SDimitry Andric     if (LoopNestDepth > MaxLoopNestDepth) {
4260b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
4270b57cec5SDimitry Andric                         << MaxLoopNestDepth << "\n");
4280b57cec5SDimitry Andric       return false;
4290b57cec5SDimitry Andric     }
4300b57cec5SDimitry Andric     if (!isComputableLoopNest(LoopList)) {
4310b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
4320b57cec5SDimitry Andric       return false;
4330b57cec5SDimitry Andric     }
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
4360b57cec5SDimitry Andric                       << "\n");
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric     CharMatrix DependencyMatrix;
4390b57cec5SDimitry Andric     Loop *OuterMostLoop = *(LoopList.begin());
4400b57cec5SDimitry Andric     if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
441bdd1243dSDimitry Andric                                   OuterMostLoop, DI, SE)) {
4420b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
4430b57cec5SDimitry Andric       return false;
4440b57cec5SDimitry Andric     }
4450b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES
4460b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
4470b57cec5SDimitry Andric     printDepMatrix(DependencyMatrix);
4480b57cec5SDimitry Andric #endif
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric     // Get the Outermost loop exit.
4510b57cec5SDimitry Andric     BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
4520b57cec5SDimitry Andric     if (!LoopNestExit) {
4530b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
4540b57cec5SDimitry Andric       return false;
4550b57cec5SDimitry Andric     }
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
45881ad6265SDimitry Andric     // Obtain the loop vector returned from loop cache analysis beforehand,
45981ad6265SDimitry Andric     // and put each <Loop, index> pair into a map for constant time query
46081ad6265SDimitry Andric     // later. Indices in loop vector reprsent the optimal order of the
46181ad6265SDimitry Andric     // corresponding loop, e.g., given a loopnest with depth N, index 0
46281ad6265SDimitry Andric     // indicates the loop should be placed as the outermost loop and index N
46381ad6265SDimitry Andric     // indicates the loop should be placed as the innermost loop.
46481ad6265SDimitry Andric     //
46581ad6265SDimitry Andric     // For the old pass manager CacheCost would be null.
46681ad6265SDimitry Andric     DenseMap<const Loop *, unsigned> CostMap;
46781ad6265SDimitry Andric     if (CC != nullptr) {
46881ad6265SDimitry Andric       const auto &LoopCosts = CC->getLoopCosts();
46981ad6265SDimitry Andric       for (unsigned i = 0; i < LoopCosts.size(); i++) {
47081ad6265SDimitry Andric         CostMap[LoopCosts[i].first] = i;
47181ad6265SDimitry Andric       }
47281ad6265SDimitry Andric     }
47381ad6265SDimitry Andric     // We try to achieve the globally optimal memory access for the loopnest,
47481ad6265SDimitry Andric     // and do interchange based on a bubble-sort fasion. We start from
47581ad6265SDimitry Andric     // the innermost loop, move it outwards to the best possible position
47681ad6265SDimitry Andric     // and repeat this process.
47781ad6265SDimitry Andric     for (unsigned j = SelecLoopId; j > 0; j--) {
47881ad6265SDimitry Andric       bool ChangedPerIter = false;
47981ad6265SDimitry Andric       for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
48081ad6265SDimitry Andric         bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
48181ad6265SDimitry Andric                                         DependencyMatrix, CostMap);
4820b57cec5SDimitry Andric         if (!Interchanged)
48381ad6265SDimitry Andric           continue;
48481ad6265SDimitry Andric         // Loops interchanged, update LoopList accordingly.
48581ad6265SDimitry Andric         std::swap(LoopList[i - 1], LoopList[i]);
4860b57cec5SDimitry Andric         // Update the DependencyMatrix
4870b57cec5SDimitry Andric         interChangeDependencies(DependencyMatrix, i, i - 1);
4880b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES
4890b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
4900b57cec5SDimitry Andric         printDepMatrix(DependencyMatrix);
4910b57cec5SDimitry Andric #endif
49281ad6265SDimitry Andric         ChangedPerIter |= Interchanged;
4930b57cec5SDimitry Andric         Changed |= Interchanged;
4940b57cec5SDimitry Andric       }
49581ad6265SDimitry Andric       // Early abort if there was no interchange during an entire round of
49681ad6265SDimitry Andric       // moving loops outwards.
49781ad6265SDimitry Andric       if (!ChangedPerIter)
49881ad6265SDimitry Andric         break;
49981ad6265SDimitry Andric     }
5000b57cec5SDimitry Andric     return Changed;
5010b57cec5SDimitry Andric   }
5020b57cec5SDimitry Andric 
503fe6060f1SDimitry Andric   bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
504fe6060f1SDimitry Andric                    unsigned OuterLoopId,
50581ad6265SDimitry Andric                    std::vector<std::vector<char>> &DependencyMatrix,
50681ad6265SDimitry Andric                    const DenseMap<const Loop *, unsigned> &CostMap) {
5070b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
5080b57cec5SDimitry Andric                       << " and OuterLoopId = " << OuterLoopId << "\n");
5090b57cec5SDimitry Andric     LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
5100b57cec5SDimitry Andric     if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
5110b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
5120b57cec5SDimitry Andric       return false;
5130b57cec5SDimitry Andric     }
5140b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
5150b57cec5SDimitry Andric     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
51681ad6265SDimitry Andric     if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
517bdd1243dSDimitry Andric                           DependencyMatrix, CostMap, CC)) {
5180b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
5190b57cec5SDimitry Andric       return false;
5200b57cec5SDimitry Andric     }
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric     ORE->emit([&]() {
5230b57cec5SDimitry Andric       return OptimizationRemark(DEBUG_TYPE, "Interchanged",
5240b57cec5SDimitry Andric                                 InnerLoop->getStartLoc(),
5250b57cec5SDimitry Andric                                 InnerLoop->getHeader())
5260b57cec5SDimitry Andric              << "Loop interchanged with enclosing loop.";
5270b57cec5SDimitry Andric     });
5280b57cec5SDimitry Andric 
529fe6060f1SDimitry Andric     LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
5300b57cec5SDimitry Andric     LIT.transform();
5310b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
5320b57cec5SDimitry Andric     LoopsInterchanged++;
5335ffd83dbSDimitry Andric 
534bdd1243dSDimitry Andric     llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
5350b57cec5SDimitry Andric     return true;
5360b57cec5SDimitry Andric   }
5370b57cec5SDimitry Andric };
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric } // end anonymous namespace
5400b57cec5SDimitry Andric 
5410b57cec5SDimitry Andric bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
5420b57cec5SDimitry Andric   return any_of(*BB, [](const Instruction &I) {
5430b57cec5SDimitry Andric     return I.mayHaveSideEffects() || I.mayReadFromMemory();
5440b57cec5SDimitry Andric   });
5450b57cec5SDimitry Andric }
5460b57cec5SDimitry Andric 
5470b57cec5SDimitry Andric bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
5480b57cec5SDimitry Andric   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
5490b57cec5SDimitry Andric   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
5500b57cec5SDimitry Andric   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric   // A perfectly nested loop will not have any branch in between the outer and
5550b57cec5SDimitry Andric   // inner block i.e. outer header will branch to either inner preheader and
5560b57cec5SDimitry Andric   // outerloop latch.
5570b57cec5SDimitry Andric   BranchInst *OuterLoopHeaderBI =
5580b57cec5SDimitry Andric       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
5590b57cec5SDimitry Andric   if (!OuterLoopHeaderBI)
5600b57cec5SDimitry Andric     return false;
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
5630b57cec5SDimitry Andric     if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
5640b57cec5SDimitry Andric         Succ != OuterLoopLatch)
5650b57cec5SDimitry Andric       return false;
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
5680b57cec5SDimitry Andric   // We do not have any basic block in between now make sure the outer header
5690b57cec5SDimitry Andric   // and outer loop latch doesn't contain any unsafe instructions.
5700b57cec5SDimitry Andric   if (containsUnsafeInstructions(OuterLoopHeader) ||
5710b57cec5SDimitry Andric       containsUnsafeInstructions(OuterLoopLatch))
5720b57cec5SDimitry Andric     return false;
5730b57cec5SDimitry Andric 
574e8d8bef9SDimitry Andric   // Also make sure the inner loop preheader does not contain any unsafe
575e8d8bef9SDimitry Andric   // instructions. Note that all instructions in the preheader will be moved to
576e8d8bef9SDimitry Andric   // the outer loop header when interchanging.
577e8d8bef9SDimitry Andric   if (InnerLoopPreHeader != OuterLoopHeader &&
578e8d8bef9SDimitry Andric       containsUnsafeInstructions(InnerLoopPreHeader))
579e8d8bef9SDimitry Andric     return false;
580e8d8bef9SDimitry Andric 
581fe6060f1SDimitry Andric   BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
582fe6060f1SDimitry Andric   // Ensure the inner loop exit block flows to the outer loop latch possibly
583fe6060f1SDimitry Andric   // through empty blocks.
584fe6060f1SDimitry Andric   const BasicBlock &SuccInner =
585fe6060f1SDimitry Andric       LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
586fe6060f1SDimitry Andric   if (&SuccInner != OuterLoopLatch) {
587fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
588fe6060f1SDimitry Andric                       << " does not lead to the outer loop latch.\n";);
589fe6060f1SDimitry Andric     return false;
590fe6060f1SDimitry Andric   }
591fe6060f1SDimitry Andric   // The inner loop exit block does flow to the outer loop latch and not some
592fe6060f1SDimitry Andric   // other BBs, now make sure it contains safe instructions, since it will be
593fe6060f1SDimitry Andric   // moved into the (new) inner loop after interchange.
594fe6060f1SDimitry Andric   if (containsUnsafeInstructions(InnerLoopExit))
595fe6060f1SDimitry Andric     return false;
596fe6060f1SDimitry Andric 
5970b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
5980b57cec5SDimitry Andric   // We have a perfect loop nest.
5990b57cec5SDimitry Andric   return true;
6000b57cec5SDimitry Andric }
6010b57cec5SDimitry Andric 
60204eeddc0SDimitry Andric bool LoopInterchangeLegality::isLoopStructureUnderstood() {
6030b57cec5SDimitry Andric   BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
60404eeddc0SDimitry Andric   for (PHINode *InnerInduction : InnerLoopInductions) {
60504eeddc0SDimitry Andric     unsigned Num = InnerInduction->getNumOperands();
6060b57cec5SDimitry Andric     for (unsigned i = 0; i < Num; ++i) {
6070b57cec5SDimitry Andric       Value *Val = InnerInduction->getOperand(i);
6080b57cec5SDimitry Andric       if (isa<Constant>(Val))
6090b57cec5SDimitry Andric         continue;
6100b57cec5SDimitry Andric       Instruction *I = dyn_cast<Instruction>(Val);
6110b57cec5SDimitry Andric       if (!I)
6120b57cec5SDimitry Andric         return false;
6130b57cec5SDimitry Andric       // TODO: Handle triangular loops.
6140b57cec5SDimitry Andric       // e.g. for(int i=0;i<N;i++)
6150b57cec5SDimitry Andric       //        for(int j=i;j<N;j++)
6160b57cec5SDimitry Andric       unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
6170b57cec5SDimitry Andric       if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
6180b57cec5SDimitry Andric               InnerLoopPreheader &&
6190b57cec5SDimitry Andric           !OuterLoop->isLoopInvariant(I)) {
6200b57cec5SDimitry Andric         return false;
6210b57cec5SDimitry Andric       }
6220b57cec5SDimitry Andric     }
62304eeddc0SDimitry Andric   }
624fe6060f1SDimitry Andric 
625fe6060f1SDimitry Andric   // TODO: Handle triangular loops of another form.
626fe6060f1SDimitry Andric   // e.g. for(int i=0;i<N;i++)
627fe6060f1SDimitry Andric   //        for(int j=0;j<i;j++)
628fe6060f1SDimitry Andric   // or,
629fe6060f1SDimitry Andric   //      for(int i=0;i<N;i++)
630fe6060f1SDimitry Andric   //        for(int j=0;j*i<N;j++)
631fe6060f1SDimitry Andric   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
632fe6060f1SDimitry Andric   BranchInst *InnerLoopLatchBI =
633fe6060f1SDimitry Andric       dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
634fe6060f1SDimitry Andric   if (!InnerLoopLatchBI->isConditional())
635fe6060f1SDimitry Andric     return false;
636fe6060f1SDimitry Andric   if (CmpInst *InnerLoopCmp =
637fe6060f1SDimitry Andric           dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
638fe6060f1SDimitry Andric     Value *Op0 = InnerLoopCmp->getOperand(0);
639fe6060f1SDimitry Andric     Value *Op1 = InnerLoopCmp->getOperand(1);
640fe6060f1SDimitry Andric 
641fe6060f1SDimitry Andric     // LHS and RHS of the inner loop exit condition, e.g.,
642fe6060f1SDimitry Andric     // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
643fe6060f1SDimitry Andric     Value *Left = nullptr;
644fe6060f1SDimitry Andric     Value *Right = nullptr;
645fe6060f1SDimitry Andric 
646fe6060f1SDimitry Andric     // Check if V only involves inner loop induction variable.
647fe6060f1SDimitry Andric     // Return true if V is InnerInduction, or a cast from
648fe6060f1SDimitry Andric     // InnerInduction, or a binary operator that involves
649fe6060f1SDimitry Andric     // InnerInduction and a constant.
65004eeddc0SDimitry Andric     std::function<bool(Value *)> IsPathToInnerIndVar;
65104eeddc0SDimitry Andric     IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
65204eeddc0SDimitry Andric       if (llvm::is_contained(InnerLoopInductions, V))
653fe6060f1SDimitry Andric         return true;
654fe6060f1SDimitry Andric       if (isa<Constant>(V))
655fe6060f1SDimitry Andric         return true;
65604eeddc0SDimitry Andric       const Instruction *I = dyn_cast<Instruction>(V);
657fe6060f1SDimitry Andric       if (!I)
658fe6060f1SDimitry Andric         return false;
659fe6060f1SDimitry Andric       if (isa<CastInst>(I))
66004eeddc0SDimitry Andric         return IsPathToInnerIndVar(I->getOperand(0));
661fe6060f1SDimitry Andric       if (isa<BinaryOperator>(I))
66204eeddc0SDimitry Andric         return IsPathToInnerIndVar(I->getOperand(0)) &&
66304eeddc0SDimitry Andric                IsPathToInnerIndVar(I->getOperand(1));
664fe6060f1SDimitry Andric       return false;
665fe6060f1SDimitry Andric     };
666fe6060f1SDimitry Andric 
66704eeddc0SDimitry Andric     // In case of multiple inner loop indvars, it is okay if LHS and RHS
66804eeddc0SDimitry Andric     // are both inner indvar related variables.
66904eeddc0SDimitry Andric     if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
67004eeddc0SDimitry Andric       return true;
67104eeddc0SDimitry Andric 
67204eeddc0SDimitry Andric     // Otherwise we check if the cmp instruction compares an inner indvar
67304eeddc0SDimitry Andric     // related variable (Left) with a outer loop invariant (Right).
67404eeddc0SDimitry Andric     if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
675fe6060f1SDimitry Andric       Left = Op0;
676fe6060f1SDimitry Andric       Right = Op1;
67704eeddc0SDimitry Andric     } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
678fe6060f1SDimitry Andric       Left = Op1;
679fe6060f1SDimitry Andric       Right = Op0;
680fe6060f1SDimitry Andric     }
681fe6060f1SDimitry Andric 
682fe6060f1SDimitry Andric     if (Left == nullptr)
683fe6060f1SDimitry Andric       return false;
684fe6060f1SDimitry Andric 
685fe6060f1SDimitry Andric     const SCEV *S = SE->getSCEV(Right);
686fe6060f1SDimitry Andric     if (!SE->isLoopInvariant(S, OuterLoop))
687fe6060f1SDimitry Andric       return false;
688fe6060f1SDimitry Andric   }
689fe6060f1SDimitry Andric 
6900b57cec5SDimitry Andric   return true;
6910b57cec5SDimitry Andric }
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric // If SV is a LCSSA PHI node with a single incoming value, return the incoming
6940b57cec5SDimitry Andric // value.
6950b57cec5SDimitry Andric static Value *followLCSSA(Value *SV) {
6960b57cec5SDimitry Andric   PHINode *PHI = dyn_cast<PHINode>(SV);
6970b57cec5SDimitry Andric   if (!PHI)
6980b57cec5SDimitry Andric     return SV;
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric   if (PHI->getNumIncomingValues() != 1)
7010b57cec5SDimitry Andric     return SV;
7020b57cec5SDimitry Andric   return followLCSSA(PHI->getIncomingValue(0));
7030b57cec5SDimitry Andric }
7040b57cec5SDimitry Andric 
7050b57cec5SDimitry Andric // Check V's users to see if it is involved in a reduction in L.
7060b57cec5SDimitry Andric static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
707e8d8bef9SDimitry Andric   // Reduction variables cannot be constants.
708e8d8bef9SDimitry Andric   if (isa<Constant>(V))
709e8d8bef9SDimitry Andric     return nullptr;
710e8d8bef9SDimitry Andric 
7110b57cec5SDimitry Andric   for (Value *User : V->users()) {
7120b57cec5SDimitry Andric     if (PHINode *PHI = dyn_cast<PHINode>(User)) {
7130b57cec5SDimitry Andric       if (PHI->getNumIncomingValues() == 1)
7140b57cec5SDimitry Andric         continue;
7150b57cec5SDimitry Andric       RecurrenceDescriptor RD;
71681ad6265SDimitry Andric       if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
71781ad6265SDimitry Andric         // Detect floating point reduction only when it can be reordered.
71881ad6265SDimitry Andric         if (RD.getExactFPMathInst() != nullptr)
71981ad6265SDimitry Andric           return nullptr;
7200b57cec5SDimitry Andric         return PHI;
72181ad6265SDimitry Andric       }
7220b57cec5SDimitry Andric       return nullptr;
7230b57cec5SDimitry Andric     }
7240b57cec5SDimitry Andric   }
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric   return nullptr;
7270b57cec5SDimitry Andric }
7280b57cec5SDimitry Andric 
7290b57cec5SDimitry Andric bool LoopInterchangeLegality::findInductionAndReductions(
7300b57cec5SDimitry Andric     Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
7310b57cec5SDimitry Andric   if (!L->getLoopLatch() || !L->getLoopPredecessor())
7320b57cec5SDimitry Andric     return false;
7330b57cec5SDimitry Andric   for (PHINode &PHI : L->getHeader()->phis()) {
7340b57cec5SDimitry Andric     InductionDescriptor ID;
7350b57cec5SDimitry Andric     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
7360b57cec5SDimitry Andric       Inductions.push_back(&PHI);
7370b57cec5SDimitry Andric     else {
7380b57cec5SDimitry Andric       // PHIs in inner loops need to be part of a reduction in the outer loop,
7390b57cec5SDimitry Andric       // discovered when checking the PHIs of the outer loop earlier.
7400b57cec5SDimitry Andric       if (!InnerLoop) {
7415ffd83dbSDimitry Andric         if (!OuterInnerReductions.count(&PHI)) {
7420b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
7430b57cec5SDimitry Andric                                "across the outer loop.\n");
7440b57cec5SDimitry Andric           return false;
7450b57cec5SDimitry Andric         }
7460b57cec5SDimitry Andric       } else {
7470b57cec5SDimitry Andric         assert(PHI.getNumIncomingValues() == 2 &&
7480b57cec5SDimitry Andric                "Phis in loop header should have exactly 2 incoming values");
7490b57cec5SDimitry Andric         // Check if we have a PHI node in the outer loop that has a reduction
7500b57cec5SDimitry Andric         // result from the inner loop as an incoming value.
7510b57cec5SDimitry Andric         Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
7520b57cec5SDimitry Andric         PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
7530b57cec5SDimitry Andric         if (!InnerRedPhi ||
754e8d8bef9SDimitry Andric             !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
7550b57cec5SDimitry Andric           LLVM_DEBUG(
7560b57cec5SDimitry Andric               dbgs()
7570b57cec5SDimitry Andric               << "Failed to recognize PHI as an induction or reduction.\n");
7580b57cec5SDimitry Andric           return false;
7590b57cec5SDimitry Andric         }
7600b57cec5SDimitry Andric         OuterInnerReductions.insert(&PHI);
7610b57cec5SDimitry Andric         OuterInnerReductions.insert(InnerRedPhi);
7620b57cec5SDimitry Andric       }
7630b57cec5SDimitry Andric     }
7640b57cec5SDimitry Andric   }
7650b57cec5SDimitry Andric   return true;
7660b57cec5SDimitry Andric }
7670b57cec5SDimitry Andric 
7680b57cec5SDimitry Andric // This function indicates the current limitations in the transform as a result
7690b57cec5SDimitry Andric // of which we do not proceed.
7700b57cec5SDimitry Andric bool LoopInterchangeLegality::currentLimitations() {
7710b57cec5SDimitry Andric   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
7720b57cec5SDimitry Andric 
7730b57cec5SDimitry Andric   // transform currently expects the loop latches to also be the exiting
7740b57cec5SDimitry Andric   // blocks.
7750b57cec5SDimitry Andric   if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
7760b57cec5SDimitry Andric       OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
7770b57cec5SDimitry Andric       !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
7780b57cec5SDimitry Andric       !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
7790b57cec5SDimitry Andric     LLVM_DEBUG(
7800b57cec5SDimitry Andric         dbgs() << "Loops where the latch is not the exiting block are not"
7810b57cec5SDimitry Andric                << " supported currently.\n");
7820b57cec5SDimitry Andric     ORE->emit([&]() {
7830b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
7840b57cec5SDimitry Andric                                       OuterLoop->getStartLoc(),
7850b57cec5SDimitry Andric                                       OuterLoop->getHeader())
7860b57cec5SDimitry Andric              << "Loops where the latch is not the exiting block cannot be"
7870b57cec5SDimitry Andric                 " interchange currently.";
7880b57cec5SDimitry Andric     });
7890b57cec5SDimitry Andric     return true;
7900b57cec5SDimitry Andric   }
7910b57cec5SDimitry Andric 
7920b57cec5SDimitry Andric   SmallVector<PHINode *, 8> Inductions;
7930b57cec5SDimitry Andric   if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
7940b57cec5SDimitry Andric     LLVM_DEBUG(
7950b57cec5SDimitry Andric         dbgs() << "Only outer loops with induction or reduction PHI nodes "
7960b57cec5SDimitry Andric                << "are supported currently.\n");
7970b57cec5SDimitry Andric     ORE->emit([&]() {
7980b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
7990b57cec5SDimitry Andric                                       OuterLoop->getStartLoc(),
8000b57cec5SDimitry Andric                                       OuterLoop->getHeader())
8010b57cec5SDimitry Andric              << "Only outer loops with induction or reduction PHI nodes can be"
8020b57cec5SDimitry Andric                 " interchanged currently.";
8030b57cec5SDimitry Andric     });
8040b57cec5SDimitry Andric     return true;
8050b57cec5SDimitry Andric   }
8060b57cec5SDimitry Andric 
8070b57cec5SDimitry Andric   Inductions.clear();
808bdd1243dSDimitry Andric   // For multi-level loop nests, make sure that all phi nodes for inner loops
809bdd1243dSDimitry Andric   // at all levels can be recognized as a induction or reduction phi. Bail out
810bdd1243dSDimitry Andric   // if a phi node at a certain nesting level cannot be properly recognized.
811bdd1243dSDimitry Andric   Loop *CurLevelLoop = OuterLoop;
812bdd1243dSDimitry Andric   while (!CurLevelLoop->getSubLoops().empty()) {
813bdd1243dSDimitry Andric     // We already made sure that the loop nest is tightly nested.
814bdd1243dSDimitry Andric     CurLevelLoop = CurLevelLoop->getSubLoops().front();
815bdd1243dSDimitry Andric     if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
8160b57cec5SDimitry Andric       LLVM_DEBUG(
8170b57cec5SDimitry Andric           dbgs() << "Only inner loops with induction or reduction PHI nodes "
8180b57cec5SDimitry Andric                 << "are supported currently.\n");
8190b57cec5SDimitry Andric       ORE->emit([&]() {
8200b57cec5SDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
821bdd1243dSDimitry Andric                                         CurLevelLoop->getStartLoc(),
822bdd1243dSDimitry Andric                                         CurLevelLoop->getHeader())
8230b57cec5SDimitry Andric               << "Only inner loops with induction or reduction PHI nodes can be"
8240b57cec5SDimitry Andric                   " interchange currently.";
8250b57cec5SDimitry Andric       });
8260b57cec5SDimitry Andric       return true;
8270b57cec5SDimitry Andric     }
828bdd1243dSDimitry Andric   }
8290b57cec5SDimitry Andric 
8300b57cec5SDimitry Andric   // TODO: Triangular loops are not handled for now.
83104eeddc0SDimitry Andric   if (!isLoopStructureUnderstood()) {
8320b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
8330b57cec5SDimitry Andric     ORE->emit([&]() {
8340b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
8350b57cec5SDimitry Andric                                       InnerLoop->getStartLoc(),
8360b57cec5SDimitry Andric                                       InnerLoop->getHeader())
8370b57cec5SDimitry Andric              << "Inner loop structure not understood currently.";
8380b57cec5SDimitry Andric     });
8390b57cec5SDimitry Andric     return true;
8400b57cec5SDimitry Andric   }
8410b57cec5SDimitry Andric 
8420b57cec5SDimitry Andric   return false;
8430b57cec5SDimitry Andric }
8440b57cec5SDimitry Andric 
84504eeddc0SDimitry Andric bool LoopInterchangeLegality::findInductions(
84604eeddc0SDimitry Andric     Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
84704eeddc0SDimitry Andric   for (PHINode &PHI : L->getHeader()->phis()) {
84804eeddc0SDimitry Andric     InductionDescriptor ID;
84904eeddc0SDimitry Andric     if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
85004eeddc0SDimitry Andric       Inductions.push_back(&PHI);
85104eeddc0SDimitry Andric   }
85204eeddc0SDimitry Andric   return !Inductions.empty();
85304eeddc0SDimitry Andric }
85404eeddc0SDimitry Andric 
855480093f4SDimitry Andric // We currently only support LCSSA PHI nodes in the inner loop exit, if their
856480093f4SDimitry Andric // users are either reduction PHIs or PHIs outside the outer loop (which means
857480093f4SDimitry Andric // the we are only interested in the final value after the loop).
858480093f4SDimitry Andric static bool
859480093f4SDimitry Andric areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
860480093f4SDimitry Andric                               SmallPtrSetImpl<PHINode *> &Reductions) {
861480093f4SDimitry Andric   BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
862480093f4SDimitry Andric   for (PHINode &PHI : InnerExit->phis()) {
863480093f4SDimitry Andric     // Reduction lcssa phi will have only 1 incoming block that from loop latch.
864480093f4SDimitry Andric     if (PHI.getNumIncomingValues() > 1)
865480093f4SDimitry Andric       return false;
866480093f4SDimitry Andric     if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
867480093f4SDimitry Andric           PHINode *PN = dyn_cast<PHINode>(U);
8685ffd83dbSDimitry Andric           return !PN ||
8695ffd83dbSDimitry Andric                  (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
870480093f4SDimitry Andric         })) {
871480093f4SDimitry Andric       return false;
872480093f4SDimitry Andric     }
873480093f4SDimitry Andric   }
874480093f4SDimitry Andric   return true;
875480093f4SDimitry Andric }
876480093f4SDimitry Andric 
8770b57cec5SDimitry Andric // We currently support LCSSA PHI nodes in the outer loop exit, if their
8780b57cec5SDimitry Andric // incoming values do not come from the outer loop latch or if the
8790b57cec5SDimitry Andric // outer loop latch has a single predecessor. In that case, the value will
8800b57cec5SDimitry Andric // be available if both the inner and outer loop conditions are true, which
8810b57cec5SDimitry Andric // will still be true after interchanging. If we have multiple predecessor,
8820b57cec5SDimitry Andric // that may not be the case, e.g. because the outer loop latch may be executed
8830b57cec5SDimitry Andric // if the inner loop is not executed.
884480093f4SDimitry Andric static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
8850b57cec5SDimitry Andric   BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
8860b57cec5SDimitry Andric   for (PHINode &PHI : LoopNestExit->phis()) {
8870b57cec5SDimitry Andric     for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
8880b57cec5SDimitry Andric       Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
8890b57cec5SDimitry Andric       if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
8900b57cec5SDimitry Andric         continue;
8910b57cec5SDimitry Andric 
8920b57cec5SDimitry Andric       // The incoming value is defined in the outer loop latch. Currently we
8930b57cec5SDimitry Andric       // only support that in case the outer loop latch has a single predecessor.
8940b57cec5SDimitry Andric       // This guarantees that the outer loop latch is executed if and only if
8950b57cec5SDimitry Andric       // the inner loop is executed (because tightlyNested() guarantees that the
8960b57cec5SDimitry Andric       // outer loop header only branches to the inner loop or the outer loop
8970b57cec5SDimitry Andric       // latch).
8980b57cec5SDimitry Andric       // FIXME: We could weaken this logic and allow multiple predecessors,
8990b57cec5SDimitry Andric       //        if the values are produced outside the loop latch. We would need
9000b57cec5SDimitry Andric       //        additional logic to update the PHI nodes in the exit block as
9010b57cec5SDimitry Andric       //        well.
9020b57cec5SDimitry Andric       if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
9030b57cec5SDimitry Andric         return false;
9040b57cec5SDimitry Andric     }
9050b57cec5SDimitry Andric   }
9060b57cec5SDimitry Andric   return true;
9070b57cec5SDimitry Andric }
9080b57cec5SDimitry Andric 
909fe6060f1SDimitry Andric // In case of multi-level nested loops, it may occur that lcssa phis exist in
910fe6060f1SDimitry Andric // the latch of InnerLoop, i.e., when defs of the incoming values are further
911fe6060f1SDimitry Andric // inside the loopnest. Sometimes those incoming values are not available
912fe6060f1SDimitry Andric // after interchange, since the original inner latch will become the new outer
913fe6060f1SDimitry Andric // latch which may have predecessor paths that do not include those incoming
914fe6060f1SDimitry Andric // values.
915fe6060f1SDimitry Andric // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
916fe6060f1SDimitry Andric // multi-level loop nests.
917fe6060f1SDimitry Andric static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
918fe6060f1SDimitry Andric   if (InnerLoop->getSubLoops().empty())
919fe6060f1SDimitry Andric     return true;
920fe6060f1SDimitry Andric   // If the original outer latch has only one predecessor, then values defined
921fe6060f1SDimitry Andric   // further inside the looploop, e.g., in the innermost loop, will be available
922fe6060f1SDimitry Andric   // at the new outer latch after interchange.
923fe6060f1SDimitry Andric   if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
924fe6060f1SDimitry Andric     return true;
925fe6060f1SDimitry Andric 
926fe6060f1SDimitry Andric   // The outer latch has more than one predecessors, i.e., the inner
927fe6060f1SDimitry Andric   // exit and the inner header.
928fe6060f1SDimitry Andric   // PHI nodes in the inner latch are lcssa phis where the incoming values
929fe6060f1SDimitry Andric   // are defined further inside the loopnest. Check if those phis are used
930fe6060f1SDimitry Andric   // in the original inner latch. If that is the case then bail out since
931fe6060f1SDimitry Andric   // those incoming values may not be available at the new outer latch.
932fe6060f1SDimitry Andric   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
933fe6060f1SDimitry Andric   for (PHINode &PHI : InnerLoopLatch->phis()) {
934fe6060f1SDimitry Andric     for (auto *U : PHI.users()) {
935fe6060f1SDimitry Andric       Instruction *UI = cast<Instruction>(U);
936fe6060f1SDimitry Andric       if (InnerLoopLatch == UI->getParent())
937fe6060f1SDimitry Andric         return false;
938fe6060f1SDimitry Andric     }
939fe6060f1SDimitry Andric   }
940fe6060f1SDimitry Andric   return true;
941fe6060f1SDimitry Andric }
942fe6060f1SDimitry Andric 
9430b57cec5SDimitry Andric bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
9440b57cec5SDimitry Andric                                                   unsigned OuterLoopId,
9450b57cec5SDimitry Andric                                                   CharMatrix &DepMatrix) {
9460b57cec5SDimitry Andric   if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
9470b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
9480b57cec5SDimitry Andric                       << " and OuterLoopId = " << OuterLoopId
9490b57cec5SDimitry Andric                       << " due to dependence\n");
9500b57cec5SDimitry Andric     ORE->emit([&]() {
9510b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
9520b57cec5SDimitry Andric                                       InnerLoop->getStartLoc(),
9530b57cec5SDimitry Andric                                       InnerLoop->getHeader())
9540b57cec5SDimitry Andric              << "Cannot interchange loops due to dependences.";
9550b57cec5SDimitry Andric     });
9560b57cec5SDimitry Andric     return false;
9570b57cec5SDimitry Andric   }
9580b57cec5SDimitry Andric   // Check if outer and inner loop contain legal instructions only.
9590b57cec5SDimitry Andric   for (auto *BB : OuterLoop->blocks())
9600b57cec5SDimitry Andric     for (Instruction &I : BB->instructionsWithoutDebug())
9610b57cec5SDimitry Andric       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
9620b57cec5SDimitry Andric         // readnone functions do not prevent interchanging.
96304eeddc0SDimitry Andric         if (CI->onlyWritesMemory())
9640b57cec5SDimitry Andric           continue;
9650b57cec5SDimitry Andric         LLVM_DEBUG(
9660b57cec5SDimitry Andric             dbgs() << "Loops with call instructions cannot be interchanged "
9670b57cec5SDimitry Andric                    << "safely.");
9680b57cec5SDimitry Andric         ORE->emit([&]() {
9690b57cec5SDimitry Andric           return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
9700b57cec5SDimitry Andric                                           CI->getDebugLoc(),
9710b57cec5SDimitry Andric                                           CI->getParent())
9720b57cec5SDimitry Andric                  << "Cannot interchange loops due to call instruction.";
9730b57cec5SDimitry Andric         });
9740b57cec5SDimitry Andric 
9750b57cec5SDimitry Andric         return false;
9760b57cec5SDimitry Andric       }
9770b57cec5SDimitry Andric 
97804eeddc0SDimitry Andric   if (!findInductions(InnerLoop, InnerLoopInductions)) {
979*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
98004eeddc0SDimitry Andric     return false;
98104eeddc0SDimitry Andric   }
98204eeddc0SDimitry Andric 
983fe6060f1SDimitry Andric   if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
984fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
985fe6060f1SDimitry Andric     ORE->emit([&]() {
986fe6060f1SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
987fe6060f1SDimitry Andric                                       InnerLoop->getStartLoc(),
988fe6060f1SDimitry Andric                                       InnerLoop->getHeader())
989fe6060f1SDimitry Andric              << "Cannot interchange loops because unsupported PHI nodes found "
990fe6060f1SDimitry Andric                 "in inner loop latch.";
991fe6060f1SDimitry Andric     });
992fe6060f1SDimitry Andric     return false;
993fe6060f1SDimitry Andric   }
994fe6060f1SDimitry Andric 
9950b57cec5SDimitry Andric   // TODO: The loops could not be interchanged due to current limitations in the
9960b57cec5SDimitry Andric   // transform module.
9970b57cec5SDimitry Andric   if (currentLimitations()) {
9980b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
9990b57cec5SDimitry Andric     return false;
10000b57cec5SDimitry Andric   }
10010b57cec5SDimitry Andric 
10020b57cec5SDimitry Andric   // Check if the loops are tightly nested.
10030b57cec5SDimitry Andric   if (!tightlyNested(OuterLoop, InnerLoop)) {
10040b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
10050b57cec5SDimitry Andric     ORE->emit([&]() {
10060b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
10070b57cec5SDimitry Andric                                       InnerLoop->getStartLoc(),
10080b57cec5SDimitry Andric                                       InnerLoop->getHeader())
10090b57cec5SDimitry Andric              << "Cannot interchange loops because they are not tightly "
10100b57cec5SDimitry Andric                 "nested.";
10110b57cec5SDimitry Andric     });
10120b57cec5SDimitry Andric     return false;
10130b57cec5SDimitry Andric   }
10140b57cec5SDimitry Andric 
1015480093f4SDimitry Andric   if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1016480093f4SDimitry Andric                                      OuterInnerReductions)) {
1017480093f4SDimitry Andric     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1018480093f4SDimitry Andric     ORE->emit([&]() {
1019480093f4SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1020480093f4SDimitry Andric                                       InnerLoop->getStartLoc(),
1021480093f4SDimitry Andric                                       InnerLoop->getHeader())
1022480093f4SDimitry Andric              << "Found unsupported PHI node in loop exit.";
1023480093f4SDimitry Andric     });
1024480093f4SDimitry Andric     return false;
1025480093f4SDimitry Andric   }
1026480093f4SDimitry Andric 
1027480093f4SDimitry Andric   if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
10280b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
10290b57cec5SDimitry Andric     ORE->emit([&]() {
10300b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
10310b57cec5SDimitry Andric                                       OuterLoop->getStartLoc(),
10320b57cec5SDimitry Andric                                       OuterLoop->getHeader())
10330b57cec5SDimitry Andric              << "Found unsupported PHI node in loop exit.";
10340b57cec5SDimitry Andric     });
10350b57cec5SDimitry Andric     return false;
10360b57cec5SDimitry Andric   }
10370b57cec5SDimitry Andric 
10380b57cec5SDimitry Andric   return true;
10390b57cec5SDimitry Andric }
10400b57cec5SDimitry Andric 
10410b57cec5SDimitry Andric int LoopInterchangeProfitability::getInstrOrderCost() {
10420b57cec5SDimitry Andric   unsigned GoodOrder, BadOrder;
10430b57cec5SDimitry Andric   BadOrder = GoodOrder = 0;
10440b57cec5SDimitry Andric   for (BasicBlock *BB : InnerLoop->blocks()) {
10450b57cec5SDimitry Andric     for (Instruction &Ins : *BB) {
10460b57cec5SDimitry Andric       if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
10470b57cec5SDimitry Andric         unsigned NumOp = GEP->getNumOperands();
10480b57cec5SDimitry Andric         bool FoundInnerInduction = false;
10490b57cec5SDimitry Andric         bool FoundOuterInduction = false;
10500b57cec5SDimitry Andric         for (unsigned i = 0; i < NumOp; ++i) {
1051e8d8bef9SDimitry Andric           // Skip operands that are not SCEV-able.
1052e8d8bef9SDimitry Andric           if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1053e8d8bef9SDimitry Andric             continue;
1054e8d8bef9SDimitry Andric 
10550b57cec5SDimitry Andric           const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
10560b57cec5SDimitry Andric           const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
10570b57cec5SDimitry Andric           if (!AR)
10580b57cec5SDimitry Andric             continue;
10590b57cec5SDimitry Andric 
10600b57cec5SDimitry Andric           // If we find the inner induction after an outer induction e.g.
10610b57cec5SDimitry Andric           // for(int i=0;i<N;i++)
10620b57cec5SDimitry Andric           //   for(int j=0;j<N;j++)
10630b57cec5SDimitry Andric           //     A[i][j] = A[i-1][j-1]+k;
10640b57cec5SDimitry Andric           // then it is a good order.
10650b57cec5SDimitry Andric           if (AR->getLoop() == InnerLoop) {
10660b57cec5SDimitry Andric             // We found an InnerLoop induction after OuterLoop induction. It is
10670b57cec5SDimitry Andric             // a good order.
10680b57cec5SDimitry Andric             FoundInnerInduction = true;
10690b57cec5SDimitry Andric             if (FoundOuterInduction) {
10700b57cec5SDimitry Andric               GoodOrder++;
10710b57cec5SDimitry Andric               break;
10720b57cec5SDimitry Andric             }
10730b57cec5SDimitry Andric           }
10740b57cec5SDimitry Andric           // If we find the outer induction after an inner induction e.g.
10750b57cec5SDimitry Andric           // for(int i=0;i<N;i++)
10760b57cec5SDimitry Andric           //   for(int j=0;j<N;j++)
10770b57cec5SDimitry Andric           //     A[j][i] = A[j-1][i-1]+k;
10780b57cec5SDimitry Andric           // then it is a bad order.
10790b57cec5SDimitry Andric           if (AR->getLoop() == OuterLoop) {
10800b57cec5SDimitry Andric             // We found an OuterLoop induction after InnerLoop induction. It is
10810b57cec5SDimitry Andric             // a bad order.
10820b57cec5SDimitry Andric             FoundOuterInduction = true;
10830b57cec5SDimitry Andric             if (FoundInnerInduction) {
10840b57cec5SDimitry Andric               BadOrder++;
10850b57cec5SDimitry Andric               break;
10860b57cec5SDimitry Andric             }
10870b57cec5SDimitry Andric           }
10880b57cec5SDimitry Andric         }
10890b57cec5SDimitry Andric       }
10900b57cec5SDimitry Andric     }
10910b57cec5SDimitry Andric   }
10920b57cec5SDimitry Andric   return GoodOrder - BadOrder;
10930b57cec5SDimitry Andric }
10940b57cec5SDimitry Andric 
1095bdd1243dSDimitry Andric std::optional<bool>
1096bdd1243dSDimitry Andric LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1097bdd1243dSDimitry Andric     const DenseMap<const Loop *, unsigned> &CostMap,
1098bdd1243dSDimitry Andric     std::unique_ptr<CacheCost> &CC) {
109981ad6265SDimitry Andric   // This is the new cost model returned from loop cache analysis.
110081ad6265SDimitry Andric   // A smaller index means the loop should be placed an outer loop, and vice
110181ad6265SDimitry Andric   // versa.
110206c3fb27SDimitry Andric   if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) {
110381ad6265SDimitry Andric     unsigned InnerIndex = 0, OuterIndex = 0;
110481ad6265SDimitry Andric     InnerIndex = CostMap.find(InnerLoop)->second;
110581ad6265SDimitry Andric     OuterIndex = CostMap.find(OuterLoop)->second;
110681ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
110781ad6265SDimitry Andric                       << ", OuterIndex = " << OuterIndex << "\n");
110881ad6265SDimitry Andric     if (InnerIndex < OuterIndex)
1109bdd1243dSDimitry Andric       return std::optional<bool>(true);
1110bdd1243dSDimitry Andric     assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1111bdd1243dSDimitry Andric                                        "numbers to each loop");
1112bdd1243dSDimitry Andric     if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1113bdd1243dSDimitry Andric       return std::nullopt;
1114bdd1243dSDimitry Andric     return std::optional<bool>(false);
1115bdd1243dSDimitry Andric   }
1116bdd1243dSDimitry Andric   return std::nullopt;
1117bdd1243dSDimitry Andric }
1118bdd1243dSDimitry Andric 
1119bdd1243dSDimitry Andric std::optional<bool>
1120bdd1243dSDimitry Andric LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
112181ad6265SDimitry Andric   // Legacy cost model: this is rough cost estimation algorithm. It counts the
112281ad6265SDimitry Andric   // good and bad order of induction variables in the instruction and allows
112381ad6265SDimitry Andric   // reordering if number of bad orders is more than good.
11240b57cec5SDimitry Andric   int Cost = getInstrOrderCost();
11250b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1126bdd1243dSDimitry Andric   if (Cost < 0 && Cost < LoopInterchangeCostThreshold)
1127bdd1243dSDimitry Andric     return std::optional<bool>(true);
1128bdd1243dSDimitry Andric 
1129bdd1243dSDimitry Andric   return std::nullopt;
113081ad6265SDimitry Andric }
11310b57cec5SDimitry Andric 
1132bdd1243dSDimitry Andric std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1133bdd1243dSDimitry Andric     unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1134bdd1243dSDimitry Andric   for (auto &Row : DepMatrix) {
1135bdd1243dSDimitry Andric     // If the inner loop is loop independent or doesn't carry any dependency
1136bdd1243dSDimitry Andric     // it is not profitable to move this to outer position, since we are
1137bdd1243dSDimitry Andric     // likely able to do inner loop vectorization already.
1138bdd1243dSDimitry Andric     if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
1139bdd1243dSDimitry Andric       return std::optional<bool>(false);
11400b57cec5SDimitry Andric 
1141bdd1243dSDimitry Andric     // If the outer loop is not loop independent it is not profitable to move
1142bdd1243dSDimitry Andric     // this to inner position, since doing so would not enable inner loop
1143bdd1243dSDimitry Andric     // parallelism.
1144bdd1243dSDimitry Andric     if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
1145bdd1243dSDimitry Andric       return std::optional<bool>(false);
1146bdd1243dSDimitry Andric   }
1147bdd1243dSDimitry Andric   // If inner loop has dependence and outer loop is loop independent then it
1148bdd1243dSDimitry Andric   // is/ profitable to interchange to enable inner loop parallelism.
1149bdd1243dSDimitry Andric   // If there are no dependences, interchanging will not improve anything.
1150bdd1243dSDimitry Andric   return std::optional<bool>(!DepMatrix.empty());
1151bdd1243dSDimitry Andric }
1152bdd1243dSDimitry Andric 
1153bdd1243dSDimitry Andric bool LoopInterchangeProfitability::isProfitable(
1154bdd1243dSDimitry Andric     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1155bdd1243dSDimitry Andric     unsigned OuterLoopId, CharMatrix &DepMatrix,
1156bdd1243dSDimitry Andric     const DenseMap<const Loop *, unsigned> &CostMap,
1157bdd1243dSDimitry Andric     std::unique_ptr<CacheCost> &CC) {
1158bdd1243dSDimitry Andric   // isProfitable() is structured to avoid endless loop interchange.
1159bdd1243dSDimitry Andric   // If loop cache analysis could decide the profitability then,
1160bdd1243dSDimitry Andric   // profitability check will stop and return the analysis result.
1161bdd1243dSDimitry Andric   // If cache analysis failed to analyze the loopnest (e.g.,
1162bdd1243dSDimitry Andric   // due to delinearization issues) then only check whether it is
1163bdd1243dSDimitry Andric   // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1164bdd1243dSDimitry Andric   // analysis the profitability then only, isProfitableForVectorization
1165bdd1243dSDimitry Andric   // will decide.
1166bdd1243dSDimitry Andric   std::optional<bool> shouldInterchange =
1167bdd1243dSDimitry Andric       isProfitablePerLoopCacheAnalysis(CostMap, CC);
1168bdd1243dSDimitry Andric   if (!shouldInterchange.has_value()) {
1169bdd1243dSDimitry Andric     shouldInterchange = isProfitablePerInstrOrderCost();
1170bdd1243dSDimitry Andric     if (!shouldInterchange.has_value())
1171bdd1243dSDimitry Andric       shouldInterchange =
1172bdd1243dSDimitry Andric           isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1173bdd1243dSDimitry Andric   }
1174bdd1243dSDimitry Andric   if (!shouldInterchange.has_value()) {
11750b57cec5SDimitry Andric     ORE->emit([&]() {
11760b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
11770b57cec5SDimitry Andric                                       InnerLoop->getStartLoc(),
11780b57cec5SDimitry Andric                                       InnerLoop->getHeader())
1179bdd1243dSDimitry Andric              << "Insufficient information to calculate the cost of loop for "
1180bdd1243dSDimitry Andric                 "interchange.";
11810b57cec5SDimitry Andric     });
11820b57cec5SDimitry Andric     return false;
1183bdd1243dSDimitry Andric   } else if (!shouldInterchange.value()) {
1184bdd1243dSDimitry Andric     ORE->emit([&]() {
1185bdd1243dSDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1186bdd1243dSDimitry Andric                                       InnerLoop->getStartLoc(),
1187bdd1243dSDimitry Andric                                       InnerLoop->getHeader())
1188bdd1243dSDimitry Andric              << "Interchanging loops is not considered to improve cache "
1189bdd1243dSDimitry Andric                 "locality nor vectorization.";
1190bdd1243dSDimitry Andric     });
1191bdd1243dSDimitry Andric     return false;
1192bdd1243dSDimitry Andric   }
1193bdd1243dSDimitry Andric   return true;
11940b57cec5SDimitry Andric }
11950b57cec5SDimitry Andric 
11960b57cec5SDimitry Andric void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
11970b57cec5SDimitry Andric                                                Loop *InnerLoop) {
11980b57cec5SDimitry Andric   for (Loop *L : *OuterLoop)
11990b57cec5SDimitry Andric     if (L == InnerLoop) {
12000b57cec5SDimitry Andric       OuterLoop->removeChildLoop(L);
12010b57cec5SDimitry Andric       return;
12020b57cec5SDimitry Andric     }
12030b57cec5SDimitry Andric   llvm_unreachable("Couldn't find loop");
12040b57cec5SDimitry Andric }
12050b57cec5SDimitry Andric 
12060b57cec5SDimitry Andric /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
12070b57cec5SDimitry Andric /// new inner and outer loop after interchanging: NewInner is the original
12080b57cec5SDimitry Andric /// outer loop and NewOuter is the original inner loop.
12090b57cec5SDimitry Andric ///
12100b57cec5SDimitry Andric /// Before interchanging, we have the following structure
12110b57cec5SDimitry Andric /// Outer preheader
12120b57cec5SDimitry Andric //  Outer header
12130b57cec5SDimitry Andric //    Inner preheader
12140b57cec5SDimitry Andric //    Inner header
12150b57cec5SDimitry Andric //      Inner body
12160b57cec5SDimitry Andric //      Inner latch
12170b57cec5SDimitry Andric //   outer bbs
12180b57cec5SDimitry Andric //   Outer latch
12190b57cec5SDimitry Andric //
12200b57cec5SDimitry Andric // After interchanging:
12210b57cec5SDimitry Andric // Inner preheader
12220b57cec5SDimitry Andric // Inner header
12230b57cec5SDimitry Andric //   Outer preheader
12240b57cec5SDimitry Andric //   Outer header
12250b57cec5SDimitry Andric //     Inner body
12260b57cec5SDimitry Andric //     outer bbs
12270b57cec5SDimitry Andric //     Outer latch
12280b57cec5SDimitry Andric //   Inner latch
12290b57cec5SDimitry Andric void LoopInterchangeTransform::restructureLoops(
12300b57cec5SDimitry Andric     Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
12310b57cec5SDimitry Andric     BasicBlock *OrigOuterPreHeader) {
12320b57cec5SDimitry Andric   Loop *OuterLoopParent = OuterLoop->getParentLoop();
12330b57cec5SDimitry Andric   // The original inner loop preheader moves from the new inner loop to
12340b57cec5SDimitry Andric   // the parent loop, if there is one.
12350b57cec5SDimitry Andric   NewInner->removeBlockFromLoop(OrigInnerPreHeader);
12360b57cec5SDimitry Andric   LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
12370b57cec5SDimitry Andric 
12380b57cec5SDimitry Andric   // Switch the loop levels.
12390b57cec5SDimitry Andric   if (OuterLoopParent) {
12400b57cec5SDimitry Andric     // Remove the loop from its parent loop.
12410b57cec5SDimitry Andric     removeChildLoop(OuterLoopParent, NewInner);
12420b57cec5SDimitry Andric     removeChildLoop(NewInner, NewOuter);
12430b57cec5SDimitry Andric     OuterLoopParent->addChildLoop(NewOuter);
12440b57cec5SDimitry Andric   } else {
12450b57cec5SDimitry Andric     removeChildLoop(NewInner, NewOuter);
12460b57cec5SDimitry Andric     LI->changeTopLevelLoop(NewInner, NewOuter);
12470b57cec5SDimitry Andric   }
1248e8d8bef9SDimitry Andric   while (!NewOuter->isInnermost())
12490b57cec5SDimitry Andric     NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
12500b57cec5SDimitry Andric   NewOuter->addChildLoop(NewInner);
12510b57cec5SDimitry Andric 
12520b57cec5SDimitry Andric   // BBs from the original inner loop.
12530b57cec5SDimitry Andric   SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
12540b57cec5SDimitry Andric 
12550b57cec5SDimitry Andric   // Add BBs from the original outer loop to the original inner loop (excluding
12560b57cec5SDimitry Andric   // BBs already in inner loop)
12570b57cec5SDimitry Andric   for (BasicBlock *BB : NewInner->blocks())
12580b57cec5SDimitry Andric     if (LI->getLoopFor(BB) == NewInner)
12590b57cec5SDimitry Andric       NewOuter->addBlockEntry(BB);
12600b57cec5SDimitry Andric 
12610b57cec5SDimitry Andric   // Now remove inner loop header and latch from the new inner loop and move
12620b57cec5SDimitry Andric   // other BBs (the loop body) to the new inner loop.
12630b57cec5SDimitry Andric   BasicBlock *OuterHeader = NewOuter->getHeader();
12640b57cec5SDimitry Andric   BasicBlock *OuterLatch = NewOuter->getLoopLatch();
12650b57cec5SDimitry Andric   for (BasicBlock *BB : OrigInnerBBs) {
12660b57cec5SDimitry Andric     // Nothing will change for BBs in child loops.
12670b57cec5SDimitry Andric     if (LI->getLoopFor(BB) != NewOuter)
12680b57cec5SDimitry Andric       continue;
12690b57cec5SDimitry Andric     // Remove the new outer loop header and latch from the new inner loop.
12700b57cec5SDimitry Andric     if (BB == OuterHeader || BB == OuterLatch)
12710b57cec5SDimitry Andric       NewInner->removeBlockFromLoop(BB);
12720b57cec5SDimitry Andric     else
12730b57cec5SDimitry Andric       LI->changeLoopFor(BB, NewInner);
12740b57cec5SDimitry Andric   }
12750b57cec5SDimitry Andric 
12760b57cec5SDimitry Andric   // The preheader of the original outer loop becomes part of the new
12770b57cec5SDimitry Andric   // outer loop.
12780b57cec5SDimitry Andric   NewOuter->addBlockEntry(OrigOuterPreHeader);
12790b57cec5SDimitry Andric   LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
12800b57cec5SDimitry Andric 
12810b57cec5SDimitry Andric   // Tell SE that we move the loops around.
12820b57cec5SDimitry Andric   SE->forgetLoop(NewOuter);
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric 
12850b57cec5SDimitry Andric bool LoopInterchangeTransform::transform() {
12860b57cec5SDimitry Andric   bool Transformed = false;
12870b57cec5SDimitry Andric 
12880b57cec5SDimitry Andric   if (InnerLoop->getSubLoops().empty()) {
12890b57cec5SDimitry Andric     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
12908bcb0991SDimitry Andric     LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
129104eeddc0SDimitry Andric     auto &InductionPHIs = LIL.getInnerLoopInductions();
129204eeddc0SDimitry Andric     if (InductionPHIs.empty()) {
12930b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
12940b57cec5SDimitry Andric       return false;
12950b57cec5SDimitry Andric     }
12960b57cec5SDimitry Andric 
129704eeddc0SDimitry Andric     SmallVector<Instruction *, 8> InnerIndexVarList;
129804eeddc0SDimitry Andric     for (PHINode *CurInductionPHI : InductionPHIs) {
129904eeddc0SDimitry Andric       if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
130004eeddc0SDimitry Andric         InnerIndexVarList.push_back(
130104eeddc0SDimitry Andric             dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
13020b57cec5SDimitry Andric       else
130304eeddc0SDimitry Andric         InnerIndexVarList.push_back(
130404eeddc0SDimitry Andric             dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
130504eeddc0SDimitry Andric     }
13060b57cec5SDimitry Andric 
13078bcb0991SDimitry Andric     // Create a new latch block for the inner loop. We split at the
13088bcb0991SDimitry Andric     // current latch's terminator and then move the condition and all
13098bcb0991SDimitry Andric     // operands that are not either loop-invariant or the induction PHI into the
13108bcb0991SDimitry Andric     // new latch block.
13118bcb0991SDimitry Andric     BasicBlock *NewLatch =
13128bcb0991SDimitry Andric         SplitBlock(InnerLoop->getLoopLatch(),
13138bcb0991SDimitry Andric                    InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
13148bcb0991SDimitry Andric 
13158bcb0991SDimitry Andric     SmallSetVector<Instruction *, 4> WorkList;
13168bcb0991SDimitry Andric     unsigned i = 0;
131704eeddc0SDimitry Andric     auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
13188bcb0991SDimitry Andric       for (; i < WorkList.size(); i++) {
13198bcb0991SDimitry Andric         // Duplicate instruction and move it the new latch. Update uses that
13208bcb0991SDimitry Andric         // have been moved.
13218bcb0991SDimitry Andric         Instruction *NewI = WorkList[i]->clone();
13228bcb0991SDimitry Andric         NewI->insertBefore(NewLatch->getFirstNonPHI());
13238bcb0991SDimitry Andric         assert(!NewI->mayHaveSideEffects() &&
13248bcb0991SDimitry Andric                "Moving instructions with side-effects may change behavior of "
13258bcb0991SDimitry Andric                "the loop nest!");
1326fe6060f1SDimitry Andric         for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
13278bcb0991SDimitry Andric           Instruction *UserI = cast<Instruction>(U.getUser());
13288bcb0991SDimitry Andric           if (!InnerLoop->contains(UserI->getParent()) ||
132904eeddc0SDimitry Andric               UserI->getParent() == NewLatch ||
133004eeddc0SDimitry Andric               llvm::is_contained(InductionPHIs, UserI))
13318bcb0991SDimitry Andric             U.set(NewI);
13328bcb0991SDimitry Andric         }
13338bcb0991SDimitry Andric         // Add operands of moved instruction to the worklist, except if they are
13348bcb0991SDimitry Andric         // outside the inner loop or are the induction PHI.
13358bcb0991SDimitry Andric         for (Value *Op : WorkList[i]->operands()) {
13368bcb0991SDimitry Andric           Instruction *OpI = dyn_cast<Instruction>(Op);
13378bcb0991SDimitry Andric           if (!OpI ||
13388bcb0991SDimitry Andric               this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
133904eeddc0SDimitry Andric               llvm::is_contained(InductionPHIs, OpI))
13408bcb0991SDimitry Andric             continue;
13418bcb0991SDimitry Andric           WorkList.insert(OpI);
13428bcb0991SDimitry Andric         }
13438bcb0991SDimitry Andric       }
13448bcb0991SDimitry Andric     };
13458bcb0991SDimitry Andric 
13468bcb0991SDimitry Andric     // FIXME: Should we interchange when we have a constant condition?
13478bcb0991SDimitry Andric     Instruction *CondI = dyn_cast<Instruction>(
13488bcb0991SDimitry Andric         cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
13498bcb0991SDimitry Andric             ->getCondition());
13508bcb0991SDimitry Andric     if (CondI)
13518bcb0991SDimitry Andric       WorkList.insert(CondI);
13528bcb0991SDimitry Andric     MoveInstructions();
135304eeddc0SDimitry Andric     for (Instruction *InnerIndexVar : InnerIndexVarList)
13548bcb0991SDimitry Andric       WorkList.insert(cast<Instruction>(InnerIndexVar));
13558bcb0991SDimitry Andric     MoveInstructions();
1356bdd1243dSDimitry Andric   }
13570b57cec5SDimitry Andric 
1358bdd1243dSDimitry Andric   // Ensure the inner loop phi nodes have a separate basic block.
13590b57cec5SDimitry Andric   BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1360bdd1243dSDimitry Andric   if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
13610b57cec5SDimitry Andric     SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
13620b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
13630b57cec5SDimitry Andric   }
13640b57cec5SDimitry Andric 
1365e8d8bef9SDimitry Andric   // Instructions in the original inner loop preheader may depend on values
1366e8d8bef9SDimitry Andric   // defined in the outer loop header. Move them there, because the original
1367e8d8bef9SDimitry Andric   // inner loop preheader will become the entry into the interchanged loop nest.
1368e8d8bef9SDimitry Andric   // Currently we move all instructions and rely on LICM to move invariant
1369e8d8bef9SDimitry Andric   // instructions outside the loop nest.
1370e8d8bef9SDimitry Andric   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1371e8d8bef9SDimitry Andric   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1372e8d8bef9SDimitry Andric   if (InnerLoopPreHeader != OuterLoopHeader) {
1373e8d8bef9SDimitry Andric     SmallPtrSet<Instruction *, 4> NeedsMoving;
1374e8d8bef9SDimitry Andric     for (Instruction &I :
1375e8d8bef9SDimitry Andric          make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1376e8d8bef9SDimitry Andric                                          std::prev(InnerLoopPreHeader->end()))))
13775f757f3fSDimitry Andric       I.moveBeforePreserving(OuterLoopHeader->getTerminator());
1378e8d8bef9SDimitry Andric   }
1379e8d8bef9SDimitry Andric 
13800b57cec5SDimitry Andric   Transformed |= adjustLoopLinks();
13810b57cec5SDimitry Andric   if (!Transformed) {
13820b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
13830b57cec5SDimitry Andric     return false;
13840b57cec5SDimitry Andric   }
13850b57cec5SDimitry Andric 
13860b57cec5SDimitry Andric   return true;
13870b57cec5SDimitry Andric }
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric /// \brief Move all instructions except the terminator from FromBB right before
13900b57cec5SDimitry Andric /// InsertBefore
13910b57cec5SDimitry Andric static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1392bdd1243dSDimitry Andric   BasicBlock *ToBB = InsertBefore->getParent();
13930b57cec5SDimitry Andric 
1394bdd1243dSDimitry Andric   ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
13950b57cec5SDimitry Andric                FromBB->getTerminator()->getIterator());
13960b57cec5SDimitry Andric }
13970b57cec5SDimitry Andric 
13985ffd83dbSDimitry Andric /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
13995ffd83dbSDimitry Andric static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
14005ffd83dbSDimitry Andric   // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
14015ffd83dbSDimitry Andric   // from BB1 afterwards.
14025ffd83dbSDimitry Andric   auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
14035ffd83dbSDimitry Andric   SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
14045ffd83dbSDimitry Andric   for (Instruction *I : TempInstrs)
14055ffd83dbSDimitry Andric     I->removeFromParent();
14065ffd83dbSDimitry Andric 
14075ffd83dbSDimitry Andric   // Move instructions from BB2 to BB1.
14085ffd83dbSDimitry Andric   moveBBContents(BB2, BB1->getTerminator());
14095ffd83dbSDimitry Andric 
14105ffd83dbSDimitry Andric   // Move instructions from TempInstrs to BB2.
14115ffd83dbSDimitry Andric   for (Instruction *I : TempInstrs)
14125ffd83dbSDimitry Andric     I->insertBefore(BB2->getTerminator());
14135ffd83dbSDimitry Andric }
14145ffd83dbSDimitry Andric 
1415480093f4SDimitry Andric // Update BI to jump to NewBB instead of OldBB. Records updates to the
1416480093f4SDimitry Andric // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1417480093f4SDimitry Andric // \p OldBB  is exactly once in BI's successor list.
14180b57cec5SDimitry Andric static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
14190b57cec5SDimitry Andric                             BasicBlock *NewBB,
1420480093f4SDimitry Andric                             std::vector<DominatorTree::UpdateType> &DTUpdates,
1421480093f4SDimitry Andric                             bool MustUpdateOnce = true) {
1422480093f4SDimitry Andric   assert((!MustUpdateOnce ||
1423480093f4SDimitry Andric           llvm::count_if(successors(BI),
1424480093f4SDimitry Andric                          [OldBB](BasicBlock *BB) {
1425480093f4SDimitry Andric                            return BB == OldBB;
1426480093f4SDimitry Andric                          }) == 1) && "BI must jump to OldBB exactly once.");
1427480093f4SDimitry Andric   bool Changed = false;
1428480093f4SDimitry Andric   for (Use &Op : BI->operands())
1429480093f4SDimitry Andric     if (Op == OldBB) {
1430480093f4SDimitry Andric       Op.set(NewBB);
1431480093f4SDimitry Andric       Changed = true;
1432480093f4SDimitry Andric     }
14330b57cec5SDimitry Andric 
1434480093f4SDimitry Andric   if (Changed) {
14350b57cec5SDimitry Andric     DTUpdates.push_back(
14360b57cec5SDimitry Andric         {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
14370b57cec5SDimitry Andric     DTUpdates.push_back(
14380b57cec5SDimitry Andric         {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
14390b57cec5SDimitry Andric   }
1440480093f4SDimitry Andric   assert(Changed && "Expected a successor to be updated");
14410b57cec5SDimitry Andric }
14420b57cec5SDimitry Andric 
14430b57cec5SDimitry Andric // Move Lcssa PHIs to the right place.
14440b57cec5SDimitry Andric static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
14450b57cec5SDimitry Andric                           BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1446480093f4SDimitry Andric                           BasicBlock *OuterLatch, BasicBlock *OuterExit,
1447480093f4SDimitry Andric                           Loop *InnerLoop, LoopInfo *LI) {
14480b57cec5SDimitry Andric 
14490b57cec5SDimitry Andric   // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
14500b57cec5SDimitry Andric   // defined either in the header or latch. Those blocks will become header and
14510b57cec5SDimitry Andric   // latch of the new outer loop, and the only possible users can PHI nodes
14520b57cec5SDimitry Andric   // in the exit block of the loop nest or the outer loop header (reduction
14530b57cec5SDimitry Andric   // PHIs, in that case, the incoming value must be defined in the inner loop
14540b57cec5SDimitry Andric   // header). We can just substitute the user with the incoming value and remove
14550b57cec5SDimitry Andric   // the PHI.
14560b57cec5SDimitry Andric   for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
14570b57cec5SDimitry Andric     assert(P.getNumIncomingValues() == 1 &&
14580b57cec5SDimitry Andric            "Only loops with a single exit are supported!");
14590b57cec5SDimitry Andric 
14600b57cec5SDimitry Andric     // Incoming values are guaranteed be instructions currently.
14610b57cec5SDimitry Andric     auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
146281ad6265SDimitry Andric     // In case of multi-level nested loops, follow LCSSA to find the incoming
146381ad6265SDimitry Andric     // value defined from the innermost loop.
146481ad6265SDimitry Andric     auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
14650b57cec5SDimitry Andric     // Skip phis with incoming values from the inner loop body, excluding the
14660b57cec5SDimitry Andric     // header and latch.
146781ad6265SDimitry Andric     if (IncIInnerMost->getParent() != InnerLatch &&
146881ad6265SDimitry Andric         IncIInnerMost->getParent() != InnerHeader)
14690b57cec5SDimitry Andric       continue;
14700b57cec5SDimitry Andric 
14710b57cec5SDimitry Andric     assert(all_of(P.users(),
14720b57cec5SDimitry Andric                   [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
14730b57cec5SDimitry Andric                     return (cast<PHINode>(U)->getParent() == OuterHeader &&
14740b57cec5SDimitry Andric                             IncI->getParent() == InnerHeader) ||
14750b57cec5SDimitry Andric                            cast<PHINode>(U)->getParent() == OuterExit;
14760b57cec5SDimitry Andric                   }) &&
14770b57cec5SDimitry Andric            "Can only replace phis iff the uses are in the loop nest exit or "
14780b57cec5SDimitry Andric            "the incoming value is defined in the inner header (it will "
14790b57cec5SDimitry Andric            "dominate all loop blocks after interchanging)");
14800b57cec5SDimitry Andric     P.replaceAllUsesWith(IncI);
14810b57cec5SDimitry Andric     P.eraseFromParent();
14820b57cec5SDimitry Andric   }
14830b57cec5SDimitry Andric 
14840b57cec5SDimitry Andric   SmallVector<PHINode *, 8> LcssaInnerExit;
14850b57cec5SDimitry Andric   for (PHINode &P : InnerExit->phis())
14860b57cec5SDimitry Andric     LcssaInnerExit.push_back(&P);
14870b57cec5SDimitry Andric 
14880b57cec5SDimitry Andric   SmallVector<PHINode *, 8> LcssaInnerLatch;
14890b57cec5SDimitry Andric   for (PHINode &P : InnerLatch->phis())
14900b57cec5SDimitry Andric     LcssaInnerLatch.push_back(&P);
14910b57cec5SDimitry Andric 
14920b57cec5SDimitry Andric   // Lcssa PHIs for values used outside the inner loop are in InnerExit.
14930b57cec5SDimitry Andric   // If a PHI node has users outside of InnerExit, it has a use outside the
14940b57cec5SDimitry Andric   // interchanged loop and we have to preserve it. We move these to
14950b57cec5SDimitry Andric   // InnerLatch, which will become the new exit block for the innermost
14960b57cec5SDimitry Andric   // loop after interchanging.
14970b57cec5SDimitry Andric   for (PHINode *P : LcssaInnerExit)
14980b57cec5SDimitry Andric     P->moveBefore(InnerLatch->getFirstNonPHI());
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric   // If the inner loop latch contains LCSSA PHIs, those come from a child loop
15010b57cec5SDimitry Andric   // and we have to move them to the new inner latch.
15020b57cec5SDimitry Andric   for (PHINode *P : LcssaInnerLatch)
15030b57cec5SDimitry Andric     P->moveBefore(InnerExit->getFirstNonPHI());
15040b57cec5SDimitry Andric 
15050b57cec5SDimitry Andric   // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1506480093f4SDimitry Andric   // incoming values defined in the outer loop, we have to add a new PHI
15070b57cec5SDimitry Andric   // in the inner loop latch, which became the exit block of the outer loop,
15080b57cec5SDimitry Andric   // after interchanging.
15090b57cec5SDimitry Andric   if (OuterExit) {
15100b57cec5SDimitry Andric     for (PHINode &P : OuterExit->phis()) {
15110b57cec5SDimitry Andric       if (P.getNumIncomingValues() != 1)
15120b57cec5SDimitry Andric         continue;
1513480093f4SDimitry Andric       // Skip Phis with incoming values defined in the inner loop. Those should
15140b57cec5SDimitry Andric       // already have been updated.
15150b57cec5SDimitry Andric       auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1516480093f4SDimitry Andric       if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
15170b57cec5SDimitry Andric         continue;
15180b57cec5SDimitry Andric 
15190b57cec5SDimitry Andric       PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
15200b57cec5SDimitry Andric       NewPhi->setIncomingValue(0, P.getIncomingValue(0));
15210b57cec5SDimitry Andric       NewPhi->setIncomingBlock(0, OuterLatch);
1522fe6060f1SDimitry Andric       // We might have incoming edges from other BBs, i.e., the original outer
1523fe6060f1SDimitry Andric       // header.
1524fe6060f1SDimitry Andric       for (auto *Pred : predecessors(InnerLatch)) {
1525fe6060f1SDimitry Andric         if (Pred == OuterLatch)
1526fe6060f1SDimitry Andric           continue;
1527fe6060f1SDimitry Andric         NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1528fe6060f1SDimitry Andric       }
15290b57cec5SDimitry Andric       NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
15300b57cec5SDimitry Andric       P.setIncomingValue(0, NewPhi);
15310b57cec5SDimitry Andric     }
15320b57cec5SDimitry Andric   }
15330b57cec5SDimitry Andric 
15340b57cec5SDimitry Andric   // Now adjust the incoming blocks for the LCSSA PHIs.
15350b57cec5SDimitry Andric   // For PHIs moved from Inner's exit block, we need to replace Inner's latch
15360b57cec5SDimitry Andric   // with the new latch.
15370b57cec5SDimitry Andric   InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
15380b57cec5SDimitry Andric }
15390b57cec5SDimitry Andric 
15400b57cec5SDimitry Andric bool LoopInterchangeTransform::adjustLoopBranches() {
15410b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
15420b57cec5SDimitry Andric   std::vector<DominatorTree::UpdateType> DTUpdates;
15430b57cec5SDimitry Andric 
15440b57cec5SDimitry Andric   BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
15450b57cec5SDimitry Andric   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
15460b57cec5SDimitry Andric 
15470b57cec5SDimitry Andric   assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
15480b57cec5SDimitry Andric          InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
15490b57cec5SDimitry Andric          InnerLoopPreHeader && "Guaranteed by loop-simplify form");
15500b57cec5SDimitry Andric   // Ensure that both preheaders do not contain PHI nodes and have single
15510b57cec5SDimitry Andric   // predecessors. This allows us to move them easily. We use
15520b57cec5SDimitry Andric   // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
15530b57cec5SDimitry Andric   // preheaders do not satisfy those conditions.
15540b57cec5SDimitry Andric   if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
15550b57cec5SDimitry Andric       !OuterLoopPreHeader->getUniquePredecessor())
15560b57cec5SDimitry Andric     OuterLoopPreHeader =
15570b57cec5SDimitry Andric         InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
15580b57cec5SDimitry Andric   if (InnerLoopPreHeader == OuterLoop->getHeader())
15590b57cec5SDimitry Andric     InnerLoopPreHeader =
15600b57cec5SDimitry Andric         InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
15610b57cec5SDimitry Andric 
15620b57cec5SDimitry Andric   // Adjust the loop preheader
15630b57cec5SDimitry Andric   BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
15640b57cec5SDimitry Andric   BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
15650b57cec5SDimitry Andric   BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
15660b57cec5SDimitry Andric   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
15670b57cec5SDimitry Andric   BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
15680b57cec5SDimitry Andric   BasicBlock *InnerLoopLatchPredecessor =
15690b57cec5SDimitry Andric       InnerLoopLatch->getUniquePredecessor();
15700b57cec5SDimitry Andric   BasicBlock *InnerLoopLatchSuccessor;
15710b57cec5SDimitry Andric   BasicBlock *OuterLoopLatchSuccessor;
15720b57cec5SDimitry Andric 
15730b57cec5SDimitry Andric   BranchInst *OuterLoopLatchBI =
15740b57cec5SDimitry Andric       dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
15750b57cec5SDimitry Andric   BranchInst *InnerLoopLatchBI =
15760b57cec5SDimitry Andric       dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
15770b57cec5SDimitry Andric   BranchInst *OuterLoopHeaderBI =
15780b57cec5SDimitry Andric       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
15790b57cec5SDimitry Andric   BranchInst *InnerLoopHeaderBI =
15800b57cec5SDimitry Andric       dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
15810b57cec5SDimitry Andric 
15820b57cec5SDimitry Andric   if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
15830b57cec5SDimitry Andric       !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
15840b57cec5SDimitry Andric       !InnerLoopHeaderBI)
15850b57cec5SDimitry Andric     return false;
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   BranchInst *InnerLoopLatchPredecessorBI =
15880b57cec5SDimitry Andric       dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
15890b57cec5SDimitry Andric   BranchInst *OuterLoopPredecessorBI =
15900b57cec5SDimitry Andric       dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
15910b57cec5SDimitry Andric 
15920b57cec5SDimitry Andric   if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
15930b57cec5SDimitry Andric     return false;
15940b57cec5SDimitry Andric   BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
15950b57cec5SDimitry Andric   if (!InnerLoopHeaderSuccessor)
15960b57cec5SDimitry Andric     return false;
15970b57cec5SDimitry Andric 
1598480093f4SDimitry Andric   // Adjust Loop Preheader and headers.
1599480093f4SDimitry Andric   // The branches in the outer loop predecessor and the outer loop header can
1600480093f4SDimitry Andric   // be unconditional branches or conditional branches with duplicates. Consider
1601480093f4SDimitry Andric   // this when updating the successors.
16020b57cec5SDimitry Andric   updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1603480093f4SDimitry Andric                   InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1604480093f4SDimitry Andric   // The outer loop header might or might not branch to the outer latch.
1605480093f4SDimitry Andric   // We are guaranteed to branch to the inner loop preheader.
1606fe6060f1SDimitry Andric   if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1607fe6060f1SDimitry Andric     // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1608fe6060f1SDimitry Andric     updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1609fe6060f1SDimitry Andric                     DTUpdates,
1610480093f4SDimitry Andric                     /*MustUpdateOnce=*/false);
1611fe6060f1SDimitry Andric   }
16120b57cec5SDimitry Andric   updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1613480093f4SDimitry Andric                   InnerLoopHeaderSuccessor, DTUpdates,
1614480093f4SDimitry Andric                   /*MustUpdateOnce=*/false);
16150b57cec5SDimitry Andric 
16160b57cec5SDimitry Andric   // Adjust reduction PHI's now that the incoming block has changed.
16170b57cec5SDimitry Andric   InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
16180b57cec5SDimitry Andric                                                OuterLoopHeader);
16190b57cec5SDimitry Andric 
16200b57cec5SDimitry Andric   updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
16210b57cec5SDimitry Andric                   OuterLoopPreHeader, DTUpdates);
16220b57cec5SDimitry Andric 
16230b57cec5SDimitry Andric   // -------------Adjust loop latches-----------
16240b57cec5SDimitry Andric   if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
16250b57cec5SDimitry Andric     InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
16260b57cec5SDimitry Andric   else
16270b57cec5SDimitry Andric     InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
16280b57cec5SDimitry Andric 
16290b57cec5SDimitry Andric   updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
16300b57cec5SDimitry Andric                   InnerLoopLatchSuccessor, DTUpdates);
16310b57cec5SDimitry Andric 
16320b57cec5SDimitry Andric   if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
16330b57cec5SDimitry Andric     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
16340b57cec5SDimitry Andric   else
16350b57cec5SDimitry Andric     OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
16360b57cec5SDimitry Andric 
16370b57cec5SDimitry Andric   updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
16380b57cec5SDimitry Andric                   OuterLoopLatchSuccessor, DTUpdates);
16390b57cec5SDimitry Andric   updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
16400b57cec5SDimitry Andric                   DTUpdates);
16410b57cec5SDimitry Andric 
16420b57cec5SDimitry Andric   DT->applyUpdates(DTUpdates);
16430b57cec5SDimitry Andric   restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
16440b57cec5SDimitry Andric                    OuterLoopPreHeader);
16450b57cec5SDimitry Andric 
16460b57cec5SDimitry Andric   moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1647480093f4SDimitry Andric                 OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1648480093f4SDimitry Andric                 InnerLoop, LI);
16490b57cec5SDimitry Andric   // For PHIs in the exit block of the outer loop, outer's latch has been
16500b57cec5SDimitry Andric   // replaced by Inners'.
16510b57cec5SDimitry Andric   OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
16520b57cec5SDimitry Andric 
1653fe6060f1SDimitry Andric   auto &OuterInnerReductions = LIL.getOuterInnerReductions();
16540b57cec5SDimitry Andric   // Now update the reduction PHIs in the inner and outer loop headers.
16550b57cec5SDimitry Andric   SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1656349cc55cSDimitry Andric   for (PHINode &PHI : InnerLoopHeader->phis())
1657349cc55cSDimitry Andric     if (OuterInnerReductions.contains(&PHI))
165804eeddc0SDimitry Andric       InnerLoopPHIs.push_back(&PHI);
165904eeddc0SDimitry Andric 
1660349cc55cSDimitry Andric   for (PHINode &PHI : OuterLoopHeader->phis())
1661349cc55cSDimitry Andric     if (OuterInnerReductions.contains(&PHI))
166204eeddc0SDimitry Andric       OuterLoopPHIs.push_back(&PHI);
16630b57cec5SDimitry Andric 
16640b57cec5SDimitry Andric   // Now move the remaining reduction PHIs from outer to inner loop header and
16650b57cec5SDimitry Andric   // vice versa. The PHI nodes must be part of a reduction across the inner and
16660b57cec5SDimitry Andric   // outer loop and all the remains to do is and updating the incoming blocks.
16670b57cec5SDimitry Andric   for (PHINode *PHI : OuterLoopPHIs) {
166804eeddc0SDimitry Andric     LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
16690b57cec5SDimitry Andric     PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
16705ffd83dbSDimitry Andric     assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
16710b57cec5SDimitry Andric   }
16720b57cec5SDimitry Andric   for (PHINode *PHI : InnerLoopPHIs) {
167304eeddc0SDimitry Andric     LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
16740b57cec5SDimitry Andric     PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
16755ffd83dbSDimitry Andric     assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
16760b57cec5SDimitry Andric   }
16770b57cec5SDimitry Andric 
16780b57cec5SDimitry Andric   // Update the incoming blocks for moved PHI nodes.
16790b57cec5SDimitry Andric   OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
16800b57cec5SDimitry Andric   OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
16810b57cec5SDimitry Andric   InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
16820b57cec5SDimitry Andric   InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
16830b57cec5SDimitry Andric 
1684e8d8bef9SDimitry Andric   // Values defined in the outer loop header could be used in the inner loop
1685e8d8bef9SDimitry Andric   // latch. In that case, we need to create LCSSA phis for them, because after
1686e8d8bef9SDimitry Andric   // interchanging they will be defined in the new inner loop and used in the
1687e8d8bef9SDimitry Andric   // new outer loop.
1688e8d8bef9SDimitry Andric   SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1689e8d8bef9SDimitry Andric   for (Instruction &I :
1690e8d8bef9SDimitry Andric        make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1691e8d8bef9SDimitry Andric     MayNeedLCSSAPhis.push_back(&I);
169206c3fb27SDimitry Andric   formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
1693e8d8bef9SDimitry Andric 
16940b57cec5SDimitry Andric   return true;
16950b57cec5SDimitry Andric }
16960b57cec5SDimitry Andric 
16970b57cec5SDimitry Andric bool LoopInterchangeTransform::adjustLoopLinks() {
16980b57cec5SDimitry Andric   // Adjust all branches in the inner and outer loop.
16990b57cec5SDimitry Andric   bool Changed = adjustLoopBranches();
17005ffd83dbSDimitry Andric   if (Changed) {
17015ffd83dbSDimitry Andric     // We have interchanged the preheaders so we need to interchange the data in
17025ffd83dbSDimitry Andric     // the preheaders as well. This is because the content of the inner
17035ffd83dbSDimitry Andric     // preheader was previously executed inside the outer loop.
17045ffd83dbSDimitry Andric     BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
17055ffd83dbSDimitry Andric     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
17065ffd83dbSDimitry Andric     swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
17075ffd83dbSDimitry Andric   }
17080b57cec5SDimitry Andric   return Changed;
17090b57cec5SDimitry Andric }
17100b57cec5SDimitry Andric 
1711fe6060f1SDimitry Andric PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1712fe6060f1SDimitry Andric                                            LoopAnalysisManager &AM,
1713e8d8bef9SDimitry Andric                                            LoopStandardAnalysisResults &AR,
1714e8d8bef9SDimitry Andric                                            LPMUpdater &U) {
1715fe6060f1SDimitry Andric   Function &F = *LN.getParent();
1716e8d8bef9SDimitry Andric 
1717e8d8bef9SDimitry Andric   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
171881ad6265SDimitry Andric   std::unique_ptr<CacheCost> CC =
171981ad6265SDimitry Andric       CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1720e8d8bef9SDimitry Andric   OptimizationRemarkEmitter ORE(&F);
172181ad6265SDimitry Andric   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1722e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
1723bdd1243dSDimitry Andric   U.markLoopNestChanged(true);
1724e8d8bef9SDimitry Andric   return getLoopPassPreservedAnalyses();
1725e8d8bef9SDimitry Andric }
1726