10b57cec5SDimitry Andric //===- LoopInterchange.cpp - Loop interchange pass-------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This Pass handles loop interchange transform. 100b57cec5SDimitry Andric // This pass interchanges loops to provide a more cache-friendly memory access 110b57cec5SDimitry Andric // patterns. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 15e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/LoopInterchange.h" 160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 180b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 190b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h" 2181ad6265SDimitry Andric #include "llvm/Analysis/LoopCacheAnalysis.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 23fe6060f1SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 280b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 290b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 300b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 310b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 320b57cec5SDimitry Andric #include "llvm/IR/Function.h" 330b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 340b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 360b57cec5SDimitry Andric #include "llvm/IR/User.h" 370b57cec5SDimitry Andric #include "llvm/IR/Value.h" 380b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 390b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 400b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 410b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 420b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 43bdd1243dSDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h" 440b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 450b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 460b57cec5SDimitry Andric #include <cassert> 470b57cec5SDimitry Andric #include <utility> 480b57cec5SDimitry Andric #include <vector> 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric using namespace llvm; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric #define DEBUG_TYPE "loop-interchange" 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric STATISTIC(LoopsInterchanged, "Number of loops interchanged"); 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric static cl::opt<int> LoopInterchangeCostThreshold( 570b57cec5SDimitry Andric "loop-interchange-threshold", cl::init(0), cl::Hidden, 580b57cec5SDimitry Andric cl::desc("Interchange if you gain more than this number")); 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric namespace { 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric using LoopVector = SmallVector<Loop *, 8>; 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric // TODO: Check if we can use a sparse matrix here. 650b57cec5SDimitry Andric using CharMatrix = std::vector<std::vector<char>>; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric } // end anonymous namespace 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric // Maximum number of dependencies that can be handled in the dependency matrix. 700b57cec5SDimitry Andric static const unsigned MaxMemInstrCount = 100; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric // Maximum loop depth supported. 730b57cec5SDimitry Andric static const unsigned MaxLoopNestDepth = 10; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES 760b57cec5SDimitry Andric static void printDepMatrix(CharMatrix &DepMatrix) { 770b57cec5SDimitry Andric for (auto &Row : DepMatrix) { 780b57cec5SDimitry Andric for (auto D : Row) 790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << D << " "); 800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric #endif 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, 86bdd1243dSDimitry Andric Loop *L, DependenceInfo *DI, 87bdd1243dSDimitry Andric ScalarEvolution *SE) { 880b57cec5SDimitry Andric using ValueVector = SmallVector<Value *, 16>; 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric ValueVector MemInstr; 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric // For each block. 930b57cec5SDimitry Andric for (BasicBlock *BB : L->blocks()) { 940b57cec5SDimitry Andric // Scan the BB and collect legal loads and stores. 950b57cec5SDimitry Andric for (Instruction &I : *BB) { 960b57cec5SDimitry Andric if (!isa<Instruction>(I)) 970b57cec5SDimitry Andric return false; 980b57cec5SDimitry Andric if (auto *Ld = dyn_cast<LoadInst>(&I)) { 990b57cec5SDimitry Andric if (!Ld->isSimple()) 1000b57cec5SDimitry Andric return false; 1010b57cec5SDimitry Andric MemInstr.push_back(&I); 1020b57cec5SDimitry Andric } else if (auto *St = dyn_cast<StoreInst>(&I)) { 1030b57cec5SDimitry Andric if (!St->isSimple()) 1040b57cec5SDimitry Andric return false; 1050b57cec5SDimitry Andric MemInstr.push_back(&I); 1060b57cec5SDimitry Andric } 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << MemInstr.size() 1110b57cec5SDimitry Andric << " Loads and Stores to analyze\n"); 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric ValueVector::iterator I, IE, J, JE; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { 1160b57cec5SDimitry Andric for (J = I, JE = MemInstr.end(); J != JE; ++J) { 1170b57cec5SDimitry Andric std::vector<char> Dep; 1180b57cec5SDimitry Andric Instruction *Src = cast<Instruction>(*I); 1190b57cec5SDimitry Andric Instruction *Dst = cast<Instruction>(*J); 1200b57cec5SDimitry Andric // Ignore Input dependencies. 1210b57cec5SDimitry Andric if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) 1220b57cec5SDimitry Andric continue; 1230b57cec5SDimitry Andric // Track Output, Flow, and Anti dependencies. 1240b57cec5SDimitry Andric if (auto D = DI->depends(Src, Dst, true)) { 1250b57cec5SDimitry Andric assert(D->isOrdered() && "Expected an output, flow or anti dep."); 126bdd1243dSDimitry Andric // If the direction vector is negative, normalize it to 127bdd1243dSDimitry Andric // make it non-negative. 128bdd1243dSDimitry Andric if (D->normalize(SE)) 129bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n"); 1300b57cec5SDimitry Andric LLVM_DEBUG(StringRef DepType = 1310b57cec5SDimitry Andric D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; 1320b57cec5SDimitry Andric dbgs() << "Found " << DepType 1330b57cec5SDimitry Andric << " dependency between Src and Dst\n" 1340b57cec5SDimitry Andric << " Src:" << *Src << "\n Dst:" << *Dst << '\n'); 1350b57cec5SDimitry Andric unsigned Levels = D->getLevels(); 1360b57cec5SDimitry Andric char Direction; 1370b57cec5SDimitry Andric for (unsigned II = 1; II <= Levels; ++II) { 138bdd1243dSDimitry Andric if (D->isScalar(II)) { 1390b57cec5SDimitry Andric Direction = 'S'; 1400b57cec5SDimitry Andric Dep.push_back(Direction); 1410b57cec5SDimitry Andric } else { 1420b57cec5SDimitry Andric unsigned Dir = D->getDirection(II); 1430b57cec5SDimitry Andric if (Dir == Dependence::DVEntry::LT || 1440b57cec5SDimitry Andric Dir == Dependence::DVEntry::LE) 1450b57cec5SDimitry Andric Direction = '<'; 1460b57cec5SDimitry Andric else if (Dir == Dependence::DVEntry::GT || 1470b57cec5SDimitry Andric Dir == Dependence::DVEntry::GE) 1480b57cec5SDimitry Andric Direction = '>'; 1490b57cec5SDimitry Andric else if (Dir == Dependence::DVEntry::EQ) 1500b57cec5SDimitry Andric Direction = '='; 1510b57cec5SDimitry Andric else 1520b57cec5SDimitry Andric Direction = '*'; 1530b57cec5SDimitry Andric Dep.push_back(Direction); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric while (Dep.size() != Level) { 1570b57cec5SDimitry Andric Dep.push_back('I'); 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric DepMatrix.push_back(Dep); 1610b57cec5SDimitry Andric if (DepMatrix.size() > MaxMemInstrCount) { 1620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount 1630b57cec5SDimitry Andric << " dependencies inside loop\n"); 1640b57cec5SDimitry Andric return false; 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric return true; 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric // A loop is moved from index 'from' to an index 'to'. Update the Dependence 1740b57cec5SDimitry Andric // matrix by exchanging the two columns. 1750b57cec5SDimitry Andric static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, 1760b57cec5SDimitry Andric unsigned ToIndx) { 177fe6060f1SDimitry Andric for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I) 178fe6060f1SDimitry Andric std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 181bdd1243dSDimitry Andric // After interchanging, check if the direction vector is valid. 1820b57cec5SDimitry Andric // [Theorem] A permutation of the loops in a perfect nest is legal if and only 1830b57cec5SDimitry Andric // if the direction matrix, after the same permutation is applied to its 1840b57cec5SDimitry Andric // columns, has no ">" direction as the leftmost non-"=" direction in any row. 185bdd1243dSDimitry Andric static bool isLexicographicallyPositive(std::vector<char> &DV) { 18606c3fb27SDimitry Andric for (unsigned char Direction : DV) { 187bdd1243dSDimitry Andric if (Direction == '<') 188bdd1243dSDimitry Andric return true; 189bdd1243dSDimitry Andric if (Direction == '>' || Direction == '*') 190bdd1243dSDimitry Andric return false; 191bdd1243dSDimitry Andric } 192bdd1243dSDimitry Andric return true; 193bdd1243dSDimitry Andric } 194bdd1243dSDimitry Andric 195bdd1243dSDimitry Andric // Checks if it is legal to interchange 2 loops. 1960b57cec5SDimitry Andric static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, 1970b57cec5SDimitry Andric unsigned InnerLoopId, 1980b57cec5SDimitry Andric unsigned OuterLoopId) { 1990b57cec5SDimitry Andric unsigned NumRows = DepMatrix.size(); 200bdd1243dSDimitry Andric std::vector<char> Cur; 2010b57cec5SDimitry Andric // For each row check if it is valid to interchange. 2020b57cec5SDimitry Andric for (unsigned Row = 0; Row < NumRows; ++Row) { 203bdd1243dSDimitry Andric // Create temporary DepVector check its lexicographical order 204bdd1243dSDimitry Andric // before and after swapping OuterLoop vs InnerLoop 205bdd1243dSDimitry Andric Cur = DepMatrix[Row]; 206bdd1243dSDimitry Andric if (!isLexicographicallyPositive(Cur)) 2070b57cec5SDimitry Andric return false; 208bdd1243dSDimitry Andric std::swap(Cur[InnerLoopId], Cur[OuterLoopId]); 209bdd1243dSDimitry Andric if (!isLexicographicallyPositive(Cur)) 2100b57cec5SDimitry Andric return false; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric return true; 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric 21581ad6265SDimitry Andric static void populateWorklist(Loop &L, LoopVector &LoopList) { 2160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: " 2170b57cec5SDimitry Andric << L.getHeader()->getParent()->getName() << " Loop: %" 2180b57cec5SDimitry Andric << L.getHeader()->getName() << '\n'); 21981ad6265SDimitry Andric assert(LoopList.empty() && "LoopList should initially be empty!"); 2200b57cec5SDimitry Andric Loop *CurrentLoop = &L; 2210b57cec5SDimitry Andric const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); 2220b57cec5SDimitry Andric while (!Vec->empty()) { 2230b57cec5SDimitry Andric // The current loop has multiple subloops in it hence it is not tightly 2240b57cec5SDimitry Andric // nested. 2250b57cec5SDimitry Andric // Discard all loops above it added into Worklist. 22681ad6265SDimitry Andric if (Vec->size() != 1) { 22781ad6265SDimitry Andric LoopList = {}; 22881ad6265SDimitry Andric return; 22981ad6265SDimitry Andric } 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric LoopList.push_back(CurrentLoop); 2320b57cec5SDimitry Andric CurrentLoop = Vec->front(); 2330b57cec5SDimitry Andric Vec = &CurrentLoop->getSubLoops(); 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric LoopList.push_back(CurrentLoop); 2360b57cec5SDimitry Andric } 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric namespace { 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric /// LoopInterchangeLegality checks if it is legal to interchange the loop. 2410b57cec5SDimitry Andric class LoopInterchangeLegality { 2420b57cec5SDimitry Andric public: 2430b57cec5SDimitry Andric LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 2440b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) 2450b57cec5SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric /// Check if the loops can be interchanged. 2480b57cec5SDimitry Andric bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, 2490b57cec5SDimitry Andric CharMatrix &DepMatrix); 2500b57cec5SDimitry Andric 25104eeddc0SDimitry Andric /// Discover induction PHIs in the header of \p L. Induction 25204eeddc0SDimitry Andric /// PHIs are added to \p Inductions. 25304eeddc0SDimitry Andric bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions); 25404eeddc0SDimitry Andric 2550b57cec5SDimitry Andric /// Check if the loop structure is understood. We do not handle triangular 2560b57cec5SDimitry Andric /// loops for now. 25704eeddc0SDimitry Andric bool isLoopStructureUnderstood(); 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric bool currentLimitations(); 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const { 2620b57cec5SDimitry Andric return OuterInnerReductions; 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric 26504eeddc0SDimitry Andric const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const { 26604eeddc0SDimitry Andric return InnerLoopInductions; 26704eeddc0SDimitry Andric } 26804eeddc0SDimitry Andric 2690b57cec5SDimitry Andric private: 2700b57cec5SDimitry Andric bool tightlyNested(Loop *Outer, Loop *Inner); 2710b57cec5SDimitry Andric bool containsUnsafeInstructions(BasicBlock *BB); 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric /// Discover induction and reduction PHIs in the header of \p L. Induction 2740b57cec5SDimitry Andric /// PHIs are added to \p Inductions, reductions are added to 2750b57cec5SDimitry Andric /// OuterInnerReductions. When the outer loop is passed, the inner loop needs 2760b57cec5SDimitry Andric /// to be passed as \p InnerLoop. 2770b57cec5SDimitry Andric bool findInductionAndReductions(Loop *L, 2780b57cec5SDimitry Andric SmallVector<PHINode *, 8> &Inductions, 2790b57cec5SDimitry Andric Loop *InnerLoop); 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric Loop *OuterLoop; 2820b57cec5SDimitry Andric Loop *InnerLoop; 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric ScalarEvolution *SE; 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric /// Interface to emit optimization remarks. 2870b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE; 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric /// Set of reduction PHIs taking part of a reduction across the inner and 2900b57cec5SDimitry Andric /// outer loop. 2910b57cec5SDimitry Andric SmallPtrSet<PHINode *, 4> OuterInnerReductions; 29204eeddc0SDimitry Andric 29304eeddc0SDimitry Andric /// Set of inner loop induction PHIs 29404eeddc0SDimitry Andric SmallVector<PHINode *, 8> InnerLoopInductions; 2950b57cec5SDimitry Andric }; 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric /// LoopInterchangeProfitability checks if it is profitable to interchange the 2980b57cec5SDimitry Andric /// loop. 2990b57cec5SDimitry Andric class LoopInterchangeProfitability { 3000b57cec5SDimitry Andric public: 3010b57cec5SDimitry Andric LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 3020b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) 3030b57cec5SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric /// Check if the loop interchange is profitable. 30681ad6265SDimitry Andric bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop, 30781ad6265SDimitry Andric unsigned InnerLoopId, unsigned OuterLoopId, 30881ad6265SDimitry Andric CharMatrix &DepMatrix, 309bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap, 310bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric private: 3130b57cec5SDimitry Andric int getInstrOrderCost(); 314bdd1243dSDimitry Andric std::optional<bool> isProfitablePerLoopCacheAnalysis( 315bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap, 316bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC); 317bdd1243dSDimitry Andric std::optional<bool> isProfitablePerInstrOrderCost(); 318bdd1243dSDimitry Andric std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId, 319bdd1243dSDimitry Andric unsigned OuterLoopId, 320bdd1243dSDimitry Andric CharMatrix &DepMatrix); 3210b57cec5SDimitry Andric Loop *OuterLoop; 3220b57cec5SDimitry Andric Loop *InnerLoop; 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric /// Scev analysis. 3250b57cec5SDimitry Andric ScalarEvolution *SE; 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andric /// Interface to emit optimization remarks. 3280b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE; 3290b57cec5SDimitry Andric }; 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric /// LoopInterchangeTransform interchanges the loop. 3320b57cec5SDimitry Andric class LoopInterchangeTransform { 3330b57cec5SDimitry Andric public: 3340b57cec5SDimitry Andric LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, 3350b57cec5SDimitry Andric LoopInfo *LI, DominatorTree *DT, 3360b57cec5SDimitry Andric const LoopInterchangeLegality &LIL) 337fe6060f1SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {} 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric /// Interchange OuterLoop and InnerLoop. 3400b57cec5SDimitry Andric bool transform(); 3410b57cec5SDimitry Andric void restructureLoops(Loop *NewInner, Loop *NewOuter, 3420b57cec5SDimitry Andric BasicBlock *OrigInnerPreHeader, 3430b57cec5SDimitry Andric BasicBlock *OrigOuterPreHeader); 3440b57cec5SDimitry Andric void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric private: 3470b57cec5SDimitry Andric bool adjustLoopLinks(); 3480b57cec5SDimitry Andric bool adjustLoopBranches(); 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric Loop *OuterLoop; 3510b57cec5SDimitry Andric Loop *InnerLoop; 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric /// Scev analysis. 3540b57cec5SDimitry Andric ScalarEvolution *SE; 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric LoopInfo *LI; 3570b57cec5SDimitry Andric DominatorTree *DT; 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric const LoopInterchangeLegality &LIL; 3600b57cec5SDimitry Andric }; 3610b57cec5SDimitry Andric 362e8d8bef9SDimitry Andric struct LoopInterchange { 3630b57cec5SDimitry Andric ScalarEvolution *SE = nullptr; 3640b57cec5SDimitry Andric LoopInfo *LI = nullptr; 3650b57cec5SDimitry Andric DependenceInfo *DI = nullptr; 3660b57cec5SDimitry Andric DominatorTree *DT = nullptr; 36781ad6265SDimitry Andric std::unique_ptr<CacheCost> CC = nullptr; 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric /// Interface to emit optimization remarks. 3700b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE; 3710b57cec5SDimitry Andric 372e8d8bef9SDimitry Andric LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI, 37381ad6265SDimitry Andric DominatorTree *DT, std::unique_ptr<CacheCost> &CC, 37481ad6265SDimitry Andric OptimizationRemarkEmitter *ORE) 37581ad6265SDimitry Andric : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {} 3760b57cec5SDimitry Andric 377e8d8bef9SDimitry Andric bool run(Loop *L) { 378e8d8bef9SDimitry Andric if (L->getParentLoop()) 3790b57cec5SDimitry Andric return false; 38081ad6265SDimitry Andric SmallVector<Loop *, 8> LoopList; 38181ad6265SDimitry Andric populateWorklist(*L, LoopList); 38281ad6265SDimitry Andric return processLoopList(LoopList); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 385fe6060f1SDimitry Andric bool run(LoopNest &LN) { 38681ad6265SDimitry Andric SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end()); 387fe6060f1SDimitry Andric for (unsigned I = 1; I < LoopList.size(); ++I) 388fe6060f1SDimitry Andric if (LoopList[I]->getParentLoop() != LoopList[I - 1]) 389fe6060f1SDimitry Andric return false; 390fe6060f1SDimitry Andric return processLoopList(LoopList); 391fe6060f1SDimitry Andric } 392fe6060f1SDimitry Andric 393fe6060f1SDimitry Andric bool isComputableLoopNest(ArrayRef<Loop *> LoopList) { 3940b57cec5SDimitry Andric for (Loop *L : LoopList) { 3950b57cec5SDimitry Andric const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); 396e8d8bef9SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCountOuter)) { 3970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); 3980b57cec5SDimitry Andric return false; 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric if (L->getNumBackEdges() != 1) { 4010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); 4020b57cec5SDimitry Andric return false; 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric if (!L->getExitingBlock()) { 4050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n"); 4060b57cec5SDimitry Andric return false; 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric return true; 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 412fe6060f1SDimitry Andric unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) { 4130b57cec5SDimitry Andric // TODO: Add a better heuristic to select the loop to be interchanged based 4140b57cec5SDimitry Andric // on the dependence matrix. Currently we select the innermost loop. 4150b57cec5SDimitry Andric return LoopList.size() - 1; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 41881ad6265SDimitry Andric bool processLoopList(SmallVectorImpl<Loop *> &LoopList) { 4190b57cec5SDimitry Andric bool Changed = false; 4200b57cec5SDimitry Andric unsigned LoopNestDepth = LoopList.size(); 4210b57cec5SDimitry Andric if (LoopNestDepth < 2) { 4220b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); 4230b57cec5SDimitry Andric return false; 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric if (LoopNestDepth > MaxLoopNestDepth) { 4260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than " 4270b57cec5SDimitry Andric << MaxLoopNestDepth << "\n"); 4280b57cec5SDimitry Andric return false; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric if (!isComputableLoopNest(LoopList)) { 4310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); 4320b57cec5SDimitry Andric return false; 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth 4360b57cec5SDimitry Andric << "\n"); 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric CharMatrix DependencyMatrix; 4390b57cec5SDimitry Andric Loop *OuterMostLoop = *(LoopList.begin()); 4400b57cec5SDimitry Andric if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth, 441bdd1243dSDimitry Andric OuterMostLoop, DI, SE)) { 4420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n"); 4430b57cec5SDimitry Andric return false; 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES 4460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Dependence before interchange\n"); 4470b57cec5SDimitry Andric printDepMatrix(DependencyMatrix); 4480b57cec5SDimitry Andric #endif 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric // Get the Outermost loop exit. 4510b57cec5SDimitry Andric BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); 4520b57cec5SDimitry Andric if (!LoopNestExit) { 4530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block"); 4540b57cec5SDimitry Andric return false; 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric unsigned SelecLoopId = selectLoopForInterchange(LoopList); 45881ad6265SDimitry Andric // Obtain the loop vector returned from loop cache analysis beforehand, 45981ad6265SDimitry Andric // and put each <Loop, index> pair into a map for constant time query 46081ad6265SDimitry Andric // later. Indices in loop vector reprsent the optimal order of the 46181ad6265SDimitry Andric // corresponding loop, e.g., given a loopnest with depth N, index 0 46281ad6265SDimitry Andric // indicates the loop should be placed as the outermost loop and index N 46381ad6265SDimitry Andric // indicates the loop should be placed as the innermost loop. 46481ad6265SDimitry Andric // 46581ad6265SDimitry Andric // For the old pass manager CacheCost would be null. 46681ad6265SDimitry Andric DenseMap<const Loop *, unsigned> CostMap; 46781ad6265SDimitry Andric if (CC != nullptr) { 46881ad6265SDimitry Andric const auto &LoopCosts = CC->getLoopCosts(); 46981ad6265SDimitry Andric for (unsigned i = 0; i < LoopCosts.size(); i++) { 47081ad6265SDimitry Andric CostMap[LoopCosts[i].first] = i; 47181ad6265SDimitry Andric } 47281ad6265SDimitry Andric } 47381ad6265SDimitry Andric // We try to achieve the globally optimal memory access for the loopnest, 47481ad6265SDimitry Andric // and do interchange based on a bubble-sort fasion. We start from 47581ad6265SDimitry Andric // the innermost loop, move it outwards to the best possible position 47681ad6265SDimitry Andric // and repeat this process. 47781ad6265SDimitry Andric for (unsigned j = SelecLoopId; j > 0; j--) { 47881ad6265SDimitry Andric bool ChangedPerIter = false; 47981ad6265SDimitry Andric for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) { 48081ad6265SDimitry Andric bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1, 48181ad6265SDimitry Andric DependencyMatrix, CostMap); 4820b57cec5SDimitry Andric if (!Interchanged) 48381ad6265SDimitry Andric continue; 48481ad6265SDimitry Andric // Loops interchanged, update LoopList accordingly. 48581ad6265SDimitry Andric std::swap(LoopList[i - 1], LoopList[i]); 4860b57cec5SDimitry Andric // Update the DependencyMatrix 4870b57cec5SDimitry Andric interChangeDependencies(DependencyMatrix, i, i - 1); 4880b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES 4890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); 4900b57cec5SDimitry Andric printDepMatrix(DependencyMatrix); 4910b57cec5SDimitry Andric #endif 49281ad6265SDimitry Andric ChangedPerIter |= Interchanged; 4930b57cec5SDimitry Andric Changed |= Interchanged; 4940b57cec5SDimitry Andric } 49581ad6265SDimitry Andric // Early abort if there was no interchange during an entire round of 49681ad6265SDimitry Andric // moving loops outwards. 49781ad6265SDimitry Andric if (!ChangedPerIter) 49881ad6265SDimitry Andric break; 49981ad6265SDimitry Andric } 5000b57cec5SDimitry Andric return Changed; 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 503fe6060f1SDimitry Andric bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, 504fe6060f1SDimitry Andric unsigned OuterLoopId, 50581ad6265SDimitry Andric std::vector<std::vector<char>> &DependencyMatrix, 50681ad6265SDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap) { 5070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId 5080b57cec5SDimitry Andric << " and OuterLoopId = " << OuterLoopId << "\n"); 5090b57cec5SDimitry Andric LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); 5100b57cec5SDimitry Andric if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { 5110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); 5120b57cec5SDimitry Andric return false; 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); 5150b57cec5SDimitry Andric LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); 51681ad6265SDimitry Andric if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId, 517bdd1243dSDimitry Andric DependencyMatrix, CostMap, CC)) { 5180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); 5190b57cec5SDimitry Andric return false; 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric ORE->emit([&]() { 5230b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Interchanged", 5240b57cec5SDimitry Andric InnerLoop->getStartLoc(), 5250b57cec5SDimitry Andric InnerLoop->getHeader()) 5260b57cec5SDimitry Andric << "Loop interchanged with enclosing loop."; 5270b57cec5SDimitry Andric }); 5280b57cec5SDimitry Andric 529fe6060f1SDimitry Andric LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL); 5300b57cec5SDimitry Andric LIT.transform(); 5310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); 5320b57cec5SDimitry Andric LoopsInterchanged++; 5335ffd83dbSDimitry Andric 534bdd1243dSDimitry Andric llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE); 5350b57cec5SDimitry Andric return true; 5360b57cec5SDimitry Andric } 5370b57cec5SDimitry Andric }; 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric } // end anonymous namespace 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) { 5420b57cec5SDimitry Andric return any_of(*BB, [](const Instruction &I) { 5430b57cec5SDimitry Andric return I.mayHaveSideEffects() || I.mayReadFromMemory(); 5440b57cec5SDimitry Andric }); 5450b57cec5SDimitry Andric } 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { 5480b57cec5SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 5490b57cec5SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 5500b57cec5SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n"); 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric // A perfectly nested loop will not have any branch in between the outer and 5550b57cec5SDimitry Andric // inner block i.e. outer header will branch to either inner preheader and 5560b57cec5SDimitry Andric // outerloop latch. 5570b57cec5SDimitry Andric BranchInst *OuterLoopHeaderBI = 5580b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); 5590b57cec5SDimitry Andric if (!OuterLoopHeaderBI) 5600b57cec5SDimitry Andric return false; 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric for (BasicBlock *Succ : successors(OuterLoopHeaderBI)) 5630b57cec5SDimitry Andric if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() && 5640b57cec5SDimitry Andric Succ != OuterLoopLatch) 5650b57cec5SDimitry Andric return false; 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); 5680b57cec5SDimitry Andric // We do not have any basic block in between now make sure the outer header 5690b57cec5SDimitry Andric // and outer loop latch doesn't contain any unsafe instructions. 5700b57cec5SDimitry Andric if (containsUnsafeInstructions(OuterLoopHeader) || 5710b57cec5SDimitry Andric containsUnsafeInstructions(OuterLoopLatch)) 5720b57cec5SDimitry Andric return false; 5730b57cec5SDimitry Andric 574e8d8bef9SDimitry Andric // Also make sure the inner loop preheader does not contain any unsafe 575e8d8bef9SDimitry Andric // instructions. Note that all instructions in the preheader will be moved to 576e8d8bef9SDimitry Andric // the outer loop header when interchanging. 577e8d8bef9SDimitry Andric if (InnerLoopPreHeader != OuterLoopHeader && 578e8d8bef9SDimitry Andric containsUnsafeInstructions(InnerLoopPreHeader)) 579e8d8bef9SDimitry Andric return false; 580e8d8bef9SDimitry Andric 581fe6060f1SDimitry Andric BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); 582fe6060f1SDimitry Andric // Ensure the inner loop exit block flows to the outer loop latch possibly 583fe6060f1SDimitry Andric // through empty blocks. 584fe6060f1SDimitry Andric const BasicBlock &SuccInner = 585fe6060f1SDimitry Andric LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch); 586fe6060f1SDimitry Andric if (&SuccInner != OuterLoopLatch) { 587fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit 588fe6060f1SDimitry Andric << " does not lead to the outer loop latch.\n";); 589fe6060f1SDimitry Andric return false; 590fe6060f1SDimitry Andric } 591fe6060f1SDimitry Andric // The inner loop exit block does flow to the outer loop latch and not some 592fe6060f1SDimitry Andric // other BBs, now make sure it contains safe instructions, since it will be 593fe6060f1SDimitry Andric // moved into the (new) inner loop after interchange. 594fe6060f1SDimitry Andric if (containsUnsafeInstructions(InnerLoopExit)) 595fe6060f1SDimitry Andric return false; 596fe6060f1SDimitry Andric 5970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); 5980b57cec5SDimitry Andric // We have a perfect loop nest. 5990b57cec5SDimitry Andric return true; 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric 60204eeddc0SDimitry Andric bool LoopInterchangeLegality::isLoopStructureUnderstood() { 6030b57cec5SDimitry Andric BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); 60404eeddc0SDimitry Andric for (PHINode *InnerInduction : InnerLoopInductions) { 60504eeddc0SDimitry Andric unsigned Num = InnerInduction->getNumOperands(); 6060b57cec5SDimitry Andric for (unsigned i = 0; i < Num; ++i) { 6070b57cec5SDimitry Andric Value *Val = InnerInduction->getOperand(i); 6080b57cec5SDimitry Andric if (isa<Constant>(Val)) 6090b57cec5SDimitry Andric continue; 6100b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(Val); 6110b57cec5SDimitry Andric if (!I) 6120b57cec5SDimitry Andric return false; 6130b57cec5SDimitry Andric // TODO: Handle triangular loops. 6140b57cec5SDimitry Andric // e.g. for(int i=0;i<N;i++) 6150b57cec5SDimitry Andric // for(int j=i;j<N;j++) 6160b57cec5SDimitry Andric unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); 6170b57cec5SDimitry Andric if (InnerInduction->getIncomingBlock(IncomBlockIndx) == 6180b57cec5SDimitry Andric InnerLoopPreheader && 6190b57cec5SDimitry Andric !OuterLoop->isLoopInvariant(I)) { 6200b57cec5SDimitry Andric return false; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric } 62304eeddc0SDimitry Andric } 624fe6060f1SDimitry Andric 625fe6060f1SDimitry Andric // TODO: Handle triangular loops of another form. 626fe6060f1SDimitry Andric // e.g. for(int i=0;i<N;i++) 627fe6060f1SDimitry Andric // for(int j=0;j<i;j++) 628fe6060f1SDimitry Andric // or, 629fe6060f1SDimitry Andric // for(int i=0;i<N;i++) 630fe6060f1SDimitry Andric // for(int j=0;j*i<N;j++) 631fe6060f1SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 632fe6060f1SDimitry Andric BranchInst *InnerLoopLatchBI = 633fe6060f1SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); 634fe6060f1SDimitry Andric if (!InnerLoopLatchBI->isConditional()) 635fe6060f1SDimitry Andric return false; 636fe6060f1SDimitry Andric if (CmpInst *InnerLoopCmp = 637fe6060f1SDimitry Andric dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) { 638fe6060f1SDimitry Andric Value *Op0 = InnerLoopCmp->getOperand(0); 639fe6060f1SDimitry Andric Value *Op1 = InnerLoopCmp->getOperand(1); 640fe6060f1SDimitry Andric 641fe6060f1SDimitry Andric // LHS and RHS of the inner loop exit condition, e.g., 642fe6060f1SDimitry Andric // in "for(int j=0;j<i;j++)", LHS is j and RHS is i. 643fe6060f1SDimitry Andric Value *Left = nullptr; 644fe6060f1SDimitry Andric Value *Right = nullptr; 645fe6060f1SDimitry Andric 646fe6060f1SDimitry Andric // Check if V only involves inner loop induction variable. 647fe6060f1SDimitry Andric // Return true if V is InnerInduction, or a cast from 648fe6060f1SDimitry Andric // InnerInduction, or a binary operator that involves 649fe6060f1SDimitry Andric // InnerInduction and a constant. 65004eeddc0SDimitry Andric std::function<bool(Value *)> IsPathToInnerIndVar; 65104eeddc0SDimitry Andric IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool { 65204eeddc0SDimitry Andric if (llvm::is_contained(InnerLoopInductions, V)) 653fe6060f1SDimitry Andric return true; 654fe6060f1SDimitry Andric if (isa<Constant>(V)) 655fe6060f1SDimitry Andric return true; 65604eeddc0SDimitry Andric const Instruction *I = dyn_cast<Instruction>(V); 657fe6060f1SDimitry Andric if (!I) 658fe6060f1SDimitry Andric return false; 659fe6060f1SDimitry Andric if (isa<CastInst>(I)) 66004eeddc0SDimitry Andric return IsPathToInnerIndVar(I->getOperand(0)); 661fe6060f1SDimitry Andric if (isa<BinaryOperator>(I)) 66204eeddc0SDimitry Andric return IsPathToInnerIndVar(I->getOperand(0)) && 66304eeddc0SDimitry Andric IsPathToInnerIndVar(I->getOperand(1)); 664fe6060f1SDimitry Andric return false; 665fe6060f1SDimitry Andric }; 666fe6060f1SDimitry Andric 66704eeddc0SDimitry Andric // In case of multiple inner loop indvars, it is okay if LHS and RHS 66804eeddc0SDimitry Andric // are both inner indvar related variables. 66904eeddc0SDimitry Andric if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1)) 67004eeddc0SDimitry Andric return true; 67104eeddc0SDimitry Andric 67204eeddc0SDimitry Andric // Otherwise we check if the cmp instruction compares an inner indvar 67304eeddc0SDimitry Andric // related variable (Left) with a outer loop invariant (Right). 67404eeddc0SDimitry Andric if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) { 675fe6060f1SDimitry Andric Left = Op0; 676fe6060f1SDimitry Andric Right = Op1; 67704eeddc0SDimitry Andric } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) { 678fe6060f1SDimitry Andric Left = Op1; 679fe6060f1SDimitry Andric Right = Op0; 680fe6060f1SDimitry Andric } 681fe6060f1SDimitry Andric 682fe6060f1SDimitry Andric if (Left == nullptr) 683fe6060f1SDimitry Andric return false; 684fe6060f1SDimitry Andric 685fe6060f1SDimitry Andric const SCEV *S = SE->getSCEV(Right); 686fe6060f1SDimitry Andric if (!SE->isLoopInvariant(S, OuterLoop)) 687fe6060f1SDimitry Andric return false; 688fe6060f1SDimitry Andric } 689fe6060f1SDimitry Andric 6900b57cec5SDimitry Andric return true; 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric // If SV is a LCSSA PHI node with a single incoming value, return the incoming 6940b57cec5SDimitry Andric // value. 6950b57cec5SDimitry Andric static Value *followLCSSA(Value *SV) { 6960b57cec5SDimitry Andric PHINode *PHI = dyn_cast<PHINode>(SV); 6970b57cec5SDimitry Andric if (!PHI) 6980b57cec5SDimitry Andric return SV; 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric if (PHI->getNumIncomingValues() != 1) 7010b57cec5SDimitry Andric return SV; 7020b57cec5SDimitry Andric return followLCSSA(PHI->getIncomingValue(0)); 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric // Check V's users to see if it is involved in a reduction in L. 7060b57cec5SDimitry Andric static PHINode *findInnerReductionPhi(Loop *L, Value *V) { 707e8d8bef9SDimitry Andric // Reduction variables cannot be constants. 708e8d8bef9SDimitry Andric if (isa<Constant>(V)) 709e8d8bef9SDimitry Andric return nullptr; 710e8d8bef9SDimitry Andric 7110b57cec5SDimitry Andric for (Value *User : V->users()) { 7120b57cec5SDimitry Andric if (PHINode *PHI = dyn_cast<PHINode>(User)) { 7130b57cec5SDimitry Andric if (PHI->getNumIncomingValues() == 1) 7140b57cec5SDimitry Andric continue; 7150b57cec5SDimitry Andric RecurrenceDescriptor RD; 71681ad6265SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) { 71781ad6265SDimitry Andric // Detect floating point reduction only when it can be reordered. 71881ad6265SDimitry Andric if (RD.getExactFPMathInst() != nullptr) 71981ad6265SDimitry Andric return nullptr; 7200b57cec5SDimitry Andric return PHI; 72181ad6265SDimitry Andric } 7220b57cec5SDimitry Andric return nullptr; 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric return nullptr; 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric bool LoopInterchangeLegality::findInductionAndReductions( 7300b57cec5SDimitry Andric Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) { 7310b57cec5SDimitry Andric if (!L->getLoopLatch() || !L->getLoopPredecessor()) 7320b57cec5SDimitry Andric return false; 7330b57cec5SDimitry Andric for (PHINode &PHI : L->getHeader()->phis()) { 7340b57cec5SDimitry Andric InductionDescriptor ID; 7350b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) 7360b57cec5SDimitry Andric Inductions.push_back(&PHI); 7370b57cec5SDimitry Andric else { 7380b57cec5SDimitry Andric // PHIs in inner loops need to be part of a reduction in the outer loop, 7390b57cec5SDimitry Andric // discovered when checking the PHIs of the outer loop earlier. 7400b57cec5SDimitry Andric if (!InnerLoop) { 7415ffd83dbSDimitry Andric if (!OuterInnerReductions.count(&PHI)) { 7420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions " 7430b57cec5SDimitry Andric "across the outer loop.\n"); 7440b57cec5SDimitry Andric return false; 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric } else { 7470b57cec5SDimitry Andric assert(PHI.getNumIncomingValues() == 2 && 7480b57cec5SDimitry Andric "Phis in loop header should have exactly 2 incoming values"); 7490b57cec5SDimitry Andric // Check if we have a PHI node in the outer loop that has a reduction 7500b57cec5SDimitry Andric // result from the inner loop as an incoming value. 7510b57cec5SDimitry Andric Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch())); 7520b57cec5SDimitry Andric PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V); 7530b57cec5SDimitry Andric if (!InnerRedPhi || 754e8d8bef9SDimitry Andric !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) { 7550b57cec5SDimitry Andric LLVM_DEBUG( 7560b57cec5SDimitry Andric dbgs() 7570b57cec5SDimitry Andric << "Failed to recognize PHI as an induction or reduction.\n"); 7580b57cec5SDimitry Andric return false; 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric OuterInnerReductions.insert(&PHI); 7610b57cec5SDimitry Andric OuterInnerReductions.insert(InnerRedPhi); 7620b57cec5SDimitry Andric } 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric } 7650b57cec5SDimitry Andric return true; 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric // This function indicates the current limitations in the transform as a result 7690b57cec5SDimitry Andric // of which we do not proceed. 7700b57cec5SDimitry Andric bool LoopInterchangeLegality::currentLimitations() { 7710b57cec5SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric // transform currently expects the loop latches to also be the exiting 7740b57cec5SDimitry Andric // blocks. 7750b57cec5SDimitry Andric if (InnerLoop->getExitingBlock() != InnerLoopLatch || 7760b57cec5SDimitry Andric OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() || 7770b57cec5SDimitry Andric !isa<BranchInst>(InnerLoopLatch->getTerminator()) || 7780b57cec5SDimitry Andric !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) { 7790b57cec5SDimitry Andric LLVM_DEBUG( 7800b57cec5SDimitry Andric dbgs() << "Loops where the latch is not the exiting block are not" 7810b57cec5SDimitry Andric << " supported currently.\n"); 7820b57cec5SDimitry Andric ORE->emit([&]() { 7830b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch", 7840b57cec5SDimitry Andric OuterLoop->getStartLoc(), 7850b57cec5SDimitry Andric OuterLoop->getHeader()) 7860b57cec5SDimitry Andric << "Loops where the latch is not the exiting block cannot be" 7870b57cec5SDimitry Andric " interchange currently."; 7880b57cec5SDimitry Andric }); 7890b57cec5SDimitry Andric return true; 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric SmallVector<PHINode *, 8> Inductions; 7930b57cec5SDimitry Andric if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) { 7940b57cec5SDimitry Andric LLVM_DEBUG( 7950b57cec5SDimitry Andric dbgs() << "Only outer loops with induction or reduction PHI nodes " 7960b57cec5SDimitry Andric << "are supported currently.\n"); 7970b57cec5SDimitry Andric ORE->emit([&]() { 7980b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", 7990b57cec5SDimitry Andric OuterLoop->getStartLoc(), 8000b57cec5SDimitry Andric OuterLoop->getHeader()) 8010b57cec5SDimitry Andric << "Only outer loops with induction or reduction PHI nodes can be" 8020b57cec5SDimitry Andric " interchanged currently."; 8030b57cec5SDimitry Andric }); 8040b57cec5SDimitry Andric return true; 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric Inductions.clear(); 808bdd1243dSDimitry Andric // For multi-level loop nests, make sure that all phi nodes for inner loops 809bdd1243dSDimitry Andric // at all levels can be recognized as a induction or reduction phi. Bail out 810bdd1243dSDimitry Andric // if a phi node at a certain nesting level cannot be properly recognized. 811bdd1243dSDimitry Andric Loop *CurLevelLoop = OuterLoop; 812bdd1243dSDimitry Andric while (!CurLevelLoop->getSubLoops().empty()) { 813bdd1243dSDimitry Andric // We already made sure that the loop nest is tightly nested. 814bdd1243dSDimitry Andric CurLevelLoop = CurLevelLoop->getSubLoops().front(); 815bdd1243dSDimitry Andric if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) { 8160b57cec5SDimitry Andric LLVM_DEBUG( 8170b57cec5SDimitry Andric dbgs() << "Only inner loops with induction or reduction PHI nodes " 8180b57cec5SDimitry Andric << "are supported currently.\n"); 8190b57cec5SDimitry Andric ORE->emit([&]() { 8200b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", 821bdd1243dSDimitry Andric CurLevelLoop->getStartLoc(), 822bdd1243dSDimitry Andric CurLevelLoop->getHeader()) 8230b57cec5SDimitry Andric << "Only inner loops with induction or reduction PHI nodes can be" 8240b57cec5SDimitry Andric " interchange currently."; 8250b57cec5SDimitry Andric }); 8260b57cec5SDimitry Andric return true; 8270b57cec5SDimitry Andric } 828bdd1243dSDimitry Andric } 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andric // TODO: Triangular loops are not handled for now. 83104eeddc0SDimitry Andric if (!isLoopStructureUnderstood()) { 8320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n"); 8330b57cec5SDimitry Andric ORE->emit([&]() { 8340b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner", 8350b57cec5SDimitry Andric InnerLoop->getStartLoc(), 8360b57cec5SDimitry Andric InnerLoop->getHeader()) 8370b57cec5SDimitry Andric << "Inner loop structure not understood currently."; 8380b57cec5SDimitry Andric }); 8390b57cec5SDimitry Andric return true; 8400b57cec5SDimitry Andric } 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andric return false; 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 84504eeddc0SDimitry Andric bool LoopInterchangeLegality::findInductions( 84604eeddc0SDimitry Andric Loop *L, SmallVectorImpl<PHINode *> &Inductions) { 84704eeddc0SDimitry Andric for (PHINode &PHI : L->getHeader()->phis()) { 84804eeddc0SDimitry Andric InductionDescriptor ID; 84904eeddc0SDimitry Andric if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) 85004eeddc0SDimitry Andric Inductions.push_back(&PHI); 85104eeddc0SDimitry Andric } 85204eeddc0SDimitry Andric return !Inductions.empty(); 85304eeddc0SDimitry Andric } 85404eeddc0SDimitry Andric 855480093f4SDimitry Andric // We currently only support LCSSA PHI nodes in the inner loop exit, if their 856480093f4SDimitry Andric // users are either reduction PHIs or PHIs outside the outer loop (which means 857480093f4SDimitry Andric // the we are only interested in the final value after the loop). 858480093f4SDimitry Andric static bool 859480093f4SDimitry Andric areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL, 860480093f4SDimitry Andric SmallPtrSetImpl<PHINode *> &Reductions) { 861480093f4SDimitry Andric BasicBlock *InnerExit = OuterL->getUniqueExitBlock(); 862480093f4SDimitry Andric for (PHINode &PHI : InnerExit->phis()) { 863480093f4SDimitry Andric // Reduction lcssa phi will have only 1 incoming block that from loop latch. 864480093f4SDimitry Andric if (PHI.getNumIncomingValues() > 1) 865480093f4SDimitry Andric return false; 866480093f4SDimitry Andric if (any_of(PHI.users(), [&Reductions, OuterL](User *U) { 867480093f4SDimitry Andric PHINode *PN = dyn_cast<PHINode>(U); 8685ffd83dbSDimitry Andric return !PN || 8695ffd83dbSDimitry Andric (!Reductions.count(PN) && OuterL->contains(PN->getParent())); 870480093f4SDimitry Andric })) { 871480093f4SDimitry Andric return false; 872480093f4SDimitry Andric } 873480093f4SDimitry Andric } 874480093f4SDimitry Andric return true; 875480093f4SDimitry Andric } 876480093f4SDimitry Andric 8770b57cec5SDimitry Andric // We currently support LCSSA PHI nodes in the outer loop exit, if their 8780b57cec5SDimitry Andric // incoming values do not come from the outer loop latch or if the 8790b57cec5SDimitry Andric // outer loop latch has a single predecessor. In that case, the value will 8800b57cec5SDimitry Andric // be available if both the inner and outer loop conditions are true, which 8810b57cec5SDimitry Andric // will still be true after interchanging. If we have multiple predecessor, 8820b57cec5SDimitry Andric // that may not be the case, e.g. because the outer loop latch may be executed 8830b57cec5SDimitry Andric // if the inner loop is not executed. 884480093f4SDimitry Andric static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { 8850b57cec5SDimitry Andric BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); 8860b57cec5SDimitry Andric for (PHINode &PHI : LoopNestExit->phis()) { 8870b57cec5SDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { 8880b57cec5SDimitry Andric Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i)); 8890b57cec5SDimitry Andric if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) 8900b57cec5SDimitry Andric continue; 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric // The incoming value is defined in the outer loop latch. Currently we 8930b57cec5SDimitry Andric // only support that in case the outer loop latch has a single predecessor. 8940b57cec5SDimitry Andric // This guarantees that the outer loop latch is executed if and only if 8950b57cec5SDimitry Andric // the inner loop is executed (because tightlyNested() guarantees that the 8960b57cec5SDimitry Andric // outer loop header only branches to the inner loop or the outer loop 8970b57cec5SDimitry Andric // latch). 8980b57cec5SDimitry Andric // FIXME: We could weaken this logic and allow multiple predecessors, 8990b57cec5SDimitry Andric // if the values are produced outside the loop latch. We would need 9000b57cec5SDimitry Andric // additional logic to update the PHI nodes in the exit block as 9010b57cec5SDimitry Andric // well. 9020b57cec5SDimitry Andric if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) 9030b57cec5SDimitry Andric return false; 9040b57cec5SDimitry Andric } 9050b57cec5SDimitry Andric } 9060b57cec5SDimitry Andric return true; 9070b57cec5SDimitry Andric } 9080b57cec5SDimitry Andric 909fe6060f1SDimitry Andric // In case of multi-level nested loops, it may occur that lcssa phis exist in 910fe6060f1SDimitry Andric // the latch of InnerLoop, i.e., when defs of the incoming values are further 911fe6060f1SDimitry Andric // inside the loopnest. Sometimes those incoming values are not available 912fe6060f1SDimitry Andric // after interchange, since the original inner latch will become the new outer 913fe6060f1SDimitry Andric // latch which may have predecessor paths that do not include those incoming 914fe6060f1SDimitry Andric // values. 915fe6060f1SDimitry Andric // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of 916fe6060f1SDimitry Andric // multi-level loop nests. 917fe6060f1SDimitry Andric static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { 918fe6060f1SDimitry Andric if (InnerLoop->getSubLoops().empty()) 919fe6060f1SDimitry Andric return true; 920fe6060f1SDimitry Andric // If the original outer latch has only one predecessor, then values defined 921fe6060f1SDimitry Andric // further inside the looploop, e.g., in the innermost loop, will be available 922fe6060f1SDimitry Andric // at the new outer latch after interchange. 923fe6060f1SDimitry Andric if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr) 924fe6060f1SDimitry Andric return true; 925fe6060f1SDimitry Andric 926fe6060f1SDimitry Andric // The outer latch has more than one predecessors, i.e., the inner 927fe6060f1SDimitry Andric // exit and the inner header. 928fe6060f1SDimitry Andric // PHI nodes in the inner latch are lcssa phis where the incoming values 929fe6060f1SDimitry Andric // are defined further inside the loopnest. Check if those phis are used 930fe6060f1SDimitry Andric // in the original inner latch. If that is the case then bail out since 931fe6060f1SDimitry Andric // those incoming values may not be available at the new outer latch. 932fe6060f1SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 933fe6060f1SDimitry Andric for (PHINode &PHI : InnerLoopLatch->phis()) { 934fe6060f1SDimitry Andric for (auto *U : PHI.users()) { 935fe6060f1SDimitry Andric Instruction *UI = cast<Instruction>(U); 936fe6060f1SDimitry Andric if (InnerLoopLatch == UI->getParent()) 937fe6060f1SDimitry Andric return false; 938fe6060f1SDimitry Andric } 939fe6060f1SDimitry Andric } 940fe6060f1SDimitry Andric return true; 941fe6060f1SDimitry Andric } 942fe6060f1SDimitry Andric 9430b57cec5SDimitry Andric bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, 9440b57cec5SDimitry Andric unsigned OuterLoopId, 9450b57cec5SDimitry Andric CharMatrix &DepMatrix) { 9460b57cec5SDimitry Andric if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { 9470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId 9480b57cec5SDimitry Andric << " and OuterLoopId = " << OuterLoopId 9490b57cec5SDimitry Andric << " due to dependence\n"); 9500b57cec5SDimitry Andric ORE->emit([&]() { 9510b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence", 9520b57cec5SDimitry Andric InnerLoop->getStartLoc(), 9530b57cec5SDimitry Andric InnerLoop->getHeader()) 9540b57cec5SDimitry Andric << "Cannot interchange loops due to dependences."; 9550b57cec5SDimitry Andric }); 9560b57cec5SDimitry Andric return false; 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric // Check if outer and inner loop contain legal instructions only. 9590b57cec5SDimitry Andric for (auto *BB : OuterLoop->blocks()) 9600b57cec5SDimitry Andric for (Instruction &I : BB->instructionsWithoutDebug()) 9610b57cec5SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) { 9620b57cec5SDimitry Andric // readnone functions do not prevent interchanging. 96304eeddc0SDimitry Andric if (CI->onlyWritesMemory()) 9640b57cec5SDimitry Andric continue; 9650b57cec5SDimitry Andric LLVM_DEBUG( 9660b57cec5SDimitry Andric dbgs() << "Loops with call instructions cannot be interchanged " 9670b57cec5SDimitry Andric << "safely."); 9680b57cec5SDimitry Andric ORE->emit([&]() { 9690b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst", 9700b57cec5SDimitry Andric CI->getDebugLoc(), 9710b57cec5SDimitry Andric CI->getParent()) 9720b57cec5SDimitry Andric << "Cannot interchange loops due to call instruction."; 9730b57cec5SDimitry Andric }); 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric return false; 9760b57cec5SDimitry Andric } 9770b57cec5SDimitry Andric 97804eeddc0SDimitry Andric if (!findInductions(InnerLoop, InnerLoopInductions)) { 979*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n"); 98004eeddc0SDimitry Andric return false; 98104eeddc0SDimitry Andric } 98204eeddc0SDimitry Andric 983fe6060f1SDimitry Andric if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) { 984fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"); 985fe6060f1SDimitry Andric ORE->emit([&]() { 986fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI", 987fe6060f1SDimitry Andric InnerLoop->getStartLoc(), 988fe6060f1SDimitry Andric InnerLoop->getHeader()) 989fe6060f1SDimitry Andric << "Cannot interchange loops because unsupported PHI nodes found " 990fe6060f1SDimitry Andric "in inner loop latch."; 991fe6060f1SDimitry Andric }); 992fe6060f1SDimitry Andric return false; 993fe6060f1SDimitry Andric } 994fe6060f1SDimitry Andric 9950b57cec5SDimitry Andric // TODO: The loops could not be interchanged due to current limitations in the 9960b57cec5SDimitry Andric // transform module. 9970b57cec5SDimitry Andric if (currentLimitations()) { 9980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n"); 9990b57cec5SDimitry Andric return false; 10000b57cec5SDimitry Andric } 10010b57cec5SDimitry Andric 10020b57cec5SDimitry Andric // Check if the loops are tightly nested. 10030b57cec5SDimitry Andric if (!tightlyNested(OuterLoop, InnerLoop)) { 10040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops not tightly nested\n"); 10050b57cec5SDimitry Andric ORE->emit([&]() { 10060b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested", 10070b57cec5SDimitry Andric InnerLoop->getStartLoc(), 10080b57cec5SDimitry Andric InnerLoop->getHeader()) 10090b57cec5SDimitry Andric << "Cannot interchange loops because they are not tightly " 10100b57cec5SDimitry Andric "nested."; 10110b57cec5SDimitry Andric }); 10120b57cec5SDimitry Andric return false; 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric 1015480093f4SDimitry Andric if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop, 1016480093f4SDimitry Andric OuterInnerReductions)) { 1017480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n"); 1018480093f4SDimitry Andric ORE->emit([&]() { 1019480093f4SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", 1020480093f4SDimitry Andric InnerLoop->getStartLoc(), 1021480093f4SDimitry Andric InnerLoop->getHeader()) 1022480093f4SDimitry Andric << "Found unsupported PHI node in loop exit."; 1023480093f4SDimitry Andric }); 1024480093f4SDimitry Andric return false; 1025480093f4SDimitry Andric } 1026480093f4SDimitry Andric 1027480093f4SDimitry Andric if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) { 10280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"); 10290b57cec5SDimitry Andric ORE->emit([&]() { 10300b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", 10310b57cec5SDimitry Andric OuterLoop->getStartLoc(), 10320b57cec5SDimitry Andric OuterLoop->getHeader()) 10330b57cec5SDimitry Andric << "Found unsupported PHI node in loop exit."; 10340b57cec5SDimitry Andric }); 10350b57cec5SDimitry Andric return false; 10360b57cec5SDimitry Andric } 10370b57cec5SDimitry Andric 10380b57cec5SDimitry Andric return true; 10390b57cec5SDimitry Andric } 10400b57cec5SDimitry Andric 10410b57cec5SDimitry Andric int LoopInterchangeProfitability::getInstrOrderCost() { 10420b57cec5SDimitry Andric unsigned GoodOrder, BadOrder; 10430b57cec5SDimitry Andric BadOrder = GoodOrder = 0; 10440b57cec5SDimitry Andric for (BasicBlock *BB : InnerLoop->blocks()) { 10450b57cec5SDimitry Andric for (Instruction &Ins : *BB) { 10460b57cec5SDimitry Andric if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { 10470b57cec5SDimitry Andric unsigned NumOp = GEP->getNumOperands(); 10480b57cec5SDimitry Andric bool FoundInnerInduction = false; 10490b57cec5SDimitry Andric bool FoundOuterInduction = false; 10500b57cec5SDimitry Andric for (unsigned i = 0; i < NumOp; ++i) { 1051e8d8bef9SDimitry Andric // Skip operands that are not SCEV-able. 1052e8d8bef9SDimitry Andric if (!SE->isSCEVable(GEP->getOperand(i)->getType())) 1053e8d8bef9SDimitry Andric continue; 1054e8d8bef9SDimitry Andric 10550b57cec5SDimitry Andric const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); 10560b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); 10570b57cec5SDimitry Andric if (!AR) 10580b57cec5SDimitry Andric continue; 10590b57cec5SDimitry Andric 10600b57cec5SDimitry Andric // If we find the inner induction after an outer induction e.g. 10610b57cec5SDimitry Andric // for(int i=0;i<N;i++) 10620b57cec5SDimitry Andric // for(int j=0;j<N;j++) 10630b57cec5SDimitry Andric // A[i][j] = A[i-1][j-1]+k; 10640b57cec5SDimitry Andric // then it is a good order. 10650b57cec5SDimitry Andric if (AR->getLoop() == InnerLoop) { 10660b57cec5SDimitry Andric // We found an InnerLoop induction after OuterLoop induction. It is 10670b57cec5SDimitry Andric // a good order. 10680b57cec5SDimitry Andric FoundInnerInduction = true; 10690b57cec5SDimitry Andric if (FoundOuterInduction) { 10700b57cec5SDimitry Andric GoodOrder++; 10710b57cec5SDimitry Andric break; 10720b57cec5SDimitry Andric } 10730b57cec5SDimitry Andric } 10740b57cec5SDimitry Andric // If we find the outer induction after an inner induction e.g. 10750b57cec5SDimitry Andric // for(int i=0;i<N;i++) 10760b57cec5SDimitry Andric // for(int j=0;j<N;j++) 10770b57cec5SDimitry Andric // A[j][i] = A[j-1][i-1]+k; 10780b57cec5SDimitry Andric // then it is a bad order. 10790b57cec5SDimitry Andric if (AR->getLoop() == OuterLoop) { 10800b57cec5SDimitry Andric // We found an OuterLoop induction after InnerLoop induction. It is 10810b57cec5SDimitry Andric // a bad order. 10820b57cec5SDimitry Andric FoundOuterInduction = true; 10830b57cec5SDimitry Andric if (FoundInnerInduction) { 10840b57cec5SDimitry Andric BadOrder++; 10850b57cec5SDimitry Andric break; 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric } 10880b57cec5SDimitry Andric } 10890b57cec5SDimitry Andric } 10900b57cec5SDimitry Andric } 10910b57cec5SDimitry Andric } 10920b57cec5SDimitry Andric return GoodOrder - BadOrder; 10930b57cec5SDimitry Andric } 10940b57cec5SDimitry Andric 1095bdd1243dSDimitry Andric std::optional<bool> 1096bdd1243dSDimitry Andric LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis( 1097bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap, 1098bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC) { 109981ad6265SDimitry Andric // This is the new cost model returned from loop cache analysis. 110081ad6265SDimitry Andric // A smaller index means the loop should be placed an outer loop, and vice 110181ad6265SDimitry Andric // versa. 110206c3fb27SDimitry Andric if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) { 110381ad6265SDimitry Andric unsigned InnerIndex = 0, OuterIndex = 0; 110481ad6265SDimitry Andric InnerIndex = CostMap.find(InnerLoop)->second; 110581ad6265SDimitry Andric OuterIndex = CostMap.find(OuterLoop)->second; 110681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex 110781ad6265SDimitry Andric << ", OuterIndex = " << OuterIndex << "\n"); 110881ad6265SDimitry Andric if (InnerIndex < OuterIndex) 1109bdd1243dSDimitry Andric return std::optional<bool>(true); 1110bdd1243dSDimitry Andric assert(InnerIndex != OuterIndex && "CostMap should assign unique " 1111bdd1243dSDimitry Andric "numbers to each loop"); 1112bdd1243dSDimitry Andric if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop)) 1113bdd1243dSDimitry Andric return std::nullopt; 1114bdd1243dSDimitry Andric return std::optional<bool>(false); 1115bdd1243dSDimitry Andric } 1116bdd1243dSDimitry Andric return std::nullopt; 1117bdd1243dSDimitry Andric } 1118bdd1243dSDimitry Andric 1119bdd1243dSDimitry Andric std::optional<bool> 1120bdd1243dSDimitry Andric LoopInterchangeProfitability::isProfitablePerInstrOrderCost() { 112181ad6265SDimitry Andric // Legacy cost model: this is rough cost estimation algorithm. It counts the 112281ad6265SDimitry Andric // good and bad order of induction variables in the instruction and allows 112381ad6265SDimitry Andric // reordering if number of bad orders is more than good. 11240b57cec5SDimitry Andric int Cost = getInstrOrderCost(); 11250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n"); 1126bdd1243dSDimitry Andric if (Cost < 0 && Cost < LoopInterchangeCostThreshold) 1127bdd1243dSDimitry Andric return std::optional<bool>(true); 1128bdd1243dSDimitry Andric 1129bdd1243dSDimitry Andric return std::nullopt; 113081ad6265SDimitry Andric } 11310b57cec5SDimitry Andric 1132bdd1243dSDimitry Andric std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization( 1133bdd1243dSDimitry Andric unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { 1134bdd1243dSDimitry Andric for (auto &Row : DepMatrix) { 1135bdd1243dSDimitry Andric // If the inner loop is loop independent or doesn't carry any dependency 1136bdd1243dSDimitry Andric // it is not profitable to move this to outer position, since we are 1137bdd1243dSDimitry Andric // likely able to do inner loop vectorization already. 1138bdd1243dSDimitry Andric if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=') 1139bdd1243dSDimitry Andric return std::optional<bool>(false); 11400b57cec5SDimitry Andric 1141bdd1243dSDimitry Andric // If the outer loop is not loop independent it is not profitable to move 1142bdd1243dSDimitry Andric // this to inner position, since doing so would not enable inner loop 1143bdd1243dSDimitry Andric // parallelism. 1144bdd1243dSDimitry Andric if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=') 1145bdd1243dSDimitry Andric return std::optional<bool>(false); 1146bdd1243dSDimitry Andric } 1147bdd1243dSDimitry Andric // If inner loop has dependence and outer loop is loop independent then it 1148bdd1243dSDimitry Andric // is/ profitable to interchange to enable inner loop parallelism. 1149bdd1243dSDimitry Andric // If there are no dependences, interchanging will not improve anything. 1150bdd1243dSDimitry Andric return std::optional<bool>(!DepMatrix.empty()); 1151bdd1243dSDimitry Andric } 1152bdd1243dSDimitry Andric 1153bdd1243dSDimitry Andric bool LoopInterchangeProfitability::isProfitable( 1154bdd1243dSDimitry Andric const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId, 1155bdd1243dSDimitry Andric unsigned OuterLoopId, CharMatrix &DepMatrix, 1156bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap, 1157bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC) { 1158bdd1243dSDimitry Andric // isProfitable() is structured to avoid endless loop interchange. 1159bdd1243dSDimitry Andric // If loop cache analysis could decide the profitability then, 1160bdd1243dSDimitry Andric // profitability check will stop and return the analysis result. 1161bdd1243dSDimitry Andric // If cache analysis failed to analyze the loopnest (e.g., 1162bdd1243dSDimitry Andric // due to delinearization issues) then only check whether it is 1163bdd1243dSDimitry Andric // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to 1164bdd1243dSDimitry Andric // analysis the profitability then only, isProfitableForVectorization 1165bdd1243dSDimitry Andric // will decide. 1166bdd1243dSDimitry Andric std::optional<bool> shouldInterchange = 1167bdd1243dSDimitry Andric isProfitablePerLoopCacheAnalysis(CostMap, CC); 1168bdd1243dSDimitry Andric if (!shouldInterchange.has_value()) { 1169bdd1243dSDimitry Andric shouldInterchange = isProfitablePerInstrOrderCost(); 1170bdd1243dSDimitry Andric if (!shouldInterchange.has_value()) 1171bdd1243dSDimitry Andric shouldInterchange = 1172bdd1243dSDimitry Andric isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix); 1173bdd1243dSDimitry Andric } 1174bdd1243dSDimitry Andric if (!shouldInterchange.has_value()) { 11750b57cec5SDimitry Andric ORE->emit([&]() { 11760b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", 11770b57cec5SDimitry Andric InnerLoop->getStartLoc(), 11780b57cec5SDimitry Andric InnerLoop->getHeader()) 1179bdd1243dSDimitry Andric << "Insufficient information to calculate the cost of loop for " 1180bdd1243dSDimitry Andric "interchange."; 11810b57cec5SDimitry Andric }); 11820b57cec5SDimitry Andric return false; 1183bdd1243dSDimitry Andric } else if (!shouldInterchange.value()) { 1184bdd1243dSDimitry Andric ORE->emit([&]() { 1185bdd1243dSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", 1186bdd1243dSDimitry Andric InnerLoop->getStartLoc(), 1187bdd1243dSDimitry Andric InnerLoop->getHeader()) 1188bdd1243dSDimitry Andric << "Interchanging loops is not considered to improve cache " 1189bdd1243dSDimitry Andric "locality nor vectorization."; 1190bdd1243dSDimitry Andric }); 1191bdd1243dSDimitry Andric return false; 1192bdd1243dSDimitry Andric } 1193bdd1243dSDimitry Andric return true; 11940b57cec5SDimitry Andric } 11950b57cec5SDimitry Andric 11960b57cec5SDimitry Andric void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, 11970b57cec5SDimitry Andric Loop *InnerLoop) { 11980b57cec5SDimitry Andric for (Loop *L : *OuterLoop) 11990b57cec5SDimitry Andric if (L == InnerLoop) { 12000b57cec5SDimitry Andric OuterLoop->removeChildLoop(L); 12010b57cec5SDimitry Andric return; 12020b57cec5SDimitry Andric } 12030b57cec5SDimitry Andric llvm_unreachable("Couldn't find loop"); 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the 12070b57cec5SDimitry Andric /// new inner and outer loop after interchanging: NewInner is the original 12080b57cec5SDimitry Andric /// outer loop and NewOuter is the original inner loop. 12090b57cec5SDimitry Andric /// 12100b57cec5SDimitry Andric /// Before interchanging, we have the following structure 12110b57cec5SDimitry Andric /// Outer preheader 12120b57cec5SDimitry Andric // Outer header 12130b57cec5SDimitry Andric // Inner preheader 12140b57cec5SDimitry Andric // Inner header 12150b57cec5SDimitry Andric // Inner body 12160b57cec5SDimitry Andric // Inner latch 12170b57cec5SDimitry Andric // outer bbs 12180b57cec5SDimitry Andric // Outer latch 12190b57cec5SDimitry Andric // 12200b57cec5SDimitry Andric // After interchanging: 12210b57cec5SDimitry Andric // Inner preheader 12220b57cec5SDimitry Andric // Inner header 12230b57cec5SDimitry Andric // Outer preheader 12240b57cec5SDimitry Andric // Outer header 12250b57cec5SDimitry Andric // Inner body 12260b57cec5SDimitry Andric // outer bbs 12270b57cec5SDimitry Andric // Outer latch 12280b57cec5SDimitry Andric // Inner latch 12290b57cec5SDimitry Andric void LoopInterchangeTransform::restructureLoops( 12300b57cec5SDimitry Andric Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, 12310b57cec5SDimitry Andric BasicBlock *OrigOuterPreHeader) { 12320b57cec5SDimitry Andric Loop *OuterLoopParent = OuterLoop->getParentLoop(); 12330b57cec5SDimitry Andric // The original inner loop preheader moves from the new inner loop to 12340b57cec5SDimitry Andric // the parent loop, if there is one. 12350b57cec5SDimitry Andric NewInner->removeBlockFromLoop(OrigInnerPreHeader); 12360b57cec5SDimitry Andric LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric // Switch the loop levels. 12390b57cec5SDimitry Andric if (OuterLoopParent) { 12400b57cec5SDimitry Andric // Remove the loop from its parent loop. 12410b57cec5SDimitry Andric removeChildLoop(OuterLoopParent, NewInner); 12420b57cec5SDimitry Andric removeChildLoop(NewInner, NewOuter); 12430b57cec5SDimitry Andric OuterLoopParent->addChildLoop(NewOuter); 12440b57cec5SDimitry Andric } else { 12450b57cec5SDimitry Andric removeChildLoop(NewInner, NewOuter); 12460b57cec5SDimitry Andric LI->changeTopLevelLoop(NewInner, NewOuter); 12470b57cec5SDimitry Andric } 1248e8d8bef9SDimitry Andric while (!NewOuter->isInnermost()) 12490b57cec5SDimitry Andric NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); 12500b57cec5SDimitry Andric NewOuter->addChildLoop(NewInner); 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric // BBs from the original inner loop. 12530b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks()); 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric // Add BBs from the original outer loop to the original inner loop (excluding 12560b57cec5SDimitry Andric // BBs already in inner loop) 12570b57cec5SDimitry Andric for (BasicBlock *BB : NewInner->blocks()) 12580b57cec5SDimitry Andric if (LI->getLoopFor(BB) == NewInner) 12590b57cec5SDimitry Andric NewOuter->addBlockEntry(BB); 12600b57cec5SDimitry Andric 12610b57cec5SDimitry Andric // Now remove inner loop header and latch from the new inner loop and move 12620b57cec5SDimitry Andric // other BBs (the loop body) to the new inner loop. 12630b57cec5SDimitry Andric BasicBlock *OuterHeader = NewOuter->getHeader(); 12640b57cec5SDimitry Andric BasicBlock *OuterLatch = NewOuter->getLoopLatch(); 12650b57cec5SDimitry Andric for (BasicBlock *BB : OrigInnerBBs) { 12660b57cec5SDimitry Andric // Nothing will change for BBs in child loops. 12670b57cec5SDimitry Andric if (LI->getLoopFor(BB) != NewOuter) 12680b57cec5SDimitry Andric continue; 12690b57cec5SDimitry Andric // Remove the new outer loop header and latch from the new inner loop. 12700b57cec5SDimitry Andric if (BB == OuterHeader || BB == OuterLatch) 12710b57cec5SDimitry Andric NewInner->removeBlockFromLoop(BB); 12720b57cec5SDimitry Andric else 12730b57cec5SDimitry Andric LI->changeLoopFor(BB, NewInner); 12740b57cec5SDimitry Andric } 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric // The preheader of the original outer loop becomes part of the new 12770b57cec5SDimitry Andric // outer loop. 12780b57cec5SDimitry Andric NewOuter->addBlockEntry(OrigOuterPreHeader); 12790b57cec5SDimitry Andric LI->changeLoopFor(OrigOuterPreHeader, NewOuter); 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric // Tell SE that we move the loops around. 12820b57cec5SDimitry Andric SE->forgetLoop(NewOuter); 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric bool LoopInterchangeTransform::transform() { 12860b57cec5SDimitry Andric bool Transformed = false; 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric if (InnerLoop->getSubLoops().empty()) { 12890b57cec5SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 12908bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); 129104eeddc0SDimitry Andric auto &InductionPHIs = LIL.getInnerLoopInductions(); 129204eeddc0SDimitry Andric if (InductionPHIs.empty()) { 12930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); 12940b57cec5SDimitry Andric return false; 12950b57cec5SDimitry Andric } 12960b57cec5SDimitry Andric 129704eeddc0SDimitry Andric SmallVector<Instruction *, 8> InnerIndexVarList; 129804eeddc0SDimitry Andric for (PHINode *CurInductionPHI : InductionPHIs) { 129904eeddc0SDimitry Andric if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) 130004eeddc0SDimitry Andric InnerIndexVarList.push_back( 130104eeddc0SDimitry Andric dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1))); 13020b57cec5SDimitry Andric else 130304eeddc0SDimitry Andric InnerIndexVarList.push_back( 130404eeddc0SDimitry Andric dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0))); 130504eeddc0SDimitry Andric } 13060b57cec5SDimitry Andric 13078bcb0991SDimitry Andric // Create a new latch block for the inner loop. We split at the 13088bcb0991SDimitry Andric // current latch's terminator and then move the condition and all 13098bcb0991SDimitry Andric // operands that are not either loop-invariant or the induction PHI into the 13108bcb0991SDimitry Andric // new latch block. 13118bcb0991SDimitry Andric BasicBlock *NewLatch = 13128bcb0991SDimitry Andric SplitBlock(InnerLoop->getLoopLatch(), 13138bcb0991SDimitry Andric InnerLoop->getLoopLatch()->getTerminator(), DT, LI); 13148bcb0991SDimitry Andric 13158bcb0991SDimitry Andric SmallSetVector<Instruction *, 4> WorkList; 13168bcb0991SDimitry Andric unsigned i = 0; 131704eeddc0SDimitry Andric auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() { 13188bcb0991SDimitry Andric for (; i < WorkList.size(); i++) { 13198bcb0991SDimitry Andric // Duplicate instruction and move it the new latch. Update uses that 13208bcb0991SDimitry Andric // have been moved. 13218bcb0991SDimitry Andric Instruction *NewI = WorkList[i]->clone(); 13228bcb0991SDimitry Andric NewI->insertBefore(NewLatch->getFirstNonPHI()); 13238bcb0991SDimitry Andric assert(!NewI->mayHaveSideEffects() && 13248bcb0991SDimitry Andric "Moving instructions with side-effects may change behavior of " 13258bcb0991SDimitry Andric "the loop nest!"); 1326fe6060f1SDimitry Andric for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) { 13278bcb0991SDimitry Andric Instruction *UserI = cast<Instruction>(U.getUser()); 13288bcb0991SDimitry Andric if (!InnerLoop->contains(UserI->getParent()) || 132904eeddc0SDimitry Andric UserI->getParent() == NewLatch || 133004eeddc0SDimitry Andric llvm::is_contained(InductionPHIs, UserI)) 13318bcb0991SDimitry Andric U.set(NewI); 13328bcb0991SDimitry Andric } 13338bcb0991SDimitry Andric // Add operands of moved instruction to the worklist, except if they are 13348bcb0991SDimitry Andric // outside the inner loop or are the induction PHI. 13358bcb0991SDimitry Andric for (Value *Op : WorkList[i]->operands()) { 13368bcb0991SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(Op); 13378bcb0991SDimitry Andric if (!OpI || 13388bcb0991SDimitry Andric this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || 133904eeddc0SDimitry Andric llvm::is_contained(InductionPHIs, OpI)) 13408bcb0991SDimitry Andric continue; 13418bcb0991SDimitry Andric WorkList.insert(OpI); 13428bcb0991SDimitry Andric } 13438bcb0991SDimitry Andric } 13448bcb0991SDimitry Andric }; 13458bcb0991SDimitry Andric 13468bcb0991SDimitry Andric // FIXME: Should we interchange when we have a constant condition? 13478bcb0991SDimitry Andric Instruction *CondI = dyn_cast<Instruction>( 13488bcb0991SDimitry Andric cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator()) 13498bcb0991SDimitry Andric ->getCondition()); 13508bcb0991SDimitry Andric if (CondI) 13518bcb0991SDimitry Andric WorkList.insert(CondI); 13528bcb0991SDimitry Andric MoveInstructions(); 135304eeddc0SDimitry Andric for (Instruction *InnerIndexVar : InnerIndexVarList) 13548bcb0991SDimitry Andric WorkList.insert(cast<Instruction>(InnerIndexVar)); 13558bcb0991SDimitry Andric MoveInstructions(); 1356bdd1243dSDimitry Andric } 13570b57cec5SDimitry Andric 1358bdd1243dSDimitry Andric // Ensure the inner loop phi nodes have a separate basic block. 13590b57cec5SDimitry Andric BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 1360bdd1243dSDimitry Andric if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) { 13610b57cec5SDimitry Andric SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); 13620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n"); 13630b57cec5SDimitry Andric } 13640b57cec5SDimitry Andric 1365e8d8bef9SDimitry Andric // Instructions in the original inner loop preheader may depend on values 1366e8d8bef9SDimitry Andric // defined in the outer loop header. Move them there, because the original 1367e8d8bef9SDimitry Andric // inner loop preheader will become the entry into the interchanged loop nest. 1368e8d8bef9SDimitry Andric // Currently we move all instructions and rely on LICM to move invariant 1369e8d8bef9SDimitry Andric // instructions outside the loop nest. 1370e8d8bef9SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 1371e8d8bef9SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 1372e8d8bef9SDimitry Andric if (InnerLoopPreHeader != OuterLoopHeader) { 1373e8d8bef9SDimitry Andric SmallPtrSet<Instruction *, 4> NeedsMoving; 1374e8d8bef9SDimitry Andric for (Instruction &I : 1375e8d8bef9SDimitry Andric make_early_inc_range(make_range(InnerLoopPreHeader->begin(), 1376e8d8bef9SDimitry Andric std::prev(InnerLoopPreHeader->end())))) 13775f757f3fSDimitry Andric I.moveBeforePreserving(OuterLoopHeader->getTerminator()); 1378e8d8bef9SDimitry Andric } 1379e8d8bef9SDimitry Andric 13800b57cec5SDimitry Andric Transformed |= adjustLoopLinks(); 13810b57cec5SDimitry Andric if (!Transformed) { 13820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n"); 13830b57cec5SDimitry Andric return false; 13840b57cec5SDimitry Andric } 13850b57cec5SDimitry Andric 13860b57cec5SDimitry Andric return true; 13870b57cec5SDimitry Andric } 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric /// \brief Move all instructions except the terminator from FromBB right before 13900b57cec5SDimitry Andric /// InsertBefore 13910b57cec5SDimitry Andric static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { 1392bdd1243dSDimitry Andric BasicBlock *ToBB = InsertBefore->getParent(); 13930b57cec5SDimitry Andric 1394bdd1243dSDimitry Andric ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(), 13950b57cec5SDimitry Andric FromBB->getTerminator()->getIterator()); 13960b57cec5SDimitry Andric } 13970b57cec5SDimitry Andric 13985ffd83dbSDimitry Andric /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact. 13995ffd83dbSDimitry Andric static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) { 14005ffd83dbSDimitry Andric // Save all non-terminator instructions of BB1 into TempInstrs and unlink them 14015ffd83dbSDimitry Andric // from BB1 afterwards. 14025ffd83dbSDimitry Andric auto Iter = map_range(*BB1, [](Instruction &I) { return &I; }); 14035ffd83dbSDimitry Andric SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end())); 14045ffd83dbSDimitry Andric for (Instruction *I : TempInstrs) 14055ffd83dbSDimitry Andric I->removeFromParent(); 14065ffd83dbSDimitry Andric 14075ffd83dbSDimitry Andric // Move instructions from BB2 to BB1. 14085ffd83dbSDimitry Andric moveBBContents(BB2, BB1->getTerminator()); 14095ffd83dbSDimitry Andric 14105ffd83dbSDimitry Andric // Move instructions from TempInstrs to BB2. 14115ffd83dbSDimitry Andric for (Instruction *I : TempInstrs) 14125ffd83dbSDimitry Andric I->insertBefore(BB2->getTerminator()); 14135ffd83dbSDimitry Andric } 14145ffd83dbSDimitry Andric 1415480093f4SDimitry Andric // Update BI to jump to NewBB instead of OldBB. Records updates to the 1416480093f4SDimitry Andric // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that 1417480093f4SDimitry Andric // \p OldBB is exactly once in BI's successor list. 14180b57cec5SDimitry Andric static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, 14190b57cec5SDimitry Andric BasicBlock *NewBB, 1420480093f4SDimitry Andric std::vector<DominatorTree::UpdateType> &DTUpdates, 1421480093f4SDimitry Andric bool MustUpdateOnce = true) { 1422480093f4SDimitry Andric assert((!MustUpdateOnce || 1423480093f4SDimitry Andric llvm::count_if(successors(BI), 1424480093f4SDimitry Andric [OldBB](BasicBlock *BB) { 1425480093f4SDimitry Andric return BB == OldBB; 1426480093f4SDimitry Andric }) == 1) && "BI must jump to OldBB exactly once."); 1427480093f4SDimitry Andric bool Changed = false; 1428480093f4SDimitry Andric for (Use &Op : BI->operands()) 1429480093f4SDimitry Andric if (Op == OldBB) { 1430480093f4SDimitry Andric Op.set(NewBB); 1431480093f4SDimitry Andric Changed = true; 1432480093f4SDimitry Andric } 14330b57cec5SDimitry Andric 1434480093f4SDimitry Andric if (Changed) { 14350b57cec5SDimitry Andric DTUpdates.push_back( 14360b57cec5SDimitry Andric {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); 14370b57cec5SDimitry Andric DTUpdates.push_back( 14380b57cec5SDimitry Andric {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); 14390b57cec5SDimitry Andric } 1440480093f4SDimitry Andric assert(Changed && "Expected a successor to be updated"); 14410b57cec5SDimitry Andric } 14420b57cec5SDimitry Andric 14430b57cec5SDimitry Andric // Move Lcssa PHIs to the right place. 14440b57cec5SDimitry Andric static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, 14450b57cec5SDimitry Andric BasicBlock *InnerLatch, BasicBlock *OuterHeader, 1446480093f4SDimitry Andric BasicBlock *OuterLatch, BasicBlock *OuterExit, 1447480093f4SDimitry Andric Loop *InnerLoop, LoopInfo *LI) { 14480b57cec5SDimitry Andric 14490b57cec5SDimitry Andric // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are 14500b57cec5SDimitry Andric // defined either in the header or latch. Those blocks will become header and 14510b57cec5SDimitry Andric // latch of the new outer loop, and the only possible users can PHI nodes 14520b57cec5SDimitry Andric // in the exit block of the loop nest or the outer loop header (reduction 14530b57cec5SDimitry Andric // PHIs, in that case, the incoming value must be defined in the inner loop 14540b57cec5SDimitry Andric // header). We can just substitute the user with the incoming value and remove 14550b57cec5SDimitry Andric // the PHI. 14560b57cec5SDimitry Andric for (PHINode &P : make_early_inc_range(InnerExit->phis())) { 14570b57cec5SDimitry Andric assert(P.getNumIncomingValues() == 1 && 14580b57cec5SDimitry Andric "Only loops with a single exit are supported!"); 14590b57cec5SDimitry Andric 14600b57cec5SDimitry Andric // Incoming values are guaranteed be instructions currently. 14610b57cec5SDimitry Andric auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch)); 146281ad6265SDimitry Andric // In case of multi-level nested loops, follow LCSSA to find the incoming 146381ad6265SDimitry Andric // value defined from the innermost loop. 146481ad6265SDimitry Andric auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI)); 14650b57cec5SDimitry Andric // Skip phis with incoming values from the inner loop body, excluding the 14660b57cec5SDimitry Andric // header and latch. 146781ad6265SDimitry Andric if (IncIInnerMost->getParent() != InnerLatch && 146881ad6265SDimitry Andric IncIInnerMost->getParent() != InnerHeader) 14690b57cec5SDimitry Andric continue; 14700b57cec5SDimitry Andric 14710b57cec5SDimitry Andric assert(all_of(P.users(), 14720b57cec5SDimitry Andric [OuterHeader, OuterExit, IncI, InnerHeader](User *U) { 14730b57cec5SDimitry Andric return (cast<PHINode>(U)->getParent() == OuterHeader && 14740b57cec5SDimitry Andric IncI->getParent() == InnerHeader) || 14750b57cec5SDimitry Andric cast<PHINode>(U)->getParent() == OuterExit; 14760b57cec5SDimitry Andric }) && 14770b57cec5SDimitry Andric "Can only replace phis iff the uses are in the loop nest exit or " 14780b57cec5SDimitry Andric "the incoming value is defined in the inner header (it will " 14790b57cec5SDimitry Andric "dominate all loop blocks after interchanging)"); 14800b57cec5SDimitry Andric P.replaceAllUsesWith(IncI); 14810b57cec5SDimitry Andric P.eraseFromParent(); 14820b57cec5SDimitry Andric } 14830b57cec5SDimitry Andric 14840b57cec5SDimitry Andric SmallVector<PHINode *, 8> LcssaInnerExit; 14850b57cec5SDimitry Andric for (PHINode &P : InnerExit->phis()) 14860b57cec5SDimitry Andric LcssaInnerExit.push_back(&P); 14870b57cec5SDimitry Andric 14880b57cec5SDimitry Andric SmallVector<PHINode *, 8> LcssaInnerLatch; 14890b57cec5SDimitry Andric for (PHINode &P : InnerLatch->phis()) 14900b57cec5SDimitry Andric LcssaInnerLatch.push_back(&P); 14910b57cec5SDimitry Andric 14920b57cec5SDimitry Andric // Lcssa PHIs for values used outside the inner loop are in InnerExit. 14930b57cec5SDimitry Andric // If a PHI node has users outside of InnerExit, it has a use outside the 14940b57cec5SDimitry Andric // interchanged loop and we have to preserve it. We move these to 14950b57cec5SDimitry Andric // InnerLatch, which will become the new exit block for the innermost 14960b57cec5SDimitry Andric // loop after interchanging. 14970b57cec5SDimitry Andric for (PHINode *P : LcssaInnerExit) 14980b57cec5SDimitry Andric P->moveBefore(InnerLatch->getFirstNonPHI()); 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric // If the inner loop latch contains LCSSA PHIs, those come from a child loop 15010b57cec5SDimitry Andric // and we have to move them to the new inner latch. 15020b57cec5SDimitry Andric for (PHINode *P : LcssaInnerLatch) 15030b57cec5SDimitry Andric P->moveBefore(InnerExit->getFirstNonPHI()); 15040b57cec5SDimitry Andric 15050b57cec5SDimitry Andric // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have 1506480093f4SDimitry Andric // incoming values defined in the outer loop, we have to add a new PHI 15070b57cec5SDimitry Andric // in the inner loop latch, which became the exit block of the outer loop, 15080b57cec5SDimitry Andric // after interchanging. 15090b57cec5SDimitry Andric if (OuterExit) { 15100b57cec5SDimitry Andric for (PHINode &P : OuterExit->phis()) { 15110b57cec5SDimitry Andric if (P.getNumIncomingValues() != 1) 15120b57cec5SDimitry Andric continue; 1513480093f4SDimitry Andric // Skip Phis with incoming values defined in the inner loop. Those should 15140b57cec5SDimitry Andric // already have been updated. 15150b57cec5SDimitry Andric auto I = dyn_cast<Instruction>(P.getIncomingValue(0)); 1516480093f4SDimitry Andric if (!I || LI->getLoopFor(I->getParent()) == InnerLoop) 15170b57cec5SDimitry Andric continue; 15180b57cec5SDimitry Andric 15190b57cec5SDimitry Andric PHINode *NewPhi = dyn_cast<PHINode>(P.clone()); 15200b57cec5SDimitry Andric NewPhi->setIncomingValue(0, P.getIncomingValue(0)); 15210b57cec5SDimitry Andric NewPhi->setIncomingBlock(0, OuterLatch); 1522fe6060f1SDimitry Andric // We might have incoming edges from other BBs, i.e., the original outer 1523fe6060f1SDimitry Andric // header. 1524fe6060f1SDimitry Andric for (auto *Pred : predecessors(InnerLatch)) { 1525fe6060f1SDimitry Andric if (Pred == OuterLatch) 1526fe6060f1SDimitry Andric continue; 1527fe6060f1SDimitry Andric NewPhi->addIncoming(P.getIncomingValue(0), Pred); 1528fe6060f1SDimitry Andric } 15290b57cec5SDimitry Andric NewPhi->insertBefore(InnerLatch->getFirstNonPHI()); 15300b57cec5SDimitry Andric P.setIncomingValue(0, NewPhi); 15310b57cec5SDimitry Andric } 15320b57cec5SDimitry Andric } 15330b57cec5SDimitry Andric 15340b57cec5SDimitry Andric // Now adjust the incoming blocks for the LCSSA PHIs. 15350b57cec5SDimitry Andric // For PHIs moved from Inner's exit block, we need to replace Inner's latch 15360b57cec5SDimitry Andric // with the new latch. 15370b57cec5SDimitry Andric InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); 15380b57cec5SDimitry Andric } 15390b57cec5SDimitry Andric 15400b57cec5SDimitry Andric bool LoopInterchangeTransform::adjustLoopBranches() { 15410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); 15420b57cec5SDimitry Andric std::vector<DominatorTree::UpdateType> DTUpdates; 15430b57cec5SDimitry Andric 15440b57cec5SDimitry Andric BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 15450b57cec5SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 15460b57cec5SDimitry Andric 15470b57cec5SDimitry Andric assert(OuterLoopPreHeader != OuterLoop->getHeader() && 15480b57cec5SDimitry Andric InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && 15490b57cec5SDimitry Andric InnerLoopPreHeader && "Guaranteed by loop-simplify form"); 15500b57cec5SDimitry Andric // Ensure that both preheaders do not contain PHI nodes and have single 15510b57cec5SDimitry Andric // predecessors. This allows us to move them easily. We use 15520b57cec5SDimitry Andric // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing 15530b57cec5SDimitry Andric // preheaders do not satisfy those conditions. 15540b57cec5SDimitry Andric if (isa<PHINode>(OuterLoopPreHeader->begin()) || 15550b57cec5SDimitry Andric !OuterLoopPreHeader->getUniquePredecessor()) 15560b57cec5SDimitry Andric OuterLoopPreHeader = 15570b57cec5SDimitry Andric InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true); 15580b57cec5SDimitry Andric if (InnerLoopPreHeader == OuterLoop->getHeader()) 15590b57cec5SDimitry Andric InnerLoopPreHeader = 15600b57cec5SDimitry Andric InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true); 15610b57cec5SDimitry Andric 15620b57cec5SDimitry Andric // Adjust the loop preheader 15630b57cec5SDimitry Andric BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); 15640b57cec5SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); 15650b57cec5SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); 15660b57cec5SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); 15670b57cec5SDimitry Andric BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); 15680b57cec5SDimitry Andric BasicBlock *InnerLoopLatchPredecessor = 15690b57cec5SDimitry Andric InnerLoopLatch->getUniquePredecessor(); 15700b57cec5SDimitry Andric BasicBlock *InnerLoopLatchSuccessor; 15710b57cec5SDimitry Andric BasicBlock *OuterLoopLatchSuccessor; 15720b57cec5SDimitry Andric 15730b57cec5SDimitry Andric BranchInst *OuterLoopLatchBI = 15740b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); 15750b57cec5SDimitry Andric BranchInst *InnerLoopLatchBI = 15760b57cec5SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); 15770b57cec5SDimitry Andric BranchInst *OuterLoopHeaderBI = 15780b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); 15790b57cec5SDimitry Andric BranchInst *InnerLoopHeaderBI = 15800b57cec5SDimitry Andric dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); 15810b57cec5SDimitry Andric 15820b57cec5SDimitry Andric if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || 15830b57cec5SDimitry Andric !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || 15840b57cec5SDimitry Andric !InnerLoopHeaderBI) 15850b57cec5SDimitry Andric return false; 15860b57cec5SDimitry Andric 15870b57cec5SDimitry Andric BranchInst *InnerLoopLatchPredecessorBI = 15880b57cec5SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); 15890b57cec5SDimitry Andric BranchInst *OuterLoopPredecessorBI = 15900b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); 15910b57cec5SDimitry Andric 15920b57cec5SDimitry Andric if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) 15930b57cec5SDimitry Andric return false; 15940b57cec5SDimitry Andric BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); 15950b57cec5SDimitry Andric if (!InnerLoopHeaderSuccessor) 15960b57cec5SDimitry Andric return false; 15970b57cec5SDimitry Andric 1598480093f4SDimitry Andric // Adjust Loop Preheader and headers. 1599480093f4SDimitry Andric // The branches in the outer loop predecessor and the outer loop header can 1600480093f4SDimitry Andric // be unconditional branches or conditional branches with duplicates. Consider 1601480093f4SDimitry Andric // this when updating the successors. 16020b57cec5SDimitry Andric updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, 1603480093f4SDimitry Andric InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); 1604480093f4SDimitry Andric // The outer loop header might or might not branch to the outer latch. 1605480093f4SDimitry Andric // We are guaranteed to branch to the inner loop preheader. 1606fe6060f1SDimitry Andric if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) { 1607fe6060f1SDimitry Andric // In this case the outerLoopHeader should branch to the InnerLoopLatch. 1608fe6060f1SDimitry Andric updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch, 1609fe6060f1SDimitry Andric DTUpdates, 1610480093f4SDimitry Andric /*MustUpdateOnce=*/false); 1611fe6060f1SDimitry Andric } 16120b57cec5SDimitry Andric updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, 1613480093f4SDimitry Andric InnerLoopHeaderSuccessor, DTUpdates, 1614480093f4SDimitry Andric /*MustUpdateOnce=*/false); 16150b57cec5SDimitry Andric 16160b57cec5SDimitry Andric // Adjust reduction PHI's now that the incoming block has changed. 16170b57cec5SDimitry Andric InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader, 16180b57cec5SDimitry Andric OuterLoopHeader); 16190b57cec5SDimitry Andric 16200b57cec5SDimitry Andric updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, 16210b57cec5SDimitry Andric OuterLoopPreHeader, DTUpdates); 16220b57cec5SDimitry Andric 16230b57cec5SDimitry Andric // -------------Adjust loop latches----------- 16240b57cec5SDimitry Andric if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) 16250b57cec5SDimitry Andric InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); 16260b57cec5SDimitry Andric else 16270b57cec5SDimitry Andric InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); 16280b57cec5SDimitry Andric 16290b57cec5SDimitry Andric updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, 16300b57cec5SDimitry Andric InnerLoopLatchSuccessor, DTUpdates); 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) 16330b57cec5SDimitry Andric OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); 16340b57cec5SDimitry Andric else 16350b57cec5SDimitry Andric OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); 16360b57cec5SDimitry Andric 16370b57cec5SDimitry Andric updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, 16380b57cec5SDimitry Andric OuterLoopLatchSuccessor, DTUpdates); 16390b57cec5SDimitry Andric updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, 16400b57cec5SDimitry Andric DTUpdates); 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andric DT->applyUpdates(DTUpdates); 16430b57cec5SDimitry Andric restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, 16440b57cec5SDimitry Andric OuterLoopPreHeader); 16450b57cec5SDimitry Andric 16460b57cec5SDimitry Andric moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch, 1647480093f4SDimitry Andric OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(), 1648480093f4SDimitry Andric InnerLoop, LI); 16490b57cec5SDimitry Andric // For PHIs in the exit block of the outer loop, outer's latch has been 16500b57cec5SDimitry Andric // replaced by Inners'. 16510b57cec5SDimitry Andric OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); 16520b57cec5SDimitry Andric 1653fe6060f1SDimitry Andric auto &OuterInnerReductions = LIL.getOuterInnerReductions(); 16540b57cec5SDimitry Andric // Now update the reduction PHIs in the inner and outer loop headers. 16550b57cec5SDimitry Andric SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; 1656349cc55cSDimitry Andric for (PHINode &PHI : InnerLoopHeader->phis()) 1657349cc55cSDimitry Andric if (OuterInnerReductions.contains(&PHI)) 165804eeddc0SDimitry Andric InnerLoopPHIs.push_back(&PHI); 165904eeddc0SDimitry Andric 1660349cc55cSDimitry Andric for (PHINode &PHI : OuterLoopHeader->phis()) 1661349cc55cSDimitry Andric if (OuterInnerReductions.contains(&PHI)) 166204eeddc0SDimitry Andric OuterLoopPHIs.push_back(&PHI); 16630b57cec5SDimitry Andric 16640b57cec5SDimitry Andric // Now move the remaining reduction PHIs from outer to inner loop header and 16650b57cec5SDimitry Andric // vice versa. The PHI nodes must be part of a reduction across the inner and 16660b57cec5SDimitry Andric // outer loop and all the remains to do is and updating the incoming blocks. 16670b57cec5SDimitry Andric for (PHINode *PHI : OuterLoopPHIs) { 166804eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();); 16690b57cec5SDimitry Andric PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); 16705ffd83dbSDimitry Andric assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); 16710b57cec5SDimitry Andric } 16720b57cec5SDimitry Andric for (PHINode *PHI : InnerLoopPHIs) { 167304eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();); 16740b57cec5SDimitry Andric PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); 16755ffd83dbSDimitry Andric assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); 16760b57cec5SDimitry Andric } 16770b57cec5SDimitry Andric 16780b57cec5SDimitry Andric // Update the incoming blocks for moved PHI nodes. 16790b57cec5SDimitry Andric OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader); 16800b57cec5SDimitry Andric OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch); 16810b57cec5SDimitry Andric InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader); 16820b57cec5SDimitry Andric InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); 16830b57cec5SDimitry Andric 1684e8d8bef9SDimitry Andric // Values defined in the outer loop header could be used in the inner loop 1685e8d8bef9SDimitry Andric // latch. In that case, we need to create LCSSA phis for them, because after 1686e8d8bef9SDimitry Andric // interchanging they will be defined in the new inner loop and used in the 1687e8d8bef9SDimitry Andric // new outer loop. 1688e8d8bef9SDimitry Andric SmallVector<Instruction *, 4> MayNeedLCSSAPhis; 1689e8d8bef9SDimitry Andric for (Instruction &I : 1690e8d8bef9SDimitry Andric make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end()))) 1691e8d8bef9SDimitry Andric MayNeedLCSSAPhis.push_back(&I); 169206c3fb27SDimitry Andric formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE); 1693e8d8bef9SDimitry Andric 16940b57cec5SDimitry Andric return true; 16950b57cec5SDimitry Andric } 16960b57cec5SDimitry Andric 16970b57cec5SDimitry Andric bool LoopInterchangeTransform::adjustLoopLinks() { 16980b57cec5SDimitry Andric // Adjust all branches in the inner and outer loop. 16990b57cec5SDimitry Andric bool Changed = adjustLoopBranches(); 17005ffd83dbSDimitry Andric if (Changed) { 17015ffd83dbSDimitry Andric // We have interchanged the preheaders so we need to interchange the data in 17025ffd83dbSDimitry Andric // the preheaders as well. This is because the content of the inner 17035ffd83dbSDimitry Andric // preheader was previously executed inside the outer loop. 17045ffd83dbSDimitry Andric BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); 17055ffd83dbSDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); 17065ffd83dbSDimitry Andric swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader); 17075ffd83dbSDimitry Andric } 17080b57cec5SDimitry Andric return Changed; 17090b57cec5SDimitry Andric } 17100b57cec5SDimitry Andric 1711fe6060f1SDimitry Andric PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, 1712fe6060f1SDimitry Andric LoopAnalysisManager &AM, 1713e8d8bef9SDimitry Andric LoopStandardAnalysisResults &AR, 1714e8d8bef9SDimitry Andric LPMUpdater &U) { 1715fe6060f1SDimitry Andric Function &F = *LN.getParent(); 1716e8d8bef9SDimitry Andric 1717e8d8bef9SDimitry Andric DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); 171881ad6265SDimitry Andric std::unique_ptr<CacheCost> CC = 171981ad6265SDimitry Andric CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); 1720e8d8bef9SDimitry Andric OptimizationRemarkEmitter ORE(&F); 172181ad6265SDimitry Andric if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) 1722e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 1723bdd1243dSDimitry Andric U.markLoopNestChanged(true); 1724e8d8bef9SDimitry Andric return getLoopPassPreservedAnalyses(); 1725e8d8bef9SDimitry Andric } 1726