10b57cec5SDimitry Andric //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements loop unroll and jam as a routine, much like 100b57cec5SDimitry Andric // LoopUnroll.cpp implements loop unroll. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 145ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h" 155ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h" 165ffd83dbSDimitry Andric #include "llvm/ADT/STLExtras.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 185ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 205ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 215ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h" 245ffd83dbSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 255ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h" 275ffd83dbSDimitry Andric #include "llvm/Analysis/MustExecute.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 300b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 310b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 325ffd83dbSDimitry Andric #include "llvm/IR/DebugLoc.h" 335ffd83dbSDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 340b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 355ffd83dbSDimitry Andric #include "llvm/IR/Function.h" 365ffd83dbSDimitry Andric #include "llvm/IR/Instruction.h" 375ffd83dbSDimitry Andric #include "llvm/IR/Instructions.h" 380b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 395ffd83dbSDimitry Andric #include "llvm/IR/User.h" 405ffd83dbSDimitry Andric #include "llvm/IR/Value.h" 415ffd83dbSDimitry Andric #include "llvm/IR/ValueHandle.h" 425ffd83dbSDimitry Andric #include "llvm/IR/ValueMap.h" 435ffd83dbSDimitry Andric #include "llvm/Support/Casting.h" 440b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 455ffd83dbSDimitry Andric #include "llvm/Support/ErrorHandling.h" 465ffd83dbSDimitry Andric #include "llvm/Support/GenericDomTree.h" 470b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 480b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 490b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 500b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 510b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h" 525ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 535ffd83dbSDimitry Andric #include <assert.h> 545ffd83dbSDimitry Andric #include <memory> 555ffd83dbSDimitry Andric #include <type_traits> 565ffd83dbSDimitry Andric #include <vector> 575ffd83dbSDimitry Andric 580b57cec5SDimitry Andric using namespace llvm; 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll-and-jam" 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed"); 630b57cec5SDimitry Andric STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed"); 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric // Partition blocks in an outer/inner loop pair into blocks before and after 680b57cec5SDimitry Andric // the loop 695ffd83dbSDimitry Andric static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks, 705ffd83dbSDimitry Andric BasicBlockSet &AftBlocks, DominatorTree &DT) { 715ffd83dbSDimitry Andric Loop *SubLoop = L.getSubLoops()[0]; 720b57cec5SDimitry Andric BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); 730b57cec5SDimitry Andric 745ffd83dbSDimitry Andric for (BasicBlock *BB : L.blocks()) { 750b57cec5SDimitry Andric if (!SubLoop->contains(BB)) { 765ffd83dbSDimitry Andric if (DT.dominates(SubLoopLatch, BB)) 770b57cec5SDimitry Andric AftBlocks.insert(BB); 780b57cec5SDimitry Andric else 790b57cec5SDimitry Andric ForeBlocks.insert(BB); 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric // Check that all blocks in ForeBlocks together dominate the subloop 840b57cec5SDimitry Andric // TODO: This might ideally be done better with a dominator/postdominators. 850b57cec5SDimitry Andric BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader(); 860b57cec5SDimitry Andric for (BasicBlock *BB : ForeBlocks) { 870b57cec5SDimitry Andric if (BB == SubLoopPreHeader) 880b57cec5SDimitry Andric continue; 890b57cec5SDimitry Andric Instruction *TI = BB->getTerminator(); 905ffd83dbSDimitry Andric for (BasicBlock *Succ : successors(TI)) 915ffd83dbSDimitry Andric if (!ForeBlocks.count(Succ)) 920b57cec5SDimitry Andric return false; 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric return true; 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 985ffd83dbSDimitry Andric /// Partition blocks in a loop nest into blocks before and after each inner 995ffd83dbSDimitry Andric /// loop. 1005ffd83dbSDimitry Andric static bool partitionOuterLoopBlocks( 1015ffd83dbSDimitry Andric Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks, 1025ffd83dbSDimitry Andric DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap, 1035ffd83dbSDimitry Andric DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) { 1045ffd83dbSDimitry Andric JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end()); 1055ffd83dbSDimitry Andric 1065ffd83dbSDimitry Andric for (Loop *L : Root.getLoopsInPreorder()) { 1075ffd83dbSDimitry Andric if (L == &JamLoop) 1085ffd83dbSDimitry Andric break; 1095ffd83dbSDimitry Andric 1105ffd83dbSDimitry Andric if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT)) 1115ffd83dbSDimitry Andric return false; 1125ffd83dbSDimitry Andric } 1135ffd83dbSDimitry Andric 1145ffd83dbSDimitry Andric return true; 1155ffd83dbSDimitry Andric } 1165ffd83dbSDimitry Andric 1175ffd83dbSDimitry Andric // TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more 1185ffd83dbSDimitry Andric // than 2 levels loop. 1195ffd83dbSDimitry Andric static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop, 1205ffd83dbSDimitry Andric BasicBlockSet &ForeBlocks, 1215ffd83dbSDimitry Andric BasicBlockSet &SubLoopBlocks, 1225ffd83dbSDimitry Andric BasicBlockSet &AftBlocks, 1235ffd83dbSDimitry Andric DominatorTree *DT) { 1245ffd83dbSDimitry Andric SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end()); 1255ffd83dbSDimitry Andric return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT); 1265ffd83dbSDimitry Andric } 1275ffd83dbSDimitry Andric 1280b57cec5SDimitry Andric // Looks at the phi nodes in Header for values coming from Latch. For these 1290b57cec5SDimitry Andric // instructions and all their operands calls Visit on them, keeping going for 1300b57cec5SDimitry Andric // all the operands in AftBlocks. Returns false if Visit returns false, 1310b57cec5SDimitry Andric // otherwise returns true. This is used to process the instructions in the 1320b57cec5SDimitry Andric // Aft blocks that need to be moved before the subloop. It is used in two 1330b57cec5SDimitry Andric // places. One to check that the required set of instructions can be moved 1340b57cec5SDimitry Andric // before the loop. Then to collect the instructions to actually move in 1350b57cec5SDimitry Andric // moveHeaderPhiOperandsToForeBlocks. 1360b57cec5SDimitry Andric template <typename T> 1370b57cec5SDimitry Andric static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, 1380b57cec5SDimitry Andric BasicBlockSet &AftBlocks, T Visit) { 139fe6060f1SDimitry Andric SmallPtrSet<Instruction *, 8> VisitedInstr; 1400b57cec5SDimitry Andric 141bdd1243dSDimitry Andric std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) { 142bdd1243dSDimitry Andric if (VisitedInstr.count(I)) 143bdd1243dSDimitry Andric return true; 144bdd1243dSDimitry Andric 145fe6060f1SDimitry Andric VisitedInstr.insert(I); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric if (AftBlocks.count(I->getParent())) 1480b57cec5SDimitry Andric for (auto &U : I->operands()) 1490b57cec5SDimitry Andric if (Instruction *II = dyn_cast<Instruction>(U)) 150bdd1243dSDimitry Andric if (!ProcessInstr(II)) 151bdd1243dSDimitry Andric return false; 152bdd1243dSDimitry Andric 153bdd1243dSDimitry Andric return Visit(I); 154bdd1243dSDimitry Andric }; 155bdd1243dSDimitry Andric 156bdd1243dSDimitry Andric for (auto &Phi : Header->phis()) { 157bdd1243dSDimitry Andric Value *V = Phi.getIncomingValueForBlock(Latch); 158bdd1243dSDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V)) 159bdd1243dSDimitry Andric if (!ProcessInstr(I)) 160bdd1243dSDimitry Andric return false; 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric return true; 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric // Move the phi operands of Header from Latch out of AftBlocks to InsertLoc. 1670b57cec5SDimitry Andric static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, 1680b57cec5SDimitry Andric BasicBlock *Latch, 1690b57cec5SDimitry Andric Instruction *InsertLoc, 1700b57cec5SDimitry Andric BasicBlockSet &AftBlocks) { 1710b57cec5SDimitry Andric // We need to ensure we move the instructions in the correct order, 1720b57cec5SDimitry Andric // starting with the earliest required instruction and moving forward. 1730b57cec5SDimitry Andric processHeaderPhiOperands(Header, Latch, AftBlocks, 174bdd1243dSDimitry Andric [&AftBlocks, &InsertLoc](Instruction *I) { 1750b57cec5SDimitry Andric if (AftBlocks.count(I->getParent())) 176bdd1243dSDimitry Andric I->moveBefore(InsertLoc); 1770b57cec5SDimitry Andric return true; 1780b57cec5SDimitry Andric }); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric /* 1820b57cec5SDimitry Andric This method performs Unroll and Jam. For a simple loop like: 1830b57cec5SDimitry Andric for (i = ..) 1840b57cec5SDimitry Andric Fore(i) 1850b57cec5SDimitry Andric for (j = ..) 1860b57cec5SDimitry Andric SubLoop(i, j) 1870b57cec5SDimitry Andric Aft(i) 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric Instead of doing normal inner or outer unrolling, we do: 1900b57cec5SDimitry Andric for (i = .., i+=2) 1910b57cec5SDimitry Andric Fore(i) 1920b57cec5SDimitry Andric Fore(i+1) 1930b57cec5SDimitry Andric for (j = ..) 1940b57cec5SDimitry Andric SubLoop(i, j) 1950b57cec5SDimitry Andric SubLoop(i+1, j) 1960b57cec5SDimitry Andric Aft(i) 1970b57cec5SDimitry Andric Aft(i+1) 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric So the outer loop is essetially unrolled and then the inner loops are fused 2000b57cec5SDimitry Andric ("jammed") together into a single loop. This can increase speed when there 2010b57cec5SDimitry Andric are loads in SubLoop that are invariant to i, as they become shared between 2020b57cec5SDimitry Andric the now jammed inner loops. 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric We do this by spliting the blocks in the loop into Fore, Subloop and Aft. 2050b57cec5SDimitry Andric Fore blocks are those before the inner loop, Aft are those after. Normal 2060b57cec5SDimitry Andric Unroll code is used to copy each of these sets of blocks and the results are 2070b57cec5SDimitry Andric combined together into the final form above. 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric isSafeToUnrollAndJam should be used prior to calling this to make sure the 2100b57cec5SDimitry Andric unrolling will be valid. Checking profitablility is also advisable. 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric If EpilogueLoop is non-null, it receives the epilogue loop (if it was 2130b57cec5SDimitry Andric necessary to create one and not fully unrolled). 2140b57cec5SDimitry Andric */ 2155ffd83dbSDimitry Andric LoopUnrollResult 2165ffd83dbSDimitry Andric llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, 2175ffd83dbSDimitry Andric unsigned TripMultiple, bool UnrollRemainder, 2185ffd83dbSDimitry Andric LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 2195ffd83dbSDimitry Andric AssumptionCache *AC, const TargetTransformInfo *TTI, 2205ffd83dbSDimitry Andric OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) { 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric // When we enter here we should have already checked that it is safe 2230b57cec5SDimitry Andric BasicBlock *Header = L->getHeader(); 224480093f4SDimitry Andric assert(Header && "No header."); 2250b57cec5SDimitry Andric assert(L->getSubLoops().size() == 1); 2260b57cec5SDimitry Andric Loop *SubLoop = *L->begin(); 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric // Don't enter the unroll code if there is nothing to do. 2290b57cec5SDimitry Andric if (TripCount == 0 && Count < 2) { 2300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n"); 2310b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric assert(Count > 0); 2350b57cec5SDimitry Andric assert(TripMultiple > 0); 2360b57cec5SDimitry Andric assert(TripCount == 0 || TripCount % TripMultiple == 0); 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric // Are we eliminating the loop control altogether? 2390b57cec5SDimitry Andric bool CompletelyUnroll = (Count == TripCount); 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric // We use the runtime remainder in cases where we don't know trip multiple 242fe6060f1SDimitry Andric if (TripMultiple % Count != 0) { 2430b57cec5SDimitry Andric if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, 2440b57cec5SDimitry Andric /*UseEpilogRemainder*/ true, 2450b57cec5SDimitry Andric UnrollRemainder, /*ForgetAllSCEV*/ false, 2465ffd83dbSDimitry Andric LI, SE, DT, AC, TTI, true, EpilogueLoop)) { 2470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " 2480b57cec5SDimitry Andric "generated when assuming runtime trip count\n"); 2490b57cec5SDimitry Andric return LoopUnrollResult::Unmodified; 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric // Notify ScalarEvolution that the loop will be substantially changed, 2540b57cec5SDimitry Andric // if not outright eliminated. 2550b57cec5SDimitry Andric if (SE) { 2560b57cec5SDimitry Andric SE->forgetLoop(L); 257bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 2580b57cec5SDimitry Andric } 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric using namespace ore; 2610b57cec5SDimitry Andric // Report the unrolling decision. 2620b57cec5SDimitry Andric if (CompletelyUnroll) { 2630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %" 2640b57cec5SDimitry Andric << Header->getName() << " with trip count " << TripCount 2650b57cec5SDimitry Andric << "!\n"); 2660b57cec5SDimitry Andric ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), 2670b57cec5SDimitry Andric L->getHeader()) 2680b57cec5SDimitry Andric << "completely unroll and jammed loop with " 2690b57cec5SDimitry Andric << NV("UnrollCount", TripCount) << " iterations"); 2700b57cec5SDimitry Andric } else { 2710b57cec5SDimitry Andric auto DiagBuilder = [&]() { 2720b57cec5SDimitry Andric OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), 2730b57cec5SDimitry Andric L->getHeader()); 2740b57cec5SDimitry Andric return Diag << "unroll and jammed loop by a factor of " 2750b57cec5SDimitry Andric << NV("UnrollCount", Count); 2760b57cec5SDimitry Andric }; 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName() 2790b57cec5SDimitry Andric << " by " << Count); 2800b57cec5SDimitry Andric if (TripMultiple != 1) { 2810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); 2820b57cec5SDimitry Andric ORE->emit([&]() { 2830b57cec5SDimitry Andric return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) 2840b57cec5SDimitry Andric << " trips per branch"; 2850b57cec5SDimitry Andric }); 2860b57cec5SDimitry Andric } else { 2870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " with run-time trip count"); 2880b57cec5SDimitry Andric ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; }); 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "!\n"); 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 2940b57cec5SDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch(); 295480093f4SDimitry Andric assert(Preheader && "No preheader"); 296480093f4SDimitry Andric assert(LatchBlock && "No latch block"); 2970b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); 2980b57cec5SDimitry Andric assert(BI && !BI->isUnconditional()); 2990b57cec5SDimitry Andric bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); 3000b57cec5SDimitry Andric BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); 3010b57cec5SDimitry Andric bool SubLoopContinueOnTrue = SubLoop->contains( 3020b57cec5SDimitry Andric SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0)); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric // Partition blocks in an outer/inner loop pair into blocks before and after 3050b57cec5SDimitry Andric // the loop 3060b57cec5SDimitry Andric BasicBlockSet SubLoopBlocks; 3070b57cec5SDimitry Andric BasicBlockSet ForeBlocks; 3080b57cec5SDimitry Andric BasicBlockSet AftBlocks; 3090b57cec5SDimitry Andric partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks, 3100b57cec5SDimitry Andric DT); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric // We keep track of the entering/first and exiting/last block of each of 3130b57cec5SDimitry Andric // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of 3140b57cec5SDimitry Andric // blocks easier. 3150b57cec5SDimitry Andric std::vector<BasicBlock *> ForeBlocksFirst; 3160b57cec5SDimitry Andric std::vector<BasicBlock *> ForeBlocksLast; 3170b57cec5SDimitry Andric std::vector<BasicBlock *> SubLoopBlocksFirst; 3180b57cec5SDimitry Andric std::vector<BasicBlock *> SubLoopBlocksLast; 3190b57cec5SDimitry Andric std::vector<BasicBlock *> AftBlocksFirst; 3200b57cec5SDimitry Andric std::vector<BasicBlock *> AftBlocksLast; 3210b57cec5SDimitry Andric ForeBlocksFirst.push_back(Header); 3220b57cec5SDimitry Andric ForeBlocksLast.push_back(SubLoop->getLoopPreheader()); 3230b57cec5SDimitry Andric SubLoopBlocksFirst.push_back(SubLoop->getHeader()); 3240b57cec5SDimitry Andric SubLoopBlocksLast.push_back(SubLoop->getExitingBlock()); 3250b57cec5SDimitry Andric AftBlocksFirst.push_back(SubLoop->getExitBlock()); 3260b57cec5SDimitry Andric AftBlocksLast.push_back(L->getExitingBlock()); 3270b57cec5SDimitry Andric // Maps Blocks[0] -> Blocks[It] 3280b57cec5SDimitry Andric ValueToValueMapTy LastValueMap; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric // Move any instructions from fore phi operands from AftBlocks into Fore. 3310b57cec5SDimitry Andric moveHeaderPhiOperandsToForeBlocks( 3325ffd83dbSDimitry Andric Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks); 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric // The current on-the-fly SSA update requires blocks to be processed in 3350b57cec5SDimitry Andric // reverse postorder so that LastValueMap contains the correct value at each 3360b57cec5SDimitry Andric // exit. 3370b57cec5SDimitry Andric LoopBlocksDFS DFS(L); 3380b57cec5SDimitry Andric DFS.perform(LI); 3390b57cec5SDimitry Andric // Stash the DFS iterators before adding blocks to the loop. 3400b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); 3410b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); 3420b57cec5SDimitry Andric 343fe6060f1SDimitry Andric // When a FSDiscriminator is enabled, we don't need to add the multiply 344fe6060f1SDimitry Andric // factors to the discriminators. 345bdd1243dSDimitry Andric if (Header->getParent()->shouldEmitDebugInfoForProfiling() && 346bdd1243dSDimitry Andric !EnableFSDiscriminator) 3470b57cec5SDimitry Andric for (BasicBlock *BB : L->getBlocks()) 3480b57cec5SDimitry Andric for (Instruction &I : *BB) 34906c3fb27SDimitry Andric if (!I.isDebugOrPseudoInst()) 3500b57cec5SDimitry Andric if (const DILocation *DIL = I.getDebugLoc()) { 3510b57cec5SDimitry Andric auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count); 3520b57cec5SDimitry Andric if (NewDIL) 35381ad6265SDimitry Andric I.setDebugLoc(*NewDIL); 3540b57cec5SDimitry Andric else 3550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 3560b57cec5SDimitry Andric << "Failed to create new discriminator: " 3570b57cec5SDimitry Andric << DIL->getFilename() << " Line: " << DIL->getLine()); 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric // Copy all blocks 3610b57cec5SDimitry Andric for (unsigned It = 1; It != Count; ++It) { 3625ffd83dbSDimitry Andric SmallVector<BasicBlock *, 8> NewBlocks; 3630b57cec5SDimitry Andric // Maps Blocks[It] -> Blocks[It-1] 3640b57cec5SDimitry Andric DenseMap<Value *, Value *> PrevItValueMap; 3655ffd83dbSDimitry Andric SmallDenseMap<const Loop *, Loop *, 4> NewLoops; 3665ffd83dbSDimitry Andric NewLoops[L] = L; 3675ffd83dbSDimitry Andric NewLoops[SubLoop] = SubLoop; 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { 3700b57cec5SDimitry Andric ValueToValueMapTy VMap; 3710b57cec5SDimitry Andric BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); 372bdd1243dSDimitry Andric Header->getParent()->insert(Header->getParent()->end(), New); 3730b57cec5SDimitry Andric 3745ffd83dbSDimitry Andric // Tell LI about New. 3755ffd83dbSDimitry Andric addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); 3760b57cec5SDimitry Andric 3775ffd83dbSDimitry Andric if (ForeBlocks.count(*BB)) { 3780b57cec5SDimitry Andric if (*BB == ForeBlocksFirst[0]) 3790b57cec5SDimitry Andric ForeBlocksFirst.push_back(New); 3800b57cec5SDimitry Andric if (*BB == ForeBlocksLast[0]) 3810b57cec5SDimitry Andric ForeBlocksLast.push_back(New); 3820b57cec5SDimitry Andric } else if (SubLoopBlocks.count(*BB)) { 3830b57cec5SDimitry Andric if (*BB == SubLoopBlocksFirst[0]) 3840b57cec5SDimitry Andric SubLoopBlocksFirst.push_back(New); 3850b57cec5SDimitry Andric if (*BB == SubLoopBlocksLast[0]) 3860b57cec5SDimitry Andric SubLoopBlocksLast.push_back(New); 3870b57cec5SDimitry Andric } else if (AftBlocks.count(*BB)) { 3880b57cec5SDimitry Andric if (*BB == AftBlocksFirst[0]) 3890b57cec5SDimitry Andric AftBlocksFirst.push_back(New); 3900b57cec5SDimitry Andric if (*BB == AftBlocksLast[0]) 3910b57cec5SDimitry Andric AftBlocksLast.push_back(New); 3920b57cec5SDimitry Andric } else { 3930b57cec5SDimitry Andric llvm_unreachable("BB being cloned should be in Fore/Sub/Aft"); 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric // Update our running maps of newest clones 3970b57cec5SDimitry Andric PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]); 3980b57cec5SDimitry Andric LastValueMap[*BB] = New; 3990b57cec5SDimitry Andric for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); 4000b57cec5SDimitry Andric VI != VE; ++VI) { 4010b57cec5SDimitry Andric PrevItValueMap[VI->second] = 4020b57cec5SDimitry Andric const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]); 4030b57cec5SDimitry Andric LastValueMap[VI->first] = VI->second; 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric NewBlocks.push_back(New); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric // Update DomTree: 4090b57cec5SDimitry Andric if (*BB == ForeBlocksFirst[0]) 4100b57cec5SDimitry Andric DT->addNewBlock(New, ForeBlocksLast[It - 1]); 4110b57cec5SDimitry Andric else if (*BB == SubLoopBlocksFirst[0]) 4120b57cec5SDimitry Andric DT->addNewBlock(New, SubLoopBlocksLast[It - 1]); 4130b57cec5SDimitry Andric else if (*BB == AftBlocksFirst[0]) 4140b57cec5SDimitry Andric DT->addNewBlock(New, AftBlocksLast[It - 1]); 4150b57cec5SDimitry Andric else { 4160b57cec5SDimitry Andric // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree 4170b57cec5SDimitry Andric // structure. 4180b57cec5SDimitry Andric auto BBDomNode = DT->getNode(*BB); 4190b57cec5SDimitry Andric auto BBIDom = BBDomNode->getIDom(); 4200b57cec5SDimitry Andric BasicBlock *OriginalBBIDom = BBIDom->getBlock(); 4210b57cec5SDimitry Andric assert(OriginalBBIDom); 4220b57cec5SDimitry Andric assert(LastValueMap[cast<Value>(OriginalBBIDom)]); 4230b57cec5SDimitry Andric DT->addNewBlock( 4240b57cec5SDimitry Andric New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric // Remap all instructions in the most recent iteration 4295ffd83dbSDimitry Andric remapInstructionsInBlocks(NewBlocks, LastValueMap); 4300b57cec5SDimitry Andric for (BasicBlock *NewBlock : NewBlocks) { 4310b57cec5SDimitry Andric for (Instruction &I : *NewBlock) { 432fe6060f1SDimitry Andric if (auto *II = dyn_cast<AssumeInst>(&I)) 4330b57cec5SDimitry Andric AC->registerAssumption(II); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric // Alter the ForeBlocks phi's, pointing them at the latest version of the 4380b57cec5SDimitry Andric // value from the previous iteration's phis 4390b57cec5SDimitry Andric for (PHINode &Phi : ForeBlocksFirst[It]->phis()) { 4400b57cec5SDimitry Andric Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]); 4410b57cec5SDimitry Andric assert(OldValue && "should have incoming edge from Aft[It]"); 4420b57cec5SDimitry Andric Value *NewValue = OldValue; 4430b57cec5SDimitry Andric if (Value *PrevValue = PrevItValueMap[OldValue]) 4440b57cec5SDimitry Andric NewValue = PrevValue; 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric assert(Phi.getNumOperands() == 2); 4470b57cec5SDimitry Andric Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]); 4480b57cec5SDimitry Andric Phi.setIncomingValue(0, NewValue); 4490b57cec5SDimitry Andric Phi.removeIncomingValue(1); 4500b57cec5SDimitry Andric } 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric // Now that all the basic blocks for the unrolled iterations are in place, 4540b57cec5SDimitry Andric // finish up connecting the blocks and phi nodes. At this point LastValueMap 4550b57cec5SDimitry Andric // is the last unrolled iterations values. 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric // Update Phis in BB from OldBB to point to NewBB and use the latest value 4580b57cec5SDimitry Andric // from LastValueMap 4590b57cec5SDimitry Andric auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB, 4600b57cec5SDimitry Andric BasicBlock *NewBB, 4610b57cec5SDimitry Andric ValueToValueMapTy &LastValueMap) { 4620b57cec5SDimitry Andric for (PHINode &Phi : BB->phis()) { 4630b57cec5SDimitry Andric for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) { 4640b57cec5SDimitry Andric if (Phi.getIncomingBlock(b) == OldBB) { 4650b57cec5SDimitry Andric Value *OldValue = Phi.getIncomingValue(b); 4660b57cec5SDimitry Andric if (Value *LastValue = LastValueMap[OldValue]) 4670b57cec5SDimitry Andric Phi.setIncomingValue(b, LastValue); 4680b57cec5SDimitry Andric Phi.setIncomingBlock(b, NewBB); 4690b57cec5SDimitry Andric break; 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric }; 4740b57cec5SDimitry Andric // Move all the phis from Src into Dest 4750b57cec5SDimitry Andric auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) { 476*0fca6ea1SDimitry Andric BasicBlock::iterator insertPoint = Dest->getFirstNonPHIIt(); 4770b57cec5SDimitry Andric while (PHINode *Phi = dyn_cast<PHINode>(Src->begin())) 478*0fca6ea1SDimitry Andric Phi->moveBefore(*Dest, insertPoint); 4790b57cec5SDimitry Andric }; 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric // Update the PHI values outside the loop to point to the last block 4820b57cec5SDimitry Andric updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(), 4830b57cec5SDimitry Andric LastValueMap); 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric // Update ForeBlocks successors and phi nodes 4860b57cec5SDimitry Andric BranchInst *ForeTerm = 4870b57cec5SDimitry Andric cast<BranchInst>(ForeBlocksLast.back()->getTerminator()); 4885ffd83dbSDimitry Andric assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor"); 4895ffd83dbSDimitry Andric ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]); 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric if (CompletelyUnroll) { 4920b57cec5SDimitry Andric while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) { 4930b57cec5SDimitry Andric Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader)); 494bdd1243dSDimitry Andric Phi->eraseFromParent(); 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric } else { 4970b57cec5SDimitry Andric // Update the PHI values to point to the last aft block 4980b57cec5SDimitry Andric updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0], 4990b57cec5SDimitry Andric AftBlocksLast.back(), LastValueMap); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric for (unsigned It = 1; It != Count; It++) { 5030b57cec5SDimitry Andric // Remap ForeBlock successors from previous iteration to this 5040b57cec5SDimitry Andric BranchInst *ForeTerm = 5050b57cec5SDimitry Andric cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator()); 5065ffd83dbSDimitry Andric assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor"); 5075ffd83dbSDimitry Andric ForeTerm->setSuccessor(0, ForeBlocksFirst[It]); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric // Subloop successors and phis 5110b57cec5SDimitry Andric BranchInst *SubTerm = 5120b57cec5SDimitry Andric cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); 5130b57cec5SDimitry Andric SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); 5140b57cec5SDimitry Andric SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); 515e8d8bef9SDimitry Andric SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0], 5160b57cec5SDimitry Andric ForeBlocksLast.back()); 517e8d8bef9SDimitry Andric SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], 5180b57cec5SDimitry Andric SubLoopBlocksLast.back()); 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric for (unsigned It = 1; It != Count; It++) { 5210b57cec5SDimitry Andric // Replace the conditional branch of the previous iteration subloop with an 5220b57cec5SDimitry Andric // unconditional one to this one 5230b57cec5SDimitry Andric BranchInst *SubTerm = 5240b57cec5SDimitry Andric cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator()); 525*0fca6ea1SDimitry Andric BranchInst::Create(SubLoopBlocksFirst[It], SubTerm->getIterator()); 5260b57cec5SDimitry Andric SubTerm->eraseFromParent(); 5270b57cec5SDimitry Andric 528e8d8bef9SDimitry Andric SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It], 5290b57cec5SDimitry Andric ForeBlocksLast.back()); 530e8d8bef9SDimitry Andric SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], 5310b57cec5SDimitry Andric SubLoopBlocksLast.back()); 5320b57cec5SDimitry Andric movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // Aft blocks successors and phis 5365ffd83dbSDimitry Andric BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator()); 5370b57cec5SDimitry Andric if (CompletelyUnroll) { 538*0fca6ea1SDimitry Andric BranchInst::Create(LoopExit, AftTerm->getIterator()); 5395ffd83dbSDimitry Andric AftTerm->eraseFromParent(); 5400b57cec5SDimitry Andric } else { 5415ffd83dbSDimitry Andric AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]); 5425ffd83dbSDimitry Andric assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit && 5435ffd83dbSDimitry Andric "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit"); 5440b57cec5SDimitry Andric } 545e8d8bef9SDimitry Andric AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0], 5460b57cec5SDimitry Andric SubLoopBlocksLast.back()); 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric for (unsigned It = 1; It != Count; It++) { 5490b57cec5SDimitry Andric // Replace the conditional branch of the previous iteration subloop with an 5500b57cec5SDimitry Andric // unconditional one to this one 5510b57cec5SDimitry Andric BranchInst *AftTerm = 5520b57cec5SDimitry Andric cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator()); 553*0fca6ea1SDimitry Andric BranchInst::Create(AftBlocksFirst[It], AftTerm->getIterator()); 5540b57cec5SDimitry Andric AftTerm->eraseFromParent(); 5550b57cec5SDimitry Andric 556e8d8bef9SDimitry Andric AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It], 5570b57cec5SDimitry Andric SubLoopBlocksLast.back()); 5580b57cec5SDimitry Andric movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric 5618bcb0991SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 5620b57cec5SDimitry Andric // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the 5630b57cec5SDimitry Andric // new ones required. 5640b57cec5SDimitry Andric if (Count != 1) { 5650b57cec5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates; 5660b57cec5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0], 5670b57cec5SDimitry Andric SubLoopBlocksFirst[0]); 5680b57cec5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, 5690b57cec5SDimitry Andric SubLoopBlocksLast[0], AftBlocksFirst[0]); 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, 5720b57cec5SDimitry Andric ForeBlocksLast.back(), SubLoopBlocksFirst[0]); 5730b57cec5SDimitry Andric DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, 5740b57cec5SDimitry Andric SubLoopBlocksLast.back(), AftBlocksFirst[0]); 5758bcb0991SDimitry Andric DTU.applyUpdatesPermissive(DTUpdates); 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric // Merge adjacent basic blocks, if possible. 5790b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> MergeBlocks; 5800b57cec5SDimitry Andric MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); 5810b57cec5SDimitry Andric MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); 5820b57cec5SDimitry Andric MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); 5835ffd83dbSDimitry Andric 5845ffd83dbSDimitry Andric MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI); 5855ffd83dbSDimitry Andric 5868bcb0991SDimitry Andric // Apply updates to the DomTree. 5878bcb0991SDimitry Andric DT = &DTU.getDomTree(); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric // At this point, the code is well formed. We now do a quick sweep over the 5900b57cec5SDimitry Andric // inserted code, doing constant propagation and dead code elimination as we 5910b57cec5SDimitry Andric // go. 5925ffd83dbSDimitry Andric simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI); 5935ffd83dbSDimitry Andric simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC, 5945ffd83dbSDimitry Andric TTI); 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric NumCompletelyUnrolledAndJammed += CompletelyUnroll; 5970b57cec5SDimitry Andric ++NumUnrolledAndJammed; 5980b57cec5SDimitry Andric 5995ffd83dbSDimitry Andric // Update LoopInfo if the loop is completely removed. 6005ffd83dbSDimitry Andric if (CompletelyUnroll) 6015ffd83dbSDimitry Andric LI->erase(L); 6025ffd83dbSDimitry Andric 6030b57cec5SDimitry Andric #ifndef NDEBUG 6040b57cec5SDimitry Andric // We shouldn't have done anything to break loop simplify form or LCSSA. 6055ffd83dbSDimitry Andric Loop *OutestLoop = SubLoop->getParentLoop() 6065ffd83dbSDimitry Andric ? SubLoop->getParentLoop()->getParentLoop() 6075ffd83dbSDimitry Andric ? SubLoop->getParentLoop()->getParentLoop() 6085ffd83dbSDimitry Andric : SubLoop->getParentLoop() 6095ffd83dbSDimitry Andric : SubLoop; 6105ffd83dbSDimitry Andric assert(DT->verify()); 6115ffd83dbSDimitry Andric LI->verify(*DT); 6120b57cec5SDimitry Andric assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI)); 6130b57cec5SDimitry Andric if (!CompletelyUnroll) 6140b57cec5SDimitry Andric assert(L->isLoopSimplifyForm()); 6150b57cec5SDimitry Andric assert(SubLoop->isLoopSimplifyForm()); 6165ffd83dbSDimitry Andric SE->verify(); 6170b57cec5SDimitry Andric #endif 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled 6200b57cec5SDimitry Andric : LoopUnrollResult::PartiallyUnrolled; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric static bool getLoadsAndStores(BasicBlockSet &Blocks, 6245ffd83dbSDimitry Andric SmallVector<Instruction *, 4> &MemInstr) { 6250b57cec5SDimitry Andric // Scan the BBs and collect legal loads and stores. 6260b57cec5SDimitry Andric // Returns false if non-simple loads/stores are found. 6270b57cec5SDimitry Andric for (BasicBlock *BB : Blocks) { 6280b57cec5SDimitry Andric for (Instruction &I : *BB) { 6290b57cec5SDimitry Andric if (auto *Ld = dyn_cast<LoadInst>(&I)) { 6300b57cec5SDimitry Andric if (!Ld->isSimple()) 6310b57cec5SDimitry Andric return false; 6320b57cec5SDimitry Andric MemInstr.push_back(&I); 6330b57cec5SDimitry Andric } else if (auto *St = dyn_cast<StoreInst>(&I)) { 6340b57cec5SDimitry Andric if (!St->isSimple()) 6350b57cec5SDimitry Andric return false; 6360b57cec5SDimitry Andric MemInstr.push_back(&I); 6370b57cec5SDimitry Andric } else if (I.mayReadOrWriteMemory()) { 6380b57cec5SDimitry Andric return false; 6390b57cec5SDimitry Andric } 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric return true; 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 6455ffd83dbSDimitry Andric static bool preservesForwardDependence(Instruction *Src, Instruction *Dst, 6465ffd83dbSDimitry Andric unsigned UnrollLevel, unsigned JamLevel, 6475ffd83dbSDimitry Andric bool Sequentialized, Dependence *D) { 6485ffd83dbSDimitry Andric // UnrollLevel might carry the dependency Src --> Dst 6495ffd83dbSDimitry Andric // Does a different loop after unrolling? 6505ffd83dbSDimitry Andric for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel; 6515ffd83dbSDimitry Andric ++CurLoopDepth) { 6525ffd83dbSDimitry Andric auto JammedDir = D->getDirection(CurLoopDepth); 6535ffd83dbSDimitry Andric if (JammedDir == Dependence::DVEntry::LT) 6545ffd83dbSDimitry Andric return true; 6555ffd83dbSDimitry Andric 6565ffd83dbSDimitry Andric if (JammedDir & Dependence::DVEntry::GT) 6575ffd83dbSDimitry Andric return false; 6585ffd83dbSDimitry Andric } 6595ffd83dbSDimitry Andric 6605ffd83dbSDimitry Andric return true; 6615ffd83dbSDimitry Andric } 6625ffd83dbSDimitry Andric 6635ffd83dbSDimitry Andric static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst, 6645ffd83dbSDimitry Andric unsigned UnrollLevel, unsigned JamLevel, 6655ffd83dbSDimitry Andric bool Sequentialized, Dependence *D) { 6665ffd83dbSDimitry Andric // UnrollLevel might carry the dependency Dst --> Src 6675ffd83dbSDimitry Andric for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel; 6685ffd83dbSDimitry Andric ++CurLoopDepth) { 6695ffd83dbSDimitry Andric auto JammedDir = D->getDirection(CurLoopDepth); 6705ffd83dbSDimitry Andric if (JammedDir == Dependence::DVEntry::GT) 6715ffd83dbSDimitry Andric return true; 6725ffd83dbSDimitry Andric 6735ffd83dbSDimitry Andric if (JammedDir & Dependence::DVEntry::LT) 6745ffd83dbSDimitry Andric return false; 6755ffd83dbSDimitry Andric } 6765ffd83dbSDimitry Andric 6775ffd83dbSDimitry Andric // Backward dependencies are only preserved if not interleaved. 6785ffd83dbSDimitry Andric return Sequentialized; 6795ffd83dbSDimitry Andric } 6805ffd83dbSDimitry Andric 6815ffd83dbSDimitry Andric // Check whether it is semantically safe Src and Dst considering any potential 6825ffd83dbSDimitry Andric // dependency between them. 6835ffd83dbSDimitry Andric // 6845ffd83dbSDimitry Andric // @param UnrollLevel The level of the loop being unrolled 6855ffd83dbSDimitry Andric // @param JamLevel The level of the loop being jammed; if Src and Dst are on 6865ffd83dbSDimitry Andric // different levels, the outermost common loop counts as jammed level 6875ffd83dbSDimitry Andric // 6885ffd83dbSDimitry Andric // @return true if is safe and false if there is a dependency violation. 6895ffd83dbSDimitry Andric static bool checkDependency(Instruction *Src, Instruction *Dst, 6905ffd83dbSDimitry Andric unsigned UnrollLevel, unsigned JamLevel, 6915ffd83dbSDimitry Andric bool Sequentialized, DependenceInfo &DI) { 6925ffd83dbSDimitry Andric assert(UnrollLevel <= JamLevel && 6935ffd83dbSDimitry Andric "Expecting JamLevel to be at least UnrollLevel"); 6945ffd83dbSDimitry Andric 6950b57cec5SDimitry Andric if (Src == Dst) 6965ffd83dbSDimitry Andric return true; 6970b57cec5SDimitry Andric // Ignore Input dependencies. 6980b57cec5SDimitry Andric if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) 6995ffd83dbSDimitry Andric return true; 7000b57cec5SDimitry Andric 7015ffd83dbSDimitry Andric // Check whether unroll-and-jam may violate a dependency. 7025ffd83dbSDimitry Andric // By construction, every dependency will be lexicographically non-negative 7035ffd83dbSDimitry Andric // (if it was, it would violate the current execution order), such as 7045ffd83dbSDimitry Andric // (0,0,>,*,*) 7055ffd83dbSDimitry Andric // Unroll-and-jam changes the GT execution of two executions to the same 7065ffd83dbSDimitry Andric // iteration of the chosen unroll level. That is, a GT dependence becomes a GE 7075ffd83dbSDimitry Andric // dependence (or EQ, if we fully unrolled the loop) at the loop's position: 7085ffd83dbSDimitry Andric // (0,0,>=,*,*) 7095ffd83dbSDimitry Andric // Now, the dependency is not necessarily non-negative anymore, i.e. 7105ffd83dbSDimitry Andric // unroll-and-jam may violate correctness. 7115ffd83dbSDimitry Andric std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true); 7125ffd83dbSDimitry Andric if (!D) 7135ffd83dbSDimitry Andric return true; 7140b57cec5SDimitry Andric assert(D->isOrdered() && "Expected an output, flow or anti dep."); 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric if (D->isConfused()) { 7170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Confused dependency between:\n" 7180b57cec5SDimitry Andric << " " << *Src << "\n" 7190b57cec5SDimitry Andric << " " << *Dst << "\n"); 7200b57cec5SDimitry Andric return false; 7210b57cec5SDimitry Andric } 7225ffd83dbSDimitry Andric 7235ffd83dbSDimitry Andric // If outer levels (levels enclosing the loop being unroll-and-jammed) have a 7245ffd83dbSDimitry Andric // non-equal direction, then the locations accessed in the inner levels cannot 7255ffd83dbSDimitry Andric // overlap in memory. We assumes the indexes never overlap into neighboring 7265ffd83dbSDimitry Andric // dimensions. 7275ffd83dbSDimitry Andric for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth) 7285ffd83dbSDimitry Andric if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ)) 7295ffd83dbSDimitry Andric return true; 7305ffd83dbSDimitry Andric 7315ffd83dbSDimitry Andric auto UnrollDirection = D->getDirection(UnrollLevel); 7325ffd83dbSDimitry Andric 7335ffd83dbSDimitry Andric // If the distance carried by the unrolled loop is 0, then after unrolling 7345ffd83dbSDimitry Andric // that distance will become non-zero resulting in non-overlapping accesses in 7355ffd83dbSDimitry Andric // the inner loops. 7365ffd83dbSDimitry Andric if (UnrollDirection == Dependence::DVEntry::EQ) 7375ffd83dbSDimitry Andric return true; 7385ffd83dbSDimitry Andric 7395ffd83dbSDimitry Andric if (UnrollDirection & Dependence::DVEntry::LT && 7405ffd83dbSDimitry Andric !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel, 7415ffd83dbSDimitry Andric Sequentialized, D.get())) 7420b57cec5SDimitry Andric return false; 7435ffd83dbSDimitry Andric 7445ffd83dbSDimitry Andric if (UnrollDirection & Dependence::DVEntry::GT && 7455ffd83dbSDimitry Andric !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel, 7465ffd83dbSDimitry Andric Sequentialized, D.get())) 7475ffd83dbSDimitry Andric return false; 7485ffd83dbSDimitry Andric 7495ffd83dbSDimitry Andric return true; 7500b57cec5SDimitry Andric } 7515ffd83dbSDimitry Andric 7525ffd83dbSDimitry Andric static bool 7535ffd83dbSDimitry Andric checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks, 7545ffd83dbSDimitry Andric const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap, 7555ffd83dbSDimitry Andric const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, 7565ffd83dbSDimitry Andric DependenceInfo &DI, LoopInfo &LI) { 7575ffd83dbSDimitry Andric SmallVector<BasicBlockSet, 8> AllBlocks; 7585ffd83dbSDimitry Andric for (Loop *L : Root.getLoopsInPreorder()) 75906c3fb27SDimitry Andric if (ForeBlocksMap.contains(L)) 7605ffd83dbSDimitry Andric AllBlocks.push_back(ForeBlocksMap.lookup(L)); 7615ffd83dbSDimitry Andric AllBlocks.push_back(SubLoopBlocks); 7625ffd83dbSDimitry Andric for (Loop *L : Root.getLoopsInPreorder()) 76306c3fb27SDimitry Andric if (AftBlocksMap.contains(L)) 7645ffd83dbSDimitry Andric AllBlocks.push_back(AftBlocksMap.lookup(L)); 7655ffd83dbSDimitry Andric 7665ffd83dbSDimitry Andric unsigned LoopDepth = Root.getLoopDepth(); 7675ffd83dbSDimitry Andric SmallVector<Instruction *, 4> EarlierLoadsAndStores; 7685ffd83dbSDimitry Andric SmallVector<Instruction *, 4> CurrentLoadsAndStores; 7695ffd83dbSDimitry Andric for (BasicBlockSet &Blocks : AllBlocks) { 7705ffd83dbSDimitry Andric CurrentLoadsAndStores.clear(); 7715ffd83dbSDimitry Andric if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores)) 7725ffd83dbSDimitry Andric return false; 7735ffd83dbSDimitry Andric 7745ffd83dbSDimitry Andric Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent()); 7755ffd83dbSDimitry Andric unsigned CurLoopDepth = CurLoop->getLoopDepth(); 7765ffd83dbSDimitry Andric 7775ffd83dbSDimitry Andric for (auto *Earlier : EarlierLoadsAndStores) { 7785ffd83dbSDimitry Andric Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent()); 7795ffd83dbSDimitry Andric unsigned EarlierDepth = EarlierLoop->getLoopDepth(); 7805ffd83dbSDimitry Andric unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth); 7815ffd83dbSDimitry Andric for (auto *Later : CurrentLoadsAndStores) { 7825ffd83dbSDimitry Andric if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false, 7835ffd83dbSDimitry Andric DI)) 7840b57cec5SDimitry Andric return false; 7850b57cec5SDimitry Andric } 7860b57cec5SDimitry Andric } 7875ffd83dbSDimitry Andric 7885ffd83dbSDimitry Andric size_t NumInsts = CurrentLoadsAndStores.size(); 7895ffd83dbSDimitry Andric for (size_t I = 0; I < NumInsts; ++I) { 7905ffd83dbSDimitry Andric for (size_t J = I; J < NumInsts; ++J) { 7915ffd83dbSDimitry Andric if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J], 7925ffd83dbSDimitry Andric LoopDepth, CurLoopDepth, true, DI)) 7935ffd83dbSDimitry Andric return false; 7940b57cec5SDimitry Andric } 7950b57cec5SDimitry Andric } 7965ffd83dbSDimitry Andric 7975ffd83dbSDimitry Andric EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(), 7985ffd83dbSDimitry Andric CurrentLoadsAndStores.end()); 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric return true; 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 8035ffd83dbSDimitry Andric static bool isEligibleLoopForm(const Loop &Root) { 8045ffd83dbSDimitry Andric // Root must have a child. 8055ffd83dbSDimitry Andric if (Root.getSubLoops().size() != 1) 8060b57cec5SDimitry Andric return false; 8070b57cec5SDimitry Andric 8085ffd83dbSDimitry Andric const Loop *L = &Root; 8095ffd83dbSDimitry Andric do { 8105ffd83dbSDimitry Andric // All loops in Root need to be in simplify and rotated form. 8115ffd83dbSDimitry Andric if (!L->isLoopSimplifyForm()) 8125ffd83dbSDimitry Andric return false; 8135ffd83dbSDimitry Andric 8145ffd83dbSDimitry Andric if (!L->isRotatedForm()) 8155ffd83dbSDimitry Andric return false; 8165ffd83dbSDimitry Andric 8175ffd83dbSDimitry Andric if (L->getHeader()->hasAddressTaken()) { 8185ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n"); 8195ffd83dbSDimitry Andric return false; 8205ffd83dbSDimitry Andric } 8215ffd83dbSDimitry Andric 8225ffd83dbSDimitry Andric unsigned SubLoopsSize = L->getSubLoops().size(); 8235ffd83dbSDimitry Andric if (SubLoopsSize == 0) 8245ffd83dbSDimitry Andric return true; 8255ffd83dbSDimitry Andric 8265ffd83dbSDimitry Andric // Only one child is allowed. 8275ffd83dbSDimitry Andric if (SubLoopsSize != 1) 8285ffd83dbSDimitry Andric return false; 8295ffd83dbSDimitry Andric 830fe6060f1SDimitry Andric // Only loops with a single exit block can be unrolled and jammed. 831fe6060f1SDimitry Andric // The function getExitBlock() is used for this check, rather than 832fe6060f1SDimitry Andric // getUniqueExitBlock() to ensure loops with mulitple exit edges are 833fe6060f1SDimitry Andric // disallowed. 834fe6060f1SDimitry Andric if (!L->getExitBlock()) { 835fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit " 836fe6060f1SDimitry Andric "blocks can be unrolled and jammed.\n"); 837fe6060f1SDimitry Andric return false; 838fe6060f1SDimitry Andric } 839fe6060f1SDimitry Andric 840fe6060f1SDimitry Andric // Only loops with a single exiting block can be unrolled and jammed. 841fe6060f1SDimitry Andric if (!L->getExitingBlock()) { 842fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single " 843fe6060f1SDimitry Andric "exiting blocks can be unrolled and jammed.\n"); 844fe6060f1SDimitry Andric return false; 845fe6060f1SDimitry Andric } 846fe6060f1SDimitry Andric 8475ffd83dbSDimitry Andric L = L->getSubLoops()[0]; 8485ffd83dbSDimitry Andric } while (L); 8495ffd83dbSDimitry Andric 8505ffd83dbSDimitry Andric return true; 8515ffd83dbSDimitry Andric } 8525ffd83dbSDimitry Andric 8535ffd83dbSDimitry Andric static Loop *getInnerMostLoop(Loop *L) { 8545ffd83dbSDimitry Andric while (!L->getSubLoops().empty()) 8555ffd83dbSDimitry Andric L = L->getSubLoops()[0]; 8565ffd83dbSDimitry Andric return L; 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, 8605ffd83dbSDimitry Andric DependenceInfo &DI, LoopInfo &LI) { 8615ffd83dbSDimitry Andric if (!isEligibleLoopForm(*L)) { 8625ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n"); 8635ffd83dbSDimitry Andric return false; 8645ffd83dbSDimitry Andric } 8655ffd83dbSDimitry Andric 8660b57cec5SDimitry Andric /* We currently handle outer loops like this: 8670b57cec5SDimitry Andric | 8685ffd83dbSDimitry Andric ForeFirst <------\ } 8695ffd83dbSDimitry Andric Blocks | } ForeBlocks of L 8700b57cec5SDimitry Andric ForeLast | } 8710b57cec5SDimitry Andric | | 8725ffd83dbSDimitry Andric ... | 8735ffd83dbSDimitry Andric | | 8745ffd83dbSDimitry Andric ForeFirst <----\ | } 8755ffd83dbSDimitry Andric Blocks | | } ForeBlocks of a inner loop of L 8765ffd83dbSDimitry Andric ForeLast | | } 8775ffd83dbSDimitry Andric | | | 8785ffd83dbSDimitry Andric JamLoopFirst <\ | | } 8795ffd83dbSDimitry Andric Blocks | | | } JamLoopBlocks of the innermost loop 8805ffd83dbSDimitry Andric JamLoopLast -/ | | } 8815ffd83dbSDimitry Andric | | | 8825ffd83dbSDimitry Andric AftFirst | | } 8835ffd83dbSDimitry Andric Blocks | | } AftBlocks of a inner loop of L 8845ffd83dbSDimitry Andric AftLast ------/ | } 8855ffd83dbSDimitry Andric | | 8865ffd83dbSDimitry Andric ... | 8870b57cec5SDimitry Andric | | 8880b57cec5SDimitry Andric AftFirst | } 8895ffd83dbSDimitry Andric Blocks | } AftBlocks of L 8905ffd83dbSDimitry Andric AftLast --------/ } 8910b57cec5SDimitry Andric | 8920b57cec5SDimitry Andric 8930b57cec5SDimitry Andric There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks 8940b57cec5SDimitry Andric and AftBlocks, providing that there is one edge from Fores to SubLoops, 8950b57cec5SDimitry Andric one edge from SubLoops to Afts and a single outer loop exit (from Afts). 8960b57cec5SDimitry Andric In practice we currently limit Aft blocks to a single block, and limit 8970b57cec5SDimitry Andric things further in the profitablility checks of the unroll and jam pass. 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric Because of the way we rearrange basic blocks, we also require that 9005ffd83dbSDimitry Andric the Fore blocks of L on all unrolled iterations are safe to move before the 9015ffd83dbSDimitry Andric blocks of the direct child of L of all iterations. So we require that the 9025ffd83dbSDimitry Andric phi node looping operands of ForeHeader can be moved to at least the end of 9035ffd83dbSDimitry Andric ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and 9045ffd83dbSDimitry Andric match up Phi's correctly. 9050b57cec5SDimitry Andric 9065ffd83dbSDimitry Andric i.e. The old order of blocks used to be 9075ffd83dbSDimitry Andric (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2. 9085ffd83dbSDimitry Andric It needs to be safe to transform this to 9095ffd83dbSDimitry Andric (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2. 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric There are then a number of checks along the lines of no calls, no 9120b57cec5SDimitry Andric exceptions, inner loop IV is consistent, etc. Note that for loops requiring 9130b57cec5SDimitry Andric runtime unrolling, UnrollRuntimeLoopRemainder can also fail in 9140b57cec5SDimitry Andric UnrollAndJamLoop if the trip count cannot be easily calculated. 9150b57cec5SDimitry Andric */ 9160b57cec5SDimitry Andric 9170b57cec5SDimitry Andric // Split blocks into Fore/SubLoop/Aft based on dominators 9185ffd83dbSDimitry Andric Loop *JamLoop = getInnerMostLoop(L); 9190b57cec5SDimitry Andric BasicBlockSet SubLoopBlocks; 9205ffd83dbSDimitry Andric DenseMap<Loop *, BasicBlockSet> ForeBlocksMap; 9215ffd83dbSDimitry Andric DenseMap<Loop *, BasicBlockSet> AftBlocksMap; 9225ffd83dbSDimitry Andric if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap, 9235ffd83dbSDimitry Andric AftBlocksMap, DT)) { 9240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n"); 9250b57cec5SDimitry Andric return false; 9260b57cec5SDimitry Andric } 9270b57cec5SDimitry Andric 9280b57cec5SDimitry Andric // Aft blocks may need to move instructions to fore blocks, which becomes more 9290b57cec5SDimitry Andric // difficult if there are multiple (potentially conditionally executed) 9300b57cec5SDimitry Andric // blocks. For now we just exclude loops with multiple aft blocks. 9315ffd83dbSDimitry Andric if (AftBlocksMap[L].size() != 1) { 9320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle " 9330b57cec5SDimitry Andric "multiple blocks after the loop\n"); 9340b57cec5SDimitry Andric return false; 9350b57cec5SDimitry Andric } 9360b57cec5SDimitry Andric 9370b57cec5SDimitry Andric // Check inner loop backedge count is consistent on all iterations of the 9380b57cec5SDimitry Andric // outer loop 9395ffd83dbSDimitry Andric if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) { 9405ffd83dbSDimitry Andric return !hasIterationCountInvariantInParent(SubLoop, SE); 9415ffd83dbSDimitry Andric })) { 9420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is " 9430b57cec5SDimitry Andric "not consistent on each iteration\n"); 9440b57cec5SDimitry Andric return false; 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric // Check the loop safety info for exceptions. 9480b57cec5SDimitry Andric SimpleLoopSafetyInfo LSI; 9490b57cec5SDimitry Andric LSI.computeLoopSafetyInfo(L); 9500b57cec5SDimitry Andric if (LSI.anyBlockMayThrow()) { 9510b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n"); 9520b57cec5SDimitry Andric return false; 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric // We've ruled out the easy stuff and now need to check that there are no 9560b57cec5SDimitry Andric // interdependencies which may prevent us from moving the: 9570b57cec5SDimitry Andric // ForeBlocks before Subloop and AftBlocks. 9580b57cec5SDimitry Andric // Subloop before AftBlocks. 9590b57cec5SDimitry Andric // ForeBlock phi operands before the subloop 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric // Make sure we can move all instructions we need to before the subloop 9625ffd83dbSDimitry Andric BasicBlock *Header = L->getHeader(); 9635ffd83dbSDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 9645ffd83dbSDimitry Andric BasicBlockSet AftBlocks = AftBlocksMap[L]; 9655ffd83dbSDimitry Andric Loop *SubLoop = L->getSubLoops()[0]; 9660b57cec5SDimitry Andric if (!processHeaderPhiOperands( 9670b57cec5SDimitry Andric Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) { 9680b57cec5SDimitry Andric if (SubLoop->contains(I->getParent())) 9690b57cec5SDimitry Andric return false; 9700b57cec5SDimitry Andric if (AftBlocks.count(I->getParent())) { 9710b57cec5SDimitry Andric // If we hit a phi node in afts we know we are done (probably 9720b57cec5SDimitry Andric // LCSSA) 9730b57cec5SDimitry Andric if (isa<PHINode>(I)) 9740b57cec5SDimitry Andric return false; 9750b57cec5SDimitry Andric // Can't move instructions with side effects or memory 9760b57cec5SDimitry Andric // reads/writes 9770b57cec5SDimitry Andric if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory()) 9780b57cec5SDimitry Andric return false; 9790b57cec5SDimitry Andric } 9800b57cec5SDimitry Andric // Keep going 9810b57cec5SDimitry Andric return true; 9820b57cec5SDimitry Andric })) { 9830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required " 9840b57cec5SDimitry Andric "instructions after subloop to before it\n"); 9850b57cec5SDimitry Andric return false; 9860b57cec5SDimitry Andric } 9870b57cec5SDimitry Andric 9880b57cec5SDimitry Andric // Check for memory dependencies which prohibit the unrolling we are doing. 9890b57cec5SDimitry Andric // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check 9900b57cec5SDimitry Andric // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub. 9915ffd83dbSDimitry Andric if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI, 9925ffd83dbSDimitry Andric LI)) { 9930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n"); 9940b57cec5SDimitry Andric return false; 9950b57cec5SDimitry Andric } 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric return true; 9980b57cec5SDimitry Andric } 999