xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements loop unroll and jam as a routine, much like
100b57cec5SDimitry Andric // LoopUnroll.cpp implements loop unroll.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
145ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h"
155ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h"
165ffd83dbSDimitry Andric #include "llvm/ADT/STLExtras.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
185ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
205ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
215ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
245ffd83dbSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
255ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
275ffd83dbSDimitry Andric #include "llvm/Analysis/MustExecute.h"
280b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
290b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
300b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
310b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
325ffd83dbSDimitry Andric #include "llvm/IR/DebugLoc.h"
335ffd83dbSDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
340b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
355ffd83dbSDimitry Andric #include "llvm/IR/Function.h"
365ffd83dbSDimitry Andric #include "llvm/IR/Instruction.h"
375ffd83dbSDimitry Andric #include "llvm/IR/Instructions.h"
380b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
395ffd83dbSDimitry Andric #include "llvm/IR/User.h"
405ffd83dbSDimitry Andric #include "llvm/IR/Value.h"
415ffd83dbSDimitry Andric #include "llvm/IR/ValueHandle.h"
425ffd83dbSDimitry Andric #include "llvm/IR/ValueMap.h"
435ffd83dbSDimitry Andric #include "llvm/Support/Casting.h"
440b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
455ffd83dbSDimitry Andric #include "llvm/Support/ErrorHandling.h"
465ffd83dbSDimitry Andric #include "llvm/Support/GenericDomTree.h"
470b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
480b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
490b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
500b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
510b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h"
525ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
535ffd83dbSDimitry Andric #include <assert.h>
545ffd83dbSDimitry Andric #include <memory>
555ffd83dbSDimitry Andric #include <type_traits>
565ffd83dbSDimitry Andric #include <vector>
575ffd83dbSDimitry Andric 
580b57cec5SDimitry Andric using namespace llvm;
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll-and-jam"
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed");
630b57cec5SDimitry Andric STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed");
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet;
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric // Partition blocks in an outer/inner loop pair into blocks before and after
680b57cec5SDimitry Andric // the loop
695ffd83dbSDimitry Andric static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks,
705ffd83dbSDimitry Andric                                 BasicBlockSet &AftBlocks, DominatorTree &DT) {
715ffd83dbSDimitry Andric   Loop *SubLoop = L.getSubLoops()[0];
720b57cec5SDimitry Andric   BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();
730b57cec5SDimitry Andric 
745ffd83dbSDimitry Andric   for (BasicBlock *BB : L.blocks()) {
750b57cec5SDimitry Andric     if (!SubLoop->contains(BB)) {
765ffd83dbSDimitry Andric       if (DT.dominates(SubLoopLatch, BB))
770b57cec5SDimitry Andric         AftBlocks.insert(BB);
780b57cec5SDimitry Andric       else
790b57cec5SDimitry Andric         ForeBlocks.insert(BB);
800b57cec5SDimitry Andric     }
810b57cec5SDimitry Andric   }
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   // Check that all blocks in ForeBlocks together dominate the subloop
840b57cec5SDimitry Andric   // TODO: This might ideally be done better with a dominator/postdominators.
850b57cec5SDimitry Andric   BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader();
860b57cec5SDimitry Andric   for (BasicBlock *BB : ForeBlocks) {
870b57cec5SDimitry Andric     if (BB == SubLoopPreHeader)
880b57cec5SDimitry Andric       continue;
890b57cec5SDimitry Andric     Instruction *TI = BB->getTerminator();
905ffd83dbSDimitry Andric     for (BasicBlock *Succ : successors(TI))
915ffd83dbSDimitry Andric       if (!ForeBlocks.count(Succ))
920b57cec5SDimitry Andric         return false;
930b57cec5SDimitry Andric   }
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   return true;
960b57cec5SDimitry Andric }
970b57cec5SDimitry Andric 
985ffd83dbSDimitry Andric /// Partition blocks in a loop nest into blocks before and after each inner
995ffd83dbSDimitry Andric /// loop.
1005ffd83dbSDimitry Andric static bool partitionOuterLoopBlocks(
1015ffd83dbSDimitry Andric     Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks,
1025ffd83dbSDimitry Andric     DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
1035ffd83dbSDimitry Andric     DenseMap<Loop *, BasicBlockSet> &AftBlocksMap, DominatorTree &DT) {
1045ffd83dbSDimitry Andric   JamLoopBlocks.insert(JamLoop.block_begin(), JamLoop.block_end());
1055ffd83dbSDimitry Andric 
1065ffd83dbSDimitry Andric   for (Loop *L : Root.getLoopsInPreorder()) {
1075ffd83dbSDimitry Andric     if (L == &JamLoop)
1085ffd83dbSDimitry Andric       break;
1095ffd83dbSDimitry Andric 
1105ffd83dbSDimitry Andric     if (!partitionLoopBlocks(*L, ForeBlocksMap[L], AftBlocksMap[L], DT))
1115ffd83dbSDimitry Andric       return false;
1125ffd83dbSDimitry Andric   }
1135ffd83dbSDimitry Andric 
1145ffd83dbSDimitry Andric   return true;
1155ffd83dbSDimitry Andric }
1165ffd83dbSDimitry Andric 
1175ffd83dbSDimitry Andric // TODO Remove when UnrollAndJamLoop changed to support unroll and jamming more
1185ffd83dbSDimitry Andric // than 2 levels loop.
1195ffd83dbSDimitry Andric static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop,
1205ffd83dbSDimitry Andric                                      BasicBlockSet &ForeBlocks,
1215ffd83dbSDimitry Andric                                      BasicBlockSet &SubLoopBlocks,
1225ffd83dbSDimitry Andric                                      BasicBlockSet &AftBlocks,
1235ffd83dbSDimitry Andric                                      DominatorTree *DT) {
1245ffd83dbSDimitry Andric   SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end());
1255ffd83dbSDimitry Andric   return partitionLoopBlocks(*L, ForeBlocks, AftBlocks, *DT);
1265ffd83dbSDimitry Andric }
1275ffd83dbSDimitry Andric 
1280b57cec5SDimitry Andric // Looks at the phi nodes in Header for values coming from Latch. For these
1290b57cec5SDimitry Andric // instructions and all their operands calls Visit on them, keeping going for
1300b57cec5SDimitry Andric // all the operands in AftBlocks. Returns false if Visit returns false,
1310b57cec5SDimitry Andric // otherwise returns true. This is used to process the instructions in the
1320b57cec5SDimitry Andric // Aft blocks that need to be moved before the subloop. It is used in two
1330b57cec5SDimitry Andric // places. One to check that the required set of instructions can be moved
1340b57cec5SDimitry Andric // before the loop. Then to collect the instructions to actually move in
1350b57cec5SDimitry Andric // moveHeaderPhiOperandsToForeBlocks.
1360b57cec5SDimitry Andric template <typename T>
1370b57cec5SDimitry Andric static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch,
1380b57cec5SDimitry Andric                                      BasicBlockSet &AftBlocks, T Visit) {
139fe6060f1SDimitry Andric   SmallPtrSet<Instruction *, 8> VisitedInstr;
1400b57cec5SDimitry Andric 
141bdd1243dSDimitry Andric   std::function<bool(Instruction * I)> ProcessInstr = [&](Instruction *I) {
142bdd1243dSDimitry Andric     if (VisitedInstr.count(I))
143bdd1243dSDimitry Andric       return true;
144bdd1243dSDimitry Andric 
145fe6060f1SDimitry Andric     VisitedInstr.insert(I);
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric     if (AftBlocks.count(I->getParent()))
1480b57cec5SDimitry Andric       for (auto &U : I->operands())
1490b57cec5SDimitry Andric         if (Instruction *II = dyn_cast<Instruction>(U))
150bdd1243dSDimitry Andric           if (!ProcessInstr(II))
151bdd1243dSDimitry Andric             return false;
152bdd1243dSDimitry Andric 
153bdd1243dSDimitry Andric     return Visit(I);
154bdd1243dSDimitry Andric   };
155bdd1243dSDimitry Andric 
156bdd1243dSDimitry Andric   for (auto &Phi : Header->phis()) {
157bdd1243dSDimitry Andric     Value *V = Phi.getIncomingValueForBlock(Latch);
158bdd1243dSDimitry Andric     if (Instruction *I = dyn_cast<Instruction>(V))
159bdd1243dSDimitry Andric       if (!ProcessInstr(I))
160bdd1243dSDimitry Andric         return false;
1610b57cec5SDimitry Andric   }
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   return true;
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric 
1660b57cec5SDimitry Andric // Move the phi operands of Header from Latch out of AftBlocks to InsertLoc.
1670b57cec5SDimitry Andric static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header,
1680b57cec5SDimitry Andric                                               BasicBlock *Latch,
1690b57cec5SDimitry Andric                                               Instruction *InsertLoc,
1700b57cec5SDimitry Andric                                               BasicBlockSet &AftBlocks) {
1710b57cec5SDimitry Andric   // We need to ensure we move the instructions in the correct order,
1720b57cec5SDimitry Andric   // starting with the earliest required instruction and moving forward.
1730b57cec5SDimitry Andric   processHeaderPhiOperands(Header, Latch, AftBlocks,
174bdd1243dSDimitry Andric                            [&AftBlocks, &InsertLoc](Instruction *I) {
1750b57cec5SDimitry Andric                              if (AftBlocks.count(I->getParent()))
176bdd1243dSDimitry Andric                                I->moveBefore(InsertLoc);
1770b57cec5SDimitry Andric                              return true;
1780b57cec5SDimitry Andric                            });
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric /*
1820b57cec5SDimitry Andric   This method performs Unroll and Jam. For a simple loop like:
1830b57cec5SDimitry Andric   for (i = ..)
1840b57cec5SDimitry Andric     Fore(i)
1850b57cec5SDimitry Andric     for (j = ..)
1860b57cec5SDimitry Andric       SubLoop(i, j)
1870b57cec5SDimitry Andric     Aft(i)
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   Instead of doing normal inner or outer unrolling, we do:
1900b57cec5SDimitry Andric   for (i = .., i+=2)
1910b57cec5SDimitry Andric     Fore(i)
1920b57cec5SDimitry Andric     Fore(i+1)
1930b57cec5SDimitry Andric     for (j = ..)
1940b57cec5SDimitry Andric       SubLoop(i, j)
1950b57cec5SDimitry Andric       SubLoop(i+1, j)
1960b57cec5SDimitry Andric     Aft(i)
1970b57cec5SDimitry Andric     Aft(i+1)
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   So the outer loop is essetially unrolled and then the inner loops are fused
2000b57cec5SDimitry Andric   ("jammed") together into a single loop. This can increase speed when there
2010b57cec5SDimitry Andric   are loads in SubLoop that are invariant to i, as they become shared between
2020b57cec5SDimitry Andric   the now jammed inner loops.
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   We do this by spliting the blocks in the loop into Fore, Subloop and Aft.
2050b57cec5SDimitry Andric   Fore blocks are those before the inner loop, Aft are those after. Normal
2060b57cec5SDimitry Andric   Unroll code is used to copy each of these sets of blocks and the results are
2070b57cec5SDimitry Andric   combined together into the final form above.
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   isSafeToUnrollAndJam should be used prior to calling this to make sure the
2100b57cec5SDimitry Andric   unrolling will be valid. Checking profitablility is also advisable.
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric   If EpilogueLoop is non-null, it receives the epilogue loop (if it was
2130b57cec5SDimitry Andric   necessary to create one and not fully unrolled).
2140b57cec5SDimitry Andric */
2155ffd83dbSDimitry Andric LoopUnrollResult
2165ffd83dbSDimitry Andric llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
2175ffd83dbSDimitry Andric                        unsigned TripMultiple, bool UnrollRemainder,
2185ffd83dbSDimitry Andric                        LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
2195ffd83dbSDimitry Andric                        AssumptionCache *AC, const TargetTransformInfo *TTI,
2205ffd83dbSDimitry Andric                        OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop) {
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   // When we enter here we should have already checked that it is safe
2230b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
224480093f4SDimitry Andric   assert(Header && "No header.");
2250b57cec5SDimitry Andric   assert(L->getSubLoops().size() == 1);
2260b57cec5SDimitry Andric   Loop *SubLoop = *L->begin();
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric   // Don't enter the unroll code if there is nothing to do.
2290b57cec5SDimitry Andric   if (TripCount == 0 && Count < 2) {
2300b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; almost nothing to do\n");
2310b57cec5SDimitry Andric     return LoopUnrollResult::Unmodified;
2320b57cec5SDimitry Andric   }
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   assert(Count > 0);
2350b57cec5SDimitry Andric   assert(TripMultiple > 0);
2360b57cec5SDimitry Andric   assert(TripCount == 0 || TripCount % TripMultiple == 0);
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric   // Are we eliminating the loop control altogether?
2390b57cec5SDimitry Andric   bool CompletelyUnroll = (Count == TripCount);
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric   // We use the runtime remainder in cases where we don't know trip multiple
242fe6060f1SDimitry Andric   if (TripMultiple % Count != 0) {
2430b57cec5SDimitry Andric     if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
2440b57cec5SDimitry Andric                                     /*UseEpilogRemainder*/ true,
2450b57cec5SDimitry Andric                                     UnrollRemainder, /*ForgetAllSCEV*/ false,
2465ffd83dbSDimitry Andric                                     LI, SE, DT, AC, TTI, true, EpilogueLoop)) {
2470b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
2480b57cec5SDimitry Andric                            "generated when assuming runtime trip count\n");
2490b57cec5SDimitry Andric       return LoopUnrollResult::Unmodified;
2500b57cec5SDimitry Andric     }
2510b57cec5SDimitry Andric   }
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric   // Notify ScalarEvolution that the loop will be substantially changed,
2540b57cec5SDimitry Andric   // if not outright eliminated.
2550b57cec5SDimitry Andric   if (SE) {
2560b57cec5SDimitry Andric     SE->forgetLoop(L);
257bdd1243dSDimitry Andric     SE->forgetBlockAndLoopDispositions();
2580b57cec5SDimitry Andric   }
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric   using namespace ore;
2610b57cec5SDimitry Andric   // Report the unrolling decision.
2620b57cec5SDimitry Andric   if (CompletelyUnroll) {
2630b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %"
2640b57cec5SDimitry Andric                       << Header->getName() << " with trip count " << TripCount
2650b57cec5SDimitry Andric                       << "!\n");
2660b57cec5SDimitry Andric     ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
2670b57cec5SDimitry Andric                                  L->getHeader())
2680b57cec5SDimitry Andric               << "completely unroll and jammed loop with "
2690b57cec5SDimitry Andric               << NV("UnrollCount", TripCount) << " iterations");
2700b57cec5SDimitry Andric   } else {
2710b57cec5SDimitry Andric     auto DiagBuilder = [&]() {
2720b57cec5SDimitry Andric       OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
2730b57cec5SDimitry Andric                               L->getHeader());
2740b57cec5SDimitry Andric       return Diag << "unroll and jammed loop by a factor of "
2750b57cec5SDimitry Andric                   << NV("UnrollCount", Count);
2760b57cec5SDimitry Andric     };
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName()
2790b57cec5SDimitry Andric                       << " by " << Count);
2800b57cec5SDimitry Andric     if (TripMultiple != 1) {
2810b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
2820b57cec5SDimitry Andric       ORE->emit([&]() {
2830b57cec5SDimitry Andric         return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
2840b57cec5SDimitry Andric                              << " trips per branch";
2850b57cec5SDimitry Andric       });
2860b57cec5SDimitry Andric     } else {
2870b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << " with run-time trip count");
2880b57cec5SDimitry Andric       ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; });
2890b57cec5SDimitry Andric     }
2900b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "!\n");
2910b57cec5SDimitry Andric   }
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
2940b57cec5SDimitry Andric   BasicBlock *LatchBlock = L->getLoopLatch();
295480093f4SDimitry Andric   assert(Preheader && "No preheader");
296480093f4SDimitry Andric   assert(LatchBlock && "No latch block");
2970b57cec5SDimitry Andric   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
2980b57cec5SDimitry Andric   assert(BI && !BI->isUnconditional());
2990b57cec5SDimitry Andric   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
3000b57cec5SDimitry Andric   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
3010b57cec5SDimitry Andric   bool SubLoopContinueOnTrue = SubLoop->contains(
3020b57cec5SDimitry Andric       SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0));
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   // Partition blocks in an outer/inner loop pair into blocks before and after
3050b57cec5SDimitry Andric   // the loop
3060b57cec5SDimitry Andric   BasicBlockSet SubLoopBlocks;
3070b57cec5SDimitry Andric   BasicBlockSet ForeBlocks;
3080b57cec5SDimitry Andric   BasicBlockSet AftBlocks;
3090b57cec5SDimitry Andric   partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks,
3100b57cec5SDimitry Andric                            DT);
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   // We keep track of the entering/first and exiting/last block of each of
3130b57cec5SDimitry Andric   // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of
3140b57cec5SDimitry Andric   // blocks easier.
3150b57cec5SDimitry Andric   std::vector<BasicBlock *> ForeBlocksFirst;
3160b57cec5SDimitry Andric   std::vector<BasicBlock *> ForeBlocksLast;
3170b57cec5SDimitry Andric   std::vector<BasicBlock *> SubLoopBlocksFirst;
3180b57cec5SDimitry Andric   std::vector<BasicBlock *> SubLoopBlocksLast;
3190b57cec5SDimitry Andric   std::vector<BasicBlock *> AftBlocksFirst;
3200b57cec5SDimitry Andric   std::vector<BasicBlock *> AftBlocksLast;
3210b57cec5SDimitry Andric   ForeBlocksFirst.push_back(Header);
3220b57cec5SDimitry Andric   ForeBlocksLast.push_back(SubLoop->getLoopPreheader());
3230b57cec5SDimitry Andric   SubLoopBlocksFirst.push_back(SubLoop->getHeader());
3240b57cec5SDimitry Andric   SubLoopBlocksLast.push_back(SubLoop->getExitingBlock());
3250b57cec5SDimitry Andric   AftBlocksFirst.push_back(SubLoop->getExitBlock());
3260b57cec5SDimitry Andric   AftBlocksLast.push_back(L->getExitingBlock());
3270b57cec5SDimitry Andric   // Maps Blocks[0] -> Blocks[It]
3280b57cec5SDimitry Andric   ValueToValueMapTy LastValueMap;
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric   // Move any instructions from fore phi operands from AftBlocks into Fore.
3310b57cec5SDimitry Andric   moveHeaderPhiOperandsToForeBlocks(
3325ffd83dbSDimitry Andric       Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks);
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   // The current on-the-fly SSA update requires blocks to be processed in
3350b57cec5SDimitry Andric   // reverse postorder so that LastValueMap contains the correct value at each
3360b57cec5SDimitry Andric   // exit.
3370b57cec5SDimitry Andric   LoopBlocksDFS DFS(L);
3380b57cec5SDimitry Andric   DFS.perform(LI);
3390b57cec5SDimitry Andric   // Stash the DFS iterators before adding blocks to the loop.
3400b57cec5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
3410b57cec5SDimitry Andric   LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
3420b57cec5SDimitry Andric 
343fe6060f1SDimitry Andric   // When a FSDiscriminator is enabled, we don't need to add the multiply
344fe6060f1SDimitry Andric   // factors to the discriminators.
345bdd1243dSDimitry Andric   if (Header->getParent()->shouldEmitDebugInfoForProfiling() &&
346bdd1243dSDimitry Andric       !EnableFSDiscriminator)
3470b57cec5SDimitry Andric     for (BasicBlock *BB : L->getBlocks())
3480b57cec5SDimitry Andric       for (Instruction &I : *BB)
34906c3fb27SDimitry Andric         if (!I.isDebugOrPseudoInst())
3500b57cec5SDimitry Andric           if (const DILocation *DIL = I.getDebugLoc()) {
3510b57cec5SDimitry Andric             auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
3520b57cec5SDimitry Andric             if (NewDIL)
35381ad6265SDimitry Andric               I.setDebugLoc(*NewDIL);
3540b57cec5SDimitry Andric             else
3550b57cec5SDimitry Andric               LLVM_DEBUG(dbgs()
3560b57cec5SDimitry Andric                          << "Failed to create new discriminator: "
3570b57cec5SDimitry Andric                          << DIL->getFilename() << " Line: " << DIL->getLine());
3580b57cec5SDimitry Andric           }
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   // Copy all blocks
3610b57cec5SDimitry Andric   for (unsigned It = 1; It != Count; ++It) {
3625ffd83dbSDimitry Andric     SmallVector<BasicBlock *, 8> NewBlocks;
3630b57cec5SDimitry Andric     // Maps Blocks[It] -> Blocks[It-1]
3640b57cec5SDimitry Andric     DenseMap<Value *, Value *> PrevItValueMap;
3655ffd83dbSDimitry Andric     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
3665ffd83dbSDimitry Andric     NewLoops[L] = L;
3675ffd83dbSDimitry Andric     NewLoops[SubLoop] = SubLoop;
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
3700b57cec5SDimitry Andric       ValueToValueMapTy VMap;
3710b57cec5SDimitry Andric       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
372bdd1243dSDimitry Andric       Header->getParent()->insert(Header->getParent()->end(), New);
3730b57cec5SDimitry Andric 
3745ffd83dbSDimitry Andric       // Tell LI about New.
3755ffd83dbSDimitry Andric       addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
3760b57cec5SDimitry Andric 
3775ffd83dbSDimitry Andric       if (ForeBlocks.count(*BB)) {
3780b57cec5SDimitry Andric         if (*BB == ForeBlocksFirst[0])
3790b57cec5SDimitry Andric           ForeBlocksFirst.push_back(New);
3800b57cec5SDimitry Andric         if (*BB == ForeBlocksLast[0])
3810b57cec5SDimitry Andric           ForeBlocksLast.push_back(New);
3820b57cec5SDimitry Andric       } else if (SubLoopBlocks.count(*BB)) {
3830b57cec5SDimitry Andric         if (*BB == SubLoopBlocksFirst[0])
3840b57cec5SDimitry Andric           SubLoopBlocksFirst.push_back(New);
3850b57cec5SDimitry Andric         if (*BB == SubLoopBlocksLast[0])
3860b57cec5SDimitry Andric           SubLoopBlocksLast.push_back(New);
3870b57cec5SDimitry Andric       } else if (AftBlocks.count(*BB)) {
3880b57cec5SDimitry Andric         if (*BB == AftBlocksFirst[0])
3890b57cec5SDimitry Andric           AftBlocksFirst.push_back(New);
3900b57cec5SDimitry Andric         if (*BB == AftBlocksLast[0])
3910b57cec5SDimitry Andric           AftBlocksLast.push_back(New);
3920b57cec5SDimitry Andric       } else {
3930b57cec5SDimitry Andric         llvm_unreachable("BB being cloned should be in Fore/Sub/Aft");
3940b57cec5SDimitry Andric       }
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric       // Update our running maps of newest clones
3970b57cec5SDimitry Andric       PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
3980b57cec5SDimitry Andric       LastValueMap[*BB] = New;
3990b57cec5SDimitry Andric       for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
4000b57cec5SDimitry Andric            VI != VE; ++VI) {
4010b57cec5SDimitry Andric         PrevItValueMap[VI->second] =
4020b57cec5SDimitry Andric             const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]);
4030b57cec5SDimitry Andric         LastValueMap[VI->first] = VI->second;
4040b57cec5SDimitry Andric       }
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric       NewBlocks.push_back(New);
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric       // Update DomTree:
4090b57cec5SDimitry Andric       if (*BB == ForeBlocksFirst[0])
4100b57cec5SDimitry Andric         DT->addNewBlock(New, ForeBlocksLast[It - 1]);
4110b57cec5SDimitry Andric       else if (*BB == SubLoopBlocksFirst[0])
4120b57cec5SDimitry Andric         DT->addNewBlock(New, SubLoopBlocksLast[It - 1]);
4130b57cec5SDimitry Andric       else if (*BB == AftBlocksFirst[0])
4140b57cec5SDimitry Andric         DT->addNewBlock(New, AftBlocksLast[It - 1]);
4150b57cec5SDimitry Andric       else {
4160b57cec5SDimitry Andric         // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree
4170b57cec5SDimitry Andric         // structure.
4180b57cec5SDimitry Andric         auto BBDomNode = DT->getNode(*BB);
4190b57cec5SDimitry Andric         auto BBIDom = BBDomNode->getIDom();
4200b57cec5SDimitry Andric         BasicBlock *OriginalBBIDom = BBIDom->getBlock();
4210b57cec5SDimitry Andric         assert(OriginalBBIDom);
4220b57cec5SDimitry Andric         assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
4230b57cec5SDimitry Andric         DT->addNewBlock(
4240b57cec5SDimitry Andric             New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
4250b57cec5SDimitry Andric       }
4260b57cec5SDimitry Andric     }
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric     // Remap all instructions in the most recent iteration
4295ffd83dbSDimitry Andric     remapInstructionsInBlocks(NewBlocks, LastValueMap);
4300b57cec5SDimitry Andric     for (BasicBlock *NewBlock : NewBlocks) {
4310b57cec5SDimitry Andric       for (Instruction &I : *NewBlock) {
432fe6060f1SDimitry Andric         if (auto *II = dyn_cast<AssumeInst>(&I))
4330b57cec5SDimitry Andric           AC->registerAssumption(II);
4340b57cec5SDimitry Andric       }
4350b57cec5SDimitry Andric     }
4360b57cec5SDimitry Andric 
4370b57cec5SDimitry Andric     // Alter the ForeBlocks phi's, pointing them at the latest version of the
4380b57cec5SDimitry Andric     // value from the previous iteration's phis
4390b57cec5SDimitry Andric     for (PHINode &Phi : ForeBlocksFirst[It]->phis()) {
4400b57cec5SDimitry Andric       Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
4410b57cec5SDimitry Andric       assert(OldValue && "should have incoming edge from Aft[It]");
4420b57cec5SDimitry Andric       Value *NewValue = OldValue;
4430b57cec5SDimitry Andric       if (Value *PrevValue = PrevItValueMap[OldValue])
4440b57cec5SDimitry Andric         NewValue = PrevValue;
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric       assert(Phi.getNumOperands() == 2);
4470b57cec5SDimitry Andric       Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
4480b57cec5SDimitry Andric       Phi.setIncomingValue(0, NewValue);
4490b57cec5SDimitry Andric       Phi.removeIncomingValue(1);
4500b57cec5SDimitry Andric     }
4510b57cec5SDimitry Andric   }
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric   // Now that all the basic blocks for the unrolled iterations are in place,
4540b57cec5SDimitry Andric   // finish up connecting the blocks and phi nodes. At this point LastValueMap
4550b57cec5SDimitry Andric   // is the last unrolled iterations values.
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   // Update Phis in BB from OldBB to point to NewBB and use the latest value
4580b57cec5SDimitry Andric   // from LastValueMap
4590b57cec5SDimitry Andric   auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB,
4600b57cec5SDimitry Andric                                      BasicBlock *NewBB,
4610b57cec5SDimitry Andric                                      ValueToValueMapTy &LastValueMap) {
4620b57cec5SDimitry Andric     for (PHINode &Phi : BB->phis()) {
4630b57cec5SDimitry Andric       for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
4640b57cec5SDimitry Andric         if (Phi.getIncomingBlock(b) == OldBB) {
4650b57cec5SDimitry Andric           Value *OldValue = Phi.getIncomingValue(b);
4660b57cec5SDimitry Andric           if (Value *LastValue = LastValueMap[OldValue])
4670b57cec5SDimitry Andric             Phi.setIncomingValue(b, LastValue);
4680b57cec5SDimitry Andric           Phi.setIncomingBlock(b, NewBB);
4690b57cec5SDimitry Andric           break;
4700b57cec5SDimitry Andric         }
4710b57cec5SDimitry Andric       }
4720b57cec5SDimitry Andric     }
4730b57cec5SDimitry Andric   };
4740b57cec5SDimitry Andric   // Move all the phis from Src into Dest
4750b57cec5SDimitry Andric   auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) {
476*0fca6ea1SDimitry Andric     BasicBlock::iterator insertPoint = Dest->getFirstNonPHIIt();
4770b57cec5SDimitry Andric     while (PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
478*0fca6ea1SDimitry Andric       Phi->moveBefore(*Dest, insertPoint);
4790b57cec5SDimitry Andric   };
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric   // Update the PHI values outside the loop to point to the last block
4820b57cec5SDimitry Andric   updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(),
4830b57cec5SDimitry Andric                            LastValueMap);
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric   // Update ForeBlocks successors and phi nodes
4860b57cec5SDimitry Andric   BranchInst *ForeTerm =
4870b57cec5SDimitry Andric       cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
4885ffd83dbSDimitry Andric   assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
4895ffd83dbSDimitry Andric   ForeTerm->setSuccessor(0, SubLoopBlocksFirst[0]);
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric   if (CompletelyUnroll) {
4920b57cec5SDimitry Andric     while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
4930b57cec5SDimitry Andric       Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
494bdd1243dSDimitry Andric       Phi->eraseFromParent();
4950b57cec5SDimitry Andric     }
4960b57cec5SDimitry Andric   } else {
4970b57cec5SDimitry Andric     // Update the PHI values to point to the last aft block
4980b57cec5SDimitry Andric     updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
4990b57cec5SDimitry Andric                              AftBlocksLast.back(), LastValueMap);
5000b57cec5SDimitry Andric   }
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric   for (unsigned It = 1; It != Count; It++) {
5030b57cec5SDimitry Andric     // Remap ForeBlock successors from previous iteration to this
5040b57cec5SDimitry Andric     BranchInst *ForeTerm =
5050b57cec5SDimitry Andric         cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
5065ffd83dbSDimitry Andric     assert(ForeTerm->getNumSuccessors() == 1 && "Expecting one successor");
5075ffd83dbSDimitry Andric     ForeTerm->setSuccessor(0, ForeBlocksFirst[It]);
5080b57cec5SDimitry Andric   }
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric   // Subloop successors and phis
5110b57cec5SDimitry Andric   BranchInst *SubTerm =
5120b57cec5SDimitry Andric       cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
5130b57cec5SDimitry Andric   SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
5140b57cec5SDimitry Andric   SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
515e8d8bef9SDimitry Andric   SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
5160b57cec5SDimitry Andric                                             ForeBlocksLast.back());
517e8d8bef9SDimitry Andric   SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
5180b57cec5SDimitry Andric                                             SubLoopBlocksLast.back());
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric   for (unsigned It = 1; It != Count; It++) {
5210b57cec5SDimitry Andric     // Replace the conditional branch of the previous iteration subloop with an
5220b57cec5SDimitry Andric     // unconditional one to this one
5230b57cec5SDimitry Andric     BranchInst *SubTerm =
5240b57cec5SDimitry Andric         cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
525*0fca6ea1SDimitry Andric     BranchInst::Create(SubLoopBlocksFirst[It], SubTerm->getIterator());
5260b57cec5SDimitry Andric     SubTerm->eraseFromParent();
5270b57cec5SDimitry Andric 
528e8d8bef9SDimitry Andric     SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
5290b57cec5SDimitry Andric                                                ForeBlocksLast.back());
530e8d8bef9SDimitry Andric     SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
5310b57cec5SDimitry Andric                                                SubLoopBlocksLast.back());
5320b57cec5SDimitry Andric     movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
5330b57cec5SDimitry Andric   }
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   // Aft blocks successors and phis
5365ffd83dbSDimitry Andric   BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
5370b57cec5SDimitry Andric   if (CompletelyUnroll) {
538*0fca6ea1SDimitry Andric     BranchInst::Create(LoopExit, AftTerm->getIterator());
5395ffd83dbSDimitry Andric     AftTerm->eraseFromParent();
5400b57cec5SDimitry Andric   } else {
5415ffd83dbSDimitry Andric     AftTerm->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
5425ffd83dbSDimitry Andric     assert(AftTerm->getSuccessor(ContinueOnTrue) == LoopExit &&
5435ffd83dbSDimitry Andric            "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
5440b57cec5SDimitry Andric   }
545e8d8bef9SDimitry Andric   AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
5460b57cec5SDimitry Andric                                         SubLoopBlocksLast.back());
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric   for (unsigned It = 1; It != Count; It++) {
5490b57cec5SDimitry Andric     // Replace the conditional branch of the previous iteration subloop with an
5500b57cec5SDimitry Andric     // unconditional one to this one
5510b57cec5SDimitry Andric     BranchInst *AftTerm =
5520b57cec5SDimitry Andric         cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
553*0fca6ea1SDimitry Andric     BranchInst::Create(AftBlocksFirst[It], AftTerm->getIterator());
5540b57cec5SDimitry Andric     AftTerm->eraseFromParent();
5550b57cec5SDimitry Andric 
556e8d8bef9SDimitry Andric     AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
5570b57cec5SDimitry Andric                                            SubLoopBlocksLast.back());
5580b57cec5SDimitry Andric     movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
5590b57cec5SDimitry Andric   }
5600b57cec5SDimitry Andric 
5618bcb0991SDimitry Andric   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
5620b57cec5SDimitry Andric   // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the
5630b57cec5SDimitry Andric   // new ones required.
5640b57cec5SDimitry Andric   if (Count != 1) {
5650b57cec5SDimitry Andric     SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
5660b57cec5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
5670b57cec5SDimitry Andric                            SubLoopBlocksFirst[0]);
5680b57cec5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete,
5690b57cec5SDimitry Andric                            SubLoopBlocksLast[0], AftBlocksFirst[0]);
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
5720b57cec5SDimitry Andric                            ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
5730b57cec5SDimitry Andric     DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert,
5740b57cec5SDimitry Andric                            SubLoopBlocksLast.back(), AftBlocksFirst[0]);
5758bcb0991SDimitry Andric     DTU.applyUpdatesPermissive(DTUpdates);
5760b57cec5SDimitry Andric   }
5770b57cec5SDimitry Andric 
5780b57cec5SDimitry Andric   // Merge adjacent basic blocks, if possible.
5790b57cec5SDimitry Andric   SmallPtrSet<BasicBlock *, 16> MergeBlocks;
5800b57cec5SDimitry Andric   MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
5810b57cec5SDimitry Andric   MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
5820b57cec5SDimitry Andric   MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
5835ffd83dbSDimitry Andric 
5845ffd83dbSDimitry Andric   MergeBlockSuccessorsIntoGivenBlocks(MergeBlocks, L, &DTU, LI);
5855ffd83dbSDimitry Andric 
5868bcb0991SDimitry Andric   // Apply updates to the DomTree.
5878bcb0991SDimitry Andric   DT = &DTU.getDomTree();
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   // At this point, the code is well formed.  We now do a quick sweep over the
5900b57cec5SDimitry Andric   // inserted code, doing constant propagation and dead code elimination as we
5910b57cec5SDimitry Andric   // go.
5925ffd83dbSDimitry Andric   simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC, TTI);
5935ffd83dbSDimitry Andric   simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC,
5945ffd83dbSDimitry Andric                           TTI);
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric   NumCompletelyUnrolledAndJammed += CompletelyUnroll;
5970b57cec5SDimitry Andric   ++NumUnrolledAndJammed;
5980b57cec5SDimitry Andric 
5995ffd83dbSDimitry Andric   // Update LoopInfo if the loop is completely removed.
6005ffd83dbSDimitry Andric   if (CompletelyUnroll)
6015ffd83dbSDimitry Andric     LI->erase(L);
6025ffd83dbSDimitry Andric 
6030b57cec5SDimitry Andric #ifndef NDEBUG
6040b57cec5SDimitry Andric   // We shouldn't have done anything to break loop simplify form or LCSSA.
6055ffd83dbSDimitry Andric   Loop *OutestLoop = SubLoop->getParentLoop()
6065ffd83dbSDimitry Andric                          ? SubLoop->getParentLoop()->getParentLoop()
6075ffd83dbSDimitry Andric                                ? SubLoop->getParentLoop()->getParentLoop()
6085ffd83dbSDimitry Andric                                : SubLoop->getParentLoop()
6095ffd83dbSDimitry Andric                          : SubLoop;
6105ffd83dbSDimitry Andric   assert(DT->verify());
6115ffd83dbSDimitry Andric   LI->verify(*DT);
6120b57cec5SDimitry Andric   assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI));
6130b57cec5SDimitry Andric   if (!CompletelyUnroll)
6140b57cec5SDimitry Andric     assert(L->isLoopSimplifyForm());
6150b57cec5SDimitry Andric   assert(SubLoop->isLoopSimplifyForm());
6165ffd83dbSDimitry Andric   SE->verify();
6170b57cec5SDimitry Andric #endif
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric   return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
6200b57cec5SDimitry Andric                           : LoopUnrollResult::PartiallyUnrolled;
6210b57cec5SDimitry Andric }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric static bool getLoadsAndStores(BasicBlockSet &Blocks,
6245ffd83dbSDimitry Andric                               SmallVector<Instruction *, 4> &MemInstr) {
6250b57cec5SDimitry Andric   // Scan the BBs and collect legal loads and stores.
6260b57cec5SDimitry Andric   // Returns false if non-simple loads/stores are found.
6270b57cec5SDimitry Andric   for (BasicBlock *BB : Blocks) {
6280b57cec5SDimitry Andric     for (Instruction &I : *BB) {
6290b57cec5SDimitry Andric       if (auto *Ld = dyn_cast<LoadInst>(&I)) {
6300b57cec5SDimitry Andric         if (!Ld->isSimple())
6310b57cec5SDimitry Andric           return false;
6320b57cec5SDimitry Andric         MemInstr.push_back(&I);
6330b57cec5SDimitry Andric       } else if (auto *St = dyn_cast<StoreInst>(&I)) {
6340b57cec5SDimitry Andric         if (!St->isSimple())
6350b57cec5SDimitry Andric           return false;
6360b57cec5SDimitry Andric         MemInstr.push_back(&I);
6370b57cec5SDimitry Andric       } else if (I.mayReadOrWriteMemory()) {
6380b57cec5SDimitry Andric         return false;
6390b57cec5SDimitry Andric       }
6400b57cec5SDimitry Andric     }
6410b57cec5SDimitry Andric   }
6420b57cec5SDimitry Andric   return true;
6430b57cec5SDimitry Andric }
6440b57cec5SDimitry Andric 
6455ffd83dbSDimitry Andric static bool preservesForwardDependence(Instruction *Src, Instruction *Dst,
6465ffd83dbSDimitry Andric                                        unsigned UnrollLevel, unsigned JamLevel,
6475ffd83dbSDimitry Andric                                        bool Sequentialized, Dependence *D) {
6485ffd83dbSDimitry Andric   // UnrollLevel might carry the dependency Src --> Dst
6495ffd83dbSDimitry Andric   // Does a different loop after unrolling?
6505ffd83dbSDimitry Andric   for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
6515ffd83dbSDimitry Andric        ++CurLoopDepth) {
6525ffd83dbSDimitry Andric     auto JammedDir = D->getDirection(CurLoopDepth);
6535ffd83dbSDimitry Andric     if (JammedDir == Dependence::DVEntry::LT)
6545ffd83dbSDimitry Andric       return true;
6555ffd83dbSDimitry Andric 
6565ffd83dbSDimitry Andric     if (JammedDir & Dependence::DVEntry::GT)
6575ffd83dbSDimitry Andric       return false;
6585ffd83dbSDimitry Andric   }
6595ffd83dbSDimitry Andric 
6605ffd83dbSDimitry Andric   return true;
6615ffd83dbSDimitry Andric }
6625ffd83dbSDimitry Andric 
6635ffd83dbSDimitry Andric static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst,
6645ffd83dbSDimitry Andric                                         unsigned UnrollLevel, unsigned JamLevel,
6655ffd83dbSDimitry Andric                                         bool Sequentialized, Dependence *D) {
6665ffd83dbSDimitry Andric   // UnrollLevel might carry the dependency Dst --> Src
6675ffd83dbSDimitry Andric   for (unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
6685ffd83dbSDimitry Andric        ++CurLoopDepth) {
6695ffd83dbSDimitry Andric     auto JammedDir = D->getDirection(CurLoopDepth);
6705ffd83dbSDimitry Andric     if (JammedDir == Dependence::DVEntry::GT)
6715ffd83dbSDimitry Andric       return true;
6725ffd83dbSDimitry Andric 
6735ffd83dbSDimitry Andric     if (JammedDir & Dependence::DVEntry::LT)
6745ffd83dbSDimitry Andric       return false;
6755ffd83dbSDimitry Andric   }
6765ffd83dbSDimitry Andric 
6775ffd83dbSDimitry Andric   // Backward dependencies are only preserved if not interleaved.
6785ffd83dbSDimitry Andric   return Sequentialized;
6795ffd83dbSDimitry Andric }
6805ffd83dbSDimitry Andric 
6815ffd83dbSDimitry Andric // Check whether it is semantically safe Src and Dst considering any potential
6825ffd83dbSDimitry Andric // dependency between them.
6835ffd83dbSDimitry Andric //
6845ffd83dbSDimitry Andric // @param UnrollLevel The level of the loop being unrolled
6855ffd83dbSDimitry Andric // @param JamLevel    The level of the loop being jammed; if Src and Dst are on
6865ffd83dbSDimitry Andric // different levels, the outermost common loop counts as jammed level
6875ffd83dbSDimitry Andric //
6885ffd83dbSDimitry Andric // @return true if is safe and false if there is a dependency violation.
6895ffd83dbSDimitry Andric static bool checkDependency(Instruction *Src, Instruction *Dst,
6905ffd83dbSDimitry Andric                             unsigned UnrollLevel, unsigned JamLevel,
6915ffd83dbSDimitry Andric                             bool Sequentialized, DependenceInfo &DI) {
6925ffd83dbSDimitry Andric   assert(UnrollLevel <= JamLevel &&
6935ffd83dbSDimitry Andric          "Expecting JamLevel to be at least UnrollLevel");
6945ffd83dbSDimitry Andric 
6950b57cec5SDimitry Andric   if (Src == Dst)
6965ffd83dbSDimitry Andric     return true;
6970b57cec5SDimitry Andric   // Ignore Input dependencies.
6980b57cec5SDimitry Andric   if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
6995ffd83dbSDimitry Andric     return true;
7000b57cec5SDimitry Andric 
7015ffd83dbSDimitry Andric   // Check whether unroll-and-jam may violate a dependency.
7025ffd83dbSDimitry Andric   // By construction, every dependency will be lexicographically non-negative
7035ffd83dbSDimitry Andric   // (if it was, it would violate the current execution order), such as
7045ffd83dbSDimitry Andric   //   (0,0,>,*,*)
7055ffd83dbSDimitry Andric   // Unroll-and-jam changes the GT execution of two executions to the same
7065ffd83dbSDimitry Andric   // iteration of the chosen unroll level. That is, a GT dependence becomes a GE
7075ffd83dbSDimitry Andric   // dependence (or EQ, if we fully unrolled the loop) at the loop's position:
7085ffd83dbSDimitry Andric   //   (0,0,>=,*,*)
7095ffd83dbSDimitry Andric   // Now, the dependency is not necessarily non-negative anymore, i.e.
7105ffd83dbSDimitry Andric   // unroll-and-jam may violate correctness.
7115ffd83dbSDimitry Andric   std::unique_ptr<Dependence> D = DI.depends(Src, Dst, true);
7125ffd83dbSDimitry Andric   if (!D)
7135ffd83dbSDimitry Andric     return true;
7140b57cec5SDimitry Andric   assert(D->isOrdered() && "Expected an output, flow or anti dep.");
7150b57cec5SDimitry Andric 
7160b57cec5SDimitry Andric   if (D->isConfused()) {
7170b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Confused dependency between:\n"
7180b57cec5SDimitry Andric                       << "  " << *Src << "\n"
7190b57cec5SDimitry Andric                       << "  " << *Dst << "\n");
7200b57cec5SDimitry Andric     return false;
7210b57cec5SDimitry Andric   }
7225ffd83dbSDimitry Andric 
7235ffd83dbSDimitry Andric   // If outer levels (levels enclosing the loop being unroll-and-jammed) have a
7245ffd83dbSDimitry Andric   // non-equal direction, then the locations accessed in the inner levels cannot
7255ffd83dbSDimitry Andric   // overlap in memory. We assumes the indexes never overlap into neighboring
7265ffd83dbSDimitry Andric   // dimensions.
7275ffd83dbSDimitry Andric   for (unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth)
7285ffd83dbSDimitry Andric     if (!(D->getDirection(CurLoopDepth) & Dependence::DVEntry::EQ))
7295ffd83dbSDimitry Andric       return true;
7305ffd83dbSDimitry Andric 
7315ffd83dbSDimitry Andric   auto UnrollDirection = D->getDirection(UnrollLevel);
7325ffd83dbSDimitry Andric 
7335ffd83dbSDimitry Andric   // If the distance carried by the unrolled loop is 0, then after unrolling
7345ffd83dbSDimitry Andric   // that distance will become non-zero resulting in non-overlapping accesses in
7355ffd83dbSDimitry Andric   // the inner loops.
7365ffd83dbSDimitry Andric   if (UnrollDirection == Dependence::DVEntry::EQ)
7375ffd83dbSDimitry Andric     return true;
7385ffd83dbSDimitry Andric 
7395ffd83dbSDimitry Andric   if (UnrollDirection & Dependence::DVEntry::LT &&
7405ffd83dbSDimitry Andric       !preservesForwardDependence(Src, Dst, UnrollLevel, JamLevel,
7415ffd83dbSDimitry Andric                                   Sequentialized, D.get()))
7420b57cec5SDimitry Andric     return false;
7435ffd83dbSDimitry Andric 
7445ffd83dbSDimitry Andric   if (UnrollDirection & Dependence::DVEntry::GT &&
7455ffd83dbSDimitry Andric       !preservesBackwardDependence(Src, Dst, UnrollLevel, JamLevel,
7465ffd83dbSDimitry Andric                                    Sequentialized, D.get()))
7475ffd83dbSDimitry Andric     return false;
7485ffd83dbSDimitry Andric 
7495ffd83dbSDimitry Andric   return true;
7500b57cec5SDimitry Andric }
7515ffd83dbSDimitry Andric 
7525ffd83dbSDimitry Andric static bool
7535ffd83dbSDimitry Andric checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
7545ffd83dbSDimitry Andric                   const DenseMap<Loop *, BasicBlockSet> &ForeBlocksMap,
7555ffd83dbSDimitry Andric                   const DenseMap<Loop *, BasicBlockSet> &AftBlocksMap,
7565ffd83dbSDimitry Andric                   DependenceInfo &DI, LoopInfo &LI) {
7575ffd83dbSDimitry Andric   SmallVector<BasicBlockSet, 8> AllBlocks;
7585ffd83dbSDimitry Andric   for (Loop *L : Root.getLoopsInPreorder())
75906c3fb27SDimitry Andric     if (ForeBlocksMap.contains(L))
7605ffd83dbSDimitry Andric       AllBlocks.push_back(ForeBlocksMap.lookup(L));
7615ffd83dbSDimitry Andric   AllBlocks.push_back(SubLoopBlocks);
7625ffd83dbSDimitry Andric   for (Loop *L : Root.getLoopsInPreorder())
76306c3fb27SDimitry Andric     if (AftBlocksMap.contains(L))
7645ffd83dbSDimitry Andric       AllBlocks.push_back(AftBlocksMap.lookup(L));
7655ffd83dbSDimitry Andric 
7665ffd83dbSDimitry Andric   unsigned LoopDepth = Root.getLoopDepth();
7675ffd83dbSDimitry Andric   SmallVector<Instruction *, 4> EarlierLoadsAndStores;
7685ffd83dbSDimitry Andric   SmallVector<Instruction *, 4> CurrentLoadsAndStores;
7695ffd83dbSDimitry Andric   for (BasicBlockSet &Blocks : AllBlocks) {
7705ffd83dbSDimitry Andric     CurrentLoadsAndStores.clear();
7715ffd83dbSDimitry Andric     if (!getLoadsAndStores(Blocks, CurrentLoadsAndStores))
7725ffd83dbSDimitry Andric       return false;
7735ffd83dbSDimitry Andric 
7745ffd83dbSDimitry Andric     Loop *CurLoop = LI.getLoopFor((*Blocks.begin())->front().getParent());
7755ffd83dbSDimitry Andric     unsigned CurLoopDepth = CurLoop->getLoopDepth();
7765ffd83dbSDimitry Andric 
7775ffd83dbSDimitry Andric     for (auto *Earlier : EarlierLoadsAndStores) {
7785ffd83dbSDimitry Andric       Loop *EarlierLoop = LI.getLoopFor(Earlier->getParent());
7795ffd83dbSDimitry Andric       unsigned EarlierDepth = EarlierLoop->getLoopDepth();
7805ffd83dbSDimitry Andric       unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth);
7815ffd83dbSDimitry Andric       for (auto *Later : CurrentLoadsAndStores) {
7825ffd83dbSDimitry Andric         if (!checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth, false,
7835ffd83dbSDimitry Andric                              DI))
7840b57cec5SDimitry Andric           return false;
7850b57cec5SDimitry Andric       }
7860b57cec5SDimitry Andric     }
7875ffd83dbSDimitry Andric 
7885ffd83dbSDimitry Andric     size_t NumInsts = CurrentLoadsAndStores.size();
7895ffd83dbSDimitry Andric     for (size_t I = 0; I < NumInsts; ++I) {
7905ffd83dbSDimitry Andric       for (size_t J = I; J < NumInsts; ++J) {
7915ffd83dbSDimitry Andric         if (!checkDependency(CurrentLoadsAndStores[I], CurrentLoadsAndStores[J],
7925ffd83dbSDimitry Andric                              LoopDepth, CurLoopDepth, true, DI))
7935ffd83dbSDimitry Andric           return false;
7940b57cec5SDimitry Andric       }
7950b57cec5SDimitry Andric     }
7965ffd83dbSDimitry Andric 
7975ffd83dbSDimitry Andric     EarlierLoadsAndStores.append(CurrentLoadsAndStores.begin(),
7985ffd83dbSDimitry Andric                                  CurrentLoadsAndStores.end());
7990b57cec5SDimitry Andric   }
8000b57cec5SDimitry Andric   return true;
8010b57cec5SDimitry Andric }
8020b57cec5SDimitry Andric 
8035ffd83dbSDimitry Andric static bool isEligibleLoopForm(const Loop &Root) {
8045ffd83dbSDimitry Andric   // Root must have a child.
8055ffd83dbSDimitry Andric   if (Root.getSubLoops().size() != 1)
8060b57cec5SDimitry Andric     return false;
8070b57cec5SDimitry Andric 
8085ffd83dbSDimitry Andric   const Loop *L = &Root;
8095ffd83dbSDimitry Andric   do {
8105ffd83dbSDimitry Andric     // All loops in Root need to be in simplify and rotated form.
8115ffd83dbSDimitry Andric     if (!L->isLoopSimplifyForm())
8125ffd83dbSDimitry Andric       return false;
8135ffd83dbSDimitry Andric 
8145ffd83dbSDimitry Andric     if (!L->isRotatedForm())
8155ffd83dbSDimitry Andric       return false;
8165ffd83dbSDimitry Andric 
8175ffd83dbSDimitry Andric     if (L->getHeader()->hasAddressTaken()) {
8185ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Address taken\n");
8195ffd83dbSDimitry Andric       return false;
8205ffd83dbSDimitry Andric     }
8215ffd83dbSDimitry Andric 
8225ffd83dbSDimitry Andric     unsigned SubLoopsSize = L->getSubLoops().size();
8235ffd83dbSDimitry Andric     if (SubLoopsSize == 0)
8245ffd83dbSDimitry Andric       return true;
8255ffd83dbSDimitry Andric 
8265ffd83dbSDimitry Andric     // Only one child is allowed.
8275ffd83dbSDimitry Andric     if (SubLoopsSize != 1)
8285ffd83dbSDimitry Andric       return false;
8295ffd83dbSDimitry Andric 
830fe6060f1SDimitry Andric     // Only loops with a single exit block can be unrolled and jammed.
831fe6060f1SDimitry Andric     // The function getExitBlock() is used for this check, rather than
832fe6060f1SDimitry Andric     // getUniqueExitBlock() to ensure loops with mulitple exit edges are
833fe6060f1SDimitry Andric     // disallowed.
834fe6060f1SDimitry Andric     if (!L->getExitBlock()) {
835fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single exit "
836fe6060f1SDimitry Andric                            "blocks can be unrolled and jammed.\n");
837fe6060f1SDimitry Andric       return false;
838fe6060f1SDimitry Andric     }
839fe6060f1SDimitry Andric 
840fe6060f1SDimitry Andric     // Only loops with a single exiting block can be unrolled and jammed.
841fe6060f1SDimitry Andric     if (!L->getExitingBlock()) {
842fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; only loops with single "
843fe6060f1SDimitry Andric                            "exiting blocks can be unrolled and jammed.\n");
844fe6060f1SDimitry Andric       return false;
845fe6060f1SDimitry Andric     }
846fe6060f1SDimitry Andric 
8475ffd83dbSDimitry Andric     L = L->getSubLoops()[0];
8485ffd83dbSDimitry Andric   } while (L);
8495ffd83dbSDimitry Andric 
8505ffd83dbSDimitry Andric   return true;
8515ffd83dbSDimitry Andric }
8525ffd83dbSDimitry Andric 
8535ffd83dbSDimitry Andric static Loop *getInnerMostLoop(Loop *L) {
8545ffd83dbSDimitry Andric   while (!L->getSubLoops().empty())
8555ffd83dbSDimitry Andric     L = L->getSubLoops()[0];
8565ffd83dbSDimitry Andric   return L;
8570b57cec5SDimitry Andric }
8580b57cec5SDimitry Andric 
8590b57cec5SDimitry Andric bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
8605ffd83dbSDimitry Andric                                 DependenceInfo &DI, LoopInfo &LI) {
8615ffd83dbSDimitry Andric   if (!isEligibleLoopForm(*L)) {
8625ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Ineligible loop form\n");
8635ffd83dbSDimitry Andric     return false;
8645ffd83dbSDimitry Andric   }
8655ffd83dbSDimitry Andric 
8660b57cec5SDimitry Andric   /* We currently handle outer loops like this:
8670b57cec5SDimitry Andric         |
8685ffd83dbSDimitry Andric     ForeFirst    <------\   }
8695ffd83dbSDimitry Andric      Blocks             |   } ForeBlocks of L
8700b57cec5SDimitry Andric     ForeLast            |   }
8710b57cec5SDimitry Andric         |               |
8725ffd83dbSDimitry Andric        ...              |
8735ffd83dbSDimitry Andric         |               |
8745ffd83dbSDimitry Andric     ForeFirst    <----\ |   }
8755ffd83dbSDimitry Andric      Blocks           | |   } ForeBlocks of a inner loop of L
8765ffd83dbSDimitry Andric     ForeLast          | |   }
8775ffd83dbSDimitry Andric         |             | |
8785ffd83dbSDimitry Andric     JamLoopFirst  <\  | |   }
8795ffd83dbSDimitry Andric      Blocks        |  | |   } JamLoopBlocks of the innermost loop
8805ffd83dbSDimitry Andric     JamLoopLast   -/  | |   }
8815ffd83dbSDimitry Andric         |             | |
8825ffd83dbSDimitry Andric     AftFirst          | |   }
8835ffd83dbSDimitry Andric      Blocks           | |   } AftBlocks of a inner loop of L
8845ffd83dbSDimitry Andric     AftLast     ------/ |   }
8855ffd83dbSDimitry Andric         |               |
8865ffd83dbSDimitry Andric        ...              |
8870b57cec5SDimitry Andric         |               |
8880b57cec5SDimitry Andric     AftFirst            |   }
8895ffd83dbSDimitry Andric      Blocks             |   } AftBlocks of L
8905ffd83dbSDimitry Andric     AftLast     --------/   }
8910b57cec5SDimitry Andric         |
8920b57cec5SDimitry Andric 
8930b57cec5SDimitry Andric     There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks
8940b57cec5SDimitry Andric     and AftBlocks, providing that there is one edge from Fores to SubLoops,
8950b57cec5SDimitry Andric     one edge from SubLoops to Afts and a single outer loop exit (from Afts).
8960b57cec5SDimitry Andric     In practice we currently limit Aft blocks to a single block, and limit
8970b57cec5SDimitry Andric     things further in the profitablility checks of the unroll and jam pass.
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric     Because of the way we rearrange basic blocks, we also require that
9005ffd83dbSDimitry Andric     the Fore blocks of L on all unrolled iterations are safe to move before the
9015ffd83dbSDimitry Andric     blocks of the direct child of L of all iterations. So we require that the
9025ffd83dbSDimitry Andric     phi node looping operands of ForeHeader can be moved to at least the end of
9035ffd83dbSDimitry Andric     ForeEnd, so that we can arrange cloned Fore Blocks before the subloop and
9045ffd83dbSDimitry Andric     match up Phi's correctly.
9050b57cec5SDimitry Andric 
9065ffd83dbSDimitry Andric     i.e. The old order of blocks used to be
9075ffd83dbSDimitry Andric            (F1)1 (F2)1 J1_1 J1_2 (A2)1 (A1)1 (F1)2 (F2)2 J2_1 J2_2 (A2)2 (A1)2.
9085ffd83dbSDimitry Andric          It needs to be safe to transform this to
9095ffd83dbSDimitry Andric            (F1)1 (F1)2 (F2)1 (F2)2 J1_1 J1_2 J2_1 J2_2 (A2)1 (A2)2 (A1)1 (A1)2.
9100b57cec5SDimitry Andric 
9110b57cec5SDimitry Andric     There are then a number of checks along the lines of no calls, no
9120b57cec5SDimitry Andric     exceptions, inner loop IV is consistent, etc. Note that for loops requiring
9130b57cec5SDimitry Andric     runtime unrolling, UnrollRuntimeLoopRemainder can also fail in
9140b57cec5SDimitry Andric     UnrollAndJamLoop if the trip count cannot be easily calculated.
9150b57cec5SDimitry Andric   */
9160b57cec5SDimitry Andric 
9170b57cec5SDimitry Andric   // Split blocks into Fore/SubLoop/Aft based on dominators
9185ffd83dbSDimitry Andric   Loop *JamLoop = getInnerMostLoop(L);
9190b57cec5SDimitry Andric   BasicBlockSet SubLoopBlocks;
9205ffd83dbSDimitry Andric   DenseMap<Loop *, BasicBlockSet> ForeBlocksMap;
9215ffd83dbSDimitry Andric   DenseMap<Loop *, BasicBlockSet> AftBlocksMap;
9225ffd83dbSDimitry Andric   if (!partitionOuterLoopBlocks(*L, *JamLoop, SubLoopBlocks, ForeBlocksMap,
9235ffd83dbSDimitry Andric                                 AftBlocksMap, DT)) {
9240b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Incompatible loop layout\n");
9250b57cec5SDimitry Andric     return false;
9260b57cec5SDimitry Andric   }
9270b57cec5SDimitry Andric 
9280b57cec5SDimitry Andric   // Aft blocks may need to move instructions to fore blocks, which becomes more
9290b57cec5SDimitry Andric   // difficult if there are multiple (potentially conditionally executed)
9300b57cec5SDimitry Andric   // blocks. For now we just exclude loops with multiple aft blocks.
9315ffd83dbSDimitry Andric   if (AftBlocksMap[L].size() != 1) {
9320b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Can't currently handle "
9330b57cec5SDimitry Andric                          "multiple blocks after the loop\n");
9340b57cec5SDimitry Andric     return false;
9350b57cec5SDimitry Andric   }
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric   // Check inner loop backedge count is consistent on all iterations of the
9380b57cec5SDimitry Andric   // outer loop
9395ffd83dbSDimitry Andric   if (any_of(L->getLoopsInPreorder(), [&SE](Loop *SubLoop) {
9405ffd83dbSDimitry Andric         return !hasIterationCountInvariantInParent(SubLoop, SE);
9415ffd83dbSDimitry Andric       })) {
9420b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Inner loop iteration count is "
9430b57cec5SDimitry Andric                          "not consistent on each iteration\n");
9440b57cec5SDimitry Andric     return false;
9450b57cec5SDimitry Andric   }
9460b57cec5SDimitry Andric 
9470b57cec5SDimitry Andric   // Check the loop safety info for exceptions.
9480b57cec5SDimitry Andric   SimpleLoopSafetyInfo LSI;
9490b57cec5SDimitry Andric   LSI.computeLoopSafetyInfo(L);
9500b57cec5SDimitry Andric   if (LSI.anyBlockMayThrow()) {
9510b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; Something may throw\n");
9520b57cec5SDimitry Andric     return false;
9530b57cec5SDimitry Andric   }
9540b57cec5SDimitry Andric 
9550b57cec5SDimitry Andric   // We've ruled out the easy stuff and now need to check that there are no
9560b57cec5SDimitry Andric   // interdependencies which may prevent us from moving the:
9570b57cec5SDimitry Andric   //  ForeBlocks before Subloop and AftBlocks.
9580b57cec5SDimitry Andric   //  Subloop before AftBlocks.
9590b57cec5SDimitry Andric   //  ForeBlock phi operands before the subloop
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric   // Make sure we can move all instructions we need to before the subloop
9625ffd83dbSDimitry Andric   BasicBlock *Header = L->getHeader();
9635ffd83dbSDimitry Andric   BasicBlock *Latch = L->getLoopLatch();
9645ffd83dbSDimitry Andric   BasicBlockSet AftBlocks = AftBlocksMap[L];
9655ffd83dbSDimitry Andric   Loop *SubLoop = L->getSubLoops()[0];
9660b57cec5SDimitry Andric   if (!processHeaderPhiOperands(
9670b57cec5SDimitry Andric           Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) {
9680b57cec5SDimitry Andric             if (SubLoop->contains(I->getParent()))
9690b57cec5SDimitry Andric               return false;
9700b57cec5SDimitry Andric             if (AftBlocks.count(I->getParent())) {
9710b57cec5SDimitry Andric               // If we hit a phi node in afts we know we are done (probably
9720b57cec5SDimitry Andric               // LCSSA)
9730b57cec5SDimitry Andric               if (isa<PHINode>(I))
9740b57cec5SDimitry Andric                 return false;
9750b57cec5SDimitry Andric               // Can't move instructions with side effects or memory
9760b57cec5SDimitry Andric               // reads/writes
9770b57cec5SDimitry Andric               if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
9780b57cec5SDimitry Andric                 return false;
9790b57cec5SDimitry Andric             }
9800b57cec5SDimitry Andric             // Keep going
9810b57cec5SDimitry Andric             return true;
9820b57cec5SDimitry Andric           })) {
9830b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't move required "
9840b57cec5SDimitry Andric                          "instructions after subloop to before it\n");
9850b57cec5SDimitry Andric     return false;
9860b57cec5SDimitry Andric   }
9870b57cec5SDimitry Andric 
9880b57cec5SDimitry Andric   // Check for memory dependencies which prohibit the unrolling we are doing.
9890b57cec5SDimitry Andric   // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check
9900b57cec5SDimitry Andric   // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub.
9915ffd83dbSDimitry Andric   if (!checkDependencies(*L, SubLoopBlocks, ForeBlocksMap, AftBlocksMap, DI,
9925ffd83dbSDimitry Andric                          LI)) {
9930b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; failed dependency check\n");
9940b57cec5SDimitry Andric     return false;
9950b57cec5SDimitry Andric   }
9960b57cec5SDimitry Andric 
9970b57cec5SDimitry Andric   return true;
9980b57cec5SDimitry Andric }
999