xref: /openbsd-src/gnu/llvm/llvm/lib/Transforms/Scalar/LoopFuse.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick ///
909467b48Spatrick /// \file
1009467b48Spatrick /// This file implements the loop fusion pass.
1109467b48Spatrick /// The implementation is largely based on the following document:
1209467b48Spatrick ///
1309467b48Spatrick ///       Code Transformations to Augment the Scope of Loop Fusion in a
1409467b48Spatrick ///         Production Compiler
1509467b48Spatrick ///       Christopher Mark Barton
1609467b48Spatrick ///       MSc Thesis
1709467b48Spatrick ///       https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
1809467b48Spatrick ///
1909467b48Spatrick /// The general approach taken is to collect sets of control flow equivalent
2009467b48Spatrick /// loops and test whether they can be fused. The necessary conditions for
2109467b48Spatrick /// fusion are:
2209467b48Spatrick ///    1. The loops must be adjacent (there cannot be any statements between
2309467b48Spatrick ///       the two loops).
2409467b48Spatrick ///    2. The loops must be conforming (they must execute the same number of
2509467b48Spatrick ///       iterations).
2609467b48Spatrick ///    3. The loops must be control flow equivalent (if one loop executes, the
2709467b48Spatrick ///       other is guaranteed to execute).
2809467b48Spatrick ///    4. There cannot be any negative distance dependencies between the loops.
2909467b48Spatrick /// If all of these conditions are satisfied, it is safe to fuse the loops.
3009467b48Spatrick ///
3109467b48Spatrick /// This implementation creates FusionCandidates that represent the loop and the
3209467b48Spatrick /// necessary information needed by fusion. It then operates on the fusion
3309467b48Spatrick /// candidates, first confirming that the candidate is eligible for fusion. The
3409467b48Spatrick /// candidates are then collected into control flow equivalent sets, sorted in
3509467b48Spatrick /// dominance order. Each set of control flow equivalent candidates is then
3609467b48Spatrick /// traversed, attempting to fuse pairs of candidates in the set. If all
3709467b48Spatrick /// requirements for fusion are met, the two candidates are fused, creating a
3809467b48Spatrick /// new (fused) candidate which is then added back into the set to consider for
3909467b48Spatrick /// additional fusion.
4009467b48Spatrick ///
4109467b48Spatrick /// This implementation currently does not make any modifications to remove
4209467b48Spatrick /// conditions for fusion. Code transformations to make loops conform to each of
4309467b48Spatrick /// the conditions for fusion are discussed in more detail in the document
4409467b48Spatrick /// above. These can be added to the current implementation in the future.
4509467b48Spatrick //===----------------------------------------------------------------------===//
4609467b48Spatrick 
4709467b48Spatrick #include "llvm/Transforms/Scalar/LoopFuse.h"
4809467b48Spatrick #include "llvm/ADT/Statistic.h"
4973471bf0Spatrick #include "llvm/Analysis/AssumptionCache.h"
5009467b48Spatrick #include "llvm/Analysis/DependenceAnalysis.h"
5109467b48Spatrick #include "llvm/Analysis/DomTreeUpdater.h"
5209467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
5309467b48Spatrick #include "llvm/Analysis/OptimizationRemarkEmitter.h"
5409467b48Spatrick #include "llvm/Analysis/PostDominators.h"
5509467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
5609467b48Spatrick #include "llvm/Analysis/ScalarEvolutionExpressions.h"
5773471bf0Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
5809467b48Spatrick #include "llvm/IR/Function.h"
5909467b48Spatrick #include "llvm/IR/Verifier.h"
6009467b48Spatrick #include "llvm/InitializePasses.h"
6109467b48Spatrick #include "llvm/Pass.h"
6209467b48Spatrick #include "llvm/Support/CommandLine.h"
6309467b48Spatrick #include "llvm/Support/Debug.h"
6409467b48Spatrick #include "llvm/Support/raw_ostream.h"
6509467b48Spatrick #include "llvm/Transforms/Scalar.h"
6609467b48Spatrick #include "llvm/Transforms/Utils.h"
6709467b48Spatrick #include "llvm/Transforms/Utils/BasicBlockUtils.h"
6809467b48Spatrick #include "llvm/Transforms/Utils/CodeMoverUtils.h"
6973471bf0Spatrick #include "llvm/Transforms/Utils/LoopPeel.h"
70*d415bd75Srobert #include "llvm/Transforms/Utils/LoopSimplify.h"
7109467b48Spatrick 
7209467b48Spatrick using namespace llvm;
7309467b48Spatrick 
7409467b48Spatrick #define DEBUG_TYPE "loop-fusion"
7509467b48Spatrick 
7609467b48Spatrick STATISTIC(FuseCounter, "Loops fused");
7709467b48Spatrick STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
7809467b48Spatrick STATISTIC(InvalidPreheader, "Loop has invalid preheader");
7909467b48Spatrick STATISTIC(InvalidHeader, "Loop has invalid header");
8009467b48Spatrick STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
8109467b48Spatrick STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
8209467b48Spatrick STATISTIC(InvalidLatch, "Loop has invalid latch");
8309467b48Spatrick STATISTIC(InvalidLoop, "Loop is invalid");
8409467b48Spatrick STATISTIC(AddressTakenBB, "Basic block has address taken");
8509467b48Spatrick STATISTIC(MayThrowException, "Loop may throw an exception");
8609467b48Spatrick STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
8709467b48Spatrick STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
8809467b48Spatrick STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
8909467b48Spatrick STATISTIC(UnknownTripCount, "Loop has unknown trip count");
9009467b48Spatrick STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
9109467b48Spatrick STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
9209467b48Spatrick STATISTIC(NonAdjacent, "Loops are not adjacent");
93097a140dSpatrick STATISTIC(
94097a140dSpatrick     NonEmptyPreheader,
95097a140dSpatrick     "Loop has a non-empty preheader with instructions that cannot be moved");
9609467b48Spatrick STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
9709467b48Spatrick STATISTIC(NonIdenticalGuards, "Candidates have different guards");
98097a140dSpatrick STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block with "
99097a140dSpatrick                              "instructions that cannot be moved");
100097a140dSpatrick STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
101097a140dSpatrick                               "instructions that cannot be moved");
10209467b48Spatrick STATISTIC(NotRotated, "Candidate is not rotated");
10373471bf0Spatrick STATISTIC(OnlySecondCandidateIsGuarded,
10473471bf0Spatrick           "The second candidate is guarded while the first one is not");
105*d415bd75Srobert STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
106*d415bd75Srobert STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
10709467b48Spatrick 
10809467b48Spatrick enum FusionDependenceAnalysisChoice {
10909467b48Spatrick   FUSION_DEPENDENCE_ANALYSIS_SCEV,
11009467b48Spatrick   FUSION_DEPENDENCE_ANALYSIS_DA,
11109467b48Spatrick   FUSION_DEPENDENCE_ANALYSIS_ALL,
11209467b48Spatrick };
11309467b48Spatrick 
11409467b48Spatrick static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
11509467b48Spatrick     "loop-fusion-dependence-analysis",
11609467b48Spatrick     cl::desc("Which dependence analysis should loop fusion use?"),
11709467b48Spatrick     cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev",
11809467b48Spatrick                           "Use the scalar evolution interface"),
11909467b48Spatrick                clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da",
12009467b48Spatrick                           "Use the dependence analysis interface"),
12109467b48Spatrick                clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
12209467b48Spatrick                           "Use all available analyses")),
123*d415bd75Srobert     cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL));
12409467b48Spatrick 
12573471bf0Spatrick static cl::opt<unsigned> FusionPeelMaxCount(
12673471bf0Spatrick     "loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
12773471bf0Spatrick     cl::desc("Max number of iterations to be peeled from a loop, such that "
12873471bf0Spatrick              "fusion can take place"));
12973471bf0Spatrick 
13009467b48Spatrick #ifndef NDEBUG
13109467b48Spatrick static cl::opt<bool>
13209467b48Spatrick     VerboseFusionDebugging("loop-fusion-verbose-debug",
13309467b48Spatrick                            cl::desc("Enable verbose debugging for Loop Fusion"),
134*d415bd75Srobert                            cl::Hidden, cl::init(false));
13509467b48Spatrick #endif
13609467b48Spatrick 
13709467b48Spatrick namespace {
13809467b48Spatrick /// This class is used to represent a candidate for loop fusion. When it is
13909467b48Spatrick /// constructed, it checks the conditions for loop fusion to ensure that it
14009467b48Spatrick /// represents a valid candidate. It caches several parts of a loop that are
14109467b48Spatrick /// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
14209467b48Spatrick /// of continually querying the underlying Loop to retrieve these values. It is
14309467b48Spatrick /// assumed these will not change throughout loop fusion.
14409467b48Spatrick ///
14509467b48Spatrick /// The invalidate method should be used to indicate that the FusionCandidate is
14609467b48Spatrick /// no longer a valid candidate for fusion. Similarly, the isValid() method can
14709467b48Spatrick /// be used to ensure that the FusionCandidate is still valid for fusion.
14809467b48Spatrick struct FusionCandidate {
14909467b48Spatrick   /// Cache of parts of the loop used throughout loop fusion. These should not
15009467b48Spatrick   /// need to change throughout the analysis and transformation.
15109467b48Spatrick   /// These parts are cached to avoid repeatedly looking up in the Loop class.
15209467b48Spatrick 
15309467b48Spatrick   /// Preheader of the loop this candidate represents
15409467b48Spatrick   BasicBlock *Preheader;
15509467b48Spatrick   /// Header of the loop this candidate represents
15609467b48Spatrick   BasicBlock *Header;
15709467b48Spatrick   /// Blocks in the loop that exit the loop
15809467b48Spatrick   BasicBlock *ExitingBlock;
15909467b48Spatrick   /// The successor block of this loop (where the exiting blocks go to)
16009467b48Spatrick   BasicBlock *ExitBlock;
16109467b48Spatrick   /// Latch of the loop
16209467b48Spatrick   BasicBlock *Latch;
16309467b48Spatrick   /// The loop that this fusion candidate represents
16409467b48Spatrick   Loop *L;
16509467b48Spatrick   /// Vector of instructions in this loop that read from memory
16609467b48Spatrick   SmallVector<Instruction *, 16> MemReads;
16709467b48Spatrick   /// Vector of instructions in this loop that write to memory
16809467b48Spatrick   SmallVector<Instruction *, 16> MemWrites;
16909467b48Spatrick   /// Are all of the members of this fusion candidate still valid
17009467b48Spatrick   bool Valid;
17109467b48Spatrick   /// Guard branch of the loop, if it exists
17209467b48Spatrick   BranchInst *GuardBranch;
17373471bf0Spatrick   /// Peeling Paramaters of the Loop.
17473471bf0Spatrick   TTI::PeelingPreferences PP;
17573471bf0Spatrick   /// Can you Peel this Loop?
17673471bf0Spatrick   bool AbleToPeel;
17773471bf0Spatrick   /// Has this loop been Peeled
17873471bf0Spatrick   bool Peeled;
17909467b48Spatrick 
18009467b48Spatrick   /// Dominator and PostDominator trees are needed for the
18109467b48Spatrick   /// FusionCandidateCompare function, required by FusionCandidateSet to
18209467b48Spatrick   /// determine where the FusionCandidate should be inserted into the set. These
18309467b48Spatrick   /// are used to establish ordering of the FusionCandidates based on dominance.
184*d415bd75Srobert   DominatorTree &DT;
18509467b48Spatrick   const PostDominatorTree *PDT;
18609467b48Spatrick 
18709467b48Spatrick   OptimizationRemarkEmitter &ORE;
18809467b48Spatrick 
FusionCandidate__anon227ba0d70111::FusionCandidate189*d415bd75Srobert   FusionCandidate(Loop *L, DominatorTree &DT, const PostDominatorTree *PDT,
190*d415bd75Srobert                   OptimizationRemarkEmitter &ORE, TTI::PeelingPreferences PP)
19109467b48Spatrick       : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
19209467b48Spatrick         ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
19309467b48Spatrick         Latch(L->getLoopLatch()), L(L), Valid(true),
19473471bf0Spatrick         GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
19573471bf0Spatrick         Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
19609467b48Spatrick 
19709467b48Spatrick     // Walk over all blocks in the loop and check for conditions that may
19809467b48Spatrick     // prevent fusion. For each block, walk over all instructions and collect
19909467b48Spatrick     // the memory reads and writes If any instructions that prevent fusion are
20009467b48Spatrick     // found, invalidate this object and return.
20109467b48Spatrick     for (BasicBlock *BB : L->blocks()) {
20209467b48Spatrick       if (BB->hasAddressTaken()) {
20309467b48Spatrick         invalidate();
20409467b48Spatrick         reportInvalidCandidate(AddressTakenBB);
20509467b48Spatrick         return;
20609467b48Spatrick       }
20709467b48Spatrick 
20809467b48Spatrick       for (Instruction &I : *BB) {
20909467b48Spatrick         if (I.mayThrow()) {
21009467b48Spatrick           invalidate();
21109467b48Spatrick           reportInvalidCandidate(MayThrowException);
21209467b48Spatrick           return;
21309467b48Spatrick         }
21409467b48Spatrick         if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
21509467b48Spatrick           if (SI->isVolatile()) {
21609467b48Spatrick             invalidate();
21709467b48Spatrick             reportInvalidCandidate(ContainsVolatileAccess);
21809467b48Spatrick             return;
21909467b48Spatrick           }
22009467b48Spatrick         }
22109467b48Spatrick         if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
22209467b48Spatrick           if (LI->isVolatile()) {
22309467b48Spatrick             invalidate();
22409467b48Spatrick             reportInvalidCandidate(ContainsVolatileAccess);
22509467b48Spatrick             return;
22609467b48Spatrick           }
22709467b48Spatrick         }
22809467b48Spatrick         if (I.mayWriteToMemory())
22909467b48Spatrick           MemWrites.push_back(&I);
23009467b48Spatrick         if (I.mayReadFromMemory())
23109467b48Spatrick           MemReads.push_back(&I);
23209467b48Spatrick       }
23309467b48Spatrick     }
23409467b48Spatrick   }
23509467b48Spatrick 
23609467b48Spatrick   /// Check if all members of the class are valid.
isValid__anon227ba0d70111::FusionCandidate23709467b48Spatrick   bool isValid() const {
23809467b48Spatrick     return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
23909467b48Spatrick            !L->isInvalid() && Valid;
24009467b48Spatrick   }
24109467b48Spatrick 
24209467b48Spatrick   /// Verify that all members are in sync with the Loop object.
verify__anon227ba0d70111::FusionCandidate24309467b48Spatrick   void verify() const {
24409467b48Spatrick     assert(isValid() && "Candidate is not valid!!");
24509467b48Spatrick     assert(!L->isInvalid() && "Loop is invalid!");
24609467b48Spatrick     assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
24709467b48Spatrick     assert(Header == L->getHeader() && "Header is out of sync");
24809467b48Spatrick     assert(ExitingBlock == L->getExitingBlock() &&
24909467b48Spatrick            "Exiting Blocks is out of sync");
25009467b48Spatrick     assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
25109467b48Spatrick     assert(Latch == L->getLoopLatch() && "Latch is out of sync");
25209467b48Spatrick   }
25309467b48Spatrick 
25409467b48Spatrick   /// Get the entry block for this fusion candidate.
25509467b48Spatrick   ///
25609467b48Spatrick   /// If this fusion candidate represents a guarded loop, the entry block is the
25709467b48Spatrick   /// loop guard block. If it represents an unguarded loop, the entry block is
25809467b48Spatrick   /// the preheader of the loop.
getEntryBlock__anon227ba0d70111::FusionCandidate25909467b48Spatrick   BasicBlock *getEntryBlock() const {
26009467b48Spatrick     if (GuardBranch)
26109467b48Spatrick       return GuardBranch->getParent();
26209467b48Spatrick     else
26309467b48Spatrick       return Preheader;
26409467b48Spatrick   }
26509467b48Spatrick 
26673471bf0Spatrick   /// After Peeling the loop is modified quite a bit, hence all of the Blocks
26773471bf0Spatrick   /// need to be updated accordingly.
updateAfterPeeling__anon227ba0d70111::FusionCandidate26873471bf0Spatrick   void updateAfterPeeling() {
26973471bf0Spatrick     Preheader = L->getLoopPreheader();
27073471bf0Spatrick     Header = L->getHeader();
27173471bf0Spatrick     ExitingBlock = L->getExitingBlock();
27273471bf0Spatrick     ExitBlock = L->getExitBlock();
27373471bf0Spatrick     Latch = L->getLoopLatch();
27473471bf0Spatrick     verify();
27573471bf0Spatrick   }
27673471bf0Spatrick 
27709467b48Spatrick   /// Given a guarded loop, get the successor of the guard that is not in the
27809467b48Spatrick   /// loop.
27909467b48Spatrick   ///
28009467b48Spatrick   /// This method returns the successor of the loop guard that is not located
28109467b48Spatrick   /// within the loop (i.e., the successor of the guard that is not the
28209467b48Spatrick   /// preheader).
28309467b48Spatrick   /// This method is only valid for guarded loops.
getNonLoopBlock__anon227ba0d70111::FusionCandidate28409467b48Spatrick   BasicBlock *getNonLoopBlock() const {
28509467b48Spatrick     assert(GuardBranch && "Only valid on guarded loops.");
28609467b48Spatrick     assert(GuardBranch->isConditional() &&
28709467b48Spatrick            "Expecting guard to be a conditional branch.");
28873471bf0Spatrick     if (Peeled)
28973471bf0Spatrick       return GuardBranch->getSuccessor(1);
29009467b48Spatrick     return (GuardBranch->getSuccessor(0) == Preheader)
29109467b48Spatrick                ? GuardBranch->getSuccessor(1)
29209467b48Spatrick                : GuardBranch->getSuccessor(0);
29309467b48Spatrick   }
29409467b48Spatrick 
29509467b48Spatrick #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump__anon227ba0d70111::FusionCandidate29609467b48Spatrick   LLVM_DUMP_METHOD void dump() const {
29709467b48Spatrick     dbgs() << "\tGuardBranch: ";
29809467b48Spatrick     if (GuardBranch)
29909467b48Spatrick       dbgs() << *GuardBranch;
30009467b48Spatrick     else
30109467b48Spatrick       dbgs() << "nullptr";
30209467b48Spatrick     dbgs() << "\n"
30309467b48Spatrick            << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
30409467b48Spatrick            << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
30509467b48Spatrick            << "\n"
30609467b48Spatrick            << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
30709467b48Spatrick            << "\tExitingBB: "
30809467b48Spatrick            << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
30909467b48Spatrick            << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
31009467b48Spatrick            << "\n"
31109467b48Spatrick            << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"
31209467b48Spatrick            << "\tEntryBlock: "
31309467b48Spatrick            << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")
31409467b48Spatrick            << "\n";
31509467b48Spatrick   }
31609467b48Spatrick #endif
31709467b48Spatrick 
31809467b48Spatrick   /// Determine if a fusion candidate (representing a loop) is eligible for
31909467b48Spatrick   /// fusion. Note that this only checks whether a single loop can be fused - it
32009467b48Spatrick   /// does not check whether it is *legal* to fuse two loops together.
isEligibleForFusion__anon227ba0d70111::FusionCandidate32109467b48Spatrick   bool isEligibleForFusion(ScalarEvolution &SE) const {
32209467b48Spatrick     if (!isValid()) {
32309467b48Spatrick       LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
32409467b48Spatrick       if (!Preheader)
32509467b48Spatrick         ++InvalidPreheader;
32609467b48Spatrick       if (!Header)
32709467b48Spatrick         ++InvalidHeader;
32809467b48Spatrick       if (!ExitingBlock)
32909467b48Spatrick         ++InvalidExitingBlock;
33009467b48Spatrick       if (!ExitBlock)
33109467b48Spatrick         ++InvalidExitBlock;
33209467b48Spatrick       if (!Latch)
33309467b48Spatrick         ++InvalidLatch;
33409467b48Spatrick       if (L->isInvalid())
33509467b48Spatrick         ++InvalidLoop;
33609467b48Spatrick 
33709467b48Spatrick       return false;
33809467b48Spatrick     }
33909467b48Spatrick 
34009467b48Spatrick     // Require ScalarEvolution to be able to determine a trip count.
34109467b48Spatrick     if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
34209467b48Spatrick       LLVM_DEBUG(dbgs() << "Loop " << L->getName()
34309467b48Spatrick                         << " trip count not computable!\n");
34409467b48Spatrick       return reportInvalidCandidate(UnknownTripCount);
34509467b48Spatrick     }
34609467b48Spatrick 
34709467b48Spatrick     if (!L->isLoopSimplifyForm()) {
34809467b48Spatrick       LLVM_DEBUG(dbgs() << "Loop " << L->getName()
34909467b48Spatrick                         << " is not in simplified form!\n");
35009467b48Spatrick       return reportInvalidCandidate(NotSimplifiedForm);
35109467b48Spatrick     }
35209467b48Spatrick 
35309467b48Spatrick     if (!L->isRotatedForm()) {
35409467b48Spatrick       LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n");
35509467b48Spatrick       return reportInvalidCandidate(NotRotated);
35609467b48Spatrick     }
35709467b48Spatrick 
35809467b48Spatrick     return true;
35909467b48Spatrick   }
36009467b48Spatrick 
36109467b48Spatrick private:
36209467b48Spatrick   // This is only used internally for now, to clear the MemWrites and MemReads
36309467b48Spatrick   // list and setting Valid to false. I can't envision other uses of this right
36409467b48Spatrick   // now, since once FusionCandidates are put into the FusionCandidateSet they
36509467b48Spatrick   // are immutable. Thus, any time we need to change/update a FusionCandidate,
36609467b48Spatrick   // we must create a new one and insert it into the FusionCandidateSet to
36709467b48Spatrick   // ensure the FusionCandidateSet remains ordered correctly.
invalidate__anon227ba0d70111::FusionCandidate36809467b48Spatrick   void invalidate() {
36909467b48Spatrick     MemWrites.clear();
37009467b48Spatrick     MemReads.clear();
37109467b48Spatrick     Valid = false;
37209467b48Spatrick   }
37309467b48Spatrick 
reportInvalidCandidate__anon227ba0d70111::FusionCandidate37409467b48Spatrick   bool reportInvalidCandidate(llvm::Statistic &Stat) const {
37509467b48Spatrick     using namespace ore;
37609467b48Spatrick     assert(L && Preheader && "Fusion candidate not initialized properly!");
37773471bf0Spatrick #if LLVM_ENABLE_STATS
37809467b48Spatrick     ++Stat;
37909467b48Spatrick     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
38009467b48Spatrick                                         L->getStartLoc(), Preheader)
38109467b48Spatrick              << "[" << Preheader->getParent()->getName() << "]: "
38209467b48Spatrick              << "Loop is not a candidate for fusion: " << Stat.getDesc());
38373471bf0Spatrick #endif
38409467b48Spatrick     return false;
38509467b48Spatrick   }
38609467b48Spatrick };
38709467b48Spatrick 
38809467b48Spatrick struct FusionCandidateCompare {
38909467b48Spatrick   /// Comparison functor to sort two Control Flow Equivalent fusion candidates
39009467b48Spatrick   /// into dominance order.
39109467b48Spatrick   /// If LHS dominates RHS and RHS post-dominates LHS, return true;
392*d415bd75Srobert   /// If RHS dominates LHS and LHS post-dominates RHS, return false;
393*d415bd75Srobert   /// If both LHS and RHS are not dominating each other then, non-strictly
394*d415bd75Srobert   /// post dominate check will decide the order of candidates. If RHS
395*d415bd75Srobert   /// non-strictly post dominates LHS then, return true. If LHS non-strictly
396*d415bd75Srobert   /// post dominates RHS then, return false. If both are non-strictly post
397*d415bd75Srobert   /// dominate each other then, level in the post dominator tree will decide
398*d415bd75Srobert   /// the order of candidates.
operator ()__anon227ba0d70111::FusionCandidateCompare39909467b48Spatrick   bool operator()(const FusionCandidate &LHS,
40009467b48Spatrick                   const FusionCandidate &RHS) const {
401*d415bd75Srobert     const DominatorTree *DT = &(LHS.DT);
40209467b48Spatrick 
40309467b48Spatrick     BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
40409467b48Spatrick     BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
40509467b48Spatrick 
40609467b48Spatrick     // Do not save PDT to local variable as it is only used in asserts and thus
40709467b48Spatrick     // will trigger an unused variable warning if building without asserts.
40809467b48Spatrick     assert(DT && LHS.PDT && "Expecting valid dominator tree");
40909467b48Spatrick 
41009467b48Spatrick     // Do this compare first so if LHS == RHS, function returns false.
41109467b48Spatrick     if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
41209467b48Spatrick       // RHS dominates LHS
41309467b48Spatrick       // Verify LHS post-dominates RHS
41409467b48Spatrick       assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
41509467b48Spatrick       return false;
41609467b48Spatrick     }
41709467b48Spatrick 
41809467b48Spatrick     if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
41909467b48Spatrick       // Verify RHS Postdominates LHS
42009467b48Spatrick       assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
42109467b48Spatrick       return true;
42209467b48Spatrick     }
42309467b48Spatrick 
424*d415bd75Srobert     // If two FusionCandidates are in the same level of dominator tree,
425*d415bd75Srobert     // they will not dominate each other, but may still be control flow
426*d415bd75Srobert     // equivalent. To sort those FusionCandidates, nonStrictlyPostDominate()
427*d415bd75Srobert     // function is needed.
428*d415bd75Srobert     bool WrongOrder =
429*d415bd75Srobert         nonStrictlyPostDominate(LHSEntryBlock, RHSEntryBlock, DT, LHS.PDT);
430*d415bd75Srobert     bool RightOrder =
431*d415bd75Srobert         nonStrictlyPostDominate(RHSEntryBlock, LHSEntryBlock, DT, LHS.PDT);
432*d415bd75Srobert     if (WrongOrder && RightOrder) {
433*d415bd75Srobert       // If common predecessor of LHS and RHS post dominates both
434*d415bd75Srobert       // FusionCandidates then, Order of FusionCandidate can be
435*d415bd75Srobert       // identified by its level in post dominator tree.
436*d415bd75Srobert       DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
437*d415bd75Srobert       DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
438*d415bd75Srobert       return LNode->getLevel() > RNode->getLevel();
439*d415bd75Srobert     } else if (WrongOrder)
440*d415bd75Srobert       return false;
441*d415bd75Srobert     else if (RightOrder)
442*d415bd75Srobert       return true;
443*d415bd75Srobert 
444*d415bd75Srobert     // If LHS does not non-strict Postdominate RHS and RHS does not non-strict
445*d415bd75Srobert     // Postdominate LHS then, there is no dominance relationship between the
446*d415bd75Srobert     // two FusionCandidates. Thus, they should not be in the same set together.
44709467b48Spatrick     llvm_unreachable(
44809467b48Spatrick         "No dominance relationship between these fusion candidates!");
44909467b48Spatrick   }
45009467b48Spatrick };
45109467b48Spatrick 
45209467b48Spatrick using LoopVector = SmallVector<Loop *, 4>;
45309467b48Spatrick 
45409467b48Spatrick // Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
45509467b48Spatrick // order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
45609467b48Spatrick // dominates FC1 and FC1 post-dominates FC0.
45709467b48Spatrick // std::set was chosen because we want a sorted data structure with stable
458*d415bd75Srobert // iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
45909467b48Spatrick // loops by moving intervening code around. When this intervening code contains
46009467b48Spatrick // loops, those loops will be moved also. The corresponding FusionCandidates
46109467b48Spatrick // will also need to be moved accordingly. As this is done, having stable
46209467b48Spatrick // iterators will simplify the logic. Similarly, having an efficient insert that
46309467b48Spatrick // keeps the FusionCandidateSet sorted will also simplify the implementation.
46409467b48Spatrick using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
46509467b48Spatrick using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
46609467b48Spatrick 
46709467b48Spatrick #if !defined(NDEBUG)
operator <<(llvm::raw_ostream & OS,const FusionCandidate & FC)46809467b48Spatrick static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
46909467b48Spatrick                                      const FusionCandidate &FC) {
47009467b48Spatrick   if (FC.isValid())
47109467b48Spatrick     OS << FC.Preheader->getName();
47209467b48Spatrick   else
47309467b48Spatrick     OS << "<Invalid>";
47409467b48Spatrick 
47509467b48Spatrick   return OS;
47609467b48Spatrick }
47709467b48Spatrick 
operator <<(llvm::raw_ostream & OS,const FusionCandidateSet & CandSet)47809467b48Spatrick static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
47909467b48Spatrick                                      const FusionCandidateSet &CandSet) {
48009467b48Spatrick   for (const FusionCandidate &FC : CandSet)
48109467b48Spatrick     OS << FC << '\n';
48209467b48Spatrick 
48309467b48Spatrick   return OS;
48409467b48Spatrick }
48509467b48Spatrick 
48609467b48Spatrick static void
printFusionCandidates(const FusionCandidateCollection & FusionCandidates)48709467b48Spatrick printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
48809467b48Spatrick   dbgs() << "Fusion Candidates: \n";
48909467b48Spatrick   for (const auto &CandidateSet : FusionCandidates) {
49009467b48Spatrick     dbgs() << "*** Fusion Candidate Set ***\n";
49109467b48Spatrick     dbgs() << CandidateSet;
49209467b48Spatrick     dbgs() << "****************************\n";
49309467b48Spatrick   }
49409467b48Spatrick }
49509467b48Spatrick #endif
49609467b48Spatrick 
49709467b48Spatrick /// Collect all loops in function at the same nest level, starting at the
49809467b48Spatrick /// outermost level.
49909467b48Spatrick ///
50009467b48Spatrick /// This data structure collects all loops at the same nest level for a
50109467b48Spatrick /// given function (specified by the LoopInfo object). It starts at the
50209467b48Spatrick /// outermost level.
50309467b48Spatrick struct LoopDepthTree {
50409467b48Spatrick   using LoopsOnLevelTy = SmallVector<LoopVector, 4>;
50509467b48Spatrick   using iterator = LoopsOnLevelTy::iterator;
50609467b48Spatrick   using const_iterator = LoopsOnLevelTy::const_iterator;
50709467b48Spatrick 
LoopDepthTree__anon227ba0d70111::LoopDepthTree50809467b48Spatrick   LoopDepthTree(LoopInfo &LI) : Depth(1) {
50909467b48Spatrick     if (!LI.empty())
51009467b48Spatrick       LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend()));
51109467b48Spatrick   }
51209467b48Spatrick 
51309467b48Spatrick   /// Test whether a given loop has been removed from the function, and thus is
51409467b48Spatrick   /// no longer valid.
isRemovedLoop__anon227ba0d70111::LoopDepthTree51509467b48Spatrick   bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
51609467b48Spatrick 
51709467b48Spatrick   /// Record that a given loop has been removed from the function and is no
51809467b48Spatrick   /// longer valid.
removeLoop__anon227ba0d70111::LoopDepthTree51909467b48Spatrick   void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
52009467b48Spatrick 
52109467b48Spatrick   /// Descend the tree to the next (inner) nesting level
descend__anon227ba0d70111::LoopDepthTree52209467b48Spatrick   void descend() {
52309467b48Spatrick     LoopsOnLevelTy LoopsOnNextLevel;
52409467b48Spatrick 
52509467b48Spatrick     for (const LoopVector &LV : *this)
52609467b48Spatrick       for (Loop *L : LV)
52709467b48Spatrick         if (!isRemovedLoop(L) && L->begin() != L->end())
52809467b48Spatrick           LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
52909467b48Spatrick 
53009467b48Spatrick     LoopsOnLevel = LoopsOnNextLevel;
53109467b48Spatrick     RemovedLoops.clear();
53209467b48Spatrick     Depth++;
53309467b48Spatrick   }
53409467b48Spatrick 
empty__anon227ba0d70111::LoopDepthTree53509467b48Spatrick   bool empty() const { return size() == 0; }
size__anon227ba0d70111::LoopDepthTree53609467b48Spatrick   size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
getDepth__anon227ba0d70111::LoopDepthTree53709467b48Spatrick   unsigned getDepth() const { return Depth; }
53809467b48Spatrick 
begin__anon227ba0d70111::LoopDepthTree53909467b48Spatrick   iterator begin() { return LoopsOnLevel.begin(); }
end__anon227ba0d70111::LoopDepthTree54009467b48Spatrick   iterator end() { return LoopsOnLevel.end(); }
begin__anon227ba0d70111::LoopDepthTree54109467b48Spatrick   const_iterator begin() const { return LoopsOnLevel.begin(); }
end__anon227ba0d70111::LoopDepthTree54209467b48Spatrick   const_iterator end() const { return LoopsOnLevel.end(); }
54309467b48Spatrick 
54409467b48Spatrick private:
54509467b48Spatrick   /// Set of loops that have been removed from the function and are no longer
54609467b48Spatrick   /// valid.
54709467b48Spatrick   SmallPtrSet<const Loop *, 8> RemovedLoops;
54809467b48Spatrick 
54909467b48Spatrick   /// Depth of the current level, starting at 1 (outermost loops).
55009467b48Spatrick   unsigned Depth;
55109467b48Spatrick 
55209467b48Spatrick   /// Vector of loops at the current depth level that have the same parent loop
55309467b48Spatrick   LoopsOnLevelTy LoopsOnLevel;
55409467b48Spatrick };
55509467b48Spatrick 
55609467b48Spatrick #ifndef NDEBUG
printLoopVector(const LoopVector & LV)55709467b48Spatrick static void printLoopVector(const LoopVector &LV) {
55809467b48Spatrick   dbgs() << "****************************\n";
559*d415bd75Srobert   for (auto *L : LV)
56009467b48Spatrick     printLoop(*L, dbgs());
56109467b48Spatrick   dbgs() << "****************************\n";
56209467b48Spatrick }
56309467b48Spatrick #endif
56409467b48Spatrick 
56509467b48Spatrick struct LoopFuser {
56609467b48Spatrick private:
56709467b48Spatrick   // Sets of control flow equivalent fusion candidates for a given nest level.
56809467b48Spatrick   FusionCandidateCollection FusionCandidates;
56909467b48Spatrick 
57009467b48Spatrick   LoopDepthTree LDT;
57109467b48Spatrick   DomTreeUpdater DTU;
57209467b48Spatrick 
57309467b48Spatrick   LoopInfo &LI;
57409467b48Spatrick   DominatorTree &DT;
57509467b48Spatrick   DependenceInfo &DI;
57609467b48Spatrick   ScalarEvolution &SE;
57709467b48Spatrick   PostDominatorTree &PDT;
57809467b48Spatrick   OptimizationRemarkEmitter &ORE;
57973471bf0Spatrick   AssumptionCache &AC;
58073471bf0Spatrick   const TargetTransformInfo &TTI;
58109467b48Spatrick 
58209467b48Spatrick public:
LoopFuser__anon227ba0d70111::LoopFuser58309467b48Spatrick   LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
58409467b48Spatrick             ScalarEvolution &SE, PostDominatorTree &PDT,
58573471bf0Spatrick             OptimizationRemarkEmitter &ORE, const DataLayout &DL,
58673471bf0Spatrick             AssumptionCache &AC, const TargetTransformInfo &TTI)
58709467b48Spatrick       : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
58873471bf0Spatrick         DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
58909467b48Spatrick 
59009467b48Spatrick   /// This is the main entry point for loop fusion. It will traverse the
59109467b48Spatrick   /// specified function and collect candidate loops to fuse, starting at the
59209467b48Spatrick   /// outermost nesting level and working inwards.
fuseLoops__anon227ba0d70111::LoopFuser59309467b48Spatrick   bool fuseLoops(Function &F) {
59409467b48Spatrick #ifndef NDEBUG
59509467b48Spatrick     if (VerboseFusionDebugging) {
59609467b48Spatrick       LI.print(dbgs());
59709467b48Spatrick     }
59809467b48Spatrick #endif
59909467b48Spatrick 
60009467b48Spatrick     LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
60109467b48Spatrick                       << "\n");
60209467b48Spatrick     bool Changed = false;
60309467b48Spatrick 
60409467b48Spatrick     while (!LDT.empty()) {
60509467b48Spatrick       LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
60609467b48Spatrick                         << LDT.getDepth() << "\n";);
60709467b48Spatrick 
60809467b48Spatrick       for (const LoopVector &LV : LDT) {
60909467b48Spatrick         assert(LV.size() > 0 && "Empty loop set was build!");
61009467b48Spatrick 
61109467b48Spatrick         // Skip singleton loop sets as they do not offer fusion opportunities on
61209467b48Spatrick         // this level.
61309467b48Spatrick         if (LV.size() == 1)
61409467b48Spatrick           continue;
61509467b48Spatrick #ifndef NDEBUG
61609467b48Spatrick         if (VerboseFusionDebugging) {
61709467b48Spatrick           LLVM_DEBUG({
61809467b48Spatrick             dbgs() << "  Visit loop set (#" << LV.size() << "):\n";
61909467b48Spatrick             printLoopVector(LV);
62009467b48Spatrick           });
62109467b48Spatrick         }
62209467b48Spatrick #endif
62309467b48Spatrick 
62409467b48Spatrick         collectFusionCandidates(LV);
62509467b48Spatrick         Changed |= fuseCandidates();
62609467b48Spatrick       }
62709467b48Spatrick 
62809467b48Spatrick       // Finished analyzing candidates at this level.
62909467b48Spatrick       // Descend to the next level and clear all of the candidates currently
63009467b48Spatrick       // collected. Note that it will not be possible to fuse any of the
63109467b48Spatrick       // existing candidates with new candidates because the new candidates will
63209467b48Spatrick       // be at a different nest level and thus not be control flow equivalent
63309467b48Spatrick       // with all of the candidates collected so far.
63409467b48Spatrick       LLVM_DEBUG(dbgs() << "Descend one level!\n");
63509467b48Spatrick       LDT.descend();
63609467b48Spatrick       FusionCandidates.clear();
63709467b48Spatrick     }
63809467b48Spatrick 
63909467b48Spatrick     if (Changed)
64009467b48Spatrick       LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
64109467b48Spatrick 
64209467b48Spatrick #ifndef NDEBUG
64309467b48Spatrick     assert(DT.verify());
64409467b48Spatrick     assert(PDT.verify());
64509467b48Spatrick     LI.verify(DT);
64609467b48Spatrick     SE.verify();
64709467b48Spatrick #endif
64809467b48Spatrick 
64909467b48Spatrick     LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
65009467b48Spatrick     return Changed;
65109467b48Spatrick   }
65209467b48Spatrick 
65309467b48Spatrick private:
65409467b48Spatrick   /// Determine if two fusion candidates are control flow equivalent.
65509467b48Spatrick   ///
65609467b48Spatrick   /// Two fusion candidates are control flow equivalent if when one executes,
65709467b48Spatrick   /// the other is guaranteed to execute. This is determined using dominators
65809467b48Spatrick   /// and post-dominators: if A dominates B and B post-dominates A then A and B
65909467b48Spatrick   /// are control-flow equivalent.
isControlFlowEquivalent__anon227ba0d70111::LoopFuser66009467b48Spatrick   bool isControlFlowEquivalent(const FusionCandidate &FC0,
66109467b48Spatrick                                const FusionCandidate &FC1) const {
66209467b48Spatrick     assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
66309467b48Spatrick 
66409467b48Spatrick     return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
66509467b48Spatrick                                      DT, PDT);
66609467b48Spatrick   }
66709467b48Spatrick 
66809467b48Spatrick   /// Iterate over all loops in the given loop set and identify the loops that
66909467b48Spatrick   /// are eligible for fusion. Place all eligible fusion candidates into Control
67009467b48Spatrick   /// Flow Equivalent sets, sorted by dominance.
collectFusionCandidates__anon227ba0d70111::LoopFuser67109467b48Spatrick   void collectFusionCandidates(const LoopVector &LV) {
67209467b48Spatrick     for (Loop *L : LV) {
67373471bf0Spatrick       TTI::PeelingPreferences PP =
674*d415bd75Srobert           gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt);
675*d415bd75Srobert       FusionCandidate CurrCand(L, DT, &PDT, ORE, PP);
67609467b48Spatrick       if (!CurrCand.isEligibleForFusion(SE))
67709467b48Spatrick         continue;
67809467b48Spatrick 
67909467b48Spatrick       // Go through each list in FusionCandidates and determine if L is control
68009467b48Spatrick       // flow equivalent with the first loop in that list. If it is, append LV.
68109467b48Spatrick       // If not, go to the next list.
68209467b48Spatrick       // If no suitable list is found, start another list and add it to
68309467b48Spatrick       // FusionCandidates.
68409467b48Spatrick       bool FoundSet = false;
68509467b48Spatrick 
68609467b48Spatrick       for (auto &CurrCandSet : FusionCandidates) {
68709467b48Spatrick         if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
68809467b48Spatrick           CurrCandSet.insert(CurrCand);
68909467b48Spatrick           FoundSet = true;
69009467b48Spatrick #ifndef NDEBUG
69109467b48Spatrick           if (VerboseFusionDebugging)
69209467b48Spatrick             LLVM_DEBUG(dbgs() << "Adding " << CurrCand
69309467b48Spatrick                               << " to existing candidate set\n");
69409467b48Spatrick #endif
69509467b48Spatrick           break;
69609467b48Spatrick         }
69709467b48Spatrick       }
69809467b48Spatrick       if (!FoundSet) {
69909467b48Spatrick         // No set was found. Create a new set and add to FusionCandidates
70009467b48Spatrick #ifndef NDEBUG
70109467b48Spatrick         if (VerboseFusionDebugging)
70209467b48Spatrick           LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
70309467b48Spatrick #endif
70409467b48Spatrick         FusionCandidateSet NewCandSet;
70509467b48Spatrick         NewCandSet.insert(CurrCand);
70609467b48Spatrick         FusionCandidates.push_back(NewCandSet);
70709467b48Spatrick       }
70809467b48Spatrick       NumFusionCandidates++;
70909467b48Spatrick     }
71009467b48Spatrick   }
71109467b48Spatrick 
71209467b48Spatrick   /// Determine if it is beneficial to fuse two loops.
71309467b48Spatrick   ///
71409467b48Spatrick   /// For now, this method simply returns true because we want to fuse as much
71509467b48Spatrick   /// as possible (primarily to test the pass). This method will evolve, over
71609467b48Spatrick   /// time, to add heuristics for profitability of fusion.
isBeneficialFusion__anon227ba0d70111::LoopFuser71709467b48Spatrick   bool isBeneficialFusion(const FusionCandidate &FC0,
71809467b48Spatrick                           const FusionCandidate &FC1) {
71909467b48Spatrick     return true;
72009467b48Spatrick   }
72109467b48Spatrick 
72209467b48Spatrick   /// Determine if two fusion candidates have the same trip count (i.e., they
72309467b48Spatrick   /// execute the same number of iterations).
72409467b48Spatrick   ///
72573471bf0Spatrick   /// This function will return a pair of values. The first is a boolean,
72673471bf0Spatrick   /// stating whether or not the two candidates are known at compile time to
72773471bf0Spatrick   /// have the same TripCount. The second is the difference in the two
72873471bf0Spatrick   /// TripCounts. This information can be used later to determine whether or not
729*d415bd75Srobert   /// peeling can be performed on either one of the candidates.
730*d415bd75Srobert   std::pair<bool, std::optional<unsigned>>
haveIdenticalTripCounts__anon227ba0d70111::LoopFuser73173471bf0Spatrick   haveIdenticalTripCounts(const FusionCandidate &FC0,
73209467b48Spatrick                           const FusionCandidate &FC1) const {
73309467b48Spatrick     const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
73409467b48Spatrick     if (isa<SCEVCouldNotCompute>(TripCount0)) {
73509467b48Spatrick       UncomputableTripCount++;
73609467b48Spatrick       LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
737*d415bd75Srobert       return {false, std::nullopt};
73809467b48Spatrick     }
73909467b48Spatrick 
74009467b48Spatrick     const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
74109467b48Spatrick     if (isa<SCEVCouldNotCompute>(TripCount1)) {
74209467b48Spatrick       UncomputableTripCount++;
74309467b48Spatrick       LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
744*d415bd75Srobert       return {false, std::nullopt};
74509467b48Spatrick     }
74673471bf0Spatrick 
74709467b48Spatrick     LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
74809467b48Spatrick                       << *TripCount1 << " are "
74909467b48Spatrick                       << (TripCount0 == TripCount1 ? "identical" : "different")
75009467b48Spatrick                       << "\n");
75109467b48Spatrick 
75273471bf0Spatrick     if (TripCount0 == TripCount1)
75373471bf0Spatrick       return {true, 0};
75473471bf0Spatrick 
75573471bf0Spatrick     LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
75673471bf0Spatrick                          "determining the difference between trip counts\n");
75773471bf0Spatrick 
75873471bf0Spatrick     // Currently only considering loops with a single exit point
75973471bf0Spatrick     // and a non-constant trip count.
76073471bf0Spatrick     const unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
76173471bf0Spatrick     const unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
76273471bf0Spatrick 
76373471bf0Spatrick     // If any of the tripcounts are zero that means that loop(s) do not have
76473471bf0Spatrick     // a single exit or a constant tripcount.
76573471bf0Spatrick     if (TC0 == 0 || TC1 == 0) {
76673471bf0Spatrick       LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
76773471bf0Spatrick                            "have a constant number of iterations. Peeling "
76873471bf0Spatrick                            "is not benefical\n");
769*d415bd75Srobert       return {false, std::nullopt};
77073471bf0Spatrick     }
77173471bf0Spatrick 
772*d415bd75Srobert     std::optional<unsigned> Difference;
77373471bf0Spatrick     int Diff = TC0 - TC1;
77473471bf0Spatrick 
77573471bf0Spatrick     if (Diff > 0)
77673471bf0Spatrick       Difference = Diff;
77773471bf0Spatrick     else {
77873471bf0Spatrick       LLVM_DEBUG(
77973471bf0Spatrick           dbgs() << "Difference is less than 0. FC1 (second loop) has more "
78073471bf0Spatrick                     "iterations than the first one. Currently not supported\n");
78173471bf0Spatrick     }
78273471bf0Spatrick 
78373471bf0Spatrick     LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
78473471bf0Spatrick                       << "\n");
78573471bf0Spatrick 
78673471bf0Spatrick     return {false, Difference};
78773471bf0Spatrick   }
78873471bf0Spatrick 
peelFusionCandidate__anon227ba0d70111::LoopFuser78973471bf0Spatrick   void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
79073471bf0Spatrick                            unsigned PeelCount) {
79173471bf0Spatrick     assert(FC0.AbleToPeel && "Should be able to peel loop");
79273471bf0Spatrick 
79373471bf0Spatrick     LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
79473471bf0Spatrick                       << " iterations of the first loop. \n");
79573471bf0Spatrick 
796*d415bd75Srobert     ValueToValueMapTy VMap;
797*d415bd75Srobert     FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true, VMap);
79873471bf0Spatrick     if (FC0.Peeled) {
79973471bf0Spatrick       LLVM_DEBUG(dbgs() << "Done Peeling\n");
80073471bf0Spatrick 
80173471bf0Spatrick #ifndef NDEBUG
80273471bf0Spatrick       auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
80373471bf0Spatrick 
80473471bf0Spatrick       assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
80573471bf0Spatrick              "Loops should have identical trip counts after peeling");
80673471bf0Spatrick #endif
80773471bf0Spatrick 
80873471bf0Spatrick       FC0.PP.PeelCount += PeelCount;
80973471bf0Spatrick 
81073471bf0Spatrick       // Peeling does not update the PDT
81173471bf0Spatrick       PDT.recalculate(*FC0.Preheader->getParent());
81273471bf0Spatrick 
81373471bf0Spatrick       FC0.updateAfterPeeling();
81473471bf0Spatrick 
81573471bf0Spatrick       // In this case the iterations of the loop are constant, so the first
81673471bf0Spatrick       // loop will execute completely (will not jump from one of
81773471bf0Spatrick       // the peeled blocks to the second loop). Here we are updating the
81873471bf0Spatrick       // branch conditions of each of the peeled blocks, such that it will
81973471bf0Spatrick       // branch to its successor which is not the preheader of the second loop
82073471bf0Spatrick       // in the case of unguarded loops, or the succesors of the exit block of
82173471bf0Spatrick       // the first loop otherwise. Doing this update will ensure that the entry
82273471bf0Spatrick       // block of the first loop dominates the entry block of the second loop.
82373471bf0Spatrick       BasicBlock *BB =
82473471bf0Spatrick           FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
82573471bf0Spatrick       if (BB) {
82673471bf0Spatrick         SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
82773471bf0Spatrick         SmallVector<Instruction *, 8> WorkList;
82873471bf0Spatrick         for (BasicBlock *Pred : predecessors(BB)) {
82973471bf0Spatrick           if (Pred != FC0.ExitBlock) {
83073471bf0Spatrick             WorkList.emplace_back(Pred->getTerminator());
83173471bf0Spatrick             TreeUpdates.emplace_back(
83273471bf0Spatrick                 DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
83373471bf0Spatrick           }
83473471bf0Spatrick         }
83573471bf0Spatrick         // Cannot modify the predecessors inside the above loop as it will cause
83673471bf0Spatrick         // the iterators to be nullptrs, causing memory errors.
83773471bf0Spatrick         for (Instruction *CurrentBranch : WorkList) {
83873471bf0Spatrick           BasicBlock *Succ = CurrentBranch->getSuccessor(0);
83973471bf0Spatrick           if (Succ == BB)
84073471bf0Spatrick             Succ = CurrentBranch->getSuccessor(1);
84173471bf0Spatrick           ReplaceInstWithInst(CurrentBranch, BranchInst::Create(Succ));
84273471bf0Spatrick         }
84373471bf0Spatrick 
84473471bf0Spatrick         DTU.applyUpdates(TreeUpdates);
84573471bf0Spatrick         DTU.flush();
84673471bf0Spatrick       }
84773471bf0Spatrick       LLVM_DEBUG(
84873471bf0Spatrick           dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
84973471bf0Spatrick                  << " iterations from the first loop.\n"
85073471bf0Spatrick                     "Both Loops have the same number of iterations now.\n");
85173471bf0Spatrick     }
85209467b48Spatrick   }
85309467b48Spatrick 
85409467b48Spatrick   /// Walk each set of control flow equivalent fusion candidates and attempt to
85509467b48Spatrick   /// fuse them. This does a single linear traversal of all candidates in the
85609467b48Spatrick   /// set. The conditions for legal fusion are checked at this point. If a pair
85709467b48Spatrick   /// of fusion candidates passes all legality checks, they are fused together
85809467b48Spatrick   /// and a new fusion candidate is created and added to the FusionCandidateSet.
85909467b48Spatrick   /// The original fusion candidates are then removed, as they are no longer
86009467b48Spatrick   /// valid.
fuseCandidates__anon227ba0d70111::LoopFuser86109467b48Spatrick   bool fuseCandidates() {
86209467b48Spatrick     bool Fused = false;
86309467b48Spatrick     LLVM_DEBUG(printFusionCandidates(FusionCandidates));
86409467b48Spatrick     for (auto &CandidateSet : FusionCandidates) {
86509467b48Spatrick       if (CandidateSet.size() < 2)
86609467b48Spatrick         continue;
86709467b48Spatrick 
86809467b48Spatrick       LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
86909467b48Spatrick                         << CandidateSet << "\n");
87009467b48Spatrick 
87109467b48Spatrick       for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
87209467b48Spatrick         assert(!LDT.isRemovedLoop(FC0->L) &&
87309467b48Spatrick                "Should not have removed loops in CandidateSet!");
87409467b48Spatrick         auto FC1 = FC0;
87509467b48Spatrick         for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
87609467b48Spatrick           assert(!LDT.isRemovedLoop(FC1->L) &&
87709467b48Spatrick                  "Should not have removed loops in CandidateSet!");
87809467b48Spatrick 
87909467b48Spatrick           LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
88009467b48Spatrick                      dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
88109467b48Spatrick 
88209467b48Spatrick           FC0->verify();
88309467b48Spatrick           FC1->verify();
88409467b48Spatrick 
88573471bf0Spatrick           // Check if the candidates have identical tripcounts (first value of
88673471bf0Spatrick           // pair), and if not check the difference in the tripcounts between
88773471bf0Spatrick           // the loops (second value of pair). The difference is not equal to
888*d415bd75Srobert           // std::nullopt iff the loops iterate a constant number of times, and
889*d415bd75Srobert           // have a single exit.
890*d415bd75Srobert           std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
89173471bf0Spatrick               haveIdenticalTripCounts(*FC0, *FC1);
89273471bf0Spatrick           bool SameTripCount = IdenticalTripCountRes.first;
893*d415bd75Srobert           std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
89473471bf0Spatrick 
89573471bf0Spatrick           // Here we are checking that FC0 (the first loop) can be peeled, and
89673471bf0Spatrick           // both loops have different tripcounts.
89773471bf0Spatrick           if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
89873471bf0Spatrick             if (*TCDifference > FusionPeelMaxCount) {
89973471bf0Spatrick               LLVM_DEBUG(dbgs()
90073471bf0Spatrick                          << "Difference in loop trip counts: " << *TCDifference
90173471bf0Spatrick                          << " is greater than maximum peel count specificed: "
90273471bf0Spatrick                          << FusionPeelMaxCount << "\n");
90373471bf0Spatrick             } else {
90473471bf0Spatrick               // Dependent on peeling being performed on the first loop, and
90573471bf0Spatrick               // assuming all other conditions for fusion return true.
90673471bf0Spatrick               SameTripCount = true;
90773471bf0Spatrick             }
90873471bf0Spatrick           }
90973471bf0Spatrick 
91073471bf0Spatrick           if (!SameTripCount) {
91109467b48Spatrick             LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
91209467b48Spatrick                                  "counts. Not fusing.\n");
91309467b48Spatrick             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
91409467b48Spatrick                                                        NonEqualTripCount);
91509467b48Spatrick             continue;
91609467b48Spatrick           }
91709467b48Spatrick 
91809467b48Spatrick           if (!isAdjacent(*FC0, *FC1)) {
91909467b48Spatrick             LLVM_DEBUG(dbgs()
92009467b48Spatrick                        << "Fusion candidates are not adjacent. Not fusing.\n");
92109467b48Spatrick             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
92209467b48Spatrick             continue;
92309467b48Spatrick           }
92409467b48Spatrick 
925*d415bd75Srobert           if ((!FC0->GuardBranch && FC1->GuardBranch) ||
926*d415bd75Srobert               (FC0->GuardBranch && !FC1->GuardBranch)) {
927*d415bd75Srobert             LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
928*d415bd75Srobert                                  "another one is not. Not fusing.\n");
92973471bf0Spatrick             reportLoopFusion<OptimizationRemarkMissed>(
93073471bf0Spatrick                 *FC0, *FC1, OnlySecondCandidateIsGuarded);
93173471bf0Spatrick             continue;
93273471bf0Spatrick           }
93373471bf0Spatrick 
93409467b48Spatrick           // Ensure that FC0 and FC1 have identical guards.
93509467b48Spatrick           // If one (or both) are not guarded, this check is not necessary.
93609467b48Spatrick           if (FC0->GuardBranch && FC1->GuardBranch &&
93773471bf0Spatrick               !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
93809467b48Spatrick             LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
93909467b48Spatrick                                  "guards. Not Fusing.\n");
94009467b48Spatrick             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
94109467b48Spatrick                                                        NonIdenticalGuards);
94209467b48Spatrick             continue;
94309467b48Spatrick           }
94409467b48Spatrick 
945097a140dSpatrick           if (FC0->GuardBranch) {
946097a140dSpatrick             assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
947097a140dSpatrick 
948097a140dSpatrick             if (!isSafeToMoveBefore(*FC0->ExitBlock,
949097a140dSpatrick                                     *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
950097a140dSpatrick                                     &PDT, &DI)) {
951097a140dSpatrick               LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
952097a140dSpatrick                                    "instructions in exit block. Not fusing.\n");
95309467b48Spatrick               reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
95409467b48Spatrick                                                          NonEmptyExitBlock);
95509467b48Spatrick               continue;
95609467b48Spatrick             }
95709467b48Spatrick 
958097a140dSpatrick             if (!isSafeToMoveBefore(
959097a140dSpatrick                     *FC1->GuardBranch->getParent(),
960097a140dSpatrick                     *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
961097a140dSpatrick                     &DI)) {
962097a140dSpatrick               LLVM_DEBUG(dbgs()
963097a140dSpatrick                          << "Fusion candidate contains unsafe "
964097a140dSpatrick                             "instructions in guard block. Not fusing.\n");
96509467b48Spatrick               reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
96609467b48Spatrick                                                          NonEmptyGuardBlock);
96709467b48Spatrick               continue;
96809467b48Spatrick             }
969097a140dSpatrick           }
97009467b48Spatrick 
97109467b48Spatrick           // Check the dependencies across the loops and do not fuse if it would
97209467b48Spatrick           // violate them.
97309467b48Spatrick           if (!dependencesAllowFusion(*FC0, *FC1)) {
97409467b48Spatrick             LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
97509467b48Spatrick             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
97609467b48Spatrick                                                        InvalidDependencies);
97709467b48Spatrick             continue;
97809467b48Spatrick           }
97909467b48Spatrick 
980*d415bd75Srobert           // If the second loop has instructions in the pre-header, attempt to
981*d415bd75Srobert           // hoist them up to the first loop's pre-header or sink them into the
982*d415bd75Srobert           // body of the second loop.
983*d415bd75Srobert           SmallVector<Instruction *, 4> SafeToHoist;
984*d415bd75Srobert           SmallVector<Instruction *, 4> SafeToSink;
985*d415bd75Srobert           // At this point, this is the last remaining legality check.
986*d415bd75Srobert           // Which means if we can make this pre-header empty, we can fuse
987*d415bd75Srobert           // these loops
988*d415bd75Srobert           if (!isEmptyPreheader(*FC1)) {
989*d415bd75Srobert             LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
990*d415bd75Srobert                                  "preheader.\n");
991*d415bd75Srobert 
992*d415bd75Srobert             // If it is not safe to hoist/sink all instructions in the
993*d415bd75Srobert             // pre-header, we cannot fuse these loops.
994*d415bd75Srobert             if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
995*d415bd75Srobert                                               SafeToSink)) {
996*d415bd75Srobert               LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
997*d415bd75Srobert                                    "Fusion Candidate Pre-header.\n"
998*d415bd75Srobert                                 << "Not Fusing.\n");
999*d415bd75Srobert               reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
1000*d415bd75Srobert                                                          NonEmptyPreheader);
1001*d415bd75Srobert               continue;
1002*d415bd75Srobert             }
1003*d415bd75Srobert           }
1004*d415bd75Srobert 
100509467b48Spatrick           bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
100609467b48Spatrick           LLVM_DEBUG(dbgs()
100709467b48Spatrick                      << "\tFusion appears to be "
100809467b48Spatrick                      << (BeneficialToFuse ? "" : "un") << "profitable!\n");
100909467b48Spatrick           if (!BeneficialToFuse) {
101009467b48Spatrick             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
101109467b48Spatrick                                                        FusionNotBeneficial);
101209467b48Spatrick             continue;
101309467b48Spatrick           }
101409467b48Spatrick           // All analysis has completed and has determined that fusion is legal
101509467b48Spatrick           // and profitable. At this point, start transforming the code and
101609467b48Spatrick           // perform fusion.
101709467b48Spatrick 
1018*d415bd75Srobert           // Execute the hoist/sink operations on preheader instructions
1019*d415bd75Srobert           movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
1020*d415bd75Srobert 
102109467b48Spatrick           LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
102209467b48Spatrick                             << *FC1 << "\n");
102309467b48Spatrick 
102473471bf0Spatrick           FusionCandidate FC0Copy = *FC0;
102573471bf0Spatrick           // Peel the loop after determining that fusion is legal. The Loops
102673471bf0Spatrick           // will still be safe to fuse after the peeling is performed.
102773471bf0Spatrick           bool Peel = TCDifference && *TCDifference > 0;
102873471bf0Spatrick           if (Peel)
102973471bf0Spatrick             peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
103073471bf0Spatrick 
103109467b48Spatrick           // Report fusion to the Optimization Remarks.
103209467b48Spatrick           // Note this needs to be done *before* performFusion because
103309467b48Spatrick           // performFusion will change the original loops, making it not
103409467b48Spatrick           // possible to identify them after fusion is complete.
103573471bf0Spatrick           reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
103673471bf0Spatrick                                                FuseCounter);
103709467b48Spatrick 
103873471bf0Spatrick           FusionCandidate FusedCand(
1039*d415bd75Srobert               performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
104073471bf0Spatrick               FC0Copy.PP);
104109467b48Spatrick           FusedCand.verify();
104209467b48Spatrick           assert(FusedCand.isEligibleForFusion(SE) &&
104309467b48Spatrick                  "Fused candidate should be eligible for fusion!");
104409467b48Spatrick 
104509467b48Spatrick           // Notify the loop-depth-tree that these loops are not valid objects
104609467b48Spatrick           LDT.removeLoop(FC1->L);
104709467b48Spatrick 
104809467b48Spatrick           CandidateSet.erase(FC0);
104909467b48Spatrick           CandidateSet.erase(FC1);
105009467b48Spatrick 
105109467b48Spatrick           auto InsertPos = CandidateSet.insert(FusedCand);
105209467b48Spatrick 
105309467b48Spatrick           assert(InsertPos.second &&
105409467b48Spatrick                  "Unable to insert TargetCandidate in CandidateSet!");
105509467b48Spatrick 
105609467b48Spatrick           // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
105709467b48Spatrick           // of the FC1 loop will attempt to fuse the new (fused) loop with the
105809467b48Spatrick           // remaining candidates in the current candidate set.
105909467b48Spatrick           FC0 = FC1 = InsertPos.first;
106009467b48Spatrick 
106109467b48Spatrick           LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
106209467b48Spatrick                             << "\n");
106309467b48Spatrick 
106409467b48Spatrick           Fused = true;
106509467b48Spatrick         }
106609467b48Spatrick       }
106709467b48Spatrick     }
106809467b48Spatrick     return Fused;
106909467b48Spatrick   }
107009467b48Spatrick 
1071*d415bd75Srobert   // Returns true if the instruction \p I can be hoisted to the end of the
1072*d415bd75Srobert   // preheader of \p FC0. \p SafeToHoist contains the instructions that are
1073*d415bd75Srobert   // known to be safe to hoist. The instructions encountered that cannot be
1074*d415bd75Srobert   // hoisted are in \p NotHoisting.
1075*d415bd75Srobert   // TODO: Move functionality into CodeMoverUtils
canHoistInst__anon227ba0d70111::LoopFuser1076*d415bd75Srobert   bool canHoistInst(Instruction &I,
1077*d415bd75Srobert                     const SmallVector<Instruction *, 4> &SafeToHoist,
1078*d415bd75Srobert                     const SmallVector<Instruction *, 4> &NotHoisting,
1079*d415bd75Srobert                     const FusionCandidate &FC0) const {
1080*d415bd75Srobert     const BasicBlock *FC0PreheaderTarget = FC0.Preheader->getSingleSuccessor();
1081*d415bd75Srobert     assert(FC0PreheaderTarget &&
1082*d415bd75Srobert            "Expected single successor for loop preheader.");
1083*d415bd75Srobert 
1084*d415bd75Srobert     for (Use &Op : I.operands()) {
1085*d415bd75Srobert       if (auto *OpInst = dyn_cast<Instruction>(Op)) {
1086*d415bd75Srobert         bool OpHoisted = is_contained(SafeToHoist, OpInst);
1087*d415bd75Srobert         // Check if we have already decided to hoist this operand. In this
1088*d415bd75Srobert         // case, it does not dominate FC0 *yet*, but will after we hoist it.
1089*d415bd75Srobert         if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
1090*d415bd75Srobert           return false;
1091*d415bd75Srobert         }
1092*d415bd75Srobert       }
1093*d415bd75Srobert     }
1094*d415bd75Srobert 
1095*d415bd75Srobert     // PHIs in FC1's header only have FC0 blocks as predecessors. PHIs
1096*d415bd75Srobert     // cannot be hoisted and should be sunk to the exit of the fused loop.
1097*d415bd75Srobert     if (isa<PHINode>(I))
1098*d415bd75Srobert       return false;
1099*d415bd75Srobert 
1100*d415bd75Srobert     // If this isn't a memory inst, hoisting is safe
1101*d415bd75Srobert     if (!I.mayReadOrWriteMemory())
1102*d415bd75Srobert       return true;
1103*d415bd75Srobert 
1104*d415bd75Srobert     LLVM_DEBUG(dbgs() << "Checking if this mem inst can be hoisted.\n");
1105*d415bd75Srobert     for (Instruction *NotHoistedInst : NotHoisting) {
1106*d415bd75Srobert       if (auto D = DI.depends(&I, NotHoistedInst, true)) {
1107*d415bd75Srobert         // Dependency is not read-before-write, write-before-read or
1108*d415bd75Srobert         // write-before-write
1109*d415bd75Srobert         if (D->isFlow() || D->isAnti() || D->isOutput()) {
1110*d415bd75Srobert           LLVM_DEBUG(dbgs() << "Inst depends on an instruction in FC1's "
1111*d415bd75Srobert                                "preheader that is not being hoisted.\n");
1112*d415bd75Srobert           return false;
1113*d415bd75Srobert         }
1114*d415bd75Srobert       }
1115*d415bd75Srobert     }
1116*d415bd75Srobert 
1117*d415bd75Srobert     for (Instruction *ReadInst : FC0.MemReads) {
1118*d415bd75Srobert       if (auto D = DI.depends(ReadInst, &I, true)) {
1119*d415bd75Srobert         // Dependency is not read-before-write
1120*d415bd75Srobert         if (D->isAnti()) {
1121*d415bd75Srobert           LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC0.\n");
1122*d415bd75Srobert           return false;
1123*d415bd75Srobert         }
1124*d415bd75Srobert       }
1125*d415bd75Srobert     }
1126*d415bd75Srobert 
1127*d415bd75Srobert     for (Instruction *WriteInst : FC0.MemWrites) {
1128*d415bd75Srobert       if (auto D = DI.depends(WriteInst, &I, true)) {
1129*d415bd75Srobert         // Dependency is not write-before-read or write-before-write
1130*d415bd75Srobert         if (D->isFlow() || D->isOutput()) {
1131*d415bd75Srobert           LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC0.\n");
1132*d415bd75Srobert           return false;
1133*d415bd75Srobert         }
1134*d415bd75Srobert       }
1135*d415bd75Srobert     }
1136*d415bd75Srobert     return true;
1137*d415bd75Srobert   }
1138*d415bd75Srobert 
1139*d415bd75Srobert   // Returns true if the instruction \p I can be sunk to the top of the exit
1140*d415bd75Srobert   // block of \p FC1.
1141*d415bd75Srobert   // TODO: Move functionality into CodeMoverUtils
canSinkInst__anon227ba0d70111::LoopFuser1142*d415bd75Srobert   bool canSinkInst(Instruction &I, const FusionCandidate &FC1) const {
1143*d415bd75Srobert     for (User *U : I.users()) {
1144*d415bd75Srobert       if (auto *UI{dyn_cast<Instruction>(U)}) {
1145*d415bd75Srobert         // Cannot sink if user in loop
1146*d415bd75Srobert         // If FC1 has phi users of this value, we cannot sink it into FC1.
1147*d415bd75Srobert         if (FC1.L->contains(UI)) {
1148*d415bd75Srobert           // Cannot hoist or sink this instruction. No hoisting/sinking
1149*d415bd75Srobert           // should take place, loops should not fuse
1150*d415bd75Srobert           return false;
1151*d415bd75Srobert         }
1152*d415bd75Srobert       }
1153*d415bd75Srobert     }
1154*d415bd75Srobert 
1155*d415bd75Srobert     // If this isn't a memory inst, sinking is safe
1156*d415bd75Srobert     if (!I.mayReadOrWriteMemory())
1157*d415bd75Srobert       return true;
1158*d415bd75Srobert 
1159*d415bd75Srobert     for (Instruction *ReadInst : FC1.MemReads) {
1160*d415bd75Srobert       if (auto D = DI.depends(&I, ReadInst, true)) {
1161*d415bd75Srobert         // Dependency is not write-before-read
1162*d415bd75Srobert         if (D->isFlow()) {
1163*d415bd75Srobert           LLVM_DEBUG(dbgs() << "Inst depends on a read instruction in FC1.\n");
1164*d415bd75Srobert           return false;
1165*d415bd75Srobert         }
1166*d415bd75Srobert       }
1167*d415bd75Srobert     }
1168*d415bd75Srobert 
1169*d415bd75Srobert     for (Instruction *WriteInst : FC1.MemWrites) {
1170*d415bd75Srobert       if (auto D = DI.depends(&I, WriteInst, true)) {
1171*d415bd75Srobert         // Dependency is not write-before-write or read-before-write
1172*d415bd75Srobert         if (D->isOutput() || D->isAnti()) {
1173*d415bd75Srobert           LLVM_DEBUG(dbgs() << "Inst depends on a write instruction in FC1.\n");
1174*d415bd75Srobert           return false;
1175*d415bd75Srobert         }
1176*d415bd75Srobert       }
1177*d415bd75Srobert     }
1178*d415bd75Srobert 
1179*d415bd75Srobert     return true;
1180*d415bd75Srobert   }
1181*d415bd75Srobert 
1182*d415bd75Srobert   /// Collect instructions in the \p FC1 Preheader that can be hoisted
1183*d415bd75Srobert   /// to the \p FC0 Preheader or sunk into the \p FC1 Body
collectMovablePreheaderInsts__anon227ba0d70111::LoopFuser1184*d415bd75Srobert   bool collectMovablePreheaderInsts(
1185*d415bd75Srobert       const FusionCandidate &FC0, const FusionCandidate &FC1,
1186*d415bd75Srobert       SmallVector<Instruction *, 4> &SafeToHoist,
1187*d415bd75Srobert       SmallVector<Instruction *, 4> &SafeToSink) const {
1188*d415bd75Srobert     BasicBlock *FC1Preheader = FC1.Preheader;
1189*d415bd75Srobert     // Save the instructions that are not being hoisted, so we know not to hoist
1190*d415bd75Srobert     // mem insts that they dominate.
1191*d415bd75Srobert     SmallVector<Instruction *, 4> NotHoisting;
1192*d415bd75Srobert 
1193*d415bd75Srobert     for (Instruction &I : *FC1Preheader) {
1194*d415bd75Srobert       // Can't move a branch
1195*d415bd75Srobert       if (&I == FC1Preheader->getTerminator())
1196*d415bd75Srobert         continue;
1197*d415bd75Srobert       // If the instruction has side-effects, give up.
1198*d415bd75Srobert       // TODO: The case of mayReadFromMemory we can handle but requires
1199*d415bd75Srobert       // additional work with a dependence analysis so for now we give
1200*d415bd75Srobert       // up on memory reads.
1201*d415bd75Srobert       if (I.mayThrow() || !I.willReturn()) {
1202*d415bd75Srobert         LLVM_DEBUG(dbgs() << "Inst: " << I << " may throw or won't return.\n");
1203*d415bd75Srobert         return false;
1204*d415bd75Srobert       }
1205*d415bd75Srobert 
1206*d415bd75Srobert       LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
1207*d415bd75Srobert 
1208*d415bd75Srobert       if (I.isAtomic() || I.isVolatile()) {
1209*d415bd75Srobert         LLVM_DEBUG(
1210*d415bd75Srobert             dbgs() << "\tInstruction is volatile or atomic. Cannot move it.\n");
1211*d415bd75Srobert         return false;
1212*d415bd75Srobert       }
1213*d415bd75Srobert 
1214*d415bd75Srobert       if (canHoistInst(I, SafeToHoist, NotHoisting, FC0)) {
1215*d415bd75Srobert         SafeToHoist.push_back(&I);
1216*d415bd75Srobert         LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
1217*d415bd75Srobert       } else {
1218*d415bd75Srobert         LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
1219*d415bd75Srobert         NotHoisting.push_back(&I);
1220*d415bd75Srobert 
1221*d415bd75Srobert         if (canSinkInst(I, FC1)) {
1222*d415bd75Srobert           SafeToSink.push_back(&I);
1223*d415bd75Srobert           LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
1224*d415bd75Srobert         } else {
1225*d415bd75Srobert           LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
1226*d415bd75Srobert           return false;
1227*d415bd75Srobert         }
1228*d415bd75Srobert       }
1229*d415bd75Srobert     }
1230*d415bd75Srobert     LLVM_DEBUG(
1231*d415bd75Srobert         dbgs() << "All preheader instructions could be sunk or hoisted!\n");
1232*d415bd75Srobert     return true;
1233*d415bd75Srobert   }
1234*d415bd75Srobert 
123509467b48Spatrick   /// Rewrite all additive recurrences in a SCEV to use a new loop.
123609467b48Spatrick   class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
123709467b48Spatrick   public:
AddRecLoopReplacer(ScalarEvolution & SE,const Loop & OldL,const Loop & NewL,bool UseMax=true)123809467b48Spatrick     AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
123909467b48Spatrick                        bool UseMax = true)
124009467b48Spatrick         : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
124109467b48Spatrick           NewL(NewL) {}
124209467b48Spatrick 
visitAddRecExpr(const SCEVAddRecExpr * Expr)124309467b48Spatrick     const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
124409467b48Spatrick       const Loop *ExprL = Expr->getLoop();
124509467b48Spatrick       SmallVector<const SCEV *, 2> Operands;
124609467b48Spatrick       if (ExprL == &OldL) {
1247*d415bd75Srobert         append_range(Operands, Expr->operands());
124809467b48Spatrick         return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags());
124909467b48Spatrick       }
125009467b48Spatrick 
125109467b48Spatrick       if (OldL.contains(ExprL)) {
125209467b48Spatrick         bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE));
125309467b48Spatrick         if (!UseMax || !Pos || !Expr->isAffine()) {
125409467b48Spatrick           Valid = false;
125509467b48Spatrick           return Expr;
125609467b48Spatrick         }
125709467b48Spatrick         return visit(Expr->getStart());
125809467b48Spatrick       }
125909467b48Spatrick 
126009467b48Spatrick       for (const SCEV *Op : Expr->operands())
126109467b48Spatrick         Operands.push_back(visit(Op));
126209467b48Spatrick       return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags());
126309467b48Spatrick     }
126409467b48Spatrick 
wasValidSCEV() const126509467b48Spatrick     bool wasValidSCEV() const { return Valid; }
126609467b48Spatrick 
126709467b48Spatrick   private:
126809467b48Spatrick     bool Valid, UseMax;
126909467b48Spatrick     const Loop &OldL, &NewL;
127009467b48Spatrick   };
127109467b48Spatrick 
127209467b48Spatrick   /// Return false if the access functions of \p I0 and \p I1 could cause
127309467b48Spatrick   /// a negative dependence.
accessDiffIsPositive__anon227ba0d70111::LoopFuser127409467b48Spatrick   bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
127509467b48Spatrick                             Instruction &I1, bool EqualIsInvalid) {
127609467b48Spatrick     Value *Ptr0 = getLoadStorePointerOperand(&I0);
127709467b48Spatrick     Value *Ptr1 = getLoadStorePointerOperand(&I1);
127809467b48Spatrick     if (!Ptr0 || !Ptr1)
127909467b48Spatrick       return false;
128009467b48Spatrick 
128109467b48Spatrick     const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
128209467b48Spatrick     const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
128309467b48Spatrick #ifndef NDEBUG
128409467b48Spatrick     if (VerboseFusionDebugging)
128509467b48Spatrick       LLVM_DEBUG(dbgs() << "    Access function check: " << *SCEVPtr0 << " vs "
128609467b48Spatrick                         << *SCEVPtr1 << "\n");
128709467b48Spatrick #endif
128809467b48Spatrick     AddRecLoopReplacer Rewriter(SE, L0, L1);
128909467b48Spatrick     SCEVPtr0 = Rewriter.visit(SCEVPtr0);
129009467b48Spatrick #ifndef NDEBUG
129109467b48Spatrick     if (VerboseFusionDebugging)
129209467b48Spatrick       LLVM_DEBUG(dbgs() << "    Access function after rewrite: " << *SCEVPtr0
129309467b48Spatrick                         << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
129409467b48Spatrick #endif
129509467b48Spatrick     if (!Rewriter.wasValidSCEV())
129609467b48Spatrick       return false;
129709467b48Spatrick 
129809467b48Spatrick     // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
129909467b48Spatrick     //       L0) and the other is not. We could check if it is monotone and test
130009467b48Spatrick     //       the beginning and end value instead.
130109467b48Spatrick 
130209467b48Spatrick     BasicBlock *L0Header = L0.getHeader();
130309467b48Spatrick     auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
130409467b48Spatrick       const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
130509467b48Spatrick       if (!AddRec)
130609467b48Spatrick         return false;
130709467b48Spatrick       return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
130809467b48Spatrick              !DT.dominates(AddRec->getLoop()->getHeader(), L0Header);
130909467b48Spatrick     };
131009467b48Spatrick     if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
131109467b48Spatrick       return false;
131209467b48Spatrick 
131309467b48Spatrick     ICmpInst::Predicate Pred =
131409467b48Spatrick         EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
131509467b48Spatrick     bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
131609467b48Spatrick #ifndef NDEBUG
131709467b48Spatrick     if (VerboseFusionDebugging)
131809467b48Spatrick       LLVM_DEBUG(dbgs() << "    Relation: " << *SCEVPtr0
131909467b48Spatrick                         << (IsAlwaysGE ? "  >=  " : "  may <  ") << *SCEVPtr1
132009467b48Spatrick                         << "\n");
132109467b48Spatrick #endif
132209467b48Spatrick     return IsAlwaysGE;
132309467b48Spatrick   }
132409467b48Spatrick 
132509467b48Spatrick   /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
132609467b48Spatrick   /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
132709467b48Spatrick   /// specified by @p DepChoice are used to determine this.
dependencesAllowFusion__anon227ba0d70111::LoopFuser132809467b48Spatrick   bool dependencesAllowFusion(const FusionCandidate &FC0,
132909467b48Spatrick                               const FusionCandidate &FC1, Instruction &I0,
133009467b48Spatrick                               Instruction &I1, bool AnyDep,
133109467b48Spatrick                               FusionDependenceAnalysisChoice DepChoice) {
133209467b48Spatrick #ifndef NDEBUG
133309467b48Spatrick     if (VerboseFusionDebugging) {
133409467b48Spatrick       LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
133509467b48Spatrick                         << DepChoice << "\n");
133609467b48Spatrick     }
133709467b48Spatrick #endif
133809467b48Spatrick     switch (DepChoice) {
133909467b48Spatrick     case FUSION_DEPENDENCE_ANALYSIS_SCEV:
134009467b48Spatrick       return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
134109467b48Spatrick     case FUSION_DEPENDENCE_ANALYSIS_DA: {
134209467b48Spatrick       auto DepResult = DI.depends(&I0, &I1, true);
134309467b48Spatrick       if (!DepResult)
134409467b48Spatrick         return true;
134509467b48Spatrick #ifndef NDEBUG
134609467b48Spatrick       if (VerboseFusionDebugging) {
134709467b48Spatrick         LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
134809467b48Spatrick                    dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
134909467b48Spatrick                           << (DepResult->isOrdered() ? "true" : "false")
135009467b48Spatrick                           << "]\n");
135109467b48Spatrick         LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
135209467b48Spatrick                           << "\n");
135309467b48Spatrick       }
135409467b48Spatrick #endif
135509467b48Spatrick 
135609467b48Spatrick       if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
135709467b48Spatrick         LLVM_DEBUG(
135809467b48Spatrick             dbgs() << "TODO: Implement pred/succ dependence handling!\n");
135909467b48Spatrick 
136009467b48Spatrick       // TODO: Can we actually use the dependence info analysis here?
136109467b48Spatrick       return false;
136209467b48Spatrick     }
136309467b48Spatrick 
136409467b48Spatrick     case FUSION_DEPENDENCE_ANALYSIS_ALL:
136509467b48Spatrick       return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
136609467b48Spatrick                                     FUSION_DEPENDENCE_ANALYSIS_SCEV) ||
136709467b48Spatrick              dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
136809467b48Spatrick                                     FUSION_DEPENDENCE_ANALYSIS_DA);
136909467b48Spatrick     }
137009467b48Spatrick 
137109467b48Spatrick     llvm_unreachable("Unknown fusion dependence analysis choice!");
137209467b48Spatrick   }
137309467b48Spatrick 
137409467b48Spatrick   /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
dependencesAllowFusion__anon227ba0d70111::LoopFuser137509467b48Spatrick   bool dependencesAllowFusion(const FusionCandidate &FC0,
137609467b48Spatrick                               const FusionCandidate &FC1) {
137709467b48Spatrick     LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
137809467b48Spatrick                       << "\n");
137909467b48Spatrick     assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
138009467b48Spatrick     assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));
138109467b48Spatrick 
138209467b48Spatrick     for (Instruction *WriteL0 : FC0.MemWrites) {
138309467b48Spatrick       for (Instruction *WriteL1 : FC1.MemWrites)
138409467b48Spatrick         if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
138509467b48Spatrick                                     /* AnyDep */ false,
138609467b48Spatrick                                     FusionDependenceAnalysis)) {
138709467b48Spatrick           InvalidDependencies++;
138809467b48Spatrick           return false;
138909467b48Spatrick         }
139009467b48Spatrick       for (Instruction *ReadL1 : FC1.MemReads)
139109467b48Spatrick         if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
139209467b48Spatrick                                     /* AnyDep */ false,
139309467b48Spatrick                                     FusionDependenceAnalysis)) {
139409467b48Spatrick           InvalidDependencies++;
139509467b48Spatrick           return false;
139609467b48Spatrick         }
139709467b48Spatrick     }
139809467b48Spatrick 
139909467b48Spatrick     for (Instruction *WriteL1 : FC1.MemWrites) {
140009467b48Spatrick       for (Instruction *WriteL0 : FC0.MemWrites)
140109467b48Spatrick         if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
140209467b48Spatrick                                     /* AnyDep */ false,
140309467b48Spatrick                                     FusionDependenceAnalysis)) {
140409467b48Spatrick           InvalidDependencies++;
140509467b48Spatrick           return false;
140609467b48Spatrick         }
140709467b48Spatrick       for (Instruction *ReadL0 : FC0.MemReads)
140809467b48Spatrick         if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
140909467b48Spatrick                                     /* AnyDep */ false,
141009467b48Spatrick                                     FusionDependenceAnalysis)) {
141109467b48Spatrick           InvalidDependencies++;
141209467b48Spatrick           return false;
141309467b48Spatrick         }
141409467b48Spatrick     }
141509467b48Spatrick 
141609467b48Spatrick     // Walk through all uses in FC1. For each use, find the reaching def. If the
141709467b48Spatrick     // def is located in FC0 then it is is not safe to fuse.
141809467b48Spatrick     for (BasicBlock *BB : FC1.L->blocks())
141909467b48Spatrick       for (Instruction &I : *BB)
142009467b48Spatrick         for (auto &Op : I.operands())
142109467b48Spatrick           if (Instruction *Def = dyn_cast<Instruction>(Op))
142209467b48Spatrick             if (FC0.L->contains(Def->getParent())) {
142309467b48Spatrick               InvalidDependencies++;
142409467b48Spatrick               return false;
142509467b48Spatrick             }
142609467b48Spatrick 
142709467b48Spatrick     return true;
142809467b48Spatrick   }
142909467b48Spatrick 
143009467b48Spatrick   /// Determine if two fusion candidates are adjacent in the CFG.
143109467b48Spatrick   ///
143209467b48Spatrick   /// This method will determine if there are additional basic blocks in the CFG
143309467b48Spatrick   /// between the exit of \p FC0 and the entry of \p FC1.
143409467b48Spatrick   /// If the two candidates are guarded loops, then it checks whether the
143509467b48Spatrick   /// non-loop successor of the \p FC0 guard branch is the entry block of \p
143609467b48Spatrick   /// FC1. If not, then the loops are not adjacent. If the two candidates are
143709467b48Spatrick   /// not guarded loops, then it checks whether the exit block of \p FC0 is the
143809467b48Spatrick   /// preheader of \p FC1.
isAdjacent__anon227ba0d70111::LoopFuser143909467b48Spatrick   bool isAdjacent(const FusionCandidate &FC0,
144009467b48Spatrick                   const FusionCandidate &FC1) const {
144109467b48Spatrick     // If the successor of the guard branch is FC1, then the loops are adjacent
144209467b48Spatrick     if (FC0.GuardBranch)
144309467b48Spatrick       return FC0.getNonLoopBlock() == FC1.getEntryBlock();
144409467b48Spatrick     else
144509467b48Spatrick       return FC0.ExitBlock == FC1.getEntryBlock();
144609467b48Spatrick   }
144709467b48Spatrick 
isEmptyPreheader__anon227ba0d70111::LoopFuser1448*d415bd75Srobert   bool isEmptyPreheader(const FusionCandidate &FC) const {
1449*d415bd75Srobert     return FC.Preheader->size() == 1;
1450*d415bd75Srobert   }
1451*d415bd75Srobert 
1452*d415bd75Srobert   /// Hoist \p FC1 Preheader instructions to \p FC0 Preheader
1453*d415bd75Srobert   /// and sink others into the body of \p FC1.
movePreheaderInsts__anon227ba0d70111::LoopFuser1454*d415bd75Srobert   void movePreheaderInsts(const FusionCandidate &FC0,
1455*d415bd75Srobert                           const FusionCandidate &FC1,
1456*d415bd75Srobert                           SmallVector<Instruction *, 4> &HoistInsts,
1457*d415bd75Srobert                           SmallVector<Instruction *, 4> &SinkInsts) const {
1458*d415bd75Srobert     // All preheader instructions except the branch must be hoisted or sunk
1459*d415bd75Srobert     assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
1460*d415bd75Srobert            "Attempting to sink and hoist preheader instructions, but not all "
1461*d415bd75Srobert            "the preheader instructions are accounted for.");
1462*d415bd75Srobert 
1463*d415bd75Srobert     NumHoistedInsts += HoistInsts.size();
1464*d415bd75Srobert     NumSunkInsts += SinkInsts.size();
1465*d415bd75Srobert 
1466*d415bd75Srobert     LLVM_DEBUG(if (VerboseFusionDebugging) {
1467*d415bd75Srobert       if (!HoistInsts.empty())
1468*d415bd75Srobert         dbgs() << "Hoisting: \n";
1469*d415bd75Srobert       for (Instruction *I : HoistInsts)
1470*d415bd75Srobert         dbgs() << *I << "\n";
1471*d415bd75Srobert       if (!SinkInsts.empty())
1472*d415bd75Srobert         dbgs() << "Sinking: \n";
1473*d415bd75Srobert       for (Instruction *I : SinkInsts)
1474*d415bd75Srobert         dbgs() << *I << "\n";
1475*d415bd75Srobert     });
1476*d415bd75Srobert 
1477*d415bd75Srobert     for (Instruction *I : HoistInsts) {
1478*d415bd75Srobert       assert(I->getParent() == FC1.Preheader);
1479*d415bd75Srobert       I->moveBefore(FC0.Preheader->getTerminator());
1480*d415bd75Srobert     }
1481*d415bd75Srobert     // insert instructions in reverse order to maintain dominance relationship
1482*d415bd75Srobert     for (Instruction *I : reverse(SinkInsts)) {
1483*d415bd75Srobert       assert(I->getParent() == FC1.Preheader);
1484*d415bd75Srobert       I->moveBefore(&*FC1.ExitBlock->getFirstInsertionPt());
1485*d415bd75Srobert     }
1486*d415bd75Srobert   }
1487*d415bd75Srobert 
148809467b48Spatrick   /// Determine if two fusion candidates have identical guards
148909467b48Spatrick   ///
149009467b48Spatrick   /// This method will determine if two fusion candidates have the same guards.
149109467b48Spatrick   /// The guards are considered the same if:
149209467b48Spatrick   ///   1. The instructions to compute the condition used in the compare are
149309467b48Spatrick   ///      identical.
149409467b48Spatrick   ///   2. The successors of the guard have the same flow into/around the loop.
149509467b48Spatrick   /// If the compare instructions are identical, then the first successor of the
149609467b48Spatrick   /// guard must go to the same place (either the preheader of the loop or the
149709467b48Spatrick   /// NonLoopBlock). In other words, the the first successor of both loops must
149809467b48Spatrick   /// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
149909467b48Spatrick   /// the NonLoopBlock). The same must be true for the second successor.
haveIdenticalGuards__anon227ba0d70111::LoopFuser150009467b48Spatrick   bool haveIdenticalGuards(const FusionCandidate &FC0,
150109467b48Spatrick                            const FusionCandidate &FC1) const {
150209467b48Spatrick     assert(FC0.GuardBranch && FC1.GuardBranch &&
150309467b48Spatrick            "Expecting FC0 and FC1 to be guarded loops.");
150409467b48Spatrick 
150509467b48Spatrick     if (auto FC0CmpInst =
150609467b48Spatrick             dyn_cast<Instruction>(FC0.GuardBranch->getCondition()))
150709467b48Spatrick       if (auto FC1CmpInst =
150809467b48Spatrick               dyn_cast<Instruction>(FC1.GuardBranch->getCondition()))
150909467b48Spatrick         if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))
151009467b48Spatrick           return false;
151109467b48Spatrick 
151209467b48Spatrick     // The compare instructions are identical.
151309467b48Spatrick     // Now make sure the successor of the guards have the same flow into/around
151409467b48Spatrick     // the loop
151509467b48Spatrick     if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)
151609467b48Spatrick       return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);
151709467b48Spatrick     else
151809467b48Spatrick       return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
151909467b48Spatrick   }
152009467b48Spatrick 
152173471bf0Spatrick   /// Modify the latch branch of FC to be unconditional since successors of the
152273471bf0Spatrick   /// branch are the same.
simplifyLatchBranch__anon227ba0d70111::LoopFuser152309467b48Spatrick   void simplifyLatchBranch(const FusionCandidate &FC) const {
152409467b48Spatrick     BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
152509467b48Spatrick     if (FCLatchBranch) {
152609467b48Spatrick       assert(FCLatchBranch->isConditional() &&
152709467b48Spatrick              FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
152809467b48Spatrick              "Expecting the two successors of FCLatchBranch to be the same");
152973471bf0Spatrick       BranchInst *NewBranch =
153073471bf0Spatrick           BranchInst::Create(FCLatchBranch->getSuccessor(0));
153173471bf0Spatrick       ReplaceInstWithInst(FCLatchBranch, NewBranch);
153209467b48Spatrick     }
153309467b48Spatrick   }
153409467b48Spatrick 
153509467b48Spatrick   /// Move instructions from FC0.Latch to FC1.Latch. If FC0.Latch has an unique
153609467b48Spatrick   /// successor, then merge FC0.Latch with its unique successor.
mergeLatch__anon227ba0d70111::LoopFuser153709467b48Spatrick   void mergeLatch(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1538097a140dSpatrick     moveInstructionsToTheBeginning(*FC0.Latch, *FC1.Latch, DT, PDT, DI);
153909467b48Spatrick     if (BasicBlock *Succ = FC0.Latch->getUniqueSuccessor()) {
154009467b48Spatrick       MergeBlockIntoPredecessor(Succ, &DTU, &LI);
154109467b48Spatrick       DTU.flush();
154209467b48Spatrick     }
154309467b48Spatrick   }
154409467b48Spatrick 
154509467b48Spatrick   /// Fuse two fusion candidates, creating a new fused loop.
154609467b48Spatrick   ///
154709467b48Spatrick   /// This method contains the mechanics of fusing two loops, represented by \p
154809467b48Spatrick   /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
154909467b48Spatrick   /// postdominates \p FC0 (making them control flow equivalent). It also
155009467b48Spatrick   /// assumes that the other conditions for fusion have been met: adjacent,
155109467b48Spatrick   /// identical trip counts, and no negative distance dependencies exist that
155209467b48Spatrick   /// would prevent fusion. Thus, there is no checking for these conditions in
155309467b48Spatrick   /// this method.
155409467b48Spatrick   ///
155509467b48Spatrick   /// Fusion is performed by rewiring the CFG to update successor blocks of the
155609467b48Spatrick   /// components of tho loop. Specifically, the following changes are done:
155709467b48Spatrick   ///
155809467b48Spatrick   ///   1. The preheader of \p FC1 is removed as it is no longer necessary
155909467b48Spatrick   ///   (because it is currently only a single statement block).
156009467b48Spatrick   ///   2. The latch of \p FC0 is modified to jump to the header of \p FC1.
156109467b48Spatrick   ///   3. The latch of \p FC1 i modified to jump to the header of \p FC0.
156209467b48Spatrick   ///   4. All blocks from \p FC1 are removed from FC1 and added to FC0.
156309467b48Spatrick   ///
156409467b48Spatrick   /// All of these modifications are done with dominator tree updates, thus
156509467b48Spatrick   /// keeping the dominator (and post dominator) information up-to-date.
156609467b48Spatrick   ///
156709467b48Spatrick   /// This can be improved in the future by actually merging blocks during
156809467b48Spatrick   /// fusion. For example, the preheader of \p FC1 can be merged with the
156909467b48Spatrick   /// preheader of \p FC0. This would allow loops with more than a single
157009467b48Spatrick   /// statement in the preheader to be fused. Similarly, the latch blocks of the
157109467b48Spatrick   /// two loops could also be fused into a single block. This will require
157209467b48Spatrick   /// analysis to prove it is safe to move the contents of the block past
157309467b48Spatrick   /// existing code, which currently has not been implemented.
performFusion__anon227ba0d70111::LoopFuser157409467b48Spatrick   Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
157509467b48Spatrick     assert(FC0.isValid() && FC1.isValid() &&
157609467b48Spatrick            "Expecting valid fusion candidates");
157709467b48Spatrick 
157809467b48Spatrick     LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
157909467b48Spatrick                dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
158009467b48Spatrick 
1581097a140dSpatrick     // Move instructions from the preheader of FC1 to the end of the preheader
1582097a140dSpatrick     // of FC0.
1583097a140dSpatrick     moveInstructionsToTheEnd(*FC1.Preheader, *FC0.Preheader, DT, PDT, DI);
1584097a140dSpatrick 
158509467b48Spatrick     // Fusing guarded loops is handled slightly differently than non-guarded
158609467b48Spatrick     // loops and has been broken out into a separate method instead of trying to
158709467b48Spatrick     // intersperse the logic within a single method.
158809467b48Spatrick     if (FC0.GuardBranch)
158909467b48Spatrick       return fuseGuardedLoops(FC0, FC1);
159009467b48Spatrick 
159173471bf0Spatrick     assert(FC1.Preheader ==
159273471bf0Spatrick            (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
159309467b48Spatrick     assert(FC1.Preheader->size() == 1 &&
159409467b48Spatrick            FC1.Preheader->getSingleSuccessor() == FC1.Header);
159509467b48Spatrick 
159609467b48Spatrick     // Remember the phi nodes originally in the header of FC0 in order to rewire
159709467b48Spatrick     // them later. However, this is only necessary if the new loop carried
159809467b48Spatrick     // values might not dominate the exiting branch. While we do not generally
159909467b48Spatrick     // test if this is the case but simply insert intermediate phi nodes, we
160009467b48Spatrick     // need to make sure these intermediate phi nodes have different
160109467b48Spatrick     // predecessors. To this end, we filter the special case where the exiting
160209467b48Spatrick     // block is the latch block of the first loop. Nothing needs to be done
160309467b48Spatrick     // anyway as all loop carried values dominate the latch and thereby also the
160409467b48Spatrick     // exiting branch.
160509467b48Spatrick     SmallVector<PHINode *, 8> OriginalFC0PHIs;
160609467b48Spatrick     if (FC0.ExitingBlock != FC0.Latch)
160709467b48Spatrick       for (PHINode &PHI : FC0.Header->phis())
160809467b48Spatrick         OriginalFC0PHIs.push_back(&PHI);
160909467b48Spatrick 
161009467b48Spatrick     // Replace incoming blocks for header PHIs first.
161109467b48Spatrick     FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
161209467b48Spatrick     FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
161309467b48Spatrick 
161409467b48Spatrick     // Then modify the control flow and update DT and PDT.
161509467b48Spatrick     SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
161609467b48Spatrick 
161709467b48Spatrick     // The old exiting block of the first loop (FC0) has to jump to the header
161809467b48Spatrick     // of the second as we need to execute the code in the second header block
161909467b48Spatrick     // regardless of the trip count. That is, if the trip count is 0, so the
162009467b48Spatrick     // back edge is never taken, we still have to execute both loop headers,
162109467b48Spatrick     // especially (but not only!) if the second is a do-while style loop.
162209467b48Spatrick     // However, doing so might invalidate the phi nodes of the first loop as
162309467b48Spatrick     // the new values do only need to dominate their latch and not the exiting
162409467b48Spatrick     // predicate. To remedy this potential problem we always introduce phi
162509467b48Spatrick     // nodes in the header of the second loop later that select the loop carried
162609467b48Spatrick     // value, if the second header was reached through an old latch of the
162709467b48Spatrick     // first, or undef otherwise. This is sound as exiting the first implies the
162809467b48Spatrick     // second will exit too, __without__ taking the back-edge. [Their
162909467b48Spatrick     // trip-counts are equal after all.
163009467b48Spatrick     // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
163109467b48Spatrick     // to FC1.Header? I think this is basically what the three sequences are
163209467b48Spatrick     // trying to accomplish; however, doing this directly in the CFG may mean
163309467b48Spatrick     // the DT/PDT becomes invalid
163473471bf0Spatrick     if (!FC0.Peeled) {
163509467b48Spatrick       FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
163609467b48Spatrick                                                            FC1.Header);
163709467b48Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
163809467b48Spatrick           DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
163909467b48Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
164009467b48Spatrick           DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
164173471bf0Spatrick     } else {
164273471bf0Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
164373471bf0Spatrick           DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
164473471bf0Spatrick 
164573471bf0Spatrick       // Remove the ExitBlock of the first Loop (also not needed)
164673471bf0Spatrick       FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
164773471bf0Spatrick                                                            FC1.Header);
164873471bf0Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
164973471bf0Spatrick           DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
165073471bf0Spatrick       FC0.ExitBlock->getTerminator()->eraseFromParent();
165173471bf0Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
165273471bf0Spatrick           DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
165373471bf0Spatrick       new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
165473471bf0Spatrick     }
165509467b48Spatrick 
165609467b48Spatrick     // The pre-header of L1 is not necessary anymore.
165773471bf0Spatrick     assert(pred_empty(FC1.Preheader));
165809467b48Spatrick     FC1.Preheader->getTerminator()->eraseFromParent();
165909467b48Spatrick     new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
166009467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
166109467b48Spatrick         DominatorTree::Delete, FC1.Preheader, FC1.Header));
166209467b48Spatrick 
166309467b48Spatrick     // Moves the phi nodes from the second to the first loops header block.
166409467b48Spatrick     while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
166509467b48Spatrick       if (SE.isSCEVable(PHI->getType()))
166609467b48Spatrick         SE.forgetValue(PHI);
166709467b48Spatrick       if (PHI->hasNUsesOrMore(1))
166809467b48Spatrick         PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
166909467b48Spatrick       else
167009467b48Spatrick         PHI->eraseFromParent();
167109467b48Spatrick     }
167209467b48Spatrick 
167309467b48Spatrick     // Introduce new phi nodes in the second loop header to ensure
167409467b48Spatrick     // exiting the first and jumping to the header of the second does not break
167509467b48Spatrick     // the SSA property of the phis originally in the first loop. See also the
167609467b48Spatrick     // comment above.
167709467b48Spatrick     Instruction *L1HeaderIP = &FC1.Header->front();
167809467b48Spatrick     for (PHINode *LCPHI : OriginalFC0PHIs) {
167909467b48Spatrick       int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
168009467b48Spatrick       assert(L1LatchBBIdx >= 0 &&
168109467b48Spatrick              "Expected loop carried value to be rewired at this point!");
168209467b48Spatrick 
168309467b48Spatrick       Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
168409467b48Spatrick 
168509467b48Spatrick       PHINode *L1HeaderPHI = PHINode::Create(
168609467b48Spatrick           LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
168709467b48Spatrick       L1HeaderPHI->addIncoming(LCV, FC0.Latch);
168809467b48Spatrick       L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
168909467b48Spatrick                                FC0.ExitingBlock);
169009467b48Spatrick 
169109467b48Spatrick       LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
169209467b48Spatrick     }
169309467b48Spatrick 
169409467b48Spatrick     // Replace latch terminator destinations.
169509467b48Spatrick     FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
169609467b48Spatrick     FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
169709467b48Spatrick 
169873471bf0Spatrick     // Modify the latch branch of FC0 to be unconditional as both successors of
169909467b48Spatrick     // the branch are the same.
170009467b48Spatrick     simplifyLatchBranch(FC0);
170109467b48Spatrick 
170209467b48Spatrick     // If FC0.Latch and FC0.ExitingBlock are the same then we have already
170309467b48Spatrick     // performed the updates above.
170409467b48Spatrick     if (FC0.Latch != FC0.ExitingBlock)
170509467b48Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
170609467b48Spatrick           DominatorTree::Insert, FC0.Latch, FC1.Header));
170709467b48Spatrick 
170809467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
170909467b48Spatrick                                                        FC0.Latch, FC0.Header));
171009467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
171109467b48Spatrick                                                        FC1.Latch, FC0.Header));
171209467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
171309467b48Spatrick                                                        FC1.Latch, FC1.Header));
171409467b48Spatrick 
171509467b48Spatrick     // Update DT/PDT
171609467b48Spatrick     DTU.applyUpdates(TreeUpdates);
171709467b48Spatrick 
171809467b48Spatrick     LI.removeBlock(FC1.Preheader);
171909467b48Spatrick     DTU.deleteBB(FC1.Preheader);
172073471bf0Spatrick     if (FC0.Peeled) {
172173471bf0Spatrick       LI.removeBlock(FC0.ExitBlock);
172273471bf0Spatrick       DTU.deleteBB(FC0.ExitBlock);
172373471bf0Spatrick     }
172473471bf0Spatrick 
172509467b48Spatrick     DTU.flush();
172609467b48Spatrick 
172709467b48Spatrick     // Is there a way to keep SE up-to-date so we don't need to forget the loops
172809467b48Spatrick     // and rebuild the information in subsequent passes of fusion?
172909467b48Spatrick     // Note: Need to forget the loops before merging the loop latches, as
173009467b48Spatrick     // mergeLatch may remove the only block in FC1.
173109467b48Spatrick     SE.forgetLoop(FC1.L);
173209467b48Spatrick     SE.forgetLoop(FC0.L);
1733*d415bd75Srobert     SE.forgetLoopDispositions();
173409467b48Spatrick 
173509467b48Spatrick     // Move instructions from FC0.Latch to FC1.Latch.
173609467b48Spatrick     // Note: mergeLatch requires an updated DT.
173709467b48Spatrick     mergeLatch(FC0, FC1);
173809467b48Spatrick 
173909467b48Spatrick     // Merge the loops.
174073471bf0Spatrick     SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
174109467b48Spatrick     for (BasicBlock *BB : Blocks) {
174209467b48Spatrick       FC0.L->addBlockEntry(BB);
174309467b48Spatrick       FC1.L->removeBlockFromLoop(BB);
174409467b48Spatrick       if (LI.getLoopFor(BB) != FC1.L)
174509467b48Spatrick         continue;
174609467b48Spatrick       LI.changeLoopFor(BB, FC0.L);
174709467b48Spatrick     }
174873471bf0Spatrick     while (!FC1.L->isInnermost()) {
174909467b48Spatrick       const auto &ChildLoopIt = FC1.L->begin();
175009467b48Spatrick       Loop *ChildLoop = *ChildLoopIt;
175109467b48Spatrick       FC1.L->removeChildLoop(ChildLoopIt);
175209467b48Spatrick       FC0.L->addChildLoop(ChildLoop);
175309467b48Spatrick     }
175409467b48Spatrick 
175509467b48Spatrick     // Delete the now empty loop L1.
175609467b48Spatrick     LI.erase(FC1.L);
175709467b48Spatrick 
175809467b48Spatrick #ifndef NDEBUG
175909467b48Spatrick     assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
176009467b48Spatrick     assert(DT.verify(DominatorTree::VerificationLevel::Fast));
176109467b48Spatrick     assert(PDT.verify());
176209467b48Spatrick     LI.verify(DT);
176309467b48Spatrick     SE.verify();
176409467b48Spatrick #endif
176509467b48Spatrick 
176609467b48Spatrick     LLVM_DEBUG(dbgs() << "Fusion done:\n");
176709467b48Spatrick 
176809467b48Spatrick     return FC0.L;
176909467b48Spatrick   }
177009467b48Spatrick 
177109467b48Spatrick   /// Report details on loop fusion opportunities.
177209467b48Spatrick   ///
177309467b48Spatrick   /// This template function can be used to report both successful and missed
177409467b48Spatrick   /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
177509467b48Spatrick   /// be one of:
177609467b48Spatrick   ///   - OptimizationRemarkMissed to report when loop fusion is unsuccessful
177709467b48Spatrick   ///     given two valid fusion candidates.
177809467b48Spatrick   ///   - OptimizationRemark to report successful fusion of two fusion
177909467b48Spatrick   ///     candidates.
178009467b48Spatrick   /// The remarks will be printed using the form:
178109467b48Spatrick   ///    <path/filename>:<line number>:<column number>: [<function name>]:
178209467b48Spatrick   ///       <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
178309467b48Spatrick   template <typename RemarkKind>
reportLoopFusion__anon227ba0d70111::LoopFuser178409467b48Spatrick   void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
178509467b48Spatrick                         llvm::Statistic &Stat) {
178609467b48Spatrick     assert(FC0.Preheader && FC1.Preheader &&
178709467b48Spatrick            "Expecting valid fusion candidates");
178809467b48Spatrick     using namespace ore;
178973471bf0Spatrick #if LLVM_ENABLE_STATS
179009467b48Spatrick     ++Stat;
179109467b48Spatrick     ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
179209467b48Spatrick                         FC0.Preheader)
179309467b48Spatrick              << "[" << FC0.Preheader->getParent()->getName()
179409467b48Spatrick              << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
179509467b48Spatrick              << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
179609467b48Spatrick              << ": " << Stat.getDesc());
179773471bf0Spatrick #endif
179809467b48Spatrick   }
179909467b48Spatrick 
180009467b48Spatrick   /// Fuse two guarded fusion candidates, creating a new fused loop.
180109467b48Spatrick   ///
180209467b48Spatrick   /// Fusing guarded loops is handled much the same way as fusing non-guarded
180309467b48Spatrick   /// loops. The rewiring of the CFG is slightly different though, because of
180409467b48Spatrick   /// the presence of the guards around the loops and the exit blocks after the
180509467b48Spatrick   /// loop body. As such, the new loop is rewired as follows:
180609467b48Spatrick   ///    1. Keep the guard branch from FC0 and use the non-loop block target
180709467b48Spatrick   /// from the FC1 guard branch.
180809467b48Spatrick   ///    2. Remove the exit block from FC0 (this exit block should be empty
180909467b48Spatrick   /// right now).
181009467b48Spatrick   ///    3. Remove the guard branch for FC1
181109467b48Spatrick   ///    4. Remove the preheader for FC1.
181209467b48Spatrick   /// The exit block successor for the latch of FC0 is updated to be the header
181309467b48Spatrick   /// of FC1 and the non-exit block successor of the latch of FC1 is updated to
181409467b48Spatrick   /// be the header of FC0, thus creating the fused loop.
fuseGuardedLoops__anon227ba0d70111::LoopFuser181509467b48Spatrick   Loop *fuseGuardedLoops(const FusionCandidate &FC0,
181609467b48Spatrick                          const FusionCandidate &FC1) {
181709467b48Spatrick     assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");
181809467b48Spatrick 
181909467b48Spatrick     BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent();
182009467b48Spatrick     BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
182109467b48Spatrick     BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
182209467b48Spatrick     BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
182373471bf0Spatrick     BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
182409467b48Spatrick 
1825097a140dSpatrick     // Move instructions from the exit block of FC0 to the beginning of the exit
182673471bf0Spatrick     // block of FC1, in the case that the FC0 loop has not been peeled. In the
182773471bf0Spatrick     // case that FC0 loop is peeled, then move the instructions of the successor
182873471bf0Spatrick     // of the FC0 Exit block to the beginning of the exit block of FC1.
182973471bf0Spatrick     moveInstructionsToTheBeginning(
183073471bf0Spatrick         (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
183173471bf0Spatrick         DT, PDT, DI);
1832097a140dSpatrick 
1833097a140dSpatrick     // Move instructions from the guard block of FC1 to the end of the guard
1834097a140dSpatrick     // block of FC0.
1835097a140dSpatrick     moveInstructionsToTheEnd(*FC1GuardBlock, *FC0GuardBlock, DT, PDT, DI);
1836097a140dSpatrick 
183709467b48Spatrick     assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
183809467b48Spatrick 
183909467b48Spatrick     SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
184009467b48Spatrick 
184109467b48Spatrick     ////////////////////////////////////////////////////////////////////////////
184209467b48Spatrick     // Update the Loop Guard
184309467b48Spatrick     ////////////////////////////////////////////////////////////////////////////
184409467b48Spatrick     // The guard for FC0 is updated to guard both FC0 and FC1. This is done by
184509467b48Spatrick     // changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1.
184609467b48Spatrick     // Thus, one path from the guard goes to the preheader for FC0 (and thus
184709467b48Spatrick     // executes the new fused loop) and the other path goes to the NonLoopBlock
184809467b48Spatrick     // for FC1 (where FC1 guard would have gone if FC1 was not executed).
1849097a140dSpatrick     FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
185009467b48Spatrick     FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
185173471bf0Spatrick 
185273471bf0Spatrick     BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
185373471bf0Spatrick     BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
185409467b48Spatrick 
185509467b48Spatrick     // The guard of FC1 is not necessary anymore.
185609467b48Spatrick     FC1.GuardBranch->eraseFromParent();
185709467b48Spatrick     new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);
185809467b48Spatrick 
185909467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
186009467b48Spatrick         DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));
186109467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
186209467b48Spatrick         DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
186309467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
186409467b48Spatrick         DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
186509467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
186609467b48Spatrick         DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
186709467b48Spatrick 
186873471bf0Spatrick     if (FC0.Peeled) {
186973471bf0Spatrick       // Remove the Block after the ExitBlock of FC0
187073471bf0Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
187173471bf0Spatrick           DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
187273471bf0Spatrick       FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
187373471bf0Spatrick       new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
187473471bf0Spatrick                           FC0ExitBlockSuccessor);
187573471bf0Spatrick     }
187673471bf0Spatrick 
187773471bf0Spatrick     assert(pred_empty(FC1GuardBlock) &&
187809467b48Spatrick            "Expecting guard block to have no predecessors");
187973471bf0Spatrick     assert(succ_empty(FC1GuardBlock) &&
188009467b48Spatrick            "Expecting guard block to have no successors");
188109467b48Spatrick 
188209467b48Spatrick     // Remember the phi nodes originally in the header of FC0 in order to rewire
188309467b48Spatrick     // them later. However, this is only necessary if the new loop carried
188409467b48Spatrick     // values might not dominate the exiting branch. While we do not generally
188509467b48Spatrick     // test if this is the case but simply insert intermediate phi nodes, we
188609467b48Spatrick     // need to make sure these intermediate phi nodes have different
188709467b48Spatrick     // predecessors. To this end, we filter the special case where the exiting
188809467b48Spatrick     // block is the latch block of the first loop. Nothing needs to be done
188909467b48Spatrick     // anyway as all loop carried values dominate the latch and thereby also the
189009467b48Spatrick     // exiting branch.
189109467b48Spatrick     // KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch
189209467b48Spatrick     // (because the loops are rotated. Thus, nothing will ever be added to
189309467b48Spatrick     // OriginalFC0PHIs.
189409467b48Spatrick     SmallVector<PHINode *, 8> OriginalFC0PHIs;
189509467b48Spatrick     if (FC0.ExitingBlock != FC0.Latch)
189609467b48Spatrick       for (PHINode &PHI : FC0.Header->phis())
189709467b48Spatrick         OriginalFC0PHIs.push_back(&PHI);
189809467b48Spatrick 
189909467b48Spatrick     assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");
190009467b48Spatrick 
190109467b48Spatrick     // Replace incoming blocks for header PHIs first.
190209467b48Spatrick     FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
190309467b48Spatrick     FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
190409467b48Spatrick 
190509467b48Spatrick     // The old exiting block of the first loop (FC0) has to jump to the header
190609467b48Spatrick     // of the second as we need to execute the code in the second header block
190709467b48Spatrick     // regardless of the trip count. That is, if the trip count is 0, so the
190809467b48Spatrick     // back edge is never taken, we still have to execute both loop headers,
190909467b48Spatrick     // especially (but not only!) if the second is a do-while style loop.
191009467b48Spatrick     // However, doing so might invalidate the phi nodes of the first loop as
191109467b48Spatrick     // the new values do only need to dominate their latch and not the exiting
191209467b48Spatrick     // predicate. To remedy this potential problem we always introduce phi
191309467b48Spatrick     // nodes in the header of the second loop later that select the loop carried
191409467b48Spatrick     // value, if the second header was reached through an old latch of the
191509467b48Spatrick     // first, or undef otherwise. This is sound as exiting the first implies the
191609467b48Spatrick     // second will exit too, __without__ taking the back-edge (their
191709467b48Spatrick     // trip-counts are equal after all).
191809467b48Spatrick     FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
191909467b48Spatrick                                                          FC1.Header);
192009467b48Spatrick 
192109467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
192209467b48Spatrick         DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
192309467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
192409467b48Spatrick         DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
192509467b48Spatrick 
192609467b48Spatrick     // Remove FC0 Exit Block
192709467b48Spatrick     // The exit block for FC0 is no longer needed since control will flow
192809467b48Spatrick     // directly to the header of FC1. Since it is an empty block, it can be
192909467b48Spatrick     // removed at this point.
193009467b48Spatrick     // TODO: In the future, we can handle non-empty exit blocks my merging any
193109467b48Spatrick     // instructions from FC0 exit block into FC1 exit block prior to removing
193209467b48Spatrick     // the block.
193373471bf0Spatrick     assert(pred_empty(FC0.ExitBlock) && "Expecting exit block to be empty");
193409467b48Spatrick     FC0.ExitBlock->getTerminator()->eraseFromParent();
193509467b48Spatrick     new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
193609467b48Spatrick 
193709467b48Spatrick     // Remove FC1 Preheader
193809467b48Spatrick     // The pre-header of L1 is not necessary anymore.
193973471bf0Spatrick     assert(pred_empty(FC1.Preheader));
194009467b48Spatrick     FC1.Preheader->getTerminator()->eraseFromParent();
194109467b48Spatrick     new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
194209467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(
194309467b48Spatrick         DominatorTree::Delete, FC1.Preheader, FC1.Header));
194409467b48Spatrick 
194509467b48Spatrick     // Moves the phi nodes from the second to the first loops header block.
194609467b48Spatrick     while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
194709467b48Spatrick       if (SE.isSCEVable(PHI->getType()))
194809467b48Spatrick         SE.forgetValue(PHI);
194909467b48Spatrick       if (PHI->hasNUsesOrMore(1))
195009467b48Spatrick         PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
195109467b48Spatrick       else
195209467b48Spatrick         PHI->eraseFromParent();
195309467b48Spatrick     }
195409467b48Spatrick 
195509467b48Spatrick     // Introduce new phi nodes in the second loop header to ensure
195609467b48Spatrick     // exiting the first and jumping to the header of the second does not break
195709467b48Spatrick     // the SSA property of the phis originally in the first loop. See also the
195809467b48Spatrick     // comment above.
195909467b48Spatrick     Instruction *L1HeaderIP = &FC1.Header->front();
196009467b48Spatrick     for (PHINode *LCPHI : OriginalFC0PHIs) {
196109467b48Spatrick       int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
196209467b48Spatrick       assert(L1LatchBBIdx >= 0 &&
196309467b48Spatrick              "Expected loop carried value to be rewired at this point!");
196409467b48Spatrick 
196509467b48Spatrick       Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
196609467b48Spatrick 
196709467b48Spatrick       PHINode *L1HeaderPHI = PHINode::Create(
196809467b48Spatrick           LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
196909467b48Spatrick       L1HeaderPHI->addIncoming(LCV, FC0.Latch);
197009467b48Spatrick       L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
197109467b48Spatrick                                FC0.ExitingBlock);
197209467b48Spatrick 
197309467b48Spatrick       LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
197409467b48Spatrick     }
197509467b48Spatrick 
197609467b48Spatrick     // Update the latches
197709467b48Spatrick 
197809467b48Spatrick     // Replace latch terminator destinations.
197909467b48Spatrick     FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
198009467b48Spatrick     FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
198109467b48Spatrick 
198273471bf0Spatrick     // Modify the latch branch of FC0 to be unconditional as both successors of
198309467b48Spatrick     // the branch are the same.
198409467b48Spatrick     simplifyLatchBranch(FC0);
198509467b48Spatrick 
198609467b48Spatrick     // If FC0.Latch and FC0.ExitingBlock are the same then we have already
198709467b48Spatrick     // performed the updates above.
198809467b48Spatrick     if (FC0.Latch != FC0.ExitingBlock)
198909467b48Spatrick       TreeUpdates.emplace_back(DominatorTree::UpdateType(
199009467b48Spatrick           DominatorTree::Insert, FC0.Latch, FC1.Header));
199109467b48Spatrick 
199209467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
199309467b48Spatrick                                                        FC0.Latch, FC0.Header));
199409467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
199509467b48Spatrick                                                        FC1.Latch, FC0.Header));
199609467b48Spatrick     TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
199709467b48Spatrick                                                        FC1.Latch, FC1.Header));
199809467b48Spatrick 
199909467b48Spatrick     // All done
200009467b48Spatrick     // Apply the updates to the Dominator Tree and cleanup.
200109467b48Spatrick 
200273471bf0Spatrick     assert(succ_empty(FC1GuardBlock) && "FC1GuardBlock has successors!!");
200373471bf0Spatrick     assert(pred_empty(FC1GuardBlock) && "FC1GuardBlock has predecessors!!");
200409467b48Spatrick 
200509467b48Spatrick     // Update DT/PDT
200609467b48Spatrick     DTU.applyUpdates(TreeUpdates);
200709467b48Spatrick 
2008097a140dSpatrick     LI.removeBlock(FC1GuardBlock);
200909467b48Spatrick     LI.removeBlock(FC1.Preheader);
2010097a140dSpatrick     LI.removeBlock(FC0.ExitBlock);
201173471bf0Spatrick     if (FC0.Peeled) {
201273471bf0Spatrick       LI.removeBlock(FC0ExitBlockSuccessor);
201373471bf0Spatrick       DTU.deleteBB(FC0ExitBlockSuccessor);
201473471bf0Spatrick     }
2015097a140dSpatrick     DTU.deleteBB(FC1GuardBlock);
201609467b48Spatrick     DTU.deleteBB(FC1.Preheader);
201709467b48Spatrick     DTU.deleteBB(FC0.ExitBlock);
201809467b48Spatrick     DTU.flush();
201909467b48Spatrick 
202009467b48Spatrick     // Is there a way to keep SE up-to-date so we don't need to forget the loops
202109467b48Spatrick     // and rebuild the information in subsequent passes of fusion?
202209467b48Spatrick     // Note: Need to forget the loops before merging the loop latches, as
202309467b48Spatrick     // mergeLatch may remove the only block in FC1.
202409467b48Spatrick     SE.forgetLoop(FC1.L);
202509467b48Spatrick     SE.forgetLoop(FC0.L);
2026*d415bd75Srobert     SE.forgetLoopDispositions();
202709467b48Spatrick 
202809467b48Spatrick     // Move instructions from FC0.Latch to FC1.Latch.
202909467b48Spatrick     // Note: mergeLatch requires an updated DT.
203009467b48Spatrick     mergeLatch(FC0, FC1);
203109467b48Spatrick 
203209467b48Spatrick     // Merge the loops.
203373471bf0Spatrick     SmallVector<BasicBlock *, 8> Blocks(FC1.L->blocks());
203409467b48Spatrick     for (BasicBlock *BB : Blocks) {
203509467b48Spatrick       FC0.L->addBlockEntry(BB);
203609467b48Spatrick       FC1.L->removeBlockFromLoop(BB);
203709467b48Spatrick       if (LI.getLoopFor(BB) != FC1.L)
203809467b48Spatrick         continue;
203909467b48Spatrick       LI.changeLoopFor(BB, FC0.L);
204009467b48Spatrick     }
204173471bf0Spatrick     while (!FC1.L->isInnermost()) {
204209467b48Spatrick       const auto &ChildLoopIt = FC1.L->begin();
204309467b48Spatrick       Loop *ChildLoop = *ChildLoopIt;
204409467b48Spatrick       FC1.L->removeChildLoop(ChildLoopIt);
204509467b48Spatrick       FC0.L->addChildLoop(ChildLoop);
204609467b48Spatrick     }
204709467b48Spatrick 
204809467b48Spatrick     // Delete the now empty loop L1.
204909467b48Spatrick     LI.erase(FC1.L);
205009467b48Spatrick 
205109467b48Spatrick #ifndef NDEBUG
205209467b48Spatrick     assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
205309467b48Spatrick     assert(DT.verify(DominatorTree::VerificationLevel::Fast));
205409467b48Spatrick     assert(PDT.verify());
205509467b48Spatrick     LI.verify(DT);
205609467b48Spatrick     SE.verify();
205709467b48Spatrick #endif
205809467b48Spatrick 
205909467b48Spatrick     LLVM_DEBUG(dbgs() << "Fusion done:\n");
206009467b48Spatrick 
206109467b48Spatrick     return FC0.L;
206209467b48Spatrick   }
206309467b48Spatrick };
206409467b48Spatrick 
206509467b48Spatrick struct LoopFuseLegacy : public FunctionPass {
206609467b48Spatrick 
206709467b48Spatrick   static char ID;
206809467b48Spatrick 
LoopFuseLegacy__anon227ba0d70111::LoopFuseLegacy206909467b48Spatrick   LoopFuseLegacy() : FunctionPass(ID) {
207009467b48Spatrick     initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry());
207109467b48Spatrick   }
207209467b48Spatrick 
getAnalysisUsage__anon227ba0d70111::LoopFuseLegacy207309467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
207409467b48Spatrick     AU.addRequiredID(LoopSimplifyID);
207509467b48Spatrick     AU.addRequired<ScalarEvolutionWrapperPass>();
207609467b48Spatrick     AU.addRequired<LoopInfoWrapperPass>();
207709467b48Spatrick     AU.addRequired<DominatorTreeWrapperPass>();
207809467b48Spatrick     AU.addRequired<PostDominatorTreeWrapperPass>();
207909467b48Spatrick     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
208009467b48Spatrick     AU.addRequired<DependenceAnalysisWrapperPass>();
208173471bf0Spatrick     AU.addRequired<AssumptionCacheTracker>();
208273471bf0Spatrick     AU.addRequired<TargetTransformInfoWrapperPass>();
208309467b48Spatrick 
208409467b48Spatrick     AU.addPreserved<ScalarEvolutionWrapperPass>();
208509467b48Spatrick     AU.addPreserved<LoopInfoWrapperPass>();
208609467b48Spatrick     AU.addPreserved<DominatorTreeWrapperPass>();
208709467b48Spatrick     AU.addPreserved<PostDominatorTreeWrapperPass>();
208809467b48Spatrick   }
208909467b48Spatrick 
runOnFunction__anon227ba0d70111::LoopFuseLegacy209009467b48Spatrick   bool runOnFunction(Function &F) override {
209109467b48Spatrick     if (skipFunction(F))
209209467b48Spatrick       return false;
2093*d415bd75Srobert 
209409467b48Spatrick     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
209509467b48Spatrick     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
209609467b48Spatrick     auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
209709467b48Spatrick     auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
209809467b48Spatrick     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
209909467b48Spatrick     auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
210073471bf0Spatrick     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
210173471bf0Spatrick     const TargetTransformInfo &TTI =
210273471bf0Spatrick         getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
210309467b48Spatrick     const DataLayout &DL = F.getParent()->getDataLayout();
210473471bf0Spatrick 
210573471bf0Spatrick     LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
210609467b48Spatrick     return LF.fuseLoops(F);
210709467b48Spatrick   }
210809467b48Spatrick };
210909467b48Spatrick } // namespace
211009467b48Spatrick 
run(Function & F,FunctionAnalysisManager & AM)211109467b48Spatrick PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
211209467b48Spatrick   auto &LI = AM.getResult<LoopAnalysis>(F);
211309467b48Spatrick   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
211409467b48Spatrick   auto &DI = AM.getResult<DependenceAnalysis>(F);
211509467b48Spatrick   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
211609467b48Spatrick   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
211709467b48Spatrick   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
211873471bf0Spatrick   auto &AC = AM.getResult<AssumptionAnalysis>(F);
211973471bf0Spatrick   const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
212009467b48Spatrick   const DataLayout &DL = F.getParent()->getDataLayout();
212173471bf0Spatrick 
2122*d415bd75Srobert   // Ensure loops are in simplifed form which is a pre-requisite for loop fusion
2123*d415bd75Srobert   // pass. Added only for new PM since the legacy PM has already added
2124*d415bd75Srobert   // LoopSimplify pass as a dependency.
2125*d415bd75Srobert   bool Changed = false;
2126*d415bd75Srobert   for (auto &L : LI) {
2127*d415bd75Srobert     Changed |=
2128*d415bd75Srobert         simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
2129*d415bd75Srobert   }
2130*d415bd75Srobert   if (Changed)
2131*d415bd75Srobert     PDT.recalculate(F);
2132*d415bd75Srobert 
213373471bf0Spatrick   LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
2134*d415bd75Srobert   Changed |= LF.fuseLoops(F);
213509467b48Spatrick   if (!Changed)
213609467b48Spatrick     return PreservedAnalyses::all();
213709467b48Spatrick 
213809467b48Spatrick   PreservedAnalyses PA;
213909467b48Spatrick   PA.preserve<DominatorTreeAnalysis>();
214009467b48Spatrick   PA.preserve<PostDominatorTreeAnalysis>();
214109467b48Spatrick   PA.preserve<ScalarEvolutionAnalysis>();
214209467b48Spatrick   PA.preserve<LoopAnalysis>();
214309467b48Spatrick   return PA;
214409467b48Spatrick }
214509467b48Spatrick 
214609467b48Spatrick char LoopFuseLegacy::ID = 0;
214709467b48Spatrick 
214809467b48Spatrick INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
214909467b48Spatrick                       false)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)215009467b48Spatrick INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
215109467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
215209467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
215309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
215409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
215509467b48Spatrick INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
215673471bf0Spatrick INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
215773471bf0Spatrick INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
215809467b48Spatrick INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
215909467b48Spatrick 
216009467b48Spatrick FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
2161