xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the Loop Distribution Pass.  Its main focus is to
100b57cec5SDimitry Andric // distribute loops that cannot be vectorized due to dependence cycles.  It
110b57cec5SDimitry Andric // tries to isolate the offending dependences into a new loop allowing
120b57cec5SDimitry Andric // vectorization of the remaining parts.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // For dependence analysis, the pass uses the LoopVectorizer's
150b57cec5SDimitry Andric // LoopAccessAnalysis.  Because this analysis presumes no change in the order of
160b57cec5SDimitry Andric // memory operations, special care is taken to preserve the lexical order of
170b57cec5SDimitry Andric // these operations.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // Similarly to the Vectorizer, the pass also supports loop versioning to
200b57cec5SDimitry Andric // run-time disambiguate potentially overlapping arrays.
210b57cec5SDimitry Andric //
220b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopDistribute.h"
250b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
260b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
270b57cec5SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h"
280b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
29*0fca6ea1SDimitry Andric #include "llvm/ADT/SetVector.h"
300b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
310b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
320b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
330b57cec5SDimitry Andric #include "llvm/ADT/Twine.h"
340b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
350b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
360b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
370b57cec5SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h"
380b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
390b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
400b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
410b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
420b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
430b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
440b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
450b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
460b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
470b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
480b57cec5SDimitry Andric #include "llvm/IR/Function.h"
490b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
500b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
510b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
520b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
530b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
540b57cec5SDimitry Andric #include "llvm/IR/Value.h"
550b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
560b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
570b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
580b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
590b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
600b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
610b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
620b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopVersioning.h"
630b57cec5SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
640b57cec5SDimitry Andric #include <cassert>
650b57cec5SDimitry Andric #include <functional>
660b57cec5SDimitry Andric #include <list>
670b57cec5SDimitry Andric #include <tuple>
680b57cec5SDimitry Andric #include <utility>
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric using namespace llvm;
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric #define LDIST_NAME "loop-distribute"
730b57cec5SDimitry Andric #define DEBUG_TYPE LDIST_NAME
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric /// @{
760b57cec5SDimitry Andric /// Metadata attribute names
770b57cec5SDimitry Andric static const char *const LLVMLoopDistributeFollowupAll =
780b57cec5SDimitry Andric     "llvm.loop.distribute.followup_all";
790b57cec5SDimitry Andric static const char *const LLVMLoopDistributeFollowupCoincident =
800b57cec5SDimitry Andric     "llvm.loop.distribute.followup_coincident";
810b57cec5SDimitry Andric static const char *const LLVMLoopDistributeFollowupSequential =
820b57cec5SDimitry Andric     "llvm.loop.distribute.followup_sequential";
830b57cec5SDimitry Andric static const char *const LLVMLoopDistributeFollowupFallback =
840b57cec5SDimitry Andric     "llvm.loop.distribute.followup_fallback";
850b57cec5SDimitry Andric /// @}
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric static cl::opt<bool>
880b57cec5SDimitry Andric     LDistVerify("loop-distribute-verify", cl::Hidden,
890b57cec5SDimitry Andric                 cl::desc("Turn on DominatorTree and LoopInfo verification "
900b57cec5SDimitry Andric                          "after Loop Distribution"),
910b57cec5SDimitry Andric                 cl::init(false));
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric static cl::opt<bool> DistributeNonIfConvertible(
940b57cec5SDimitry Andric     "loop-distribute-non-if-convertible", cl::Hidden,
950b57cec5SDimitry Andric     cl::desc("Whether to distribute into a loop that may not be "
960b57cec5SDimitry Andric              "if-convertible by the loop vectorizer"),
970b57cec5SDimitry Andric     cl::init(false));
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric static cl::opt<unsigned> DistributeSCEVCheckThreshold(
1000b57cec5SDimitry Andric     "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
1010b57cec5SDimitry Andric     cl::desc("The maximum number of SCEV checks allowed for Loop "
1020b57cec5SDimitry Andric              "Distribution"));
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
1050b57cec5SDimitry Andric     "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
1060b57cec5SDimitry Andric     cl::Hidden,
1075f757f3fSDimitry Andric     cl::desc("The maximum number of SCEV checks allowed for Loop "
1085f757f3fSDimitry Andric              "Distribution for loop marked with #pragma clang loop "
1095f757f3fSDimitry Andric              "distribute(enable)"));
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric static cl::opt<bool> EnableLoopDistribute(
1120b57cec5SDimitry Andric     "enable-loop-distribute", cl::Hidden,
1130b57cec5SDimitry Andric     cl::desc("Enable the new, experimental LoopDistribution Pass"),
1140b57cec5SDimitry Andric     cl::init(false));
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric STATISTIC(NumLoopsDistributed, "Number of loops distributed");
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric namespace {
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric /// Maintains the set of instructions of the loop for a partition before
1210b57cec5SDimitry Andric /// cloning.  After cloning, it hosts the new loop.
1220b57cec5SDimitry Andric class InstPartition {
123*0fca6ea1SDimitry Andric   using InstructionSet = SmallSetVector<Instruction *, 8>;
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric public:
1260b57cec5SDimitry Andric   InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
1270b57cec5SDimitry Andric       : DepCycle(DepCycle), OrigLoop(L) {
1280b57cec5SDimitry Andric     Set.insert(I);
1290b57cec5SDimitry Andric   }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   /// Returns whether this partition contains a dependence cycle.
1320b57cec5SDimitry Andric   bool hasDepCycle() const { return DepCycle; }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric   /// Adds an instruction to this partition.
1350b57cec5SDimitry Andric   void add(Instruction *I) { Set.insert(I); }
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric   /// Collection accessors.
1380b57cec5SDimitry Andric   InstructionSet::iterator begin() { return Set.begin(); }
1390b57cec5SDimitry Andric   InstructionSet::iterator end() { return Set.end(); }
1400b57cec5SDimitry Andric   InstructionSet::const_iterator begin() const { return Set.begin(); }
1410b57cec5SDimitry Andric   InstructionSet::const_iterator end() const { return Set.end(); }
1420b57cec5SDimitry Andric   bool empty() const { return Set.empty(); }
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   /// Moves this partition into \p Other.  This partition becomes empty
1450b57cec5SDimitry Andric   /// after this.
1460b57cec5SDimitry Andric   void moveTo(InstPartition &Other) {
1470b57cec5SDimitry Andric     Other.Set.insert(Set.begin(), Set.end());
1480b57cec5SDimitry Andric     Set.clear();
1490b57cec5SDimitry Andric     Other.DepCycle |= DepCycle;
1500b57cec5SDimitry Andric   }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric   /// Populates the partition with a transitive closure of all the
1530b57cec5SDimitry Andric   /// instructions that the seeded instructions dependent on.
1540b57cec5SDimitry Andric   void populateUsedSet() {
1550b57cec5SDimitry Andric     // FIXME: We currently don't use control-dependence but simply include all
1560b57cec5SDimitry Andric     // blocks (possibly empty at the end) and let simplifycfg mostly clean this
1570b57cec5SDimitry Andric     // up.
1580b57cec5SDimitry Andric     for (auto *B : OrigLoop->getBlocks())
1590b57cec5SDimitry Andric       Set.insert(B->getTerminator());
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric     // Follow the use-def chains to form a transitive closure of all the
1620b57cec5SDimitry Andric     // instructions that the originally seeded instructions depend on.
1630b57cec5SDimitry Andric     SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
1640b57cec5SDimitry Andric     while (!Worklist.empty()) {
1650b57cec5SDimitry Andric       Instruction *I = Worklist.pop_back_val();
1660b57cec5SDimitry Andric       // Insert instructions from the loop that we depend on.
1670b57cec5SDimitry Andric       for (Value *V : I->operand_values()) {
1680b57cec5SDimitry Andric         auto *I = dyn_cast<Instruction>(V);
169*0fca6ea1SDimitry Andric         if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
1700b57cec5SDimitry Andric           Worklist.push_back(I);
1710b57cec5SDimitry Andric       }
1720b57cec5SDimitry Andric     }
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   /// Clones the original loop.
1760b57cec5SDimitry Andric   ///
1770b57cec5SDimitry Andric   /// Updates LoopInfo and DominatorTree using the information that block \p
1780b57cec5SDimitry Andric   /// LoopDomBB dominates the loop.
1790b57cec5SDimitry Andric   Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
1800b57cec5SDimitry Andric                                unsigned Index, LoopInfo *LI,
1810b57cec5SDimitry Andric                                DominatorTree *DT) {
1820b57cec5SDimitry Andric     ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
1830b57cec5SDimitry Andric                                           VMap, Twine(".ldist") + Twine(Index),
1840b57cec5SDimitry Andric                                           LI, DT, ClonedLoopBlocks);
1850b57cec5SDimitry Andric     return ClonedLoop;
1860b57cec5SDimitry Andric   }
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric   /// The cloned loop.  If this partition is mapped to the original loop,
1890b57cec5SDimitry Andric   /// this is null.
1900b57cec5SDimitry Andric   const Loop *getClonedLoop() const { return ClonedLoop; }
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric   /// Returns the loop where this partition ends up after distribution.
1930b57cec5SDimitry Andric   /// If this partition is mapped to the original loop then use the block from
1940b57cec5SDimitry Andric   /// the loop.
1950b57cec5SDimitry Andric   Loop *getDistributedLoop() const {
1960b57cec5SDimitry Andric     return ClonedLoop ? ClonedLoop : OrigLoop;
1970b57cec5SDimitry Andric   }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   /// The VMap that is populated by cloning and then used in
2000b57cec5SDimitry Andric   /// remapinstruction to remap the cloned instructions.
2010b57cec5SDimitry Andric   ValueToValueMapTy &getVMap() { return VMap; }
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric   /// Remaps the cloned instructions using VMap.
2040b57cec5SDimitry Andric   void remapInstructions() {
2050b57cec5SDimitry Andric     remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
2060b57cec5SDimitry Andric   }
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   /// Based on the set of instructions selected for this partition,
2090b57cec5SDimitry Andric   /// removes the unnecessary ones.
2100b57cec5SDimitry Andric   void removeUnusedInsts() {
2110b57cec5SDimitry Andric     SmallVector<Instruction *, 8> Unused;
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric     for (auto *Block : OrigLoop->getBlocks())
2140b57cec5SDimitry Andric       for (auto &Inst : *Block)
2150b57cec5SDimitry Andric         if (!Set.count(&Inst)) {
2160b57cec5SDimitry Andric           Instruction *NewInst = &Inst;
2170b57cec5SDimitry Andric           if (!VMap.empty())
2180b57cec5SDimitry Andric             NewInst = cast<Instruction>(VMap[NewInst]);
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric           assert(!isa<BranchInst>(NewInst) &&
2210b57cec5SDimitry Andric                  "Branches are marked used early on");
2220b57cec5SDimitry Andric           Unused.push_back(NewInst);
2230b57cec5SDimitry Andric         }
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric     // Delete the instructions backwards, as it has a reduced likelihood of
2260b57cec5SDimitry Andric     // having to update as many def-use and use-def chains.
2270b57cec5SDimitry Andric     for (auto *Inst : reverse(Unused)) {
2280b57cec5SDimitry Andric       if (!Inst->use_empty())
22981ad6265SDimitry Andric         Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
2300b57cec5SDimitry Andric       Inst->eraseFromParent();
2310b57cec5SDimitry Andric     }
2320b57cec5SDimitry Andric   }
2330b57cec5SDimitry Andric 
234*0fca6ea1SDimitry Andric   void print(raw_ostream &OS) const {
235*0fca6ea1SDimitry Andric     OS << (DepCycle ? " (cycle)\n" : "\n");
2360b57cec5SDimitry Andric     for (auto *I : Set)
2370b57cec5SDimitry Andric       // Prefix with the block name.
238*0fca6ea1SDimitry Andric       OS << "  " << I->getParent()->getName() << ":" << *I << "\n";
2390b57cec5SDimitry Andric   }
2400b57cec5SDimitry Andric 
241*0fca6ea1SDimitry Andric   void printBlocks(raw_ostream &OS) const {
2420b57cec5SDimitry Andric     for (auto *BB : getDistributedLoop()->getBlocks())
243*0fca6ea1SDimitry Andric       OS << *BB;
2440b57cec5SDimitry Andric   }
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric private:
2470b57cec5SDimitry Andric   /// Instructions from OrigLoop selected for this partition.
2480b57cec5SDimitry Andric   InstructionSet Set;
2490b57cec5SDimitry Andric 
2500b57cec5SDimitry Andric   /// Whether this partition contains a dependence cycle.
2510b57cec5SDimitry Andric   bool DepCycle;
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric   /// The original loop.
2540b57cec5SDimitry Andric   Loop *OrigLoop;
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric   /// The cloned loop.  If this partition is mapped to the original loop,
2570b57cec5SDimitry Andric   /// this is null.
2580b57cec5SDimitry Andric   Loop *ClonedLoop = nullptr;
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric   /// The blocks of ClonedLoop including the preheader.  If this
2610b57cec5SDimitry Andric   /// partition is mapped to the original loop, this is empty.
2620b57cec5SDimitry Andric   SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric   /// These gets populated once the set of instructions have been
2650b57cec5SDimitry Andric   /// finalized. If this partition is mapped to the original loop, these are not
2660b57cec5SDimitry Andric   /// set.
2670b57cec5SDimitry Andric   ValueToValueMapTy VMap;
2680b57cec5SDimitry Andric };
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric /// Holds the set of Partitions.  It populates them, merges them and then
2710b57cec5SDimitry Andric /// clones the loops.
2720b57cec5SDimitry Andric class InstPartitionContainer {
2730b57cec5SDimitry Andric   using InstToPartitionIdT = DenseMap<Instruction *, int>;
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric public:
2760b57cec5SDimitry Andric   InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
2770b57cec5SDimitry Andric       : L(L), LI(LI), DT(DT) {}
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   /// Returns the number of partitions.
2800b57cec5SDimitry Andric   unsigned getSize() const { return PartitionContainer.size(); }
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric   /// Adds \p Inst into the current partition if that is marked to
2830b57cec5SDimitry Andric   /// contain cycles.  Otherwise start a new partition for it.
2840b57cec5SDimitry Andric   void addToCyclicPartition(Instruction *Inst) {
2850b57cec5SDimitry Andric     // If the current partition is non-cyclic.  Start a new one.
2860b57cec5SDimitry Andric     if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
2870b57cec5SDimitry Andric       PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
2880b57cec5SDimitry Andric     else
2890b57cec5SDimitry Andric       PartitionContainer.back().add(Inst);
2900b57cec5SDimitry Andric   }
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric   /// Adds \p Inst into a partition that is not marked to contain
2930b57cec5SDimitry Andric   /// dependence cycles.
2940b57cec5SDimitry Andric   ///
2950b57cec5SDimitry Andric   //  Initially we isolate memory instructions into as many partitions as
2960b57cec5SDimitry Andric   //  possible, then later we may merge them back together.
2970b57cec5SDimitry Andric   void addToNewNonCyclicPartition(Instruction *Inst) {
2980b57cec5SDimitry Andric     PartitionContainer.emplace_back(Inst, L);
2990b57cec5SDimitry Andric   }
3000b57cec5SDimitry Andric 
3010b57cec5SDimitry Andric   /// Merges adjacent non-cyclic partitions.
3020b57cec5SDimitry Andric   ///
3030b57cec5SDimitry Andric   /// The idea is that we currently only want to isolate the non-vectorizable
3040b57cec5SDimitry Andric   /// partition.  We could later allow more distribution among these partition
3050b57cec5SDimitry Andric   /// too.
3060b57cec5SDimitry Andric   void mergeAdjacentNonCyclic() {
3070b57cec5SDimitry Andric     mergeAdjacentPartitionsIf(
3080b57cec5SDimitry Andric         [](const InstPartition *P) { return !P->hasDepCycle(); });
3090b57cec5SDimitry Andric   }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   /// If a partition contains only conditional stores, we won't vectorize
3120b57cec5SDimitry Andric   /// it.  Try to merge it with a previous cyclic partition.
3130b57cec5SDimitry Andric   void mergeNonIfConvertible() {
3140b57cec5SDimitry Andric     mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
3150b57cec5SDimitry Andric       if (Partition->hasDepCycle())
3160b57cec5SDimitry Andric         return true;
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric       // Now, check if all stores are conditional in this partition.
3190b57cec5SDimitry Andric       bool seenStore = false;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric       for (auto *Inst : *Partition)
3220b57cec5SDimitry Andric         if (isa<StoreInst>(Inst)) {
3230b57cec5SDimitry Andric           seenStore = true;
3240b57cec5SDimitry Andric           if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
3250b57cec5SDimitry Andric             return false;
3260b57cec5SDimitry Andric         }
3270b57cec5SDimitry Andric       return seenStore;
3280b57cec5SDimitry Andric     });
3290b57cec5SDimitry Andric   }
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric   /// Merges the partitions according to various heuristics.
3320b57cec5SDimitry Andric   void mergeBeforePopulating() {
3330b57cec5SDimitry Andric     mergeAdjacentNonCyclic();
3340b57cec5SDimitry Andric     if (!DistributeNonIfConvertible)
3350b57cec5SDimitry Andric       mergeNonIfConvertible();
3360b57cec5SDimitry Andric   }
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric   /// Merges partitions in order to ensure that no loads are duplicated.
3390b57cec5SDimitry Andric   ///
3400b57cec5SDimitry Andric   /// We can't duplicate loads because that could potentially reorder them.
3410b57cec5SDimitry Andric   /// LoopAccessAnalysis provides dependency information with the context that
3420b57cec5SDimitry Andric   /// the order of memory operation is preserved.
3430b57cec5SDimitry Andric   ///
3440b57cec5SDimitry Andric   /// Return if any partitions were merged.
3450b57cec5SDimitry Andric   bool mergeToAvoidDuplicatedLoads() {
3460b57cec5SDimitry Andric     using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
3470b57cec5SDimitry Andric     using ToBeMergedT = EquivalenceClasses<InstPartition *>;
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric     LoadToPartitionT LoadToPartition;
3500b57cec5SDimitry Andric     ToBeMergedT ToBeMerged;
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric     // Step through the partitions and create equivalence between partitions
3530b57cec5SDimitry Andric     // that contain the same load.  Also put partitions in between them in the
3540b57cec5SDimitry Andric     // same equivalence class to avoid reordering of memory operations.
3550b57cec5SDimitry Andric     for (PartitionContainerT::iterator I = PartitionContainer.begin(),
3560b57cec5SDimitry Andric                                        E = PartitionContainer.end();
3570b57cec5SDimitry Andric          I != E; ++I) {
3580b57cec5SDimitry Andric       auto *PartI = &*I;
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric       // If a load occurs in two partitions PartI and PartJ, merge all
3610b57cec5SDimitry Andric       // partitions (PartI, PartJ] into PartI.
3620b57cec5SDimitry Andric       for (Instruction *Inst : *PartI)
3630b57cec5SDimitry Andric         if (isa<LoadInst>(Inst)) {
3640b57cec5SDimitry Andric           bool NewElt;
3650b57cec5SDimitry Andric           LoadToPartitionT::iterator LoadToPart;
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric           std::tie(LoadToPart, NewElt) =
3680b57cec5SDimitry Andric               LoadToPartition.insert(std::make_pair(Inst, PartI));
3690b57cec5SDimitry Andric           if (!NewElt) {
370*0fca6ea1SDimitry Andric             LLVM_DEBUG(
371*0fca6ea1SDimitry Andric                 dbgs()
372*0fca6ea1SDimitry Andric                 << "LDist: Merging partitions due to this load in multiple "
373*0fca6ea1SDimitry Andric                 << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
3740b57cec5SDimitry Andric                 << *Inst << "\n");
3750b57cec5SDimitry Andric 
3760b57cec5SDimitry Andric             auto PartJ = I;
3770b57cec5SDimitry Andric             do {
3780b57cec5SDimitry Andric               --PartJ;
3790b57cec5SDimitry Andric               ToBeMerged.unionSets(PartI, &*PartJ);
3800b57cec5SDimitry Andric             } while (&*PartJ != LoadToPart->second);
3810b57cec5SDimitry Andric           }
3820b57cec5SDimitry Andric         }
3830b57cec5SDimitry Andric     }
3840b57cec5SDimitry Andric     if (ToBeMerged.empty())
3850b57cec5SDimitry Andric       return false;
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric     // Merge the member of an equivalence class into its class leader.  This
3880b57cec5SDimitry Andric     // makes the members empty.
3890b57cec5SDimitry Andric     for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
3900b57cec5SDimitry Andric          I != E; ++I) {
3910b57cec5SDimitry Andric       if (!I->isLeader())
3920b57cec5SDimitry Andric         continue;
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric       auto PartI = I->getData();
395bdd1243dSDimitry Andric       for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
3960b57cec5SDimitry Andric                                    ToBeMerged.member_end())) {
3970b57cec5SDimitry Andric         PartJ->moveTo(*PartI);
3980b57cec5SDimitry Andric       }
3990b57cec5SDimitry Andric     }
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric     // Remove the empty partitions.
4020b57cec5SDimitry Andric     PartitionContainer.remove_if(
4030b57cec5SDimitry Andric         [](const InstPartition &P) { return P.empty(); });
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric     return true;
4060b57cec5SDimitry Andric   }
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric   /// Sets up the mapping between instructions to partitions.  If the
4090b57cec5SDimitry Andric   /// instruction is duplicated across multiple partitions, set the entry to -1.
4100b57cec5SDimitry Andric   void setupPartitionIdOnInstructions() {
4110b57cec5SDimitry Andric     int PartitionID = 0;
4120b57cec5SDimitry Andric     for (const auto &Partition : PartitionContainer) {
4130b57cec5SDimitry Andric       for (Instruction *Inst : Partition) {
4140b57cec5SDimitry Andric         bool NewElt;
4150b57cec5SDimitry Andric         InstToPartitionIdT::iterator Iter;
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric         std::tie(Iter, NewElt) =
4180b57cec5SDimitry Andric             InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
4190b57cec5SDimitry Andric         if (!NewElt)
4200b57cec5SDimitry Andric           Iter->second = -1;
4210b57cec5SDimitry Andric       }
4220b57cec5SDimitry Andric       ++PartitionID;
4230b57cec5SDimitry Andric     }
4240b57cec5SDimitry Andric   }
4250b57cec5SDimitry Andric 
4260b57cec5SDimitry Andric   /// Populates the partition with everything that the seeding
4270b57cec5SDimitry Andric   /// instructions require.
4280b57cec5SDimitry Andric   void populateUsedSet() {
4290b57cec5SDimitry Andric     for (auto &P : PartitionContainer)
4300b57cec5SDimitry Andric       P.populateUsedSet();
4310b57cec5SDimitry Andric   }
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric   /// This performs the main chunk of the work of cloning the loops for
4340b57cec5SDimitry Andric   /// the partitions.
4350b57cec5SDimitry Andric   void cloneLoops() {
4360b57cec5SDimitry Andric     BasicBlock *OrigPH = L->getLoopPreheader();
4370b57cec5SDimitry Andric     // At this point the predecessor of the preheader is either the memcheck
4380b57cec5SDimitry Andric     // block or the top part of the original preheader.
4390b57cec5SDimitry Andric     BasicBlock *Pred = OrigPH->getSinglePredecessor();
4400b57cec5SDimitry Andric     assert(Pred && "Preheader does not have a single predecessor");
4410b57cec5SDimitry Andric     BasicBlock *ExitBlock = L->getExitBlock();
4420b57cec5SDimitry Andric     assert(ExitBlock && "No single exit block");
4430b57cec5SDimitry Andric     Loop *NewLoop;
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric     assert(!PartitionContainer.empty() && "at least two partitions expected");
4460b57cec5SDimitry Andric     // We're cloning the preheader along with the loop so we already made sure
4470b57cec5SDimitry Andric     // it was empty.
4480b57cec5SDimitry Andric     assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
4490b57cec5SDimitry Andric            "preheader not empty");
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric     // Preserve the original loop ID for use after the transformation.
4520b57cec5SDimitry Andric     MDNode *OrigLoopID = L->getLoopID();
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric     // Create a loop for each partition except the last.  Clone the original
4550b57cec5SDimitry Andric     // loop before PH along with adding a preheader for the cloned loop.  Then
4560b57cec5SDimitry Andric     // update PH to point to the newly added preheader.
4570b57cec5SDimitry Andric     BasicBlock *TopPH = OrigPH;
4580b57cec5SDimitry Andric     unsigned Index = getSize() - 1;
459bdd1243dSDimitry Andric     for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
460bdd1243dSDimitry Andric       NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
4610b57cec5SDimitry Andric 
462bdd1243dSDimitry Andric       Part.getVMap()[ExitBlock] = TopPH;
463bdd1243dSDimitry Andric       Part.remapInstructions();
464bdd1243dSDimitry Andric       setNewLoopID(OrigLoopID, &Part);
465bdd1243dSDimitry Andric       --Index;
466bdd1243dSDimitry Andric       TopPH = NewLoop->getLoopPreheader();
4670b57cec5SDimitry Andric     }
4680b57cec5SDimitry Andric     Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric     // Also set a new loop ID for the last loop.
4710b57cec5SDimitry Andric     setNewLoopID(OrigLoopID, &PartitionContainer.back());
4720b57cec5SDimitry Andric 
4730b57cec5SDimitry Andric     // Now go in forward order and update the immediate dominator for the
4740b57cec5SDimitry Andric     // preheaders with the exiting block of the previous loop.  Dominance
4750b57cec5SDimitry Andric     // within the loop is updated in cloneLoopWithPreheader.
4760b57cec5SDimitry Andric     for (auto Curr = PartitionContainer.cbegin(),
4770b57cec5SDimitry Andric               Next = std::next(PartitionContainer.cbegin()),
4780b57cec5SDimitry Andric               E = PartitionContainer.cend();
4790b57cec5SDimitry Andric          Next != E; ++Curr, ++Next)
4800b57cec5SDimitry Andric       DT->changeImmediateDominator(
4810b57cec5SDimitry Andric           Next->getDistributedLoop()->getLoopPreheader(),
4820b57cec5SDimitry Andric           Curr->getDistributedLoop()->getExitingBlock());
4830b57cec5SDimitry Andric   }
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric   /// Removes the dead instructions from the cloned loops.
4860b57cec5SDimitry Andric   void removeUnusedInsts() {
4870b57cec5SDimitry Andric     for (auto &Partition : PartitionContainer)
4880b57cec5SDimitry Andric       Partition.removeUnusedInsts();
4890b57cec5SDimitry Andric   }
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric   /// For each memory pointer, it computes the partitionId the pointer is
4920b57cec5SDimitry Andric   /// used in.
4930b57cec5SDimitry Andric   ///
4940b57cec5SDimitry Andric   /// This returns an array of int where the I-th entry corresponds to I-th
4950b57cec5SDimitry Andric   /// entry in LAI.getRuntimePointerCheck().  If the pointer is used in multiple
4960b57cec5SDimitry Andric   /// partitions its entry is set to -1.
4970b57cec5SDimitry Andric   SmallVector<int, 8>
4980b57cec5SDimitry Andric   computePartitionSetForPointers(const LoopAccessInfo &LAI) {
4990b57cec5SDimitry Andric     const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric     unsigned N = RtPtrCheck->Pointers.size();
5020b57cec5SDimitry Andric     SmallVector<int, 8> PtrToPartitions(N);
5030b57cec5SDimitry Andric     for (unsigned I = 0; I < N; ++I) {
5040b57cec5SDimitry Andric       Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
5050b57cec5SDimitry Andric       auto Instructions =
5060b57cec5SDimitry Andric           LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric       int &Partition = PtrToPartitions[I];
5090b57cec5SDimitry Andric       // First set it to uninitialized.
5100b57cec5SDimitry Andric       Partition = -2;
5110b57cec5SDimitry Andric       for (Instruction *Inst : Instructions) {
5120b57cec5SDimitry Andric         // Note that this could be -1 if Inst is duplicated across multiple
5130b57cec5SDimitry Andric         // partitions.
5140b57cec5SDimitry Andric         int ThisPartition = this->InstToPartitionId[Inst];
5150b57cec5SDimitry Andric         if (Partition == -2)
5160b57cec5SDimitry Andric           Partition = ThisPartition;
5170b57cec5SDimitry Andric         // -1 means belonging to multiple partitions.
5180b57cec5SDimitry Andric         else if (Partition == -1)
5190b57cec5SDimitry Andric           break;
5200b57cec5SDimitry Andric         else if (Partition != (int)ThisPartition)
5210b57cec5SDimitry Andric           Partition = -1;
5220b57cec5SDimitry Andric       }
5230b57cec5SDimitry Andric       assert(Partition != -2 && "Pointer not belonging to any partition");
5240b57cec5SDimitry Andric     }
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric     return PtrToPartitions;
5270b57cec5SDimitry Andric   }
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric   void print(raw_ostream &OS) const {
5300b57cec5SDimitry Andric     unsigned Index = 0;
5310b57cec5SDimitry Andric     for (const auto &P : PartitionContainer) {
532*0fca6ea1SDimitry Andric       OS << "LDist: Partition " << Index++ << ":";
533*0fca6ea1SDimitry Andric       P.print(OS);
5340b57cec5SDimitry Andric     }
5350b57cec5SDimitry Andric   }
5360b57cec5SDimitry Andric 
5370b57cec5SDimitry Andric   void dump() const { print(dbgs()); }
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric #ifndef NDEBUG
5400b57cec5SDimitry Andric   friend raw_ostream &operator<<(raw_ostream &OS,
5410b57cec5SDimitry Andric                                  const InstPartitionContainer &Partitions) {
5420b57cec5SDimitry Andric     Partitions.print(OS);
5430b57cec5SDimitry Andric     return OS;
5440b57cec5SDimitry Andric   }
5450b57cec5SDimitry Andric #endif
5460b57cec5SDimitry Andric 
547*0fca6ea1SDimitry Andric   void printBlocks(raw_ostream &OS) const {
5480b57cec5SDimitry Andric     unsigned Index = 0;
5490b57cec5SDimitry Andric     for (const auto &P : PartitionContainer) {
550*0fca6ea1SDimitry Andric       OS << "LDist: Partition " << Index++ << ":";
551*0fca6ea1SDimitry Andric       P.printBlocks(OS);
5520b57cec5SDimitry Andric     }
5530b57cec5SDimitry Andric   }
5540b57cec5SDimitry Andric 
5550b57cec5SDimitry Andric private:
5560b57cec5SDimitry Andric   using PartitionContainerT = std::list<InstPartition>;
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric   /// List of partitions.
5590b57cec5SDimitry Andric   PartitionContainerT PartitionContainer;
5600b57cec5SDimitry Andric 
5610b57cec5SDimitry Andric   /// Mapping from Instruction to partition Id.  If the instruction
5620b57cec5SDimitry Andric   /// belongs to multiple partitions the entry contains -1.
5630b57cec5SDimitry Andric   InstToPartitionIdT InstToPartitionId;
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric   Loop *L;
5660b57cec5SDimitry Andric   LoopInfo *LI;
5670b57cec5SDimitry Andric   DominatorTree *DT;
5680b57cec5SDimitry Andric 
5690b57cec5SDimitry Andric   /// The control structure to merge adjacent partitions if both satisfy
5700b57cec5SDimitry Andric   /// the \p Predicate.
5710b57cec5SDimitry Andric   template <class UnaryPredicate>
5720b57cec5SDimitry Andric   void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
5730b57cec5SDimitry Andric     InstPartition *PrevMatch = nullptr;
5740b57cec5SDimitry Andric     for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
5750b57cec5SDimitry Andric       auto DoesMatch = Predicate(&*I);
5760b57cec5SDimitry Andric       if (PrevMatch == nullptr && DoesMatch) {
5770b57cec5SDimitry Andric         PrevMatch = &*I;
5780b57cec5SDimitry Andric         ++I;
5790b57cec5SDimitry Andric       } else if (PrevMatch != nullptr && DoesMatch) {
5800b57cec5SDimitry Andric         I->moveTo(*PrevMatch);
5810b57cec5SDimitry Andric         I = PartitionContainer.erase(I);
5820b57cec5SDimitry Andric       } else {
5830b57cec5SDimitry Andric         PrevMatch = nullptr;
5840b57cec5SDimitry Andric         ++I;
5850b57cec5SDimitry Andric       }
5860b57cec5SDimitry Andric     }
5870b57cec5SDimitry Andric   }
5880b57cec5SDimitry Andric 
5890b57cec5SDimitry Andric   /// Assign new LoopIDs for the partition's cloned loop.
5900b57cec5SDimitry Andric   void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
591bdd1243dSDimitry Andric     std::optional<MDNode *> PartitionID = makeFollowupLoopID(
5920b57cec5SDimitry Andric         OrigLoopID,
5930b57cec5SDimitry Andric         {LLVMLoopDistributeFollowupAll,
5940b57cec5SDimitry Andric          Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
5950b57cec5SDimitry Andric                              : LLVMLoopDistributeFollowupCoincident});
59681ad6265SDimitry Andric     if (PartitionID) {
5970b57cec5SDimitry Andric       Loop *NewLoop = Part->getDistributedLoop();
598bdd1243dSDimitry Andric       NewLoop->setLoopID(*PartitionID);
5990b57cec5SDimitry Andric     }
6000b57cec5SDimitry Andric   }
6010b57cec5SDimitry Andric };
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric /// For each memory instruction, this class maintains difference of the
6040b57cec5SDimitry Andric /// number of unsafe dependences that start out from this instruction minus
6050b57cec5SDimitry Andric /// those that end here.
6060b57cec5SDimitry Andric ///
6070b57cec5SDimitry Andric /// By traversing the memory instructions in program order and accumulating this
6080b57cec5SDimitry Andric /// number, we know whether any unsafe dependence crosses over a program point.
6090b57cec5SDimitry Andric class MemoryInstructionDependences {
6100b57cec5SDimitry Andric   using Dependence = MemoryDepChecker::Dependence;
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric public:
6130b57cec5SDimitry Andric   struct Entry {
6140b57cec5SDimitry Andric     Instruction *Inst;
6150b57cec5SDimitry Andric     unsigned NumUnsafeDependencesStartOrEnd = 0;
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric     Entry(Instruction *Inst) : Inst(Inst) {}
6180b57cec5SDimitry Andric   };
6190b57cec5SDimitry Andric 
6200b57cec5SDimitry Andric   using AccessesType = SmallVector<Entry, 8>;
6210b57cec5SDimitry Andric 
6220b57cec5SDimitry Andric   AccessesType::const_iterator begin() const { return Accesses.begin(); }
6230b57cec5SDimitry Andric   AccessesType::const_iterator end() const { return Accesses.end(); }
6240b57cec5SDimitry Andric 
6250b57cec5SDimitry Andric   MemoryInstructionDependences(
6260b57cec5SDimitry Andric       const SmallVectorImpl<Instruction *> &Instructions,
6270b57cec5SDimitry Andric       const SmallVectorImpl<Dependence> &Dependences) {
6280b57cec5SDimitry Andric     Accesses.append(Instructions.begin(), Instructions.end());
6290b57cec5SDimitry Andric 
630*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
631bdd1243dSDimitry Andric     for (const auto &Dep : Dependences)
6320b57cec5SDimitry Andric       if (Dep.isPossiblyBackward()) {
6330b57cec5SDimitry Andric         // Note that the designations source and destination follow the program
6340b57cec5SDimitry Andric         // order, i.e. source is always first.  (The direction is given by the
6350b57cec5SDimitry Andric         // DepType.)
6360b57cec5SDimitry Andric         ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
6370b57cec5SDimitry Andric         --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric         LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
6400b57cec5SDimitry Andric       }
6410b57cec5SDimitry Andric   }
6420b57cec5SDimitry Andric 
6430b57cec5SDimitry Andric private:
6440b57cec5SDimitry Andric   AccessesType Accesses;
6450b57cec5SDimitry Andric };
6460b57cec5SDimitry Andric 
6470b57cec5SDimitry Andric /// The actual class performing the per-loop work.
6480b57cec5SDimitry Andric class LoopDistributeForLoop {
6490b57cec5SDimitry Andric public:
6500b57cec5SDimitry Andric   LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
651bdd1243dSDimitry Andric                         ScalarEvolution *SE, LoopAccessInfoManager &LAIs,
652bdd1243dSDimitry Andric                         OptimizationRemarkEmitter *ORE)
653bdd1243dSDimitry Andric       : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
6540b57cec5SDimitry Andric     setForced();
6550b57cec5SDimitry Andric   }
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric   /// Try to distribute an inner-most loop.
658bdd1243dSDimitry Andric   bool processLoop() {
659e8d8bef9SDimitry Andric     assert(L->isInnermost() && "Only process inner loops.");
6600b57cec5SDimitry Andric 
661*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
662*0fca6ea1SDimitry Andric                       << L->getHeader()->getParent()->getName() << "' from "
663*0fca6ea1SDimitry Andric                       << L->getLocStr() << "\n");
6640b57cec5SDimitry Andric 
665e8d8bef9SDimitry Andric     // Having a single exit block implies there's also one exiting block.
6660b57cec5SDimitry Andric     if (!L->getExitBlock())
6670b57cec5SDimitry Andric       return fail("MultipleExitBlocks", "multiple exit blocks");
6680b57cec5SDimitry Andric     if (!L->isLoopSimplifyForm())
6690b57cec5SDimitry Andric       return fail("NotLoopSimplifyForm",
6700b57cec5SDimitry Andric                   "loop is not in loop-simplify form");
671e8d8bef9SDimitry Andric     if (!L->isRotatedForm())
672e8d8bef9SDimitry Andric       return fail("NotBottomTested", "loop is not bottom tested");
6730b57cec5SDimitry Andric 
6740b57cec5SDimitry Andric     BasicBlock *PH = L->getLoopPreheader();
6750b57cec5SDimitry Andric 
676bdd1243dSDimitry Andric     LAI = &LAIs.getInfo(*L);
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric     // Currently, we only distribute to isolate the part of the loop with
6790b57cec5SDimitry Andric     // dependence cycles to enable partial vectorization.
6800b57cec5SDimitry Andric     if (LAI->canVectorizeMemory())
6810b57cec5SDimitry Andric       return fail("MemOpsCanBeVectorized",
6820b57cec5SDimitry Andric                   "memory operations are safe for vectorization");
6830b57cec5SDimitry Andric 
6840b57cec5SDimitry Andric     auto *Dependences = LAI->getDepChecker().getDependences();
6850b57cec5SDimitry Andric     if (!Dependences || Dependences->empty())
6860b57cec5SDimitry Andric       return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
6870b57cec5SDimitry Andric 
688*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Found a candidate loop: "
689*0fca6ea1SDimitry Andric                       << L->getHeader()->getName() << "\n");
690*0fca6ea1SDimitry Andric 
6910b57cec5SDimitry Andric     InstPartitionContainer Partitions(L, LI, DT);
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric     // First, go through each memory operation and assign them to consecutive
6940b57cec5SDimitry Andric     // partitions (the order of partitions follows program order).  Put those
6950b57cec5SDimitry Andric     // with unsafe dependences into "cyclic" partition otherwise put each store
6960b57cec5SDimitry Andric     // in its own "non-cyclic" partition (we'll merge these later).
6970b57cec5SDimitry Andric     //
6980b57cec5SDimitry Andric     // Note that a memory operation (e.g. Load2 below) at a program point that
6990b57cec5SDimitry Andric     // has an unsafe dependence (Store3->Load1) spanning over it must be
7000b57cec5SDimitry Andric     // included in the same cyclic partition as the dependent operations.  This
7010b57cec5SDimitry Andric     // is to preserve the original program order after distribution.  E.g.:
7020b57cec5SDimitry Andric     //
7030b57cec5SDimitry Andric     //                NumUnsafeDependencesStartOrEnd  NumUnsafeDependencesActive
7040b57cec5SDimitry Andric     //  Load1   -.                     1                       0->1
7050b57cec5SDimitry Andric     //  Load2    | /Unsafe/            0                       1
7060b57cec5SDimitry Andric     //  Store3  -'                    -1                       1->0
7070b57cec5SDimitry Andric     //  Load4                          0                       0
7080b57cec5SDimitry Andric     //
7090b57cec5SDimitry Andric     // NumUnsafeDependencesActive > 0 indicates this situation and in this case
7100b57cec5SDimitry Andric     // we just keep assigning to the same cyclic partition until
7110b57cec5SDimitry Andric     // NumUnsafeDependencesActive reaches 0.
7120b57cec5SDimitry Andric     const MemoryDepChecker &DepChecker = LAI->getDepChecker();
7130b57cec5SDimitry Andric     MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
7140b57cec5SDimitry Andric                                      *Dependences);
7150b57cec5SDimitry Andric 
7160b57cec5SDimitry Andric     int NumUnsafeDependencesActive = 0;
717bdd1243dSDimitry Andric     for (const auto &InstDep : MID) {
7180b57cec5SDimitry Andric       Instruction *I = InstDep.Inst;
7190b57cec5SDimitry Andric       // We update NumUnsafeDependencesActive post-instruction, catch the
7200b57cec5SDimitry Andric       // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
7210b57cec5SDimitry Andric       if (NumUnsafeDependencesActive ||
7220b57cec5SDimitry Andric           InstDep.NumUnsafeDependencesStartOrEnd > 0)
7230b57cec5SDimitry Andric         Partitions.addToCyclicPartition(I);
7240b57cec5SDimitry Andric       else
7250b57cec5SDimitry Andric         Partitions.addToNewNonCyclicPartition(I);
7260b57cec5SDimitry Andric       NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
7270b57cec5SDimitry Andric       assert(NumUnsafeDependencesActive >= 0 &&
7280b57cec5SDimitry Andric              "Negative number of dependences active");
7290b57cec5SDimitry Andric     }
7300b57cec5SDimitry Andric 
7310b57cec5SDimitry Andric     // Add partitions for values used outside.  These partitions can be out of
7320b57cec5SDimitry Andric     // order from the original program order.  This is OK because if the
7330b57cec5SDimitry Andric     // partition uses a load we will merge this partition with the original
7340b57cec5SDimitry Andric     // partition of the load that we set up in the previous loop (see
7350b57cec5SDimitry Andric     // mergeToAvoidDuplicatedLoads).
7360b57cec5SDimitry Andric     auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
7370b57cec5SDimitry Andric     for (auto *Inst : DefsUsedOutside)
7380b57cec5SDimitry Andric       Partitions.addToNewNonCyclicPartition(Inst);
7390b57cec5SDimitry Andric 
740*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
7410b57cec5SDimitry Andric     if (Partitions.getSize() < 2)
7420b57cec5SDimitry Andric       return fail("CantIsolateUnsafeDeps",
7430b57cec5SDimitry Andric                   "cannot isolate unsafe dependencies");
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric     // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
7460b57cec5SDimitry Andric     // should be able to vectorize these together.
7470b57cec5SDimitry Andric     Partitions.mergeBeforePopulating();
748*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
7490b57cec5SDimitry Andric     if (Partitions.getSize() < 2)
7500b57cec5SDimitry Andric       return fail("CantIsolateUnsafeDeps",
7510b57cec5SDimitry Andric                   "cannot isolate unsafe dependencies");
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric     // Now, populate the partitions with non-memory operations.
7540b57cec5SDimitry Andric     Partitions.populateUsedSet();
755*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
7560b57cec5SDimitry Andric 
7570b57cec5SDimitry Andric     // In order to preserve original lexical order for loads, keep them in the
7580b57cec5SDimitry Andric     // partition that we set up in the MemoryInstructionDependences loop.
7590b57cec5SDimitry Andric     if (Partitions.mergeToAvoidDuplicatedLoads()) {
760*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
7610b57cec5SDimitry Andric                         << Partitions);
7620b57cec5SDimitry Andric       if (Partitions.getSize() < 2)
7630b57cec5SDimitry Andric         return fail("CantIsolateUnsafeDeps",
7640b57cec5SDimitry Andric                     "cannot isolate unsafe dependencies");
7650b57cec5SDimitry Andric     }
7660b57cec5SDimitry Andric 
7670b57cec5SDimitry Andric     // Don't distribute the loop if we need too many SCEV run-time checks, or
7680b57cec5SDimitry Andric     // any if it's illegal.
76981ad6265SDimitry Andric     const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
7700b57cec5SDimitry Andric     if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
7710b57cec5SDimitry Andric       return fail("RuntimeCheckWithConvergent",
7720b57cec5SDimitry Andric                   "may not insert runtime check with convergent operation");
7730b57cec5SDimitry Andric     }
7740b57cec5SDimitry Andric 
77581ad6265SDimitry Andric     if (Pred.getComplexity() > (IsForced.value_or(false)
7760b57cec5SDimitry Andric                                     ? PragmaDistributeSCEVCheckThreshold
7770b57cec5SDimitry Andric                                     : DistributeSCEVCheckThreshold))
7780b57cec5SDimitry Andric       return fail("TooManySCEVRuntimeChecks",
7790b57cec5SDimitry Andric                   "too many SCEV run-time checks needed.\n");
7800b57cec5SDimitry Andric 
78181ad6265SDimitry Andric     if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
7820b57cec5SDimitry Andric       return fail("HeuristicDisabled", "distribution heuristic disabled");
7830b57cec5SDimitry Andric 
784*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
785*0fca6ea1SDimitry Andric                       << L->getHeader()->getName() << "\n");
7860b57cec5SDimitry Andric     // We're done forming the partitions set up the reverse mapping from
7870b57cec5SDimitry Andric     // instructions to partitions.
7880b57cec5SDimitry Andric     Partitions.setupPartitionIdOnInstructions();
7890b57cec5SDimitry Andric 
7900b57cec5SDimitry Andric     // If we need run-time checks, version the loop now.
7910b57cec5SDimitry Andric     auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
7920b57cec5SDimitry Andric     const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
7930b57cec5SDimitry Andric     const auto &AllChecks = RtPtrChecking->getChecks();
7940b57cec5SDimitry Andric     auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
7950b57cec5SDimitry Andric                                                   RtPtrChecking);
7960b57cec5SDimitry Andric 
7970b57cec5SDimitry Andric     if (LAI->hasConvergentOp() && !Checks.empty()) {
7980b57cec5SDimitry Andric       return fail("RuntimeCheckWithConvergent",
7990b57cec5SDimitry Andric                   "may not insert runtime check with convergent operation");
8000b57cec5SDimitry Andric     }
8010b57cec5SDimitry Andric 
8025ffd83dbSDimitry Andric     // To keep things simple have an empty preheader before we version or clone
8035ffd83dbSDimitry Andric     // the loop.  (Also split if this has no predecessor, i.e. entry, because we
8045ffd83dbSDimitry Andric     // rely on PH having a predecessor.)
8055ffd83dbSDimitry Andric     if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
8065ffd83dbSDimitry Andric       SplitBlock(PH, PH->getTerminator(), DT, LI);
8075ffd83dbSDimitry Andric 
8080b57cec5SDimitry Andric     if (!Pred.isAlwaysTrue() || !Checks.empty()) {
8090b57cec5SDimitry Andric       assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
8100b57cec5SDimitry Andric 
8110b57cec5SDimitry Andric       MDNode *OrigLoopID = L->getLoopID();
8120b57cec5SDimitry Andric 
813*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
8140b57cec5SDimitry Andric       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
815e8d8bef9SDimitry Andric       LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
8160b57cec5SDimitry Andric       LVer.versionLoop(DefsUsedOutside);
8170b57cec5SDimitry Andric       LVer.annotateLoopWithNoAlias();
8180b57cec5SDimitry Andric 
8190b57cec5SDimitry Andric       // The unversioned loop will not be changed, so we inherit all attributes
8200b57cec5SDimitry Andric       // from the original loop, but remove the loop distribution metadata to
8210b57cec5SDimitry Andric       // avoid to distribute it again.
822bdd1243dSDimitry Andric       MDNode *UnversionedLoopID = *makeFollowupLoopID(
823bdd1243dSDimitry Andric           OrigLoopID,
824bdd1243dSDimitry Andric           {LLVMLoopDistributeFollowupAll, LLVMLoopDistributeFollowupFallback},
825bdd1243dSDimitry Andric           "llvm.loop.distribute.", true);
8260b57cec5SDimitry Andric       LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
8270b57cec5SDimitry Andric     }
8280b57cec5SDimitry Andric 
8290b57cec5SDimitry Andric     // Create identical copies of the original loop for each partition and hook
8300b57cec5SDimitry Andric     // them up sequentially.
8310b57cec5SDimitry Andric     Partitions.cloneLoops();
8320b57cec5SDimitry Andric 
8330b57cec5SDimitry Andric     // Now, we remove the instruction from each loop that don't belong to that
8340b57cec5SDimitry Andric     // partition.
8350b57cec5SDimitry Andric     Partitions.removeUnusedInsts();
836*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
837*0fca6ea1SDimitry Andric     LLVM_DEBUG(Partitions.printBlocks(dbgs()));
8380b57cec5SDimitry Andric 
8390b57cec5SDimitry Andric     if (LDistVerify) {
8400b57cec5SDimitry Andric       LI->verify(*DT);
8410b57cec5SDimitry Andric       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
8420b57cec5SDimitry Andric     }
8430b57cec5SDimitry Andric 
8440b57cec5SDimitry Andric     ++NumLoopsDistributed;
8450b57cec5SDimitry Andric     // Report the success.
8460b57cec5SDimitry Andric     ORE->emit([&]() {
8470b57cec5SDimitry Andric       return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
8480b57cec5SDimitry Andric                                 L->getHeader())
8490b57cec5SDimitry Andric              << "distributed loop";
8500b57cec5SDimitry Andric     });
8510b57cec5SDimitry Andric     return true;
8520b57cec5SDimitry Andric   }
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric   /// Provide diagnostics then \return with false.
8550b57cec5SDimitry Andric   bool fail(StringRef RemarkName, StringRef Message) {
8560b57cec5SDimitry Andric     LLVMContext &Ctx = F->getContext();
85781ad6265SDimitry Andric     bool Forced = isForced().value_or(false);
8580b57cec5SDimitry Andric 
859*0fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
8600b57cec5SDimitry Andric 
8610b57cec5SDimitry Andric     // With Rpass-missed report that distribution failed.
8620b57cec5SDimitry Andric     ORE->emit([&]() {
8630b57cec5SDimitry Andric       return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
8640b57cec5SDimitry Andric                                       L->getStartLoc(), L->getHeader())
8650b57cec5SDimitry Andric              << "loop not distributed: use -Rpass-analysis=loop-distribute for "
8660b57cec5SDimitry Andric                 "more "
8670b57cec5SDimitry Andric                 "info";
8680b57cec5SDimitry Andric     });
8690b57cec5SDimitry Andric 
8700b57cec5SDimitry Andric     // With Rpass-analysis report why.  This is on by default if distribution
8710b57cec5SDimitry Andric     // was requested explicitly.
8720b57cec5SDimitry Andric     ORE->emit(OptimizationRemarkAnalysis(
8730b57cec5SDimitry Andric                   Forced ? OptimizationRemarkAnalysis::AlwaysPrint : LDIST_NAME,
8740b57cec5SDimitry Andric                   RemarkName, L->getStartLoc(), L->getHeader())
8750b57cec5SDimitry Andric               << "loop not distributed: " << Message);
8760b57cec5SDimitry Andric 
8770b57cec5SDimitry Andric     // Also issue a warning if distribution was requested explicitly but it
8780b57cec5SDimitry Andric     // failed.
8790b57cec5SDimitry Andric     if (Forced)
8800b57cec5SDimitry Andric       Ctx.diagnose(DiagnosticInfoOptimizationFailure(
8810b57cec5SDimitry Andric           *F, L->getStartLoc(), "loop not distributed: failed "
8820b57cec5SDimitry Andric                                 "explicitly specified loop distribution"));
8830b57cec5SDimitry Andric 
8840b57cec5SDimitry Andric     return false;
8850b57cec5SDimitry Andric   }
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric   /// Return if distribution forced to be enabled/disabled for the loop.
8880b57cec5SDimitry Andric   ///
8890b57cec5SDimitry Andric   /// If the optional has a value, it indicates whether distribution was forced
8900b57cec5SDimitry Andric   /// to be enabled (true) or disabled (false).  If the optional has no value
8910b57cec5SDimitry Andric   /// distribution was not forced either way.
892bdd1243dSDimitry Andric   const std::optional<bool> &isForced() const { return IsForced; }
8930b57cec5SDimitry Andric 
8940b57cec5SDimitry Andric private:
8950b57cec5SDimitry Andric   /// Filter out checks between pointers from the same partition.
8960b57cec5SDimitry Andric   ///
8970b57cec5SDimitry Andric   /// \p PtrToPartition contains the partition number for pointers.  Partition
8980b57cec5SDimitry Andric   /// number -1 means that the pointer is used in multiple partitions.  In this
8990b57cec5SDimitry Andric   /// case we can't safely omit the check.
9005ffd83dbSDimitry Andric   SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
9015ffd83dbSDimitry Andric       const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
9020b57cec5SDimitry Andric       const SmallVectorImpl<int> &PtrToPartition,
9030b57cec5SDimitry Andric       const RuntimePointerChecking *RtPtrChecking) {
9045ffd83dbSDimitry Andric     SmallVector<RuntimePointerCheck, 4> Checks;
9050b57cec5SDimitry Andric 
9060b57cec5SDimitry Andric     copy_if(AllChecks, std::back_inserter(Checks),
9075ffd83dbSDimitry Andric             [&](const RuntimePointerCheck &Check) {
9080b57cec5SDimitry Andric               for (unsigned PtrIdx1 : Check.first->Members)
9090b57cec5SDimitry Andric                 for (unsigned PtrIdx2 : Check.second->Members)
9100b57cec5SDimitry Andric                   // Only include this check if there is a pair of pointers
9110b57cec5SDimitry Andric                   // that require checking and the pointers fall into
9120b57cec5SDimitry Andric                   // separate partitions.
9130b57cec5SDimitry Andric                   //
9140b57cec5SDimitry Andric                   // (Note that we already know at this point that the two
9150b57cec5SDimitry Andric                   // pointer groups need checking but it doesn't follow
9160b57cec5SDimitry Andric                   // that each pair of pointers within the two groups need
9170b57cec5SDimitry Andric                   // checking as well.
9180b57cec5SDimitry Andric                   //
9190b57cec5SDimitry Andric                   // In other words we don't want to include a check just
9200b57cec5SDimitry Andric                   // because there is a pair of pointers between the two
9210b57cec5SDimitry Andric                   // pointer groups that require checks and a different
9220b57cec5SDimitry Andric                   // pair whose pointers fall into different partitions.)
9230b57cec5SDimitry Andric                   if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
9240b57cec5SDimitry Andric                       !RuntimePointerChecking::arePointersInSamePartition(
9250b57cec5SDimitry Andric                           PtrToPartition, PtrIdx1, PtrIdx2))
9260b57cec5SDimitry Andric                     return true;
9270b57cec5SDimitry Andric               return false;
9280b57cec5SDimitry Andric             });
9290b57cec5SDimitry Andric 
9300b57cec5SDimitry Andric     return Checks;
9310b57cec5SDimitry Andric   }
9320b57cec5SDimitry Andric 
9330b57cec5SDimitry Andric   /// Check whether the loop metadata is forcing distribution to be
9340b57cec5SDimitry Andric   /// enabled/disabled.
9350b57cec5SDimitry Andric   void setForced() {
936bdd1243dSDimitry Andric     std::optional<const MDOperand *> Value =
9370b57cec5SDimitry Andric         findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
9380b57cec5SDimitry Andric     if (!Value)
9390b57cec5SDimitry Andric       return;
9400b57cec5SDimitry Andric 
9410b57cec5SDimitry Andric     const MDOperand *Op = *Value;
9420b57cec5SDimitry Andric     assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
9430b57cec5SDimitry Andric     IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
9440b57cec5SDimitry Andric   }
9450b57cec5SDimitry Andric 
9460b57cec5SDimitry Andric   Loop *L;
9470b57cec5SDimitry Andric   Function *F;
9480b57cec5SDimitry Andric 
9490b57cec5SDimitry Andric   // Analyses used.
9500b57cec5SDimitry Andric   LoopInfo *LI;
9510b57cec5SDimitry Andric   const LoopAccessInfo *LAI = nullptr;
9520b57cec5SDimitry Andric   DominatorTree *DT;
9530b57cec5SDimitry Andric   ScalarEvolution *SE;
954bdd1243dSDimitry Andric   LoopAccessInfoManager &LAIs;
9550b57cec5SDimitry Andric   OptimizationRemarkEmitter *ORE;
9560b57cec5SDimitry Andric 
9570b57cec5SDimitry Andric   /// Indicates whether distribution is forced to be enabled/disabled for
9580b57cec5SDimitry Andric   /// the loop.
9590b57cec5SDimitry Andric   ///
9600b57cec5SDimitry Andric   /// If the optional has a value, it indicates whether distribution was forced
9610b57cec5SDimitry Andric   /// to be enabled (true) or disabled (false).  If the optional has no value
9620b57cec5SDimitry Andric   /// distribution was not forced either way.
963bdd1243dSDimitry Andric   std::optional<bool> IsForced;
9640b57cec5SDimitry Andric };
9650b57cec5SDimitry Andric 
9660b57cec5SDimitry Andric } // end anonymous namespace
9670b57cec5SDimitry Andric 
9680b57cec5SDimitry Andric static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
9690b57cec5SDimitry Andric                     ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
970bdd1243dSDimitry Andric                     LoopAccessInfoManager &LAIs) {
971*0fca6ea1SDimitry Andric   // Build up a worklist of inner-loops to distribute. This is necessary as the
9720b57cec5SDimitry Andric   // act of distributing a loop creates new loops and can invalidate iterators
9730b57cec5SDimitry Andric   // across the loops.
9740b57cec5SDimitry Andric   SmallVector<Loop *, 8> Worklist;
9750b57cec5SDimitry Andric 
9760b57cec5SDimitry Andric   for (Loop *TopLevelLoop : *LI)
9770b57cec5SDimitry Andric     for (Loop *L : depth_first(TopLevelLoop))
9780b57cec5SDimitry Andric       // We only handle inner-most loops.
979e8d8bef9SDimitry Andric       if (L->isInnermost())
9800b57cec5SDimitry Andric         Worklist.push_back(L);
9810b57cec5SDimitry Andric 
9820b57cec5SDimitry Andric   // Now walk the identified inner loops.
9830b57cec5SDimitry Andric   bool Changed = false;
9840b57cec5SDimitry Andric   for (Loop *L : Worklist) {
985bdd1243dSDimitry Andric     LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
9860b57cec5SDimitry Andric 
9870b57cec5SDimitry Andric     // If distribution was forced for the specific loop to be
9880b57cec5SDimitry Andric     // enabled/disabled, follow that.  Otherwise use the global flag.
98981ad6265SDimitry Andric     if (LDL.isForced().value_or(EnableLoopDistribute))
990bdd1243dSDimitry Andric       Changed |= LDL.processLoop();
9910b57cec5SDimitry Andric   }
9920b57cec5SDimitry Andric 
9930b57cec5SDimitry Andric   // Process each loop nest in the function.
9940b57cec5SDimitry Andric   return Changed;
9950b57cec5SDimitry Andric }
9960b57cec5SDimitry Andric 
9970b57cec5SDimitry Andric PreservedAnalyses LoopDistributePass::run(Function &F,
9980b57cec5SDimitry Andric                                           FunctionAnalysisManager &AM) {
9990b57cec5SDimitry Andric   auto &LI = AM.getResult<LoopAnalysis>(F);
10000b57cec5SDimitry Andric   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
10010b57cec5SDimitry Andric   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
10020b57cec5SDimitry Andric   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
10030b57cec5SDimitry Andric 
1004bdd1243dSDimitry Andric   LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(F);
1005bdd1243dSDimitry Andric   bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
10060b57cec5SDimitry Andric   if (!Changed)
10070b57cec5SDimitry Andric     return PreservedAnalyses::all();
10080b57cec5SDimitry Andric   PreservedAnalyses PA;
10090b57cec5SDimitry Andric   PA.preserve<LoopAnalysis>();
10100b57cec5SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
10110b57cec5SDimitry Andric   return PA;
10120b57cec5SDimitry Andric }
1013