xref: /openbsd-src/gnu/llvm/llvm/lib/Transforms/Scalar/LoopDistribute.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements the Loop Distribution Pass.  Its main focus is to
1009467b48Spatrick // distribute loops that cannot be vectorized due to dependence cycles.  It
1109467b48Spatrick // tries to isolate the offending dependences into a new loop allowing
1209467b48Spatrick // vectorization of the remaining parts.
1309467b48Spatrick //
1409467b48Spatrick // For dependence analysis, the pass uses the LoopVectorizer's
1509467b48Spatrick // LoopAccessAnalysis.  Because this analysis presumes no change in the order of
1609467b48Spatrick // memory operations, special care is taken to preserve the lexical order of
1709467b48Spatrick // these operations.
1809467b48Spatrick //
1909467b48Spatrick // Similarly to the Vectorizer, the pass also supports loop versioning to
2009467b48Spatrick // run-time disambiguate potentially overlapping arrays.
2109467b48Spatrick //
2209467b48Spatrick //===----------------------------------------------------------------------===//
2309467b48Spatrick 
2409467b48Spatrick #include "llvm/Transforms/Scalar/LoopDistribute.h"
2509467b48Spatrick #include "llvm/ADT/DenseMap.h"
2609467b48Spatrick #include "llvm/ADT/DepthFirstIterator.h"
2709467b48Spatrick #include "llvm/ADT/EquivalenceClasses.h"
2809467b48Spatrick #include "llvm/ADT/STLExtras.h"
2909467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
3009467b48Spatrick #include "llvm/ADT/SmallVector.h"
3109467b48Spatrick #include "llvm/ADT/Statistic.h"
3209467b48Spatrick #include "llvm/ADT/StringRef.h"
3309467b48Spatrick #include "llvm/ADT/Twine.h"
3409467b48Spatrick #include "llvm/ADT/iterator_range.h"
3509467b48Spatrick #include "llvm/Analysis/AssumptionCache.h"
3609467b48Spatrick #include "llvm/Analysis/GlobalsModRef.h"
3709467b48Spatrick #include "llvm/Analysis/LoopAccessAnalysis.h"
3809467b48Spatrick #include "llvm/Analysis/LoopAnalysisManager.h"
3909467b48Spatrick #include "llvm/Analysis/LoopInfo.h"
4009467b48Spatrick #include "llvm/Analysis/OptimizationRemarkEmitter.h"
4109467b48Spatrick #include "llvm/Analysis/ScalarEvolution.h"
4209467b48Spatrick #include "llvm/Analysis/TargetLibraryInfo.h"
4309467b48Spatrick #include "llvm/Analysis/TargetTransformInfo.h"
4409467b48Spatrick #include "llvm/IR/BasicBlock.h"
4509467b48Spatrick #include "llvm/IR/Constants.h"
4609467b48Spatrick #include "llvm/IR/DiagnosticInfo.h"
4709467b48Spatrick #include "llvm/IR/Dominators.h"
4809467b48Spatrick #include "llvm/IR/Function.h"
4909467b48Spatrick #include "llvm/IR/Instruction.h"
5009467b48Spatrick #include "llvm/IR/Instructions.h"
5109467b48Spatrick #include "llvm/IR/LLVMContext.h"
5209467b48Spatrick #include "llvm/IR/Metadata.h"
5309467b48Spatrick #include "llvm/IR/PassManager.h"
5409467b48Spatrick #include "llvm/IR/Value.h"
5509467b48Spatrick #include "llvm/InitializePasses.h"
5609467b48Spatrick #include "llvm/Pass.h"
5709467b48Spatrick #include "llvm/Support/Casting.h"
5809467b48Spatrick #include "llvm/Support/CommandLine.h"
5909467b48Spatrick #include "llvm/Support/Debug.h"
6009467b48Spatrick #include "llvm/Support/raw_ostream.h"
6109467b48Spatrick #include "llvm/Transforms/Scalar.h"
6209467b48Spatrick #include "llvm/Transforms/Utils/BasicBlockUtils.h"
6309467b48Spatrick #include "llvm/Transforms/Utils/Cloning.h"
6409467b48Spatrick #include "llvm/Transforms/Utils/LoopUtils.h"
6509467b48Spatrick #include "llvm/Transforms/Utils/LoopVersioning.h"
6609467b48Spatrick #include "llvm/Transforms/Utils/ValueMapper.h"
6709467b48Spatrick #include <cassert>
6809467b48Spatrick #include <functional>
6909467b48Spatrick #include <list>
7009467b48Spatrick #include <tuple>
7109467b48Spatrick #include <utility>
7209467b48Spatrick 
7309467b48Spatrick using namespace llvm;
7409467b48Spatrick 
7509467b48Spatrick #define LDIST_NAME "loop-distribute"
7609467b48Spatrick #define DEBUG_TYPE LDIST_NAME
7709467b48Spatrick 
7809467b48Spatrick /// @{
7909467b48Spatrick /// Metadata attribute names
8009467b48Spatrick static const char *const LLVMLoopDistributeFollowupAll =
8109467b48Spatrick     "llvm.loop.distribute.followup_all";
8209467b48Spatrick static const char *const LLVMLoopDistributeFollowupCoincident =
8309467b48Spatrick     "llvm.loop.distribute.followup_coincident";
8409467b48Spatrick static const char *const LLVMLoopDistributeFollowupSequential =
8509467b48Spatrick     "llvm.loop.distribute.followup_sequential";
8609467b48Spatrick static const char *const LLVMLoopDistributeFollowupFallback =
8709467b48Spatrick     "llvm.loop.distribute.followup_fallback";
8809467b48Spatrick /// @}
8909467b48Spatrick 
9009467b48Spatrick static cl::opt<bool>
9109467b48Spatrick     LDistVerify("loop-distribute-verify", cl::Hidden,
9209467b48Spatrick                 cl::desc("Turn on DominatorTree and LoopInfo verification "
9309467b48Spatrick                          "after Loop Distribution"),
9409467b48Spatrick                 cl::init(false));
9509467b48Spatrick 
9609467b48Spatrick static cl::opt<bool> DistributeNonIfConvertible(
9709467b48Spatrick     "loop-distribute-non-if-convertible", cl::Hidden,
9809467b48Spatrick     cl::desc("Whether to distribute into a loop that may not be "
9909467b48Spatrick              "if-convertible by the loop vectorizer"),
10009467b48Spatrick     cl::init(false));
10109467b48Spatrick 
10209467b48Spatrick static cl::opt<unsigned> DistributeSCEVCheckThreshold(
10309467b48Spatrick     "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
10409467b48Spatrick     cl::desc("The maximum number of SCEV checks allowed for Loop "
10509467b48Spatrick              "Distribution"));
10609467b48Spatrick 
10709467b48Spatrick static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
10809467b48Spatrick     "loop-distribute-scev-check-threshold-with-pragma", cl::init(128),
10909467b48Spatrick     cl::Hidden,
11009467b48Spatrick     cl::desc(
11109467b48Spatrick         "The maximum number of SCEV checks allowed for Loop "
11209467b48Spatrick         "Distribution for loop marked with #pragma loop distribute(enable)"));
11309467b48Spatrick 
11409467b48Spatrick static cl::opt<bool> EnableLoopDistribute(
11509467b48Spatrick     "enable-loop-distribute", cl::Hidden,
11609467b48Spatrick     cl::desc("Enable the new, experimental LoopDistribution Pass"),
11709467b48Spatrick     cl::init(false));
11809467b48Spatrick 
11909467b48Spatrick STATISTIC(NumLoopsDistributed, "Number of loops distributed");
12009467b48Spatrick 
12109467b48Spatrick namespace {
12209467b48Spatrick 
12309467b48Spatrick /// Maintains the set of instructions of the loop for a partition before
12409467b48Spatrick /// cloning.  After cloning, it hosts the new loop.
12509467b48Spatrick class InstPartition {
12609467b48Spatrick   using InstructionSet = SmallPtrSet<Instruction *, 8>;
12709467b48Spatrick 
12809467b48Spatrick public:
InstPartition(Instruction * I,Loop * L,bool DepCycle=false)12909467b48Spatrick   InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
13009467b48Spatrick       : DepCycle(DepCycle), OrigLoop(L) {
13109467b48Spatrick     Set.insert(I);
13209467b48Spatrick   }
13309467b48Spatrick 
13409467b48Spatrick   /// Returns whether this partition contains a dependence cycle.
hasDepCycle() const13509467b48Spatrick   bool hasDepCycle() const { return DepCycle; }
13609467b48Spatrick 
13709467b48Spatrick   /// Adds an instruction to this partition.
add(Instruction * I)13809467b48Spatrick   void add(Instruction *I) { Set.insert(I); }
13909467b48Spatrick 
14009467b48Spatrick   /// Collection accessors.
begin()14109467b48Spatrick   InstructionSet::iterator begin() { return Set.begin(); }
end()14209467b48Spatrick   InstructionSet::iterator end() { return Set.end(); }
begin() const14309467b48Spatrick   InstructionSet::const_iterator begin() const { return Set.begin(); }
end() const14409467b48Spatrick   InstructionSet::const_iterator end() const { return Set.end(); }
empty() const14509467b48Spatrick   bool empty() const { return Set.empty(); }
14609467b48Spatrick 
14709467b48Spatrick   /// Moves this partition into \p Other.  This partition becomes empty
14809467b48Spatrick   /// after this.
moveTo(InstPartition & Other)14909467b48Spatrick   void moveTo(InstPartition &Other) {
15009467b48Spatrick     Other.Set.insert(Set.begin(), Set.end());
15109467b48Spatrick     Set.clear();
15209467b48Spatrick     Other.DepCycle |= DepCycle;
15309467b48Spatrick   }
15409467b48Spatrick 
15509467b48Spatrick   /// Populates the partition with a transitive closure of all the
15609467b48Spatrick   /// instructions that the seeded instructions dependent on.
populateUsedSet()15709467b48Spatrick   void populateUsedSet() {
15809467b48Spatrick     // FIXME: We currently don't use control-dependence but simply include all
15909467b48Spatrick     // blocks (possibly empty at the end) and let simplifycfg mostly clean this
16009467b48Spatrick     // up.
16109467b48Spatrick     for (auto *B : OrigLoop->getBlocks())
16209467b48Spatrick       Set.insert(B->getTerminator());
16309467b48Spatrick 
16409467b48Spatrick     // Follow the use-def chains to form a transitive closure of all the
16509467b48Spatrick     // instructions that the originally seeded instructions depend on.
16609467b48Spatrick     SmallVector<Instruction *, 8> Worklist(Set.begin(), Set.end());
16709467b48Spatrick     while (!Worklist.empty()) {
16809467b48Spatrick       Instruction *I = Worklist.pop_back_val();
16909467b48Spatrick       // Insert instructions from the loop that we depend on.
17009467b48Spatrick       for (Value *V : I->operand_values()) {
17109467b48Spatrick         auto *I = dyn_cast<Instruction>(V);
17209467b48Spatrick         if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
17309467b48Spatrick           Worklist.push_back(I);
17409467b48Spatrick       }
17509467b48Spatrick     }
17609467b48Spatrick   }
17709467b48Spatrick 
17809467b48Spatrick   /// Clones the original loop.
17909467b48Spatrick   ///
18009467b48Spatrick   /// Updates LoopInfo and DominatorTree using the information that block \p
18109467b48Spatrick   /// LoopDomBB dominates the loop.
cloneLoopWithPreheader(BasicBlock * InsertBefore,BasicBlock * LoopDomBB,unsigned Index,LoopInfo * LI,DominatorTree * DT)18209467b48Spatrick   Loop *cloneLoopWithPreheader(BasicBlock *InsertBefore, BasicBlock *LoopDomBB,
18309467b48Spatrick                                unsigned Index, LoopInfo *LI,
18409467b48Spatrick                                DominatorTree *DT) {
18509467b48Spatrick     ClonedLoop = ::cloneLoopWithPreheader(InsertBefore, LoopDomBB, OrigLoop,
18609467b48Spatrick                                           VMap, Twine(".ldist") + Twine(Index),
18709467b48Spatrick                                           LI, DT, ClonedLoopBlocks);
18809467b48Spatrick     return ClonedLoop;
18909467b48Spatrick   }
19009467b48Spatrick 
19109467b48Spatrick   /// The cloned loop.  If this partition is mapped to the original loop,
19209467b48Spatrick   /// this is null.
getClonedLoop() const19309467b48Spatrick   const Loop *getClonedLoop() const { return ClonedLoop; }
19409467b48Spatrick 
19509467b48Spatrick   /// Returns the loop where this partition ends up after distribution.
19609467b48Spatrick   /// If this partition is mapped to the original loop then use the block from
19709467b48Spatrick   /// the loop.
getDistributedLoop() const19809467b48Spatrick   Loop *getDistributedLoop() const {
19909467b48Spatrick     return ClonedLoop ? ClonedLoop : OrigLoop;
20009467b48Spatrick   }
20109467b48Spatrick 
20209467b48Spatrick   /// The VMap that is populated by cloning and then used in
20309467b48Spatrick   /// remapinstruction to remap the cloned instructions.
getVMap()20409467b48Spatrick   ValueToValueMapTy &getVMap() { return VMap; }
20509467b48Spatrick 
20609467b48Spatrick   /// Remaps the cloned instructions using VMap.
remapInstructions()20709467b48Spatrick   void remapInstructions() {
20809467b48Spatrick     remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
20909467b48Spatrick   }
21009467b48Spatrick 
21109467b48Spatrick   /// Based on the set of instructions selected for this partition,
21209467b48Spatrick   /// removes the unnecessary ones.
removeUnusedInsts()21309467b48Spatrick   void removeUnusedInsts() {
21409467b48Spatrick     SmallVector<Instruction *, 8> Unused;
21509467b48Spatrick 
21609467b48Spatrick     for (auto *Block : OrigLoop->getBlocks())
21709467b48Spatrick       for (auto &Inst : *Block)
21809467b48Spatrick         if (!Set.count(&Inst)) {
21909467b48Spatrick           Instruction *NewInst = &Inst;
22009467b48Spatrick           if (!VMap.empty())
22109467b48Spatrick             NewInst = cast<Instruction>(VMap[NewInst]);
22209467b48Spatrick 
22309467b48Spatrick           assert(!isa<BranchInst>(NewInst) &&
22409467b48Spatrick                  "Branches are marked used early on");
22509467b48Spatrick           Unused.push_back(NewInst);
22609467b48Spatrick         }
22709467b48Spatrick 
22809467b48Spatrick     // Delete the instructions backwards, as it has a reduced likelihood of
22909467b48Spatrick     // having to update as many def-use and use-def chains.
23009467b48Spatrick     for (auto *Inst : reverse(Unused)) {
23109467b48Spatrick       if (!Inst->use_empty())
232*d415bd75Srobert         Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType()));
23309467b48Spatrick       Inst->eraseFromParent();
23409467b48Spatrick     }
23509467b48Spatrick   }
23609467b48Spatrick 
print() const23709467b48Spatrick   void print() const {
23809467b48Spatrick     if (DepCycle)
23909467b48Spatrick       dbgs() << "  (cycle)\n";
24009467b48Spatrick     for (auto *I : Set)
24109467b48Spatrick       // Prefix with the block name.
24209467b48Spatrick       dbgs() << "  " << I->getParent()->getName() << ":" << *I << "\n";
24309467b48Spatrick   }
24409467b48Spatrick 
printBlocks() const24509467b48Spatrick   void printBlocks() const {
24609467b48Spatrick     for (auto *BB : getDistributedLoop()->getBlocks())
24709467b48Spatrick       dbgs() << *BB;
24809467b48Spatrick   }
24909467b48Spatrick 
25009467b48Spatrick private:
25109467b48Spatrick   /// Instructions from OrigLoop selected for this partition.
25209467b48Spatrick   InstructionSet Set;
25309467b48Spatrick 
25409467b48Spatrick   /// Whether this partition contains a dependence cycle.
25509467b48Spatrick   bool DepCycle;
25609467b48Spatrick 
25709467b48Spatrick   /// The original loop.
25809467b48Spatrick   Loop *OrigLoop;
25909467b48Spatrick 
26009467b48Spatrick   /// The cloned loop.  If this partition is mapped to the original loop,
26109467b48Spatrick   /// this is null.
26209467b48Spatrick   Loop *ClonedLoop = nullptr;
26309467b48Spatrick 
26409467b48Spatrick   /// The blocks of ClonedLoop including the preheader.  If this
26509467b48Spatrick   /// partition is mapped to the original loop, this is empty.
26609467b48Spatrick   SmallVector<BasicBlock *, 8> ClonedLoopBlocks;
26709467b48Spatrick 
26809467b48Spatrick   /// These gets populated once the set of instructions have been
26909467b48Spatrick   /// finalized. If this partition is mapped to the original loop, these are not
27009467b48Spatrick   /// set.
27109467b48Spatrick   ValueToValueMapTy VMap;
27209467b48Spatrick };
27309467b48Spatrick 
27409467b48Spatrick /// Holds the set of Partitions.  It populates them, merges them and then
27509467b48Spatrick /// clones the loops.
27609467b48Spatrick class InstPartitionContainer {
27709467b48Spatrick   using InstToPartitionIdT = DenseMap<Instruction *, int>;
27809467b48Spatrick 
27909467b48Spatrick public:
InstPartitionContainer(Loop * L,LoopInfo * LI,DominatorTree * DT)28009467b48Spatrick   InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT)
28109467b48Spatrick       : L(L), LI(LI), DT(DT) {}
28209467b48Spatrick 
28309467b48Spatrick   /// Returns the number of partitions.
getSize() const28409467b48Spatrick   unsigned getSize() const { return PartitionContainer.size(); }
28509467b48Spatrick 
28609467b48Spatrick   /// Adds \p Inst into the current partition if that is marked to
28709467b48Spatrick   /// contain cycles.  Otherwise start a new partition for it.
addToCyclicPartition(Instruction * Inst)28809467b48Spatrick   void addToCyclicPartition(Instruction *Inst) {
28909467b48Spatrick     // If the current partition is non-cyclic.  Start a new one.
29009467b48Spatrick     if (PartitionContainer.empty() || !PartitionContainer.back().hasDepCycle())
29109467b48Spatrick       PartitionContainer.emplace_back(Inst, L, /*DepCycle=*/true);
29209467b48Spatrick     else
29309467b48Spatrick       PartitionContainer.back().add(Inst);
29409467b48Spatrick   }
29509467b48Spatrick 
29609467b48Spatrick   /// Adds \p Inst into a partition that is not marked to contain
29709467b48Spatrick   /// dependence cycles.
29809467b48Spatrick   ///
29909467b48Spatrick   //  Initially we isolate memory instructions into as many partitions as
30009467b48Spatrick   //  possible, then later we may merge them back together.
addToNewNonCyclicPartition(Instruction * Inst)30109467b48Spatrick   void addToNewNonCyclicPartition(Instruction *Inst) {
30209467b48Spatrick     PartitionContainer.emplace_back(Inst, L);
30309467b48Spatrick   }
30409467b48Spatrick 
30509467b48Spatrick   /// Merges adjacent non-cyclic partitions.
30609467b48Spatrick   ///
30709467b48Spatrick   /// The idea is that we currently only want to isolate the non-vectorizable
30809467b48Spatrick   /// partition.  We could later allow more distribution among these partition
30909467b48Spatrick   /// too.
mergeAdjacentNonCyclic()31009467b48Spatrick   void mergeAdjacentNonCyclic() {
31109467b48Spatrick     mergeAdjacentPartitionsIf(
31209467b48Spatrick         [](const InstPartition *P) { return !P->hasDepCycle(); });
31309467b48Spatrick   }
31409467b48Spatrick 
31509467b48Spatrick   /// If a partition contains only conditional stores, we won't vectorize
31609467b48Spatrick   /// it.  Try to merge it with a previous cyclic partition.
mergeNonIfConvertible()31709467b48Spatrick   void mergeNonIfConvertible() {
31809467b48Spatrick     mergeAdjacentPartitionsIf([&](const InstPartition *Partition) {
31909467b48Spatrick       if (Partition->hasDepCycle())
32009467b48Spatrick         return true;
32109467b48Spatrick 
32209467b48Spatrick       // Now, check if all stores are conditional in this partition.
32309467b48Spatrick       bool seenStore = false;
32409467b48Spatrick 
32509467b48Spatrick       for (auto *Inst : *Partition)
32609467b48Spatrick         if (isa<StoreInst>(Inst)) {
32709467b48Spatrick           seenStore = true;
32809467b48Spatrick           if (!LoopAccessInfo::blockNeedsPredication(Inst->getParent(), L, DT))
32909467b48Spatrick             return false;
33009467b48Spatrick         }
33109467b48Spatrick       return seenStore;
33209467b48Spatrick     });
33309467b48Spatrick   }
33409467b48Spatrick 
33509467b48Spatrick   /// Merges the partitions according to various heuristics.
mergeBeforePopulating()33609467b48Spatrick   void mergeBeforePopulating() {
33709467b48Spatrick     mergeAdjacentNonCyclic();
33809467b48Spatrick     if (!DistributeNonIfConvertible)
33909467b48Spatrick       mergeNonIfConvertible();
34009467b48Spatrick   }
34109467b48Spatrick 
34209467b48Spatrick   /// Merges partitions in order to ensure that no loads are duplicated.
34309467b48Spatrick   ///
34409467b48Spatrick   /// We can't duplicate loads because that could potentially reorder them.
34509467b48Spatrick   /// LoopAccessAnalysis provides dependency information with the context that
34609467b48Spatrick   /// the order of memory operation is preserved.
34709467b48Spatrick   ///
34809467b48Spatrick   /// Return if any partitions were merged.
mergeToAvoidDuplicatedLoads()34909467b48Spatrick   bool mergeToAvoidDuplicatedLoads() {
35009467b48Spatrick     using LoadToPartitionT = DenseMap<Instruction *, InstPartition *>;
35109467b48Spatrick     using ToBeMergedT = EquivalenceClasses<InstPartition *>;
35209467b48Spatrick 
35309467b48Spatrick     LoadToPartitionT LoadToPartition;
35409467b48Spatrick     ToBeMergedT ToBeMerged;
35509467b48Spatrick 
35609467b48Spatrick     // Step through the partitions and create equivalence between partitions
35709467b48Spatrick     // that contain the same load.  Also put partitions in between them in the
35809467b48Spatrick     // same equivalence class to avoid reordering of memory operations.
35909467b48Spatrick     for (PartitionContainerT::iterator I = PartitionContainer.begin(),
36009467b48Spatrick                                        E = PartitionContainer.end();
36109467b48Spatrick          I != E; ++I) {
36209467b48Spatrick       auto *PartI = &*I;
36309467b48Spatrick 
36409467b48Spatrick       // If a load occurs in two partitions PartI and PartJ, merge all
36509467b48Spatrick       // partitions (PartI, PartJ] into PartI.
36609467b48Spatrick       for (Instruction *Inst : *PartI)
36709467b48Spatrick         if (isa<LoadInst>(Inst)) {
36809467b48Spatrick           bool NewElt;
36909467b48Spatrick           LoadToPartitionT::iterator LoadToPart;
37009467b48Spatrick 
37109467b48Spatrick           std::tie(LoadToPart, NewElt) =
37209467b48Spatrick               LoadToPartition.insert(std::make_pair(Inst, PartI));
37309467b48Spatrick           if (!NewElt) {
37409467b48Spatrick             LLVM_DEBUG(dbgs()
37509467b48Spatrick                        << "Merging partitions due to this load in multiple "
37609467b48Spatrick                        << "partitions: " << PartI << ", " << LoadToPart->second
37709467b48Spatrick                        << "\n"
37809467b48Spatrick                        << *Inst << "\n");
37909467b48Spatrick 
38009467b48Spatrick             auto PartJ = I;
38109467b48Spatrick             do {
38209467b48Spatrick               --PartJ;
38309467b48Spatrick               ToBeMerged.unionSets(PartI, &*PartJ);
38409467b48Spatrick             } while (&*PartJ != LoadToPart->second);
38509467b48Spatrick           }
38609467b48Spatrick         }
38709467b48Spatrick     }
38809467b48Spatrick     if (ToBeMerged.empty())
38909467b48Spatrick       return false;
39009467b48Spatrick 
39109467b48Spatrick     // Merge the member of an equivalence class into its class leader.  This
39209467b48Spatrick     // makes the members empty.
39309467b48Spatrick     for (ToBeMergedT::iterator I = ToBeMerged.begin(), E = ToBeMerged.end();
39409467b48Spatrick          I != E; ++I) {
39509467b48Spatrick       if (!I->isLeader())
39609467b48Spatrick         continue;
39709467b48Spatrick 
39809467b48Spatrick       auto PartI = I->getData();
399*d415bd75Srobert       for (auto *PartJ : make_range(std::next(ToBeMerged.member_begin(I)),
40009467b48Spatrick                                    ToBeMerged.member_end())) {
40109467b48Spatrick         PartJ->moveTo(*PartI);
40209467b48Spatrick       }
40309467b48Spatrick     }
40409467b48Spatrick 
40509467b48Spatrick     // Remove the empty partitions.
40609467b48Spatrick     PartitionContainer.remove_if(
40709467b48Spatrick         [](const InstPartition &P) { return P.empty(); });
40809467b48Spatrick 
40909467b48Spatrick     return true;
41009467b48Spatrick   }
41109467b48Spatrick 
41209467b48Spatrick   /// Sets up the mapping between instructions to partitions.  If the
41309467b48Spatrick   /// instruction is duplicated across multiple partitions, set the entry to -1.
setupPartitionIdOnInstructions()41409467b48Spatrick   void setupPartitionIdOnInstructions() {
41509467b48Spatrick     int PartitionID = 0;
41609467b48Spatrick     for (const auto &Partition : PartitionContainer) {
41709467b48Spatrick       for (Instruction *Inst : Partition) {
41809467b48Spatrick         bool NewElt;
41909467b48Spatrick         InstToPartitionIdT::iterator Iter;
42009467b48Spatrick 
42109467b48Spatrick         std::tie(Iter, NewElt) =
42209467b48Spatrick             InstToPartitionId.insert(std::make_pair(Inst, PartitionID));
42309467b48Spatrick         if (!NewElt)
42409467b48Spatrick           Iter->second = -1;
42509467b48Spatrick       }
42609467b48Spatrick       ++PartitionID;
42709467b48Spatrick     }
42809467b48Spatrick   }
42909467b48Spatrick 
43009467b48Spatrick   /// Populates the partition with everything that the seeding
43109467b48Spatrick   /// instructions require.
populateUsedSet()43209467b48Spatrick   void populateUsedSet() {
43309467b48Spatrick     for (auto &P : PartitionContainer)
43409467b48Spatrick       P.populateUsedSet();
43509467b48Spatrick   }
43609467b48Spatrick 
43709467b48Spatrick   /// This performs the main chunk of the work of cloning the loops for
43809467b48Spatrick   /// the partitions.
cloneLoops()43909467b48Spatrick   void cloneLoops() {
44009467b48Spatrick     BasicBlock *OrigPH = L->getLoopPreheader();
44109467b48Spatrick     // At this point the predecessor of the preheader is either the memcheck
44209467b48Spatrick     // block or the top part of the original preheader.
44309467b48Spatrick     BasicBlock *Pred = OrigPH->getSinglePredecessor();
44409467b48Spatrick     assert(Pred && "Preheader does not have a single predecessor");
44509467b48Spatrick     BasicBlock *ExitBlock = L->getExitBlock();
44609467b48Spatrick     assert(ExitBlock && "No single exit block");
44709467b48Spatrick     Loop *NewLoop;
44809467b48Spatrick 
44909467b48Spatrick     assert(!PartitionContainer.empty() && "at least two partitions expected");
45009467b48Spatrick     // We're cloning the preheader along with the loop so we already made sure
45109467b48Spatrick     // it was empty.
45209467b48Spatrick     assert(&*OrigPH->begin() == OrigPH->getTerminator() &&
45309467b48Spatrick            "preheader not empty");
45409467b48Spatrick 
45509467b48Spatrick     // Preserve the original loop ID for use after the transformation.
45609467b48Spatrick     MDNode *OrigLoopID = L->getLoopID();
45709467b48Spatrick 
45809467b48Spatrick     // Create a loop for each partition except the last.  Clone the original
45909467b48Spatrick     // loop before PH along with adding a preheader for the cloned loop.  Then
46009467b48Spatrick     // update PH to point to the newly added preheader.
46109467b48Spatrick     BasicBlock *TopPH = OrigPH;
46209467b48Spatrick     unsigned Index = getSize() - 1;
463*d415bd75Srobert     for (auto &Part : llvm::drop_begin(llvm::reverse(PartitionContainer))) {
464*d415bd75Srobert       NewLoop = Part.cloneLoopWithPreheader(TopPH, Pred, Index, LI, DT);
46509467b48Spatrick 
466*d415bd75Srobert       Part.getVMap()[ExitBlock] = TopPH;
467*d415bd75Srobert       Part.remapInstructions();
468*d415bd75Srobert       setNewLoopID(OrigLoopID, &Part);
469*d415bd75Srobert       --Index;
470*d415bd75Srobert       TopPH = NewLoop->getLoopPreheader();
47109467b48Spatrick     }
47209467b48Spatrick     Pred->getTerminator()->replaceUsesOfWith(OrigPH, TopPH);
47309467b48Spatrick 
47409467b48Spatrick     // Also set a new loop ID for the last loop.
47509467b48Spatrick     setNewLoopID(OrigLoopID, &PartitionContainer.back());
47609467b48Spatrick 
47709467b48Spatrick     // Now go in forward order and update the immediate dominator for the
47809467b48Spatrick     // preheaders with the exiting block of the previous loop.  Dominance
47909467b48Spatrick     // within the loop is updated in cloneLoopWithPreheader.
48009467b48Spatrick     for (auto Curr = PartitionContainer.cbegin(),
48109467b48Spatrick               Next = std::next(PartitionContainer.cbegin()),
48209467b48Spatrick               E = PartitionContainer.cend();
48309467b48Spatrick          Next != E; ++Curr, ++Next)
48409467b48Spatrick       DT->changeImmediateDominator(
48509467b48Spatrick           Next->getDistributedLoop()->getLoopPreheader(),
48609467b48Spatrick           Curr->getDistributedLoop()->getExitingBlock());
48709467b48Spatrick   }
48809467b48Spatrick 
48909467b48Spatrick   /// Removes the dead instructions from the cloned loops.
removeUnusedInsts()49009467b48Spatrick   void removeUnusedInsts() {
49109467b48Spatrick     for (auto &Partition : PartitionContainer)
49209467b48Spatrick       Partition.removeUnusedInsts();
49309467b48Spatrick   }
49409467b48Spatrick 
49509467b48Spatrick   /// For each memory pointer, it computes the partitionId the pointer is
49609467b48Spatrick   /// used in.
49709467b48Spatrick   ///
49809467b48Spatrick   /// This returns an array of int where the I-th entry corresponds to I-th
49909467b48Spatrick   /// entry in LAI.getRuntimePointerCheck().  If the pointer is used in multiple
50009467b48Spatrick   /// partitions its entry is set to -1.
50109467b48Spatrick   SmallVector<int, 8>
computePartitionSetForPointers(const LoopAccessInfo & LAI)50209467b48Spatrick   computePartitionSetForPointers(const LoopAccessInfo &LAI) {
50309467b48Spatrick     const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
50409467b48Spatrick 
50509467b48Spatrick     unsigned N = RtPtrCheck->Pointers.size();
50609467b48Spatrick     SmallVector<int, 8> PtrToPartitions(N);
50709467b48Spatrick     for (unsigned I = 0; I < N; ++I) {
50809467b48Spatrick       Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
50909467b48Spatrick       auto Instructions =
51009467b48Spatrick           LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
51109467b48Spatrick 
51209467b48Spatrick       int &Partition = PtrToPartitions[I];
51309467b48Spatrick       // First set it to uninitialized.
51409467b48Spatrick       Partition = -2;
51509467b48Spatrick       for (Instruction *Inst : Instructions) {
51609467b48Spatrick         // Note that this could be -1 if Inst is duplicated across multiple
51709467b48Spatrick         // partitions.
51809467b48Spatrick         int ThisPartition = this->InstToPartitionId[Inst];
51909467b48Spatrick         if (Partition == -2)
52009467b48Spatrick           Partition = ThisPartition;
52109467b48Spatrick         // -1 means belonging to multiple partitions.
52209467b48Spatrick         else if (Partition == -1)
52309467b48Spatrick           break;
52409467b48Spatrick         else if (Partition != (int)ThisPartition)
52509467b48Spatrick           Partition = -1;
52609467b48Spatrick       }
52709467b48Spatrick       assert(Partition != -2 && "Pointer not belonging to any partition");
52809467b48Spatrick     }
52909467b48Spatrick 
53009467b48Spatrick     return PtrToPartitions;
53109467b48Spatrick   }
53209467b48Spatrick 
print(raw_ostream & OS) const53309467b48Spatrick   void print(raw_ostream &OS) const {
53409467b48Spatrick     unsigned Index = 0;
53509467b48Spatrick     for (const auto &P : PartitionContainer) {
53609467b48Spatrick       OS << "Partition " << Index++ << " (" << &P << "):\n";
53709467b48Spatrick       P.print();
53809467b48Spatrick     }
53909467b48Spatrick   }
54009467b48Spatrick 
dump() const54109467b48Spatrick   void dump() const { print(dbgs()); }
54209467b48Spatrick 
54309467b48Spatrick #ifndef NDEBUG
operator <<(raw_ostream & OS,const InstPartitionContainer & Partitions)54409467b48Spatrick   friend raw_ostream &operator<<(raw_ostream &OS,
54509467b48Spatrick                                  const InstPartitionContainer &Partitions) {
54609467b48Spatrick     Partitions.print(OS);
54709467b48Spatrick     return OS;
54809467b48Spatrick   }
54909467b48Spatrick #endif
55009467b48Spatrick 
printBlocks() const55109467b48Spatrick   void printBlocks() const {
55209467b48Spatrick     unsigned Index = 0;
55309467b48Spatrick     for (const auto &P : PartitionContainer) {
55409467b48Spatrick       dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
55509467b48Spatrick       P.printBlocks();
55609467b48Spatrick     }
55709467b48Spatrick   }
55809467b48Spatrick 
55909467b48Spatrick private:
56009467b48Spatrick   using PartitionContainerT = std::list<InstPartition>;
56109467b48Spatrick 
56209467b48Spatrick   /// List of partitions.
56309467b48Spatrick   PartitionContainerT PartitionContainer;
56409467b48Spatrick 
56509467b48Spatrick   /// Mapping from Instruction to partition Id.  If the instruction
56609467b48Spatrick   /// belongs to multiple partitions the entry contains -1.
56709467b48Spatrick   InstToPartitionIdT InstToPartitionId;
56809467b48Spatrick 
56909467b48Spatrick   Loop *L;
57009467b48Spatrick   LoopInfo *LI;
57109467b48Spatrick   DominatorTree *DT;
57209467b48Spatrick 
57309467b48Spatrick   /// The control structure to merge adjacent partitions if both satisfy
57409467b48Spatrick   /// the \p Predicate.
57509467b48Spatrick   template <class UnaryPredicate>
mergeAdjacentPartitionsIf(UnaryPredicate Predicate)57609467b48Spatrick   void mergeAdjacentPartitionsIf(UnaryPredicate Predicate) {
57709467b48Spatrick     InstPartition *PrevMatch = nullptr;
57809467b48Spatrick     for (auto I = PartitionContainer.begin(); I != PartitionContainer.end();) {
57909467b48Spatrick       auto DoesMatch = Predicate(&*I);
58009467b48Spatrick       if (PrevMatch == nullptr && DoesMatch) {
58109467b48Spatrick         PrevMatch = &*I;
58209467b48Spatrick         ++I;
58309467b48Spatrick       } else if (PrevMatch != nullptr && DoesMatch) {
58409467b48Spatrick         I->moveTo(*PrevMatch);
58509467b48Spatrick         I = PartitionContainer.erase(I);
58609467b48Spatrick       } else {
58709467b48Spatrick         PrevMatch = nullptr;
58809467b48Spatrick         ++I;
58909467b48Spatrick       }
59009467b48Spatrick     }
59109467b48Spatrick   }
59209467b48Spatrick 
59309467b48Spatrick   /// Assign new LoopIDs for the partition's cloned loop.
setNewLoopID(MDNode * OrigLoopID,InstPartition * Part)59409467b48Spatrick   void setNewLoopID(MDNode *OrigLoopID, InstPartition *Part) {
595*d415bd75Srobert     std::optional<MDNode *> PartitionID = makeFollowupLoopID(
59609467b48Spatrick         OrigLoopID,
59709467b48Spatrick         {LLVMLoopDistributeFollowupAll,
59809467b48Spatrick          Part->hasDepCycle() ? LLVMLoopDistributeFollowupSequential
59909467b48Spatrick                              : LLVMLoopDistributeFollowupCoincident});
600*d415bd75Srobert     if (PartitionID) {
60109467b48Spatrick       Loop *NewLoop = Part->getDistributedLoop();
602*d415bd75Srobert       NewLoop->setLoopID(*PartitionID);
60309467b48Spatrick     }
60409467b48Spatrick   }
60509467b48Spatrick };
60609467b48Spatrick 
60709467b48Spatrick /// For each memory instruction, this class maintains difference of the
60809467b48Spatrick /// number of unsafe dependences that start out from this instruction minus
60909467b48Spatrick /// those that end here.
61009467b48Spatrick ///
61109467b48Spatrick /// By traversing the memory instructions in program order and accumulating this
61209467b48Spatrick /// number, we know whether any unsafe dependence crosses over a program point.
61309467b48Spatrick class MemoryInstructionDependences {
61409467b48Spatrick   using Dependence = MemoryDepChecker::Dependence;
61509467b48Spatrick 
61609467b48Spatrick public:
61709467b48Spatrick   struct Entry {
61809467b48Spatrick     Instruction *Inst;
61909467b48Spatrick     unsigned NumUnsafeDependencesStartOrEnd = 0;
62009467b48Spatrick 
Entry__anonf92e1f230111::MemoryInstructionDependences::Entry62109467b48Spatrick     Entry(Instruction *Inst) : Inst(Inst) {}
62209467b48Spatrick   };
62309467b48Spatrick 
62409467b48Spatrick   using AccessesType = SmallVector<Entry, 8>;
62509467b48Spatrick 
begin() const62609467b48Spatrick   AccessesType::const_iterator begin() const { return Accesses.begin(); }
end() const62709467b48Spatrick   AccessesType::const_iterator end() const { return Accesses.end(); }
62809467b48Spatrick 
MemoryInstructionDependences(const SmallVectorImpl<Instruction * > & Instructions,const SmallVectorImpl<Dependence> & Dependences)62909467b48Spatrick   MemoryInstructionDependences(
63009467b48Spatrick       const SmallVectorImpl<Instruction *> &Instructions,
63109467b48Spatrick       const SmallVectorImpl<Dependence> &Dependences) {
63209467b48Spatrick     Accesses.append(Instructions.begin(), Instructions.end());
63309467b48Spatrick 
63409467b48Spatrick     LLVM_DEBUG(dbgs() << "Backward dependences:\n");
635*d415bd75Srobert     for (const auto &Dep : Dependences)
63609467b48Spatrick       if (Dep.isPossiblyBackward()) {
63709467b48Spatrick         // Note that the designations source and destination follow the program
63809467b48Spatrick         // order, i.e. source is always first.  (The direction is given by the
63909467b48Spatrick         // DepType.)
64009467b48Spatrick         ++Accesses[Dep.Source].NumUnsafeDependencesStartOrEnd;
64109467b48Spatrick         --Accesses[Dep.Destination].NumUnsafeDependencesStartOrEnd;
64209467b48Spatrick 
64309467b48Spatrick         LLVM_DEBUG(Dep.print(dbgs(), 2, Instructions));
64409467b48Spatrick       }
64509467b48Spatrick   }
64609467b48Spatrick 
64709467b48Spatrick private:
64809467b48Spatrick   AccessesType Accesses;
64909467b48Spatrick };
65009467b48Spatrick 
65109467b48Spatrick /// The actual class performing the per-loop work.
65209467b48Spatrick class LoopDistributeForLoop {
65309467b48Spatrick public:
LoopDistributeForLoop(Loop * L,Function * F,LoopInfo * LI,DominatorTree * DT,ScalarEvolution * SE,LoopAccessInfoManager & LAIs,OptimizationRemarkEmitter * ORE)65409467b48Spatrick   LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT,
655*d415bd75Srobert                         ScalarEvolution *SE, LoopAccessInfoManager &LAIs,
656*d415bd75Srobert                         OptimizationRemarkEmitter *ORE)
657*d415bd75Srobert       : L(L), F(F), LI(LI), DT(DT), SE(SE), LAIs(LAIs), ORE(ORE) {
65809467b48Spatrick     setForced();
65909467b48Spatrick   }
66009467b48Spatrick 
66109467b48Spatrick   /// Try to distribute an inner-most loop.
processLoop()662*d415bd75Srobert   bool processLoop() {
66373471bf0Spatrick     assert(L->isInnermost() && "Only process inner loops.");
66409467b48Spatrick 
66509467b48Spatrick     LLVM_DEBUG(dbgs() << "\nLDist: In \""
66609467b48Spatrick                       << L->getHeader()->getParent()->getName()
66709467b48Spatrick                       << "\" checking " << *L << "\n");
66809467b48Spatrick 
66973471bf0Spatrick     // Having a single exit block implies there's also one exiting block.
67009467b48Spatrick     if (!L->getExitBlock())
67109467b48Spatrick       return fail("MultipleExitBlocks", "multiple exit blocks");
67209467b48Spatrick     if (!L->isLoopSimplifyForm())
67309467b48Spatrick       return fail("NotLoopSimplifyForm",
67409467b48Spatrick                   "loop is not in loop-simplify form");
67573471bf0Spatrick     if (!L->isRotatedForm())
67673471bf0Spatrick       return fail("NotBottomTested", "loop is not bottom tested");
67709467b48Spatrick 
67809467b48Spatrick     BasicBlock *PH = L->getLoopPreheader();
67909467b48Spatrick 
680*d415bd75Srobert     LAI = &LAIs.getInfo(*L);
68109467b48Spatrick 
68209467b48Spatrick     // Currently, we only distribute to isolate the part of the loop with
68309467b48Spatrick     // dependence cycles to enable partial vectorization.
68409467b48Spatrick     if (LAI->canVectorizeMemory())
68509467b48Spatrick       return fail("MemOpsCanBeVectorized",
68609467b48Spatrick                   "memory operations are safe for vectorization");
68709467b48Spatrick 
68809467b48Spatrick     auto *Dependences = LAI->getDepChecker().getDependences();
68909467b48Spatrick     if (!Dependences || Dependences->empty())
69009467b48Spatrick       return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
69109467b48Spatrick 
69209467b48Spatrick     InstPartitionContainer Partitions(L, LI, DT);
69309467b48Spatrick 
69409467b48Spatrick     // First, go through each memory operation and assign them to consecutive
69509467b48Spatrick     // partitions (the order of partitions follows program order).  Put those
69609467b48Spatrick     // with unsafe dependences into "cyclic" partition otherwise put each store
69709467b48Spatrick     // in its own "non-cyclic" partition (we'll merge these later).
69809467b48Spatrick     //
69909467b48Spatrick     // Note that a memory operation (e.g. Load2 below) at a program point that
70009467b48Spatrick     // has an unsafe dependence (Store3->Load1) spanning over it must be
70109467b48Spatrick     // included in the same cyclic partition as the dependent operations.  This
70209467b48Spatrick     // is to preserve the original program order after distribution.  E.g.:
70309467b48Spatrick     //
70409467b48Spatrick     //                NumUnsafeDependencesStartOrEnd  NumUnsafeDependencesActive
70509467b48Spatrick     //  Load1   -.                     1                       0->1
70609467b48Spatrick     //  Load2    | /Unsafe/            0                       1
70709467b48Spatrick     //  Store3  -'                    -1                       1->0
70809467b48Spatrick     //  Load4                          0                       0
70909467b48Spatrick     //
71009467b48Spatrick     // NumUnsafeDependencesActive > 0 indicates this situation and in this case
71109467b48Spatrick     // we just keep assigning to the same cyclic partition until
71209467b48Spatrick     // NumUnsafeDependencesActive reaches 0.
71309467b48Spatrick     const MemoryDepChecker &DepChecker = LAI->getDepChecker();
71409467b48Spatrick     MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
71509467b48Spatrick                                      *Dependences);
71609467b48Spatrick 
71709467b48Spatrick     int NumUnsafeDependencesActive = 0;
718*d415bd75Srobert     for (const auto &InstDep : MID) {
71909467b48Spatrick       Instruction *I = InstDep.Inst;
72009467b48Spatrick       // We update NumUnsafeDependencesActive post-instruction, catch the
72109467b48Spatrick       // start of a dependence directly via NumUnsafeDependencesStartOrEnd.
72209467b48Spatrick       if (NumUnsafeDependencesActive ||
72309467b48Spatrick           InstDep.NumUnsafeDependencesStartOrEnd > 0)
72409467b48Spatrick         Partitions.addToCyclicPartition(I);
72509467b48Spatrick       else
72609467b48Spatrick         Partitions.addToNewNonCyclicPartition(I);
72709467b48Spatrick       NumUnsafeDependencesActive += InstDep.NumUnsafeDependencesStartOrEnd;
72809467b48Spatrick       assert(NumUnsafeDependencesActive >= 0 &&
72909467b48Spatrick              "Negative number of dependences active");
73009467b48Spatrick     }
73109467b48Spatrick 
73209467b48Spatrick     // Add partitions for values used outside.  These partitions can be out of
73309467b48Spatrick     // order from the original program order.  This is OK because if the
73409467b48Spatrick     // partition uses a load we will merge this partition with the original
73509467b48Spatrick     // partition of the load that we set up in the previous loop (see
73609467b48Spatrick     // mergeToAvoidDuplicatedLoads).
73709467b48Spatrick     auto DefsUsedOutside = findDefsUsedOutsideOfLoop(L);
73809467b48Spatrick     for (auto *Inst : DefsUsedOutside)
73909467b48Spatrick       Partitions.addToNewNonCyclicPartition(Inst);
74009467b48Spatrick 
74109467b48Spatrick     LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
74209467b48Spatrick     if (Partitions.getSize() < 2)
74309467b48Spatrick       return fail("CantIsolateUnsafeDeps",
74409467b48Spatrick                   "cannot isolate unsafe dependencies");
74509467b48Spatrick 
74609467b48Spatrick     // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
74709467b48Spatrick     // should be able to vectorize these together.
74809467b48Spatrick     Partitions.mergeBeforePopulating();
74909467b48Spatrick     LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
75009467b48Spatrick     if (Partitions.getSize() < 2)
75109467b48Spatrick       return fail("CantIsolateUnsafeDeps",
75209467b48Spatrick                   "cannot isolate unsafe dependencies");
75309467b48Spatrick 
75409467b48Spatrick     // Now, populate the partitions with non-memory operations.
75509467b48Spatrick     Partitions.populateUsedSet();
75609467b48Spatrick     LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
75709467b48Spatrick 
75809467b48Spatrick     // In order to preserve original lexical order for loads, keep them in the
75909467b48Spatrick     // partition that we set up in the MemoryInstructionDependences loop.
76009467b48Spatrick     if (Partitions.mergeToAvoidDuplicatedLoads()) {
76109467b48Spatrick       LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
76209467b48Spatrick                         << Partitions);
76309467b48Spatrick       if (Partitions.getSize() < 2)
76409467b48Spatrick         return fail("CantIsolateUnsafeDeps",
76509467b48Spatrick                     "cannot isolate unsafe dependencies");
76609467b48Spatrick     }
76709467b48Spatrick 
76809467b48Spatrick     // Don't distribute the loop if we need too many SCEV run-time checks, or
76909467b48Spatrick     // any if it's illegal.
770*d415bd75Srobert     const SCEVPredicate &Pred = LAI->getPSE().getPredicate();
77109467b48Spatrick     if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
77209467b48Spatrick       return fail("RuntimeCheckWithConvergent",
77309467b48Spatrick                   "may not insert runtime check with convergent operation");
77409467b48Spatrick     }
77509467b48Spatrick 
776*d415bd75Srobert     if (Pred.getComplexity() > (IsForced.value_or(false)
77709467b48Spatrick                                     ? PragmaDistributeSCEVCheckThreshold
77809467b48Spatrick                                     : DistributeSCEVCheckThreshold))
77909467b48Spatrick       return fail("TooManySCEVRuntimeChecks",
78009467b48Spatrick                   "too many SCEV run-time checks needed.\n");
78109467b48Spatrick 
782*d415bd75Srobert     if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
78309467b48Spatrick       return fail("HeuristicDisabled", "distribution heuristic disabled");
78409467b48Spatrick 
78509467b48Spatrick     LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
78609467b48Spatrick     // We're done forming the partitions set up the reverse mapping from
78709467b48Spatrick     // instructions to partitions.
78809467b48Spatrick     Partitions.setupPartitionIdOnInstructions();
78909467b48Spatrick 
79009467b48Spatrick     // If we need run-time checks, version the loop now.
79109467b48Spatrick     auto PtrToPartition = Partitions.computePartitionSetForPointers(*LAI);
79209467b48Spatrick     const auto *RtPtrChecking = LAI->getRuntimePointerChecking();
79309467b48Spatrick     const auto &AllChecks = RtPtrChecking->getChecks();
79409467b48Spatrick     auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
79509467b48Spatrick                                                   RtPtrChecking);
79609467b48Spatrick 
79709467b48Spatrick     if (LAI->hasConvergentOp() && !Checks.empty()) {
79809467b48Spatrick       return fail("RuntimeCheckWithConvergent",
79909467b48Spatrick                   "may not insert runtime check with convergent operation");
80009467b48Spatrick     }
80109467b48Spatrick 
802097a140dSpatrick     // To keep things simple have an empty preheader before we version or clone
803097a140dSpatrick     // the loop.  (Also split if this has no predecessor, i.e. entry, because we
804097a140dSpatrick     // rely on PH having a predecessor.)
805097a140dSpatrick     if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
806097a140dSpatrick       SplitBlock(PH, PH->getTerminator(), DT, LI);
807097a140dSpatrick 
80809467b48Spatrick     if (!Pred.isAlwaysTrue() || !Checks.empty()) {
80909467b48Spatrick       assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
81009467b48Spatrick 
81109467b48Spatrick       MDNode *OrigLoopID = L->getLoopID();
81209467b48Spatrick 
81309467b48Spatrick       LLVM_DEBUG(dbgs() << "\nPointers:\n");
81409467b48Spatrick       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
81573471bf0Spatrick       LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
81609467b48Spatrick       LVer.versionLoop(DefsUsedOutside);
81709467b48Spatrick       LVer.annotateLoopWithNoAlias();
81809467b48Spatrick 
81909467b48Spatrick       // The unversioned loop will not be changed, so we inherit all attributes
82009467b48Spatrick       // from the original loop, but remove the loop distribution metadata to
82109467b48Spatrick       // avoid to distribute it again.
822*d415bd75Srobert       MDNode *UnversionedLoopID = *makeFollowupLoopID(
823*d415bd75Srobert           OrigLoopID,
824*d415bd75Srobert           {LLVMLoopDistributeFollowupAll, LLVMLoopDistributeFollowupFallback},
825*d415bd75Srobert           "llvm.loop.distribute.", true);
82609467b48Spatrick       LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
82709467b48Spatrick     }
82809467b48Spatrick 
82909467b48Spatrick     // Create identical copies of the original loop for each partition and hook
83009467b48Spatrick     // them up sequentially.
83109467b48Spatrick     Partitions.cloneLoops();
83209467b48Spatrick 
83309467b48Spatrick     // Now, we remove the instruction from each loop that don't belong to that
83409467b48Spatrick     // partition.
83509467b48Spatrick     Partitions.removeUnusedInsts();
83609467b48Spatrick     LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
83709467b48Spatrick     LLVM_DEBUG(Partitions.printBlocks());
83809467b48Spatrick 
83909467b48Spatrick     if (LDistVerify) {
84009467b48Spatrick       LI->verify(*DT);
84109467b48Spatrick       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
84209467b48Spatrick     }
84309467b48Spatrick 
84409467b48Spatrick     ++NumLoopsDistributed;
84509467b48Spatrick     // Report the success.
84609467b48Spatrick     ORE->emit([&]() {
84709467b48Spatrick       return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
84809467b48Spatrick                                 L->getHeader())
84909467b48Spatrick              << "distributed loop";
85009467b48Spatrick     });
85109467b48Spatrick     return true;
85209467b48Spatrick   }
85309467b48Spatrick 
85409467b48Spatrick   /// Provide diagnostics then \return with false.
fail(StringRef RemarkName,StringRef Message)85509467b48Spatrick   bool fail(StringRef RemarkName, StringRef Message) {
85609467b48Spatrick     LLVMContext &Ctx = F->getContext();
857*d415bd75Srobert     bool Forced = isForced().value_or(false);
85809467b48Spatrick 
85909467b48Spatrick     LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
86009467b48Spatrick 
86109467b48Spatrick     // With Rpass-missed report that distribution failed.
86209467b48Spatrick     ORE->emit([&]() {
86309467b48Spatrick       return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed",
86409467b48Spatrick                                       L->getStartLoc(), L->getHeader())
86509467b48Spatrick              << "loop not distributed: use -Rpass-analysis=loop-distribute for "
86609467b48Spatrick                 "more "
86709467b48Spatrick                 "info";
86809467b48Spatrick     });
86909467b48Spatrick 
87009467b48Spatrick     // With Rpass-analysis report why.  This is on by default if distribution
87109467b48Spatrick     // was requested explicitly.
87209467b48Spatrick     ORE->emit(OptimizationRemarkAnalysis(
87309467b48Spatrick                   Forced ? OptimizationRemarkAnalysis::AlwaysPrint : LDIST_NAME,
87409467b48Spatrick                   RemarkName, L->getStartLoc(), L->getHeader())
87509467b48Spatrick               << "loop not distributed: " << Message);
87609467b48Spatrick 
87709467b48Spatrick     // Also issue a warning if distribution was requested explicitly but it
87809467b48Spatrick     // failed.
87909467b48Spatrick     if (Forced)
88009467b48Spatrick       Ctx.diagnose(DiagnosticInfoOptimizationFailure(
88109467b48Spatrick           *F, L->getStartLoc(), "loop not distributed: failed "
88209467b48Spatrick                                 "explicitly specified loop distribution"));
88309467b48Spatrick 
88409467b48Spatrick     return false;
88509467b48Spatrick   }
88609467b48Spatrick 
88709467b48Spatrick   /// Return if distribution forced to be enabled/disabled for the loop.
88809467b48Spatrick   ///
88909467b48Spatrick   /// If the optional has a value, it indicates whether distribution was forced
89009467b48Spatrick   /// to be enabled (true) or disabled (false).  If the optional has no value
89109467b48Spatrick   /// distribution was not forced either way.
isForced() const892*d415bd75Srobert   const std::optional<bool> &isForced() const { return IsForced; }
89309467b48Spatrick 
89409467b48Spatrick private:
89509467b48Spatrick   /// Filter out checks between pointers from the same partition.
89609467b48Spatrick   ///
89709467b48Spatrick   /// \p PtrToPartition contains the partition number for pointers.  Partition
89809467b48Spatrick   /// number -1 means that the pointer is used in multiple partitions.  In this
89909467b48Spatrick   /// case we can't safely omit the check.
includeOnlyCrossPartitionChecks(const SmallVectorImpl<RuntimePointerCheck> & AllChecks,const SmallVectorImpl<int> & PtrToPartition,const RuntimePointerChecking * RtPtrChecking)900097a140dSpatrick   SmallVector<RuntimePointerCheck, 4> includeOnlyCrossPartitionChecks(
901097a140dSpatrick       const SmallVectorImpl<RuntimePointerCheck> &AllChecks,
90209467b48Spatrick       const SmallVectorImpl<int> &PtrToPartition,
90309467b48Spatrick       const RuntimePointerChecking *RtPtrChecking) {
904097a140dSpatrick     SmallVector<RuntimePointerCheck, 4> Checks;
90509467b48Spatrick 
90609467b48Spatrick     copy_if(AllChecks, std::back_inserter(Checks),
907097a140dSpatrick             [&](const RuntimePointerCheck &Check) {
90809467b48Spatrick               for (unsigned PtrIdx1 : Check.first->Members)
90909467b48Spatrick                 for (unsigned PtrIdx2 : Check.second->Members)
91009467b48Spatrick                   // Only include this check if there is a pair of pointers
91109467b48Spatrick                   // that require checking and the pointers fall into
91209467b48Spatrick                   // separate partitions.
91309467b48Spatrick                   //
91409467b48Spatrick                   // (Note that we already know at this point that the two
91509467b48Spatrick                   // pointer groups need checking but it doesn't follow
91609467b48Spatrick                   // that each pair of pointers within the two groups need
91709467b48Spatrick                   // checking as well.
91809467b48Spatrick                   //
91909467b48Spatrick                   // In other words we don't want to include a check just
92009467b48Spatrick                   // because there is a pair of pointers between the two
92109467b48Spatrick                   // pointer groups that require checks and a different
92209467b48Spatrick                   // pair whose pointers fall into different partitions.)
92309467b48Spatrick                   if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
92409467b48Spatrick                       !RuntimePointerChecking::arePointersInSamePartition(
92509467b48Spatrick                           PtrToPartition, PtrIdx1, PtrIdx2))
92609467b48Spatrick                     return true;
92709467b48Spatrick               return false;
92809467b48Spatrick             });
92909467b48Spatrick 
93009467b48Spatrick     return Checks;
93109467b48Spatrick   }
93209467b48Spatrick 
93309467b48Spatrick   /// Check whether the loop metadata is forcing distribution to be
93409467b48Spatrick   /// enabled/disabled.
setForced()93509467b48Spatrick   void setForced() {
936*d415bd75Srobert     std::optional<const MDOperand *> Value =
93709467b48Spatrick         findStringMetadataForLoop(L, "llvm.loop.distribute.enable");
93809467b48Spatrick     if (!Value)
93909467b48Spatrick       return;
94009467b48Spatrick 
94109467b48Spatrick     const MDOperand *Op = *Value;
94209467b48Spatrick     assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
94309467b48Spatrick     IsForced = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
94409467b48Spatrick   }
94509467b48Spatrick 
94609467b48Spatrick   Loop *L;
94709467b48Spatrick   Function *F;
94809467b48Spatrick 
94909467b48Spatrick   // Analyses used.
95009467b48Spatrick   LoopInfo *LI;
95109467b48Spatrick   const LoopAccessInfo *LAI = nullptr;
95209467b48Spatrick   DominatorTree *DT;
95309467b48Spatrick   ScalarEvolution *SE;
954*d415bd75Srobert   LoopAccessInfoManager &LAIs;
95509467b48Spatrick   OptimizationRemarkEmitter *ORE;
95609467b48Spatrick 
95709467b48Spatrick   /// Indicates whether distribution is forced to be enabled/disabled for
95809467b48Spatrick   /// the loop.
95909467b48Spatrick   ///
96009467b48Spatrick   /// If the optional has a value, it indicates whether distribution was forced
96109467b48Spatrick   /// to be enabled (true) or disabled (false).  If the optional has no value
96209467b48Spatrick   /// distribution was not forced either way.
963*d415bd75Srobert   std::optional<bool> IsForced;
96409467b48Spatrick };
96509467b48Spatrick 
96609467b48Spatrick } // end anonymous namespace
96709467b48Spatrick 
96809467b48Spatrick /// Shared implementation between new and old PMs.
runImpl(Function & F,LoopInfo * LI,DominatorTree * DT,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE,LoopAccessInfoManager & LAIs)96909467b48Spatrick static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
97009467b48Spatrick                     ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
971*d415bd75Srobert                     LoopAccessInfoManager &LAIs) {
97209467b48Spatrick   // Build up a worklist of inner-loops to vectorize. This is necessary as the
97309467b48Spatrick   // act of distributing a loop creates new loops and can invalidate iterators
97409467b48Spatrick   // across the loops.
97509467b48Spatrick   SmallVector<Loop *, 8> Worklist;
97609467b48Spatrick 
97709467b48Spatrick   for (Loop *TopLevelLoop : *LI)
97809467b48Spatrick     for (Loop *L : depth_first(TopLevelLoop))
97909467b48Spatrick       // We only handle inner-most loops.
98073471bf0Spatrick       if (L->isInnermost())
98109467b48Spatrick         Worklist.push_back(L);
98209467b48Spatrick 
98309467b48Spatrick   // Now walk the identified inner loops.
98409467b48Spatrick   bool Changed = false;
98509467b48Spatrick   for (Loop *L : Worklist) {
986*d415bd75Srobert     LoopDistributeForLoop LDL(L, &F, LI, DT, SE, LAIs, ORE);
98709467b48Spatrick 
98809467b48Spatrick     // If distribution was forced for the specific loop to be
98909467b48Spatrick     // enabled/disabled, follow that.  Otherwise use the global flag.
990*d415bd75Srobert     if (LDL.isForced().value_or(EnableLoopDistribute))
991*d415bd75Srobert       Changed |= LDL.processLoop();
99209467b48Spatrick   }
99309467b48Spatrick 
99409467b48Spatrick   // Process each loop nest in the function.
99509467b48Spatrick   return Changed;
99609467b48Spatrick }
99709467b48Spatrick 
99809467b48Spatrick namespace {
99909467b48Spatrick 
100009467b48Spatrick /// The pass class.
100109467b48Spatrick class LoopDistributeLegacy : public FunctionPass {
100209467b48Spatrick public:
100309467b48Spatrick   static char ID;
100409467b48Spatrick 
LoopDistributeLegacy()100509467b48Spatrick   LoopDistributeLegacy() : FunctionPass(ID) {
100609467b48Spatrick     // The default is set by the caller.
100709467b48Spatrick     initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
100809467b48Spatrick   }
100909467b48Spatrick 
runOnFunction(Function & F)101009467b48Spatrick   bool runOnFunction(Function &F) override {
101109467b48Spatrick     if (skipFunction(F))
101209467b48Spatrick       return false;
101309467b48Spatrick 
101409467b48Spatrick     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
101509467b48Spatrick     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
101609467b48Spatrick     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
101709467b48Spatrick     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1018*d415bd75Srobert     auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
101909467b48Spatrick 
1020*d415bd75Srobert     return runImpl(F, LI, DT, SE, ORE, LAIs);
102109467b48Spatrick   }
102209467b48Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const102309467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
102409467b48Spatrick     AU.addRequired<ScalarEvolutionWrapperPass>();
102509467b48Spatrick     AU.addRequired<LoopInfoWrapperPass>();
102609467b48Spatrick     AU.addPreserved<LoopInfoWrapperPass>();
102709467b48Spatrick     AU.addRequired<LoopAccessLegacyAnalysis>();
102809467b48Spatrick     AU.addRequired<DominatorTreeWrapperPass>();
102909467b48Spatrick     AU.addPreserved<DominatorTreeWrapperPass>();
103009467b48Spatrick     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
103109467b48Spatrick     AU.addPreserved<GlobalsAAWrapperPass>();
103209467b48Spatrick   }
103309467b48Spatrick };
103409467b48Spatrick 
103509467b48Spatrick } // end anonymous namespace
103609467b48Spatrick 
run(Function & F,FunctionAnalysisManager & AM)103709467b48Spatrick PreservedAnalyses LoopDistributePass::run(Function &F,
103809467b48Spatrick                                           FunctionAnalysisManager &AM) {
103909467b48Spatrick   auto &LI = AM.getResult<LoopAnalysis>(F);
104009467b48Spatrick   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
104109467b48Spatrick   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
104209467b48Spatrick   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
104309467b48Spatrick 
1044*d415bd75Srobert   LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(F);
1045*d415bd75Srobert   bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, LAIs);
104609467b48Spatrick   if (!Changed)
104709467b48Spatrick     return PreservedAnalyses::all();
104809467b48Spatrick   PreservedAnalyses PA;
104909467b48Spatrick   PA.preserve<LoopAnalysis>();
105009467b48Spatrick   PA.preserve<DominatorTreeAnalysis>();
105109467b48Spatrick   return PA;
105209467b48Spatrick }
105309467b48Spatrick 
105409467b48Spatrick char LoopDistributeLegacy::ID;
105509467b48Spatrick 
105609467b48Spatrick static const char ldist_name[] = "Loop Distribution";
105709467b48Spatrick 
INITIALIZE_PASS_BEGIN(LoopDistributeLegacy,LDIST_NAME,ldist_name,false,false)105809467b48Spatrick INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
105909467b48Spatrick                       false)
106009467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
106109467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
106209467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
106309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
106409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
106509467b48Spatrick INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
106609467b48Spatrick 
106709467b48Spatrick FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
1068