10b57cec5SDimitry Andric //===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implement a loop-aware load elimination pass. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // It uses LoopAccessAnalysis to identify loop-carried dependences with a 120b57cec5SDimitry Andric // distance of one between stores and loads. These form the candidates for the 130b57cec5SDimitry Andric // transformation. The source value of each store then propagated to the user 140b57cec5SDimitry Andric // of the corresponding load. This makes the load dead. 150b57cec5SDimitry Andric // 160b57cec5SDimitry Andric // The pass can also version the loop and add memchecks in order to prove that 170b57cec5SDimitry Andric // may-aliasing stores can't change the value in memory before it's read by the 180b57cec5SDimitry Andric // load. 190b57cec5SDimitry Andric // 200b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopLoadElimination.h" 230b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 240b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 250b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 260b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 270b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 280b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 290b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 300b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 310b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 320b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h" 330b57cec5SDimitry Andric #include "llvm/Analysis/LazyBlockFrequencyInfo.h" 340b57cec5SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h" 350b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 360b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 370b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 380b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 390b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 400b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 410b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 420b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 430b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 440b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 450b57cec5SDimitry Andric #include "llvm/IR/Module.h" 460b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 470b57cec5SDimitry Andric #include "llvm/IR/Type.h" 480b57cec5SDimitry Andric #include "llvm/IR/Value.h" 490b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 500b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 510b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 520b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 530b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h" 54e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h" 550b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopVersioning.h" 565ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 570b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h" 580b57cec5SDimitry Andric #include <algorithm> 590b57cec5SDimitry Andric #include <cassert> 600b57cec5SDimitry Andric #include <forward_list> 610b57cec5SDimitry Andric #include <tuple> 620b57cec5SDimitry Andric #include <utility> 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric using namespace llvm; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric #define LLE_OPTION "loop-load-elim" 670b57cec5SDimitry Andric #define DEBUG_TYPE LLE_OPTION 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric static cl::opt<unsigned> CheckPerElim( 700b57cec5SDimitry Andric "runtime-check-per-loop-load-elim", cl::Hidden, 710b57cec5SDimitry Andric cl::desc("Max number of memchecks allowed per eliminated load on average"), 720b57cec5SDimitry Andric cl::init(1)); 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric static cl::opt<unsigned> LoadElimSCEVCheckThreshold( 750b57cec5SDimitry Andric "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden, 760b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed for Loop " 770b57cec5SDimitry Andric "Load Elimination")); 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE"); 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric namespace { 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric /// Represent a store-to-forwarding candidate. 840b57cec5SDimitry Andric struct StoreToLoadForwardingCandidate { 850b57cec5SDimitry Andric LoadInst *Load; 860b57cec5SDimitry Andric StoreInst *Store; 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store) 890b57cec5SDimitry Andric : Load(Load), Store(Store) {} 900b57cec5SDimitry Andric 9106c3fb27SDimitry Andric /// Return true if the dependence from the store to the load has an 9206c3fb27SDimitry Andric /// absolute distance of one. 9306c3fb27SDimitry Andric /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop) 940b57cec5SDimitry Andric bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, 950b57cec5SDimitry Andric Loop *L) const { 960b57cec5SDimitry Andric Value *LoadPtr = Load->getPointerOperand(); 970b57cec5SDimitry Andric Value *StorePtr = Store->getPointerOperand(); 98fe6060f1SDimitry Andric Type *LoadType = getLoadStoreType(Load); 99*0fca6ea1SDimitry Andric auto &DL = Load->getDataLayout(); 1000b57cec5SDimitry Andric 101fe6060f1SDimitry Andric assert(LoadPtr->getType()->getPointerAddressSpace() == 1020b57cec5SDimitry Andric StorePtr->getType()->getPointerAddressSpace() && 103bdd1243dSDimitry Andric DL.getTypeSizeInBits(LoadType) == 104bdd1243dSDimitry Andric DL.getTypeSizeInBits(getLoadStoreType(Store)) && 1050b57cec5SDimitry Andric "Should be a known dependence"); 1060b57cec5SDimitry Andric 10706c3fb27SDimitry Andric int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0); 10806c3fb27SDimitry Andric int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0); 10906c3fb27SDimitry Andric if (!StrideLoad || !StrideStore || StrideLoad != StrideStore) 11006c3fb27SDimitry Andric return false; 11106c3fb27SDimitry Andric 11206c3fb27SDimitry Andric // TODO: This check for stride values other than 1 and -1 can be eliminated. 11306c3fb27SDimitry Andric // However, doing so may cause the LoopAccessAnalysis to overcompensate, 11406c3fb27SDimitry Andric // generating numerous non-wrap runtime checks that may undermine the 11506c3fb27SDimitry Andric // benefits of load elimination. To safely implement support for non-unit 11606c3fb27SDimitry Andric // strides, we would need to ensure either that the processed case does not 11706c3fb27SDimitry Andric // require these additional checks, or improve the LAA to handle them more 11806c3fb27SDimitry Andric // efficiently, or potentially both. 11906c3fb27SDimitry Andric if (std::abs(StrideLoad) != 1) 1200b57cec5SDimitry Andric return false; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType)); 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr)); 1250b57cec5SDimitry Andric auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr)); 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric // We don't need to check non-wrapping here because forward/backward 1280b57cec5SDimitry Andric // dependence wouldn't be valid if these weren't monotonic accesses. 129*0fca6ea1SDimitry Andric auto *Dist = dyn_cast<SCEVConstant>( 1300b57cec5SDimitry Andric PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); 131*0fca6ea1SDimitry Andric if (!Dist) 132*0fca6ea1SDimitry Andric return false; 1330b57cec5SDimitry Andric const APInt &Val = Dist->getAPInt(); 13406c3fb27SDimitry Andric return Val == TypeByteSize * StrideLoad; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric Value *getLoadPtr() const { return Load->getPointerOperand(); } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric #ifndef NDEBUG 1400b57cec5SDimitry Andric friend raw_ostream &operator<<(raw_ostream &OS, 1410b57cec5SDimitry Andric const StoreToLoadForwardingCandidate &Cand) { 1420b57cec5SDimitry Andric OS << *Cand.Store << " -->\n"; 1430b57cec5SDimitry Andric OS.indent(2) << *Cand.Load << "\n"; 1440b57cec5SDimitry Andric return OS; 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric #endif 1470b57cec5SDimitry Andric }; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric } // end anonymous namespace 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric /// Check if the store dominates all latches, so as long as there is no 1520b57cec5SDimitry Andric /// intervening store this value will be loaded in the next iteration. 1530b57cec5SDimitry Andric static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, 1540b57cec5SDimitry Andric DominatorTree *DT) { 1550b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> Latches; 1560b57cec5SDimitry Andric L->getLoopLatches(Latches); 1570b57cec5SDimitry Andric return llvm::all_of(Latches, [&](const BasicBlock *Latch) { 1580b57cec5SDimitry Andric return DT->dominates(StoreBlock, Latch); 1590b57cec5SDimitry Andric }); 1600b57cec5SDimitry Andric } 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric /// Return true if the load is not executed on all paths in the loop. 1630b57cec5SDimitry Andric static bool isLoadConditional(LoadInst *Load, Loop *L) { 1640b57cec5SDimitry Andric return Load->getParent() != L->getHeader(); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric namespace { 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric /// The per-loop class that does most of the work. 1700b57cec5SDimitry Andric class LoadEliminationForLoop { 1710b57cec5SDimitry Andric public: 1720b57cec5SDimitry Andric LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, 1730b57cec5SDimitry Andric DominatorTree *DT, BlockFrequencyInfo *BFI, 1740b57cec5SDimitry Andric ProfileSummaryInfo* PSI) 1750b57cec5SDimitry Andric : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {} 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric /// Look through the loop-carried and loop-independent dependences in 1780b57cec5SDimitry Andric /// this loop and find store->load dependences. 1790b57cec5SDimitry Andric /// 1800b57cec5SDimitry Andric /// Note that no candidate is returned if LAA has failed to analyze the loop 1810b57cec5SDimitry Andric /// (e.g. if it's not bottom-tested, contains volatile memops, etc.) 1820b57cec5SDimitry Andric std::forward_list<StoreToLoadForwardingCandidate> 1830b57cec5SDimitry Andric findStoreToLoadDependences(const LoopAccessInfo &LAI) { 1840b57cec5SDimitry Andric std::forward_list<StoreToLoadForwardingCandidate> Candidates; 1850b57cec5SDimitry Andric 186*0fca6ea1SDimitry Andric const auto &DepChecker = LAI.getDepChecker(); 187*0fca6ea1SDimitry Andric const auto *Deps = DepChecker.getDependences(); 1880b57cec5SDimitry Andric if (!Deps) 1890b57cec5SDimitry Andric return Candidates; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric // Find store->load dependences (consequently true dep). Both lexically 1920b57cec5SDimitry Andric // forward and backward dependences qualify. Disqualify loads that have 1930b57cec5SDimitry Andric // other unknown dependences. 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence; 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric for (const auto &Dep : *Deps) { 198*0fca6ea1SDimitry Andric Instruction *Source = Dep.getSource(DepChecker); 199*0fca6ea1SDimitry Andric Instruction *Destination = Dep.getDestination(DepChecker); 2000b57cec5SDimitry Andric 2015f757f3fSDimitry Andric if (Dep.Type == MemoryDepChecker::Dependence::Unknown || 2025f757f3fSDimitry Andric Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) { 2030b57cec5SDimitry Andric if (isa<LoadInst>(Source)) 2040b57cec5SDimitry Andric LoadsWithUnknownDepedence.insert(Source); 2050b57cec5SDimitry Andric if (isa<LoadInst>(Destination)) 2060b57cec5SDimitry Andric LoadsWithUnknownDepedence.insert(Destination); 2070b57cec5SDimitry Andric continue; 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric if (Dep.isBackward()) 2110b57cec5SDimitry Andric // Note that the designations source and destination follow the program 2120b57cec5SDimitry Andric // order, i.e. source is always first. (The direction is given by the 2130b57cec5SDimitry Andric // DepType.) 2140b57cec5SDimitry Andric std::swap(Source, Destination); 2150b57cec5SDimitry Andric else 2160b57cec5SDimitry Andric assert(Dep.isForward() && "Needs to be a forward dependence"); 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric auto *Store = dyn_cast<StoreInst>(Source); 2190b57cec5SDimitry Andric if (!Store) 2200b57cec5SDimitry Andric continue; 2210b57cec5SDimitry Andric auto *Load = dyn_cast<LoadInst>(Destination); 2220b57cec5SDimitry Andric if (!Load) 2230b57cec5SDimitry Andric continue; 2240b57cec5SDimitry Andric 225bdd1243dSDimitry Andric // Only propagate if the stored values are bit/pointer castable. 226bdd1243dSDimitry Andric if (!CastInst::isBitOrNoopPointerCastable( 227bdd1243dSDimitry Andric getLoadStoreType(Store), getLoadStoreType(Load), 228*0fca6ea1SDimitry Andric Store->getDataLayout())) 2290b57cec5SDimitry Andric continue; 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric Candidates.emplace_front(Load, Store); 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric if (!LoadsWithUnknownDepedence.empty()) 2350b57cec5SDimitry Andric Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) { 2360b57cec5SDimitry Andric return LoadsWithUnknownDepedence.count(C.Load); 2370b57cec5SDimitry Andric }); 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric return Candidates; 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric /// Return the index of the instruction according to program order. 2430b57cec5SDimitry Andric unsigned getInstrIndex(Instruction *Inst) { 2440b57cec5SDimitry Andric auto I = InstOrder.find(Inst); 2450b57cec5SDimitry Andric assert(I != InstOrder.end() && "No index for instruction"); 2460b57cec5SDimitry Andric return I->second; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric /// If a load has multiple candidates associated (i.e. different 2500b57cec5SDimitry Andric /// stores), it means that it could be forwarding from multiple stores 2510b57cec5SDimitry Andric /// depending on control flow. Remove these candidates. 2520b57cec5SDimitry Andric /// 2530b57cec5SDimitry Andric /// Here, we rely on LAA to include the relevant loop-independent dependences. 2540b57cec5SDimitry Andric /// LAA is known to omit these in the very simple case when the read and the 2550b57cec5SDimitry Andric /// write within an alias set always takes place using the *same* pointer. 2560b57cec5SDimitry Andric /// 2570b57cec5SDimitry Andric /// However, we know that this is not the case here, i.e. we can rely on LAA 2580b57cec5SDimitry Andric /// to provide us with loop-independent dependences for the cases we're 2590b57cec5SDimitry Andric /// interested. Consider the case for example where a loop-independent 2600b57cec5SDimitry Andric /// dependece S1->S2 invalidates the forwarding S3->S2. 2610b57cec5SDimitry Andric /// 2620b57cec5SDimitry Andric /// A[i] = ... (S1) 2630b57cec5SDimitry Andric /// ... = A[i] (S2) 2640b57cec5SDimitry Andric /// A[i+1] = ... (S3) 2650b57cec5SDimitry Andric /// 2660b57cec5SDimitry Andric /// LAA will perform dependence analysis here because there are two 2670b57cec5SDimitry Andric /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]). 2680b57cec5SDimitry Andric void removeDependencesFromMultipleStores( 2690b57cec5SDimitry Andric std::forward_list<StoreToLoadForwardingCandidate> &Candidates) { 2700b57cec5SDimitry Andric // If Store is nullptr it means that we have multiple stores forwarding to 2710b57cec5SDimitry Andric // this store. 2720b57cec5SDimitry Andric using LoadToSingleCandT = 2730b57cec5SDimitry Andric DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>; 2740b57cec5SDimitry Andric LoadToSingleCandT LoadToSingleCand; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric for (const auto &Cand : Candidates) { 2770b57cec5SDimitry Andric bool NewElt; 2780b57cec5SDimitry Andric LoadToSingleCandT::iterator Iter; 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric std::tie(Iter, NewElt) = 2810b57cec5SDimitry Andric LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand)); 2820b57cec5SDimitry Andric if (!NewElt) { 2830b57cec5SDimitry Andric const StoreToLoadForwardingCandidate *&OtherCand = Iter->second; 2840b57cec5SDimitry Andric // Already multiple stores forward to this load. 2850b57cec5SDimitry Andric if (OtherCand == nullptr) 2860b57cec5SDimitry Andric continue; 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric // Handle the very basic case when the two stores are in the same block 2890b57cec5SDimitry Andric // so deciding which one forwards is easy. The later one forwards as 2900b57cec5SDimitry Andric // long as they both have a dependence distance of one to the load. 2910b57cec5SDimitry Andric if (Cand.Store->getParent() == OtherCand->Store->getParent() && 2920b57cec5SDimitry Andric Cand.isDependenceDistanceOfOne(PSE, L) && 2930b57cec5SDimitry Andric OtherCand->isDependenceDistanceOfOne(PSE, L)) { 2940b57cec5SDimitry Andric // They are in the same block, the later one will forward to the load. 2950b57cec5SDimitry Andric if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store)) 2960b57cec5SDimitry Andric OtherCand = &Cand; 2970b57cec5SDimitry Andric } else 2980b57cec5SDimitry Andric OtherCand = nullptr; 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) { 3030b57cec5SDimitry Andric if (LoadToSingleCand[Cand.Load] != &Cand) { 3040b57cec5SDimitry Andric LLVM_DEBUG( 3050b57cec5SDimitry Andric dbgs() << "Removing from candidates: \n" 3060b57cec5SDimitry Andric << Cand 3070b57cec5SDimitry Andric << " The load may have multiple stores forwarding to " 3080b57cec5SDimitry Andric << "it\n"); 3090b57cec5SDimitry Andric return true; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric return false; 3120b57cec5SDimitry Andric }); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric /// Given two pointers operations by their RuntimePointerChecking 3160b57cec5SDimitry Andric /// indices, return true if they require an alias check. 3170b57cec5SDimitry Andric /// 3180b57cec5SDimitry Andric /// We need a check if one is a pointer for a candidate load and the other is 3190b57cec5SDimitry Andric /// a pointer for a possibly intervening store. 3200b57cec5SDimitry Andric bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2, 321e8d8bef9SDimitry Andric const SmallPtrSetImpl<Value *> &PtrsWrittenOnFwdingPath, 322e8d8bef9SDimitry Andric const SmallPtrSetImpl<Value *> &CandLoadPtrs) { 3230b57cec5SDimitry Andric Value *Ptr1 = 3240b57cec5SDimitry Andric LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue; 3250b57cec5SDimitry Andric Value *Ptr2 = 3260b57cec5SDimitry Andric LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue; 3270b57cec5SDimitry Andric return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) || 3280b57cec5SDimitry Andric (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1))); 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric /// Return pointers that are possibly written to on the path from a 3320b57cec5SDimitry Andric /// forwarding store to a load. 3330b57cec5SDimitry Andric /// 3340b57cec5SDimitry Andric /// These pointers need to be alias-checked against the forwarding candidates. 3350b57cec5SDimitry Andric SmallPtrSet<Value *, 4> findPointersWrittenOnForwardingPath( 3360b57cec5SDimitry Andric const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) { 3370b57cec5SDimitry Andric // From FirstStore to LastLoad neither of the elimination candidate loads 3380b57cec5SDimitry Andric // should overlap with any of the stores. 3390b57cec5SDimitry Andric // 3400b57cec5SDimitry Andric // E.g.: 3410b57cec5SDimitry Andric // 3420b57cec5SDimitry Andric // st1 C[i] 3430b57cec5SDimitry Andric // ld1 B[i] <-------, 3440b57cec5SDimitry Andric // ld0 A[i] <----, | * LastLoad 3450b57cec5SDimitry Andric // ... | | 3460b57cec5SDimitry Andric // st2 E[i] | | 3470b57cec5SDimitry Andric // st3 B[i+1] -- | -' * FirstStore 3480b57cec5SDimitry Andric // st0 A[i+1] ---' 3490b57cec5SDimitry Andric // st4 D[i] 3500b57cec5SDimitry Andric // 3510b57cec5SDimitry Andric // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with 3520b57cec5SDimitry Andric // ld0. 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric LoadInst *LastLoad = 355*0fca6ea1SDimitry Andric llvm::max_element(Candidates, 3560b57cec5SDimitry Andric [&](const StoreToLoadForwardingCandidate &A, 3570b57cec5SDimitry Andric const StoreToLoadForwardingCandidate &B) { 358*0fca6ea1SDimitry Andric return getInstrIndex(A.Load) < 359*0fca6ea1SDimitry Andric getInstrIndex(B.Load); 3600b57cec5SDimitry Andric }) 3610b57cec5SDimitry Andric ->Load; 3620b57cec5SDimitry Andric StoreInst *FirstStore = 363*0fca6ea1SDimitry Andric llvm::min_element(Candidates, 3640b57cec5SDimitry Andric [&](const StoreToLoadForwardingCandidate &A, 3650b57cec5SDimitry Andric const StoreToLoadForwardingCandidate &B) { 3660b57cec5SDimitry Andric return getInstrIndex(A.Store) < 3670b57cec5SDimitry Andric getInstrIndex(B.Store); 3680b57cec5SDimitry Andric }) 3690b57cec5SDimitry Andric ->Store; 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric // We're looking for stores after the first forwarding store until the end 3720b57cec5SDimitry Andric // of the loop, then from the beginning of the loop until the last 3730b57cec5SDimitry Andric // forwarded-to load. Collect the pointer for the stores. 3740b57cec5SDimitry Andric SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath; 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric auto InsertStorePtr = [&](Instruction *I) { 3770b57cec5SDimitry Andric if (auto *S = dyn_cast<StoreInst>(I)) 3780b57cec5SDimitry Andric PtrsWrittenOnFwdingPath.insert(S->getPointerOperand()); 3790b57cec5SDimitry Andric }; 3800b57cec5SDimitry Andric const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions(); 3810b57cec5SDimitry Andric std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1, 3820b57cec5SDimitry Andric MemInstrs.end(), InsertStorePtr); 3830b57cec5SDimitry Andric std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)], 3840b57cec5SDimitry Andric InsertStorePtr); 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric return PtrsWrittenOnFwdingPath; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric /// Determine the pointer alias checks to prove that there are no 3900b57cec5SDimitry Andric /// intervening stores. 3915ffd83dbSDimitry Andric SmallVector<RuntimePointerCheck, 4> collectMemchecks( 3920b57cec5SDimitry Andric const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) { 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric SmallPtrSet<Value *, 4> PtrsWrittenOnFwdingPath = 3950b57cec5SDimitry Andric findPointersWrittenOnForwardingPath(Candidates); 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric // Collect the pointers of the candidate loads. 398e8d8bef9SDimitry Andric SmallPtrSet<Value *, 4> CandLoadPtrs; 399e8d8bef9SDimitry Andric for (const auto &Candidate : Candidates) 400e8d8bef9SDimitry Andric CandLoadPtrs.insert(Candidate.getLoadPtr()); 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks(); 4035ffd83dbSDimitry Andric SmallVector<RuntimePointerCheck, 4> Checks; 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric copy_if(AllChecks, std::back_inserter(Checks), 4065ffd83dbSDimitry Andric [&](const RuntimePointerCheck &Check) { 4070b57cec5SDimitry Andric for (auto PtrIdx1 : Check.first->Members) 4080b57cec5SDimitry Andric for (auto PtrIdx2 : Check.second->Members) 4090b57cec5SDimitry Andric if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath, 4100b57cec5SDimitry Andric CandLoadPtrs)) 4110b57cec5SDimitry Andric return true; 4120b57cec5SDimitry Andric return false; 4130b57cec5SDimitry Andric }); 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() 4160b57cec5SDimitry Andric << "):\n"); 4170b57cec5SDimitry Andric LLVM_DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric return Checks; 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric /// Perform the transformation for a candidate. 4230b57cec5SDimitry Andric void 4240b57cec5SDimitry Andric propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand, 4250b57cec5SDimitry Andric SCEVExpander &SEE) { 4260b57cec5SDimitry Andric // loop: 4270b57cec5SDimitry Andric // %x = load %gep_i 4280b57cec5SDimitry Andric // = ... %x 4290b57cec5SDimitry Andric // store %y, %gep_i_plus_1 4300b57cec5SDimitry Andric // 4310b57cec5SDimitry Andric // => 4320b57cec5SDimitry Andric // 4330b57cec5SDimitry Andric // ph: 4340b57cec5SDimitry Andric // %x.initial = load %gep_0 4350b57cec5SDimitry Andric // loop: 4360b57cec5SDimitry Andric // %x.storeforward = phi [%x.initial, %ph] [%y, %loop] 4370b57cec5SDimitry Andric // %x = load %gep_i <---- now dead 4380b57cec5SDimitry Andric // = ... %x.storeforward 4390b57cec5SDimitry Andric // store %y, %gep_i_plus_1 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric Value *Ptr = Cand.Load->getPointerOperand(); 4420b57cec5SDimitry Andric auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr)); 4430b57cec5SDimitry Andric auto *PH = L->getLoopPreheader(); 4445ffd83dbSDimitry Andric assert(PH && "Preheader should exist!"); 4450b57cec5SDimitry Andric Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(), 4460b57cec5SDimitry Andric PH->getTerminator()); 447*0fca6ea1SDimitry Andric Value *Initial = 448*0fca6ea1SDimitry Andric new LoadInst(Cand.Load->getType(), InitialPtr, "load_initial", 449*0fca6ea1SDimitry Andric /* isVolatile */ false, Cand.Load->getAlign(), 450*0fca6ea1SDimitry Andric PH->getTerminator()->getIterator()); 451*0fca6ea1SDimitry Andric // We don't give any debug location to Initial, because it is inserted 452*0fca6ea1SDimitry Andric // into the loop's preheader. A debug location inside the loop will cause 453*0fca6ea1SDimitry Andric // a misleading stepping when debugging. The test update-debugloc-store 454*0fca6ea1SDimitry Andric // -forwarded.ll checks this. 4550b57cec5SDimitry Andric 4565f757f3fSDimitry Andric PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded"); 4575f757f3fSDimitry Andric PHI->insertBefore(L->getHeader()->begin()); 4580b57cec5SDimitry Andric PHI->addIncoming(Initial, PH); 459bdd1243dSDimitry Andric 460bdd1243dSDimitry Andric Type *LoadType = Initial->getType(); 461bdd1243dSDimitry Andric Type *StoreType = Cand.Store->getValueOperand()->getType(); 462*0fca6ea1SDimitry Andric auto &DL = Cand.Load->getDataLayout(); 463bdd1243dSDimitry Andric (void)DL; 464bdd1243dSDimitry Andric 465bdd1243dSDimitry Andric assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) && 466bdd1243dSDimitry Andric "The type sizes should match!"); 467bdd1243dSDimitry Andric 468bdd1243dSDimitry Andric Value *StoreValue = Cand.Store->getValueOperand(); 469*0fca6ea1SDimitry Andric if (LoadType != StoreType) { 470*0fca6ea1SDimitry Andric StoreValue = CastInst::CreateBitOrPointerCast(StoreValue, LoadType, 471*0fca6ea1SDimitry Andric "store_forward_cast", 472*0fca6ea1SDimitry Andric Cand.Store->getIterator()); 473*0fca6ea1SDimitry Andric // Because it casts the old `load` value and is used by the new `phi` 474*0fca6ea1SDimitry Andric // which replaces the old `load`, we give the `load`'s debug location 475*0fca6ea1SDimitry Andric // to it. 476*0fca6ea1SDimitry Andric cast<Instruction>(StoreValue)->setDebugLoc(Cand.Load->getDebugLoc()); 477*0fca6ea1SDimitry Andric } 478bdd1243dSDimitry Andric 479bdd1243dSDimitry Andric PHI->addIncoming(StoreValue, L->getLoopLatch()); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric Cand.Load->replaceAllUsesWith(PHI); 482*0fca6ea1SDimitry Andric PHI->setDebugLoc(Cand.Load->getDebugLoc()); 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric /// Top-level driver for each loop: find store->load forwarding 4860b57cec5SDimitry Andric /// candidates, add run-time checks and perform transformation. 4870b57cec5SDimitry Andric bool processLoop() { 4880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName() 4890b57cec5SDimitry Andric << "\" checking " << *L << "\n"); 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric // Look for store-to-load forwarding cases across the 4920b57cec5SDimitry Andric // backedge. E.g.: 4930b57cec5SDimitry Andric // 4940b57cec5SDimitry Andric // loop: 4950b57cec5SDimitry Andric // %x = load %gep_i 4960b57cec5SDimitry Andric // = ... %x 4970b57cec5SDimitry Andric // store %y, %gep_i_plus_1 4980b57cec5SDimitry Andric // 4990b57cec5SDimitry Andric // => 5000b57cec5SDimitry Andric // 5010b57cec5SDimitry Andric // ph: 5020b57cec5SDimitry Andric // %x.initial = load %gep_0 5030b57cec5SDimitry Andric // loop: 5040b57cec5SDimitry Andric // %x.storeforward = phi [%x.initial, %ph] [%y, %loop] 5050b57cec5SDimitry Andric // %x = load %gep_i <---- now dead 5060b57cec5SDimitry Andric // = ... %x.storeforward 5070b57cec5SDimitry Andric // store %y, %gep_i_plus_1 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric // First start with store->load dependences. 5100b57cec5SDimitry Andric auto StoreToLoadDependences = findStoreToLoadDependences(LAI); 5110b57cec5SDimitry Andric if (StoreToLoadDependences.empty()) 5120b57cec5SDimitry Andric return false; 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric // Generate an index for each load and store according to the original 5150b57cec5SDimitry Andric // program order. This will be used later. 5160b57cec5SDimitry Andric InstOrder = LAI.getDepChecker().generateInstructionOrderMap(); 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric // To keep things simple for now, remove those where the load is potentially 5190b57cec5SDimitry Andric // fed by multiple stores. 5200b57cec5SDimitry Andric removeDependencesFromMultipleStores(StoreToLoadDependences); 5210b57cec5SDimitry Andric if (StoreToLoadDependences.empty()) 5220b57cec5SDimitry Andric return false; 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric // Filter the candidates further. 5250b57cec5SDimitry Andric SmallVector<StoreToLoadForwardingCandidate, 4> Candidates; 526480093f4SDimitry Andric for (const StoreToLoadForwardingCandidate &Cand : StoreToLoadDependences) { 5270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Candidate " << Cand); 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric // Make sure that the stored values is available everywhere in the loop in 5300b57cec5SDimitry Andric // the next iteration. 5310b57cec5SDimitry Andric if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT)) 5320b57cec5SDimitry Andric continue; 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric // If the load is conditional we can't hoist its 0-iteration instance to 5350b57cec5SDimitry Andric // the preheader because that would make it unconditional. Thus we would 5360b57cec5SDimitry Andric // access a memory location that the original loop did not access. 5370b57cec5SDimitry Andric if (isLoadConditional(Cand.Load, L)) 5380b57cec5SDimitry Andric continue; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric // Check whether the SCEV difference is the same as the induction step, 5410b57cec5SDimitry Andric // thus we load the value in the next iteration. 5420b57cec5SDimitry Andric if (!Cand.isDependenceDistanceOfOne(PSE, L)) 5430b57cec5SDimitry Andric continue; 5440b57cec5SDimitry Andric 545e8d8bef9SDimitry Andric assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) && 546e8d8bef9SDimitry Andric "Loading from something other than indvar?"); 547e8d8bef9SDimitry Andric assert( 548e8d8bef9SDimitry Andric isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Store->getPointerOperand())) && 549e8d8bef9SDimitry Andric "Storing to something other than indvar?"); 550e8d8bef9SDimitry Andric 551e8d8bef9SDimitry Andric Candidates.push_back(Cand); 5520b57cec5SDimitry Andric LLVM_DEBUG( 5530b57cec5SDimitry Andric dbgs() 554e8d8bef9SDimitry Andric << Candidates.size() 5550b57cec5SDimitry Andric << ". Valid store-to-load forwarding across the loop backedge\n"); 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric if (Candidates.empty()) 5580b57cec5SDimitry Andric return false; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // Check intervening may-alias stores. These need runtime checks for alias 5610b57cec5SDimitry Andric // disambiguation. 5625ffd83dbSDimitry Andric SmallVector<RuntimePointerCheck, 4> Checks = collectMemchecks(Candidates); 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric // Too many checks are likely to outweigh the benefits of forwarding. 5650b57cec5SDimitry Andric if (Checks.size() > Candidates.size() * CheckPerElim) { 5660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Too many run-time checks needed.\n"); 5670b57cec5SDimitry Andric return false; 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 57081ad6265SDimitry Andric if (LAI.getPSE().getPredicate().getComplexity() > 5710b57cec5SDimitry Andric LoadElimSCEVCheckThreshold) { 5720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); 5730b57cec5SDimitry Andric return false; 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric 5765ffd83dbSDimitry Andric if (!L->isLoopSimplifyForm()) { 5775ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Loop is not is loop-simplify form"); 5785ffd83dbSDimitry Andric return false; 5795ffd83dbSDimitry Andric } 5805ffd83dbSDimitry Andric 58181ad6265SDimitry Andric if (!Checks.empty() || !LAI.getPSE().getPredicate().isAlwaysTrue()) { 5820b57cec5SDimitry Andric if (LAI.hasConvergentOp()) { 5830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with " 5840b57cec5SDimitry Andric "convergent calls\n"); 5850b57cec5SDimitry Andric return false; 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric auto *HeaderBB = L->getHeader(); 5890b57cec5SDimitry Andric auto *F = HeaderBB->getParent(); 5900b57cec5SDimitry Andric bool OptForSize = F->hasOptSize() || 591480093f4SDimitry Andric llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI, 592480093f4SDimitry Andric PGSOQueryType::IRPass); 5930b57cec5SDimitry Andric if (OptForSize) { 5940b57cec5SDimitry Andric LLVM_DEBUG( 5950b57cec5SDimitry Andric dbgs() << "Versioning is needed but not allowed when optimizing " 5960b57cec5SDimitry Andric "for size.\n"); 5970b57cec5SDimitry Andric return false; 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric // Point of no-return, start the transformation. First, version the loop 6010b57cec5SDimitry Andric // if necessary. 6020b57cec5SDimitry Andric 603e8d8bef9SDimitry Andric LoopVersioning LV(LAI, Checks, L, LI, DT, PSE.getSE()); 6040b57cec5SDimitry Andric LV.versionLoop(); 605e8d8bef9SDimitry Andric 606e8d8bef9SDimitry Andric // After versioning, some of the candidates' pointers could stop being 607e8d8bef9SDimitry Andric // SCEVAddRecs. We need to filter them out. 608e8d8bef9SDimitry Andric auto NoLongerGoodCandidate = [this]( 609e8d8bef9SDimitry Andric const StoreToLoadForwardingCandidate &Cand) { 610e8d8bef9SDimitry Andric return !isa<SCEVAddRecExpr>( 611e8d8bef9SDimitry Andric PSE.getSCEV(Cand.Load->getPointerOperand())) || 612e8d8bef9SDimitry Andric !isa<SCEVAddRecExpr>( 613e8d8bef9SDimitry Andric PSE.getSCEV(Cand.Store->getPointerOperand())); 614e8d8bef9SDimitry Andric }; 615e8d8bef9SDimitry Andric llvm::erase_if(Candidates, NoLongerGoodCandidate); 6160b57cec5SDimitry Andric } 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric // Next, propagate the value stored by the store to the users of the load. 6190b57cec5SDimitry Andric // Also for the first iteration, generate the initial value of the load. 620*0fca6ea1SDimitry Andric SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(), 6210b57cec5SDimitry Andric "storeforward"); 6220b57cec5SDimitry Andric for (const auto &Cand : Candidates) 6230b57cec5SDimitry Andric propagateStoredValueToLoadUsers(Cand, SEE); 624e8d8bef9SDimitry Andric NumLoopLoadEliminted += Candidates.size(); 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric return true; 6270b57cec5SDimitry Andric } 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric private: 6300b57cec5SDimitry Andric Loop *L; 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric /// Maps the load/store instructions to their index according to 6330b57cec5SDimitry Andric /// program order. 6340b57cec5SDimitry Andric DenseMap<Instruction *, unsigned> InstOrder; 6350b57cec5SDimitry Andric 6360b57cec5SDimitry Andric // Analyses used. 6370b57cec5SDimitry Andric LoopInfo *LI; 6380b57cec5SDimitry Andric const LoopAccessInfo &LAI; 6390b57cec5SDimitry Andric DominatorTree *DT; 6400b57cec5SDimitry Andric BlockFrequencyInfo *BFI; 6410b57cec5SDimitry Andric ProfileSummaryInfo *PSI; 6420b57cec5SDimitry Andric PredicatedScalarEvolution PSE; 6430b57cec5SDimitry Andric }; 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric } // end anonymous namespace 6460b57cec5SDimitry Andric 647bdd1243dSDimitry Andric static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, 648bdd1243dSDimitry Andric DominatorTree &DT, 649bdd1243dSDimitry Andric BlockFrequencyInfo *BFI, 650bdd1243dSDimitry Andric ProfileSummaryInfo *PSI, 651e8d8bef9SDimitry Andric ScalarEvolution *SE, AssumptionCache *AC, 652bdd1243dSDimitry Andric LoopAccessInfoManager &LAIs) { 6530b57cec5SDimitry Andric // Build up a worklist of inner-loops to transform to avoid iterator 6540b57cec5SDimitry Andric // invalidation. 6550b57cec5SDimitry Andric // FIXME: This logic comes from other passes that actually change the loop 6560b57cec5SDimitry Andric // nest structure. It isn't clear this is necessary (or useful) for a pass 6570b57cec5SDimitry Andric // which merely optimizes the use of loads in a loop. 6580b57cec5SDimitry Andric SmallVector<Loop *, 8> Worklist; 6590b57cec5SDimitry Andric 660e8d8bef9SDimitry Andric bool Changed = false; 661e8d8bef9SDimitry Andric 6620b57cec5SDimitry Andric for (Loop *TopLevelLoop : LI) 663e8d8bef9SDimitry Andric for (Loop *L : depth_first(TopLevelLoop)) { 664e8d8bef9SDimitry Andric Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false); 6650b57cec5SDimitry Andric // We only handle inner-most loops. 666e8d8bef9SDimitry Andric if (L->isInnermost()) 6670b57cec5SDimitry Andric Worklist.push_back(L); 668e8d8bef9SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric // Now walk the identified inner loops. 6710b57cec5SDimitry Andric for (Loop *L : Worklist) { 672e8d8bef9SDimitry Andric // Match historical behavior 673e8d8bef9SDimitry Andric if (!L->isRotatedForm() || !L->getExitingBlock()) 674e8d8bef9SDimitry Andric continue; 6750b57cec5SDimitry Andric // The actual work is performed by LoadEliminationForLoop. 676bdd1243dSDimitry Andric LoadEliminationForLoop LEL(L, &LI, LAIs.getInfo(*L), &DT, BFI, PSI); 6770b57cec5SDimitry Andric Changed |= LEL.processLoop(); 678bdd1243dSDimitry Andric if (Changed) 679bdd1243dSDimitry Andric LAIs.clear(); 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric return Changed; 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric PreservedAnalyses LoopLoadEliminationPass::run(Function &F, 6850b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 6860b57cec5SDimitry Andric auto &LI = AM.getResult<LoopAnalysis>(F); 68781ad6265SDimitry Andric // There are no loops in the function. Return before computing other expensive 68881ad6265SDimitry Andric // analyses. 68981ad6265SDimitry Andric if (LI.empty()) 69081ad6265SDimitry Andric return PreservedAnalyses::all(); 69181ad6265SDimitry Andric auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); 6920b57cec5SDimitry Andric auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 6930b57cec5SDimitry Andric auto &AC = AM.getResult<AssumptionAnalysis>(F); 6945ffd83dbSDimitry Andric auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 6955ffd83dbSDimitry Andric auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 6960b57cec5SDimitry Andric auto *BFI = (PSI && PSI->hasProfileSummary()) ? 6970b57cec5SDimitry Andric &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; 698bdd1243dSDimitry Andric LoopAccessInfoManager &LAIs = AM.getResult<LoopAccessAnalysis>(F); 6990b57cec5SDimitry Andric 700bdd1243dSDimitry Andric bool Changed = eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, &SE, &AC, LAIs); 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric if (!Changed) 7030b57cec5SDimitry Andric return PreservedAnalyses::all(); 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric PreservedAnalyses PA; 70606c3fb27SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 70706c3fb27SDimitry Andric PA.preserve<LoopAnalysis>(); 7080b57cec5SDimitry Andric return PA; 7090b57cec5SDimitry Andric } 710